diff options
Diffstat (limited to 'sys/fs')
-rw-r--r-- | sys/fs/nandfs/bmap.c | 621 | ||||
-rw-r--r-- | sys/fs/nandfs/bmap.h | 40 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs.h | 310 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_alloc.c | 364 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_bmap.c | 230 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_buffer.c | 83 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_cleaner.c | 621 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_cpfile.c | 776 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_dat.c | 344 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_dir.c | 314 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_fs.h | 565 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_ifile.c | 213 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_mount.h | 50 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_segment.c | 1329 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_subr.c | 1120 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_subr.h | 238 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_sufile.c | 569 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_vfsops.c | 1590 | ||||
-rw-r--r-- | sys/fs/nandfs/nandfs_vnops.c | 2455 |
19 files changed, 11832 insertions, 0 deletions
diff --git a/sys/fs/nandfs/bmap.c b/sys/fs/nandfs/bmap.c new file mode 100644 index 0000000..84e4a9e --- /dev/null +++ b/sys/fs/nandfs/bmap.c @@ -0,0 +1,621 @@ +/*- + * Copyright (c) 2012 Semihalf + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/kernel.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/bio.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/signalvar.h> +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/lockf.h> +#include <sys/ktr.h> +#include <sys/kdb.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + +#include <machine/_inttypes.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + +#include "nandfs_mount.h" +#include "nandfs.h" +#include "nandfs_subr.h" +#include "bmap.h" + +static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t, + struct nandfs_indir *, int *); + +int +bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk) +{ + struct nandfs_inode *ip; + struct nandfs_indir a[NIADDR + 1], *ap; + nandfs_daddr_t daddr; + struct buf *bp; + int error; + int num, *nump; + + DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk)); + ip = &node->nn_inode; + + ap = a; + nump = # + + error = bmap_getlbns(node, lblk, ap, nump); + if (error) + return (error); + + if (num == 0) { + *vblk = ip->i_db[lblk]; + return (0); + } + + DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__, + node, lblk, ap->in_off)); + daddr = ip->i_ib[ap->in_off]; + for (bp = NULL, ++ap; --num; ap++) { + if (daddr == 0) { + DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with " + "vblk 0\n", __func__, node, lblk)); + *vblk = 0; + return (0); + } + if (ap->in_lbn == lblk) { + DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx " + "returning address of indirect block (%jx)\n", + __func__, node, lblk, ap->in_lbn, daddr)); + *vblk = daddr; + return (0); + } + + DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block " + "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn)); + + error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off]; + brelse(bp); + } + + DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__, + node, lblk, daddr)); + *vblk = daddr; + + return (0); +} + +int +bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force) +{ + struct nandfs_indir a[NIADDR+1], *ap; +#ifdef DEBUG + nandfs_daddr_t daddr; +#endif + struct buf *bp; + int error; + int num, *nump; + + DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk)); + + ap = a; + nump = # + + error = bmap_getlbns(node, lblk, ap, nump); + if (error) + return (error); + + /* + * Direct block, nothing to do + */ + if (num == 0) + return (0); + + DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node)); + + for (bp = NULL, ++ap; --num; ap++) { + error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + +#ifdef DEBUG + daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off]; + MPASS(daddr != 0 || node->nn_ino == 3); +#endif + + error = nandfs_dirty_buf_meta(bp, force); + if (error) + return (error); + } + + return (0); +} + +int +bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk, + nandfs_daddr_t vblk) +{ + struct nandfs_inode *ip; + struct nandfs_indir a[NIADDR+1], *ap; + struct buf *bp; + nandfs_daddr_t daddr; + int error; + int num, *nump, i; + + DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk, + vblk)); + + ip = &node->nn_inode; + + ap = a; + nump = # + + error = bmap_getlbns(node, lblk, ap, nump); + if (error) + return (error); + + DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__, + node, lblk, vblk, num)); + + if (num == 0) { + DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__, + node, lblk)); + ip->i_db[lblk] = vblk; + return (0); + } + + DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n", + __func__, node, lblk, ap->in_off)); + + if (num == 1) { + DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting " + "%jx as vblk for indirect block %d\n", __func__, node, + lblk, vblk, ap->in_off)); + ip->i_ib[ap->in_off] = vblk; + return (0); + } + + bp = NULL; + daddr = ip->i_ib[a[0].in_off]; + for (i = 1; i < num; i++) { + if (bp) + brelse(bp); + if (daddr == 0) { + DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create " + "block %jx %d\n", __func__, node, lblk, vblk, + a[i].in_lbn, a[i].in_off)); + error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED, + 0, &bp); + if (error) + return (error); + } else { + DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read " + "block %jx %d\n", __func__, node, daddr, vblk, + a[i].in_lbn, a[i].in_off)); + error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + } + daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off]; + } + i--; + + DPRINTF(BMAP, + ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at " + "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off, + daddr)); + + if (!bp) { + nandfs_error("%s: cannot find indirect block\n", __func__); + return (-1); + } + ((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk; + + error = nandfs_dirty_buf_meta(bp, 0); + if (error) { + nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp); + return (error); + } + DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__, + node, lblk, vblk)); + + return (error); +} + +CTASSERT(NIADDR <= 3); +#define SINGLE 0 /* index of single indirect block */ +#define DOUBLE 1 /* index of double indirect block */ +#define TRIPLE 2 /* index of triple indirect block */ + +static __inline nandfs_lbn_t +lbn_offset(struct nandfs_device *fsdev, int level) +{ + nandfs_lbn_t res; + + for (res = 1; level > 0; level--) + res *= MNINDIR(fsdev); + return (res); +} + +static nandfs_lbn_t +blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip) +{ + nandfs_lbn_t blocks; + + for (blocks = 1; level >= SINGLE; level--, nip++) { + MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev)); + blocks += nip->in_off * lbn_offset(fsdev, level); + } + + return (blocks); +} + +static int +bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left, + int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp, + nandfs_daddr_t *copy) +{ + struct buf *bp; + nandfs_lbn_t i, lbn, nlbn, factor, tosub; + struct nandfs_device *fsdev; + int error, lcleaned, modified; + + DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__, + node, level, *left)); + + fsdev = node->nn_nandfsdev; + + MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev)); + + factor = lbn_offset(fsdev, level); + lbn = ap->in_lbn; + + error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + bcopy(bp->b_data, copy, fsdev->nd_blocksize); + bqrelse(bp); + + modified = 0; + + i = ap->in_off; + + if (ap != fp) + ap++; + for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--, + nlbn += factor) { + lcleaned = 0; + + DPRINTF(BMAP, + ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n", + __func__, node, i, nlbn, *left, ap, copy[i])); + + if (copy[i] == 0) { + tosub = blocks_inside(fsdev, level - 1, ap); + if (tosub > *left) + tosub = 0; + + *left -= tosub; + } else { + if (level > SINGLE) { + if (ap == fp) + ap->in_lbn = nlbn; + + error = bmap_truncate_indirect(node, level - 1, + left, &lcleaned, ap, fp, + copy + MNINDIR(fsdev)); + if (error) + return (error); + } else { + error = nandfs_bdestroy(node, copy[i]); + if (error) + return (error); + lcleaned = 1; + *left -= 1; + } + } + + if (lcleaned) { + if (level > SINGLE) { + error = nandfs_vblock_end(fsdev, copy[i]); + if (error) + return (error); + } + copy[i] = 0; + modified++; + } + + ap = fp; + } + + if (i == -1) + *cleaned = 1; + + error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + if (modified) + bcopy(copy, bp->b_data, fsdev->nd_blocksize); + + error = nandfs_dirty_buf_meta(bp, 0); + if (error) + return (error); + + return (error); +} + +int +bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk, + nandfs_lbn_t todo) +{ + struct nandfs_inode *ip; + struct nandfs_indir a[NIADDR + 1], f[NIADDR], *ap; + nandfs_daddr_t indir_lbn[NIADDR]; + nandfs_daddr_t *copy; + int error, level; + nandfs_lbn_t left, tosub; + struct nandfs_device *fsdev; + int cleaned, i; + int num, *nump; + + DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__, + node, lastblk, todo)); + + ip = &node->nn_inode; + fsdev = node->nn_nandfsdev; + + ap = a; + nump = # + + error = bmap_getlbns(node, lastblk, ap, nump); + if (error) + return (error); + + indir_lbn[SINGLE] = -NDADDR; + indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1; + indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev) + * MNINDIR(fsdev) - 1; + + for (i = 0; i < NIADDR; i++) { + f[i].in_off = MNINDIR(fsdev) - 1; + f[i].in_lbn = 0xdeadbeef; + } + + left = todo; + +#ifdef DEBUG + a[num].in_off = -1; +#endif + + ap++; + num -= 2; + + if (num < 0) + goto direct; + + copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1), + M_NANDFSTEMP, M_WAITOK); + + for (level = num; level >= SINGLE && left > 0; level--) { + cleaned = 0; + + if (ip->i_ib[level] == 0) { + tosub = blocks_inside(fsdev, level, ap); + if (tosub > left) + left = 0; + else + left -= tosub; + } else { + if (ap == f) + ap->in_lbn = indir_lbn[level]; + error = bmap_truncate_indirect(node, level, &left, + &cleaned, ap, f, copy); + if (error) { + nandfs_error("%s: error %d when truncate " + "at level %d\n", __func__, error, level); + return (error); + } + } + + if (cleaned) { + nandfs_vblock_end(fsdev, ip->i_ib[level]); + ip->i_ib[level] = 0; + } + + ap = f; + } + + free(copy, M_NANDFSTEMP); + +direct: + if (num < 0) + i = lastblk; + else + i = NDADDR - 1; + + for (; i >= 0 && left > 0; i--) { + if (ip->i_db[i] != 0) { + error = nandfs_bdestroy(node, ip->i_db[i]); + if (error) { + nandfs_error("%s: cannot destroy " + "block %jx, error %d\n", __func__, + (uintmax_t)ip->i_db[i], error); + return (error); + } + ip->i_db[i] = 0; + } + + left--; + } + + KASSERT(left == 0, + ("truncated wrong number of blocks (%jd should be 0)", left)); + + return (error); +} + +nandfs_lbn_t +get_maxfilesize(struct nandfs_device *fsdev) +{ + struct nandfs_indir f[NIADDR]; + nandfs_lbn_t max; + int i; + + max = NDADDR; + + for (i = 0; i < NIADDR; i++) { + f[i].in_off = MNINDIR(fsdev) - 1; + max += blocks_inside(fsdev, i, f); + } + + max *= fsdev->nd_blocksize; + + return (max); +} + +/* + * This is ufs_getlbns with minor modifications. + */ +/* + * Create an array of logical block number/offset pairs which represent the + * path of indirect blocks required to access a data block. The first "pair" + * contains the logical block number of the appropriate single, double or + * triple indirect block and the offset into the inode indirect block array. + * Note, the logical block number of the inode single/double/triple indirect + * block appears twice in the array, once with the offset into the i_ib and + * once with the offset into the page itself. + */ +static int +bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump) +{ + nandfs_daddr_t blockcnt; + nandfs_lbn_t metalbn, realbn; + struct nandfs_device *fsdev; + int i, numlevels, off; + + fsdev = node->nn_nandfsdev; + + DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__, + node, bn, MNINDIR(fsdev))); + + *nump = 0; + numlevels = 0; + realbn = bn; + + if (bn < 0) + bn = -bn; + + /* The first NDADDR blocks are direct blocks. */ + if (bn < NDADDR) + return (0); + + /* + * Determine the number of levels of indirection. After this loop + * is done, blockcnt indicates the number of data blocks possible + * at the previous level of indirection, and NIADDR - i is the number + * of levels of indirection needed to locate the requested block. + */ + for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { + DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__, + blockcnt, i, bn)); + if (i == 0) + return (EFBIG); + blockcnt *= MNINDIR(fsdev); + if (bn < blockcnt) + break; + } + + /* Calculate the address of the first meta-block. */ + if (realbn >= 0) + metalbn = -(realbn - bn + NIADDR - i); + else + metalbn = -(-realbn - bn + NIADDR - i); + + /* + * At each iteration, off is the offset into the bap array which is + * an array of disk addresses at the current level of indirection. + * The logical block number and the offset in that block are stored + * into the argument array. + */ + ap->in_lbn = metalbn; + ap->in_off = off = NIADDR - i; + + DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__, + metalbn, off)); + + ap++; + for (++numlevels; i <= NIADDR; i++) { + /* If searching for a meta-data block, quit when found. */ + if (metalbn == realbn) + break; + + blockcnt /= MNINDIR(fsdev); + off = (bn / blockcnt) % MNINDIR(fsdev); + + ++numlevels; + ap->in_lbn = metalbn; + ap->in_off = off; + + DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__, + ap->in_lbn, ap->in_off)); + ++ap; + + metalbn -= -1 + off * blockcnt; + } + if (nump) + *nump = numlevels; + + DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels)); + + return (0); +} diff --git a/sys/fs/nandfs/bmap.h b/sys/fs/nandfs/bmap.h new file mode 100644 index 0000000..c27c61c --- /dev/null +++ b/sys/fs/nandfs/bmap.h @@ -0,0 +1,40 @@ +/*- + * Copyright (c) 2012 Semihalf + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _BMAP_H +#define _BMAP_H + +#include "nandfs_fs.h" + +int bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *); +int bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t); +int bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t, nandfs_lbn_t); +int bmap_dirty_meta(struct nandfs_node *, nandfs_lbn_t, int); + +nandfs_lbn_t get_maxfilesize(struct nandfs_device *); + +#endif /* _BMAP_H */ diff --git a/sys/fs/nandfs/nandfs.h b/sys/fs/nandfs/nandfs.h new file mode 100644 index 0000000..beb4e16 --- /dev/null +++ b/sys/fs/nandfs/nandfs.h @@ -0,0 +1,310 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs.h,v 1.1 2009/07/18 16:31:42 reinoud + * + * $FreeBSD$ + */ + +#ifndef _FS_NANDFS_NANDFS_H_ +#define _FS_NANDFS_NANDFS_H_ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/condvar.h> +#include <sys/lock.h> +#include <sys/mutex.h> + +#include <sys/queue.h> +#include <sys/uio.h> +#include <sys/mutex.h> + +#include <sys/disk.h> +#include <sys/kthread.h> +#include "nandfs_fs.h" + +MALLOC_DECLARE(M_NANDFSTEMP); + +/* Debug categories */ +#define NANDFS_DEBUG_VOLUMES 0x000001 +#define NANDFS_DEBUG_BLOCK 0x000004 +#define NANDFS_DEBUG_LOCKING 0x000008 +#define NANDFS_DEBUG_NODE 0x000010 +#define NANDFS_DEBUG_LOOKUP 0x000020 +#define NANDFS_DEBUG_READDIR 0x000040 +#define NANDFS_DEBUG_TRANSLATE 0x000080 +#define NANDFS_DEBUG_STRATEGY 0x000100 +#define NANDFS_DEBUG_READ 0x000200 +#define NANDFS_DEBUG_WRITE 0x000400 +#define NANDFS_DEBUG_IFILE 0x000800 +#define NANDFS_DEBUG_ATTR 0x001000 +#define NANDFS_DEBUG_EXTATTR 0x002000 +#define NANDFS_DEBUG_ALLOC 0x004000 +#define NANDFS_DEBUG_CPFILE 0x008000 +#define NANDFS_DEBUG_DIRHASH 0x010000 +#define NANDFS_DEBUG_NOTIMPL 0x020000 +#define NANDFS_DEBUG_SHEDULE 0x040000 +#define NANDFS_DEBUG_SEG 0x080000 +#define NANDFS_DEBUG_SYNC 0x100000 +#define NANDFS_DEBUG_PARANOIA 0x200000 +#define NANDFS_DEBUG_VNCALL 0x400000 +#define NANDFS_DEBUG_BUF 0x1000000 +#define NANDFS_DEBUG_BMAP 0x2000000 +#define NANDFS_DEBUG_DAT 0x4000000 +#define NANDFS_DEBUG_GENERIC 0x8000000 +#define NANDFS_DEBUG_CLEAN 0x10000000 + +extern int nandfs_verbose; + +#define DPRINTF(name, arg) { \ + if (nandfs_verbose & NANDFS_DEBUG_##name) {\ + printf arg;\ + };\ + } +#define DPRINTFIF(name, cond, arg) { \ + if (nandfs_verbose & NANDFS_DEBUG_##name) { \ + if (cond) printf arg;\ + };\ + } + +#define VFSTONANDFS(mp) ((struct nandfsmount *)((mp)->mnt_data)) +#define VTON(vp) ((struct nandfs_node *)(vp)->v_data) +#define NTOV(xp) ((xp)->nn_vnode) + +int nandfs_init(struct vfsconf *); +int nandfs_uninit(struct vfsconf *); + +extern struct vop_vector nandfs_vnodeops; +extern struct vop_vector nandfs_system_vnodeops; + +struct nandfs_node; + +/* Structure and derivatives */ +struct nandfs_mdt { + uint32_t entries_per_block; + uint32_t entries_per_group; + uint32_t blocks_per_group; + uint32_t groups_per_desc_block; /* desc is super group */ + uint32_t blocks_per_desc_block; /* desc is super group */ +}; + +struct nandfs_segment { + LIST_ENTRY(nandfs_segment) seg_link; + + struct nandfs_device *fsdev; + + TAILQ_HEAD(, buf) segsum; + TAILQ_HEAD(, buf) data; + + uint64_t seg_num; + uint64_t seg_next; + uint64_t start_block; + uint32_t num_blocks; + + uint32_t nblocks; + uint32_t nbinfos; + uint32_t segsum_blocks; + uint32_t segsum_bytes; + uint32_t bytes_left; + char *current_off; +}; + +struct nandfs_seginfo { + LIST_HEAD( ,nandfs_segment) seg_list; + struct nandfs_segment *curseg; + struct nandfs_device *fsdev; + uint32_t blocks; + uint8_t reiterate; +}; + +#define NANDFS_FSSTOR_FAILED 1 +struct nandfs_fsarea { + int offset; + int flags; + int last_used; +}; + +extern int nandfs_cleaner_enable; +extern int nandfs_cleaner_interval; +extern int nandfs_cleaner_segments; + +struct nandfs_device { + struct vnode *nd_devvp; + struct g_consumer *nd_gconsumer; + + struct thread *nd_syncer; + struct thread *nd_cleaner; + int nd_syncer_exit; + int nd_cleaner_exit; + + int nd_is_nand; + + struct nandfs_fsarea nd_fsarea[NANDFS_NFSAREAS]; + int nd_last_fsarea; + + STAILQ_HEAD(nandfs_mnts, nandfsmount) nd_mounts; + SLIST_ENTRY(nandfs_device) nd_next_device; + + /* FS structures */ + struct nandfs_fsdata nd_fsdata; + struct nandfs_super_block nd_super; + struct nandfs_segment_summary nd_last_segsum; + struct nandfs_super_root nd_super_root; + struct nandfs_node *nd_dat_node; + struct nandfs_node *nd_cp_node; + struct nandfs_node *nd_su_node; + struct nandfs_node *nd_gc_node; + + struct nandfs_mdt nd_dat_mdt; + struct nandfs_mdt nd_ifile_mdt; + + struct timespec nd_ts; + + /* Synchronization */ + struct mtx nd_mutex; + struct mtx nd_sync_mtx; + struct cv nd_sync_cv; + struct mtx nd_clean_mtx; + struct cv nd_clean_cv; + struct lock nd_seg_const; + + struct nandfs_seginfo *nd_seginfo; + + /* FS geometry */ + uint64_t nd_devsize; + uint64_t nd_maxfilesize; + uint32_t nd_blocksize; + uint32_t nd_erasesize; + + uint32_t nd_devblocksize; + + /* Segment usage */ + uint64_t nd_clean_segs; + uint64_t *nd_free_base; + uint64_t nd_free_count; + uint64_t nd_dirty_bufs; + + /* Running values */ + uint64_t nd_seg_sequence; + uint64_t nd_seg_num; + uint64_t nd_next_seg_num; + uint64_t nd_last_pseg; + uint64_t nd_last_cno; + uint64_t nd_last_ino; + uint64_t nd_fakevblk; + + int nd_mount_state; + int nd_refcnt; + int nd_syncing; + int nd_cleaning; +}; + +extern SLIST_HEAD(_nandfs_devices, nandfs_device) nandfs_devices; + +#define NANDFS_FORCE_SYNCER 0x1 +#define NANDFS_UMOUNT 0x2 + +#define SYNCER_UMOUNT 0x0 +#define SYNCER_VFS_SYNC 0x1 +#define SYNCER_BDFLUSH 0x2 +#define SYNCER_FFORCE 0x3 +#define SYNCER_FSYNC 0x4 +#define SYNCER_ROUPD 0x5 + +static __inline int +nandfs_writelockflags(struct nandfs_device *fsdev, int flags) +{ + int error = 0; + + if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE) + error = lockmgr(&fsdev->nd_seg_const, flags | LK_SHARED, NULL); + + return (error); +} + +static __inline void +nandfs_writeunlock(struct nandfs_device *fsdev) +{ + + if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE) + lockmgr(&(fsdev)->nd_seg_const, LK_RELEASE, NULL); +} + +#define NANDFS_WRITELOCKFLAGS(fsdev, flags) nandfs_writelockflags(fsdev, flags) + +#define NANDFS_WRITELOCK(fsdev) NANDFS_WRITELOCKFLAGS(fsdev, 0) + +#define NANDFS_WRITEUNLOCK(fsdev) nandfs_writeunlock(fsdev) + +#define NANDFS_WRITEASSERT(fsdev) lockmgr_assert(&(fsdev)->nd_seg_const, KA_LOCKED) + +/* Specific mountpoint; head or a checkpoint/snapshot */ +struct nandfsmount { + STAILQ_ENTRY(nandfsmount) nm_next_mount; + + struct mount *nm_vfs_mountp; + struct nandfs_device *nm_nandfsdev; + struct nandfs_args nm_mount_args; + struct nandfs_node *nm_ifile_node; + + uint8_t nm_flags; + int8_t nm_ronly; +}; + +struct nandfs_node { + struct vnode *nn_vnode; + struct nandfsmount *nn_nmp; + struct nandfs_device *nn_nandfsdev; + struct lockf *nn_lockf; + + uint64_t nn_ino; + struct nandfs_inode nn_inode; + + uint64_t nn_diroff; + uint32_t nn_flags; +}; + +#define IN_ACCESS 0x0001 /* Inode access time update request */ +#define IN_CHANGE 0x0002 /* Inode change time update request */ +#define IN_UPDATE 0x0004 /* Inode was written to; update mtime*/ +#define IN_MODIFIED 0x0008 /* node has been modified */ +#define IN_RENAME 0x0010 /* node is being renamed. */ + +/* File permissions. */ +#define IEXEC 0000100 /* Executable. */ +#define IWRITE 0000200 /* Writeable. */ +#define IREAD 0000400 /* Readable. */ +#define ISVTX 0001000 /* Sticky bit. */ +#define ISGID 0002000 /* Set-gid. */ +#define ISUID 0004000 /* Set-uid. */ + +#define PRINT_NODE_FLAGS \ + "\10\1IN_ACCESS\2IN_CHANGE\3IN_UPDATE\4IN_MODIFIED\5IN_RENAME" + +#define NANDFS_GATHER(x) ((x)->b_flags |= B_00800000) +#define NANDFS_UNGATHER(x) ((x)->b_flags &= ~B_00800000) +#define NANDFS_ISGATHERED(x) ((x)->b_flags & B_00800000) + +#endif /* !_FS_NANDFS_NANDFS_H_ */ diff --git a/sys/fs/nandfs/nandfs_alloc.c b/sys/fs/nandfs/nandfs_alloc.c new file mode 100644 index 0000000..3417266 --- /dev/null +++ b/sys/fs/nandfs/nandfs_alloc.c @@ -0,0 +1,364 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +static void +nandfs_get_desc_block_nr(struct nandfs_mdt *mdt, uint64_t desc, + uint64_t *desc_block) +{ + + *desc_block = desc * mdt->blocks_per_desc_block; +} + +static void +nandfs_get_group_block_nr(struct nandfs_mdt *mdt, uint64_t group, + uint64_t *group_block) +{ + uint64_t desc, group_off; + + desc = group / mdt->groups_per_desc_block; + group_off = group % mdt->groups_per_desc_block; + *group_block = desc * mdt->blocks_per_desc_block + + 1 + group_off * mdt->blocks_per_group; +} + +static void +init_desc_block(struct nandfs_mdt *mdt, uint8_t *block_data) +{ + struct nandfs_block_group_desc *desc; + uint32_t i; + + desc = (struct nandfs_block_group_desc *) block_data; + for (i = 0; i < mdt->groups_per_desc_block; i++) + desc[i].bg_nfrees = mdt->entries_per_group; +} + +int +nandfs_find_free_entry(struct nandfs_mdt *mdt, struct nandfs_node *node, + struct nandfs_alloc_request *req) +{ + nandfs_daddr_t desc, group, maxgroup, maxdesc, pos = 0; + nandfs_daddr_t start_group, start_desc; + nandfs_daddr_t desc_block, group_block; + nandfs_daddr_t file_blocks; + struct nandfs_block_group_desc *descriptors; + struct buf *bp, *bp2; + uint32_t *mask, i, mcount, msize; + int error; + + file_blocks = node->nn_inode.i_blocks; + maxgroup = 0x100000000ull / mdt->entries_per_group; + maxdesc = maxgroup / mdt->groups_per_desc_block; + start_group = req->entrynum / mdt->entries_per_group; + start_desc = start_group / mdt->groups_per_desc_block; + + bp = bp2 = NULL; +restart: + for (desc = start_desc; desc < maxdesc; desc++) { + nandfs_get_desc_block_nr(mdt, desc, &desc_block); + + if (bp) + brelse(bp); + if (desc_block < file_blocks) { + error = nandfs_bread(node, desc_block, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + } else { + error = nandfs_bcreate(node, desc_block, NOCRED, 0, + &bp); + if (error) + return (error); + file_blocks++; + init_desc_block(mdt, bp->b_data); + } + + descriptors = (struct nandfs_block_group_desc *) bp->b_data; + for (group = start_group; group < mdt->groups_per_desc_block; + group++) { + if (descriptors[group].bg_nfrees > 0) { + nandfs_get_group_block_nr(mdt, group, + &group_block); + + if (bp2) + brelse(bp2); + if (group_block < file_blocks) { + error = nandfs_bread(node, group_block, + NOCRED, 0, &bp2); + if (error) { + brelse(bp); + return (error); + } + } else { + error = nandfs_bcreate(node, + group_block, NOCRED, 0, &bp2); + if (error) + return (error); + file_blocks++; + } + mask = (uint32_t *)bp2->b_data; + msize = (sizeof(uint32_t) * __CHAR_BIT); + mcount = mdt->entries_per_group / msize; + for (i = 0; i < mcount; i++) { + if (mask[i] == UINT32_MAX) + continue; + + pos = ffs(~mask[i]) - 1; + pos += (msize * i); + pos += (group * mdt->entries_per_group); + pos += desc * group * + mdt->groups_per_desc_block * + mdt->entries_per_group; + goto found; + } + } + } + start_group = 0; + } + + if (start_desc != 0) { + maxdesc = start_desc; + start_desc = 0; + req->entrynum = 0; + goto restart; + } + + return (ENOENT); + +found: + req->entrynum = pos; + req->bp_desc = bp; + req->bp_bitmap = bp2; + DPRINTF(ALLOC, ("%s: desc: %p bitmap: %p entry: %#jx\n", + __func__, req->bp_desc, req->bp_bitmap, (uintmax_t)pos)); + + return (0); +} + +int +nandfs_find_entry(struct nandfs_mdt* mdt, struct nandfs_node *nnode, + struct nandfs_alloc_request *req) +{ + uint64_t dblock, bblock, eblock; + uint32_t offset; + int error; + + nandfs_mdt_trans_blk(mdt, req->entrynum, &dblock, &bblock, &eblock, + &offset); + + error = nandfs_bread(nnode, dblock, NOCRED, 0, &req->bp_desc); + if (error) { + brelse(req->bp_desc); + return (error); + } + + error = nandfs_bread(nnode, bblock, NOCRED, 0, &req->bp_bitmap); + if (error) { + brelse(req->bp_desc); + brelse(req->bp_bitmap); + return (error); + } + + error = nandfs_bread(nnode, eblock, NOCRED, 0, &req->bp_entry); + if (error) { + brelse(req->bp_desc); + brelse(req->bp_bitmap); + brelse(req->bp_entry); + return (error); + } + + DPRINTF(ALLOC, + ("%s: desc_buf: %p bitmap_buf %p entry_buf %p offset %x\n", + __func__, req->bp_desc, req->bp_bitmap, req->bp_entry, offset)); + + return (0); +} + +static __inline void +nandfs_calc_idx_entry(struct nandfs_mdt* mdt, uint32_t entrynum, + uint64_t *group, uint64_t *bitmap_idx, uint64_t *bitmap_off) +{ + + /* Find group_desc index */ + entrynum = entrynum % + (mdt->entries_per_group * mdt->groups_per_desc_block); + *group = entrynum / mdt->entries_per_group; + /* Find bitmap index and bit offset */ + entrynum = entrynum % mdt->entries_per_group; + *bitmap_idx = entrynum / (sizeof(uint32_t) * __CHAR_BIT); + *bitmap_off = entrynum % (sizeof(uint32_t) * __CHAR_BIT); +} + +int +nandfs_free_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req) +{ + struct nandfs_block_group_desc *descriptors; + uint64_t bitmap_idx, bitmap_off; + uint64_t group; + uint32_t *mask, maskrw; + + nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx, + &bitmap_off); + + DPRINTF(ALLOC, ("nandfs_free_entry: req->entrynum=%jx bitmap_idx=%jx" + " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum, + (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group)); + + /* Update counter of free entries for group */ + descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data; + descriptors[group].bg_nfrees++; + + /* Set bit to indicate that entry is taken */ + mask = (uint32_t *)req->bp_bitmap->b_data; + maskrw = mask[bitmap_idx]; + KASSERT(maskrw & (1 << bitmap_off), ("freeing unallocated vblock")); + maskrw &= ~(1 << bitmap_off); + mask[bitmap_idx] = maskrw; + + /* Make descriptor, bitmap and entry buffer dirty */ + if (nandfs_dirty_buf(req->bp_desc, 0) == 0) { + nandfs_dirty_buf(req->bp_bitmap, 1); + nandfs_dirty_buf(req->bp_entry, 1); + } else { + brelse(req->bp_bitmap); + brelse(req->bp_entry); + return (-1); + } + + return (0); +} + +int +nandfs_alloc_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req) +{ + struct nandfs_block_group_desc *descriptors; + uint64_t bitmap_idx, bitmap_off; + uint64_t group; + uint32_t *mask, maskrw; + + nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx, + &bitmap_off); + + DPRINTF(ALLOC, ("nandfs_alloc_entry: req->entrynum=%jx bitmap_idx=%jx" + " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum, + (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group)); + + /* Update counter of free entries for group */ + descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data; + descriptors[group].bg_nfrees--; + + /* Clear bit to indicate that entry is free */ + mask = (uint32_t *)req->bp_bitmap->b_data; + maskrw = mask[bitmap_idx]; + maskrw |= 1 << bitmap_off; + mask[bitmap_idx] = maskrw; + + /* Make descriptor, bitmap and entry buffer dirty */ + if (nandfs_dirty_buf(req->bp_desc, 0) == 0) { + nandfs_dirty_buf(req->bp_bitmap, 1); + nandfs_dirty_buf(req->bp_entry, 1); + } else { + brelse(req->bp_bitmap); + brelse(req->bp_entry); + return (-1); + } + + return (0); +} + +void +nandfs_abort_entry(struct nandfs_alloc_request *req) +{ + + brelse(req->bp_desc); + brelse(req->bp_bitmap); + brelse(req->bp_entry); +} + +int +nandfs_get_entry_block(struct nandfs_mdt *mdt, struct nandfs_node *node, + struct nandfs_alloc_request *req, uint32_t *entry, int create) +{ + struct buf *bp; + nandfs_lbn_t blocknr; + int error; + + /* Find buffer number for given entry */ + nandfs_mdt_trans(mdt, req->entrynum, &blocknr, entry); + DPRINTF(ALLOC, ("%s: ino %#jx entrynum:%#jx block:%#jx entry:%x\n", + __func__, (uintmax_t)node->nn_ino, (uintmax_t)req->entrynum, + (uintmax_t)blocknr, *entry)); + + /* Read entry block or create if 'create' parameter is not zero */ + bp = NULL; + + if (blocknr < node->nn_inode.i_blocks) + error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); + else if (create) + error = nandfs_bcreate(node, blocknr, NOCRED, 0, &bp); + else + error = E2BIG; + + if (error) { + DPRINTF(ALLOC, ("%s: ino %#jx block %#jx entry %x error %d\n", + __func__, (uintmax_t)node->nn_ino, (uintmax_t)blocknr, + *entry, error)); + if (bp) + brelse(bp); + return (error); + } + + MPASS(nandfs_vblk_get(bp) != 0 || node->nn_ino == NANDFS_DAT_INO); + + req->bp_entry = bp; + return (0); +} diff --git a/sys/fs/nandfs/nandfs_bmap.c b/sys/fs/nandfs/nandfs_bmap.c new file mode 100644 index 0000000..9f800b8 --- /dev/null +++ b/sys/fs/nandfs/nandfs_bmap.c @@ -0,0 +1,230 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/kernel.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/bio.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/signalvar.h> +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/lockf.h> +#include <sys/ktr.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + +#include <machine/_inttypes.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + +#include "nandfs_mount.h" +#include "nandfs.h" +#include "nandfs_subr.h" +#include "bmap.h" + +nandfs_lbn_t +nandfs_get_maxfilesize(struct nandfs_device *fsdev) +{ + + return (get_maxfilesize(fsdev)); +} + +int +nandfs_bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, + nandfs_daddr_t *vblk) +{ + int error = 0; + + if (node->nn_ino == NANDFS_GC_INO && lblk >= 0) + *vblk = lblk; + else + error = bmap_lookup(node, lblk, vblk); + + DPRINTF(TRANSLATE, ("%s: error %d ino %#jx lblocknr %#jx -> %#jx\n", + __func__, error, (uintmax_t)node->nn_ino, (uintmax_t)lblk, + (uintmax_t)*vblk)); + + if (error) + nandfs_error("%s: returned %d", __func__, error); + + return (error); +} + +int +nandfs_bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk, + struct buf *bp) +{ + struct nandfs_device *fsdev; + nandfs_daddr_t vblk; + int error; + + fsdev = node->nn_nandfsdev; + + vblk = 0; + if (node->nn_ino != NANDFS_DAT_INO) { + error = nandfs_vblock_alloc(fsdev, &vblk); + if (error) + return (error); + } + + nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED); + nandfs_vblk_set(bp, vblk); + + error = bmap_insert_block(node, lblk, vblk); + if (error) { + nandfs_vblock_free(fsdev, vblk); + return (error); + } + + return (0); +} + +int +nandfs_bmap_dirty_blocks(struct nandfs_node *node, struct buf *bp, int force) +{ + int error; + + error = bmap_dirty_meta(node, bp->b_lblkno, force); + if (error) + nandfs_error("%s: cannot dirty buffer %p\n", + __func__, bp); + + return (error); +} + +static int +nandfs_bmap_update_mapping(struct nandfs_node *node, nandfs_lbn_t lblk, + nandfs_daddr_t blknr) +{ + int error; + + DPRINTF(BMAP, + ("%s: node: %p ino: %#jx lblk: %#jx vblk: %#jx\n", + __func__, node, (uintmax_t)node->nn_ino, (uintmax_t)lblk, + (uintmax_t)blknr)); + + error = bmap_insert_block(node, lblk, blknr); + + return (error); +} + +int +nandfs_bmap_update_block(struct nandfs_node *node, struct buf *bp, + nandfs_lbn_t blknr) +{ + nandfs_lbn_t lblk; + int error; + + lblk = bp->b_lblkno; + nandfs_vblk_set(bp, blknr); + + DPRINTF(BMAP, ("%s: node: %p ino: %#jx bp: %p lblk: %#jx blk: %#jx\n", + __func__, node, (uintmax_t)node->nn_ino, bp, + (uintmax_t)lblk, (uintmax_t)blknr)); + + error = nandfs_bmap_update_mapping(node, lblk, blknr); + if (error) { + nandfs_error("%s: cannot update lblk:%jx to blk:%jx for " + "node:%p, error:%d\n", __func__, (uintmax_t)lblk, + (uintmax_t)blknr, node, error); + return (error); + } + + return (error); +} + +int +nandfs_bmap_update_dat(struct nandfs_node *node, nandfs_daddr_t oldblk, + struct buf *bp) +{ + struct nandfs_device *fsdev; + nandfs_daddr_t vblk = 0; + int error; + + if (node->nn_ino == NANDFS_DAT_INO) + return (0); + + if (nandfs_buf_check(bp, NANDFS_VBLK_ASSIGNED)) { + nandfs_buf_clear(bp, NANDFS_VBLK_ASSIGNED); + return (0); + } + + fsdev = node->nn_nandfsdev; + + /* First alloc new virtual block.... */ + error = nandfs_vblock_alloc(fsdev, &vblk); + if (error) + return (error); + + error = nandfs_bmap_update_block(node, bp, vblk); + if (error) + return (error); + + /* Then we can end up with old one */ + nandfs_vblock_end(fsdev, oldblk); + + DPRINTF(BMAP, + ("%s: ino %#jx block %#jx: update vblk %#jx to %#jx\n", + __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno, + (uintmax_t)oldblk, (uintmax_t)vblk)); + return (error); +} + +int +nandfs_bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t oblk, + nandfs_lbn_t nblk) +{ + nandfs_lbn_t todo; + int error; + + todo = oblk - nblk; + + DPRINTF(BMAP, ("%s: node %p oblk %jx nblk %jx truncate by %jx\n", + __func__, node, oblk, nblk, todo)); + + error = bmap_truncate_mapping(node, oblk, todo); + if (error) + return (error); + + return (error); +} diff --git a/sys/fs/nandfs/nandfs_buffer.c b/sys/fs/nandfs/nandfs_buffer.c new file mode 100644 index 0000000..b0d72668 --- /dev/null +++ b/sys/fs/nandfs/nandfs_buffer.c @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/buf.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/bio.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +struct buf * +nandfs_geteblk(int size, int flags) +{ + struct buf *bp; + + /* + * XXX + * Right now we can call geteblk with GB_NOWAIT_BD flag, which means + * it can return NULL. But we cannot afford to get NULL, hence this panic. + */ + bp = geteblk(size, flags); + if (bp == NULL) + panic("geteblk returned NULL"); + + return (bp); +} + +void +nandfs_dirty_bufs_increment(struct nandfs_device *fsdev) +{ + + mtx_lock(&fsdev->nd_mutex); + KASSERT(fsdev->nd_dirty_bufs >= 0, ("negative nd_dirty_bufs")); + fsdev->nd_dirty_bufs++; + mtx_unlock(&fsdev->nd_mutex); +} + +void +nandfs_dirty_bufs_decrement(struct nandfs_device *fsdev) +{ + + mtx_lock(&fsdev->nd_mutex); + KASSERT(fsdev->nd_dirty_bufs > 0, + ("decrementing not-positive nd_dirty_bufs")); + fsdev->nd_dirty_bufs--; + mtx_unlock(&fsdev->nd_mutex); +} diff --git a/sys/fs/nandfs/nandfs_cleaner.c b/sys/fs/nandfs/nandfs_cleaner.c new file mode 100644 index 0000000..9257c10 --- /dev/null +++ b/sys/fs/nandfs/nandfs_cleaner.c @@ -0,0 +1,621 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/buf.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/bio.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +#define NANDFS_CLEANER_KILL 1 + +static void nandfs_cleaner(struct nandfs_device *); +static int nandfs_cleaner_clean_segments(struct nandfs_device *, + struct nandfs_vinfo *, uint32_t, struct nandfs_period *, uint32_t, + struct nandfs_bdesc *, uint32_t, uint64_t *, uint32_t); + +static int +nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, + uint64_t nmembs); + +static void +nandfs_wakeup_wait_cleaner(struct nandfs_device *fsdev, int reason) +{ + + mtx_lock(&fsdev->nd_clean_mtx); + if (reason == NANDFS_CLEANER_KILL) + fsdev->nd_cleaner_exit = 1; + if (fsdev->nd_cleaning == 0) { + fsdev->nd_cleaning = 1; + wakeup(&fsdev->nd_cleaning); + } + cv_wait(&fsdev->nd_clean_cv, &fsdev->nd_clean_mtx); + mtx_unlock(&fsdev->nd_clean_mtx); +} + +int +nandfs_start_cleaner(struct nandfs_device *fsdev) +{ + int error; + + MPASS(fsdev->nd_cleaner == NULL); + + fsdev->nd_cleaner_exit = 0; + + error = kthread_add((void(*)(void *))nandfs_cleaner, fsdev, NULL, + &fsdev->nd_cleaner, 0, 0, "nandfs_cleaner"); + if (error) + printf("nandfs: could not start cleaner: %d\n", error); + + return (error); +} + +int +nandfs_stop_cleaner(struct nandfs_device *fsdev) +{ + + MPASS(fsdev->nd_cleaner != NULL); + nandfs_wakeup_wait_cleaner(fsdev, NANDFS_CLEANER_KILL); + fsdev->nd_cleaner = NULL; + + DPRINTF(CLEAN, ("cleaner stopped\n")); + return (0); +} + +static int +nandfs_cleaner_finished(struct nandfs_device *fsdev) +{ + int exit; + + mtx_lock(&fsdev->nd_clean_mtx); + fsdev->nd_cleaning = 0; + if (!fsdev->nd_cleaner_exit) { + DPRINTF(CLEAN, ("%s: sleep\n", __func__)); + msleep(&fsdev->nd_cleaning, &fsdev->nd_clean_mtx, PRIBIO, "-", + hz * nandfs_cleaner_interval); + } + exit = fsdev->nd_cleaner_exit; + cv_broadcast(&fsdev->nd_clean_cv); + mtx_unlock(&fsdev->nd_clean_mtx); + if (exit) { + DPRINTF(CLEAN, ("%s: no longer active\n", __func__)); + return (1); + } + + return (0); +} + +static void +print_suinfo(struct nandfs_suinfo *suinfo, int nsegs) +{ + int i; + + for (i = 0; i < nsegs; i++) { + DPRINTF(CLEAN, ("%jx %jd %c%c%c %10u\n", + suinfo[i].nsi_num, suinfo[i].nsi_lastmod, + (suinfo[i].nsi_flags & + (NANDFS_SEGMENT_USAGE_ACTIVE) ? 'a' : '-'), + (suinfo[i].nsi_flags & + (NANDFS_SEGMENT_USAGE_DIRTY) ? 'd' : '-'), + (suinfo[i].nsi_flags & + (NANDFS_SEGMENT_USAGE_ERROR) ? 'e' : '-'), + suinfo[i].nsi_blocks)); + } +} + +static int +nandfs_cleaner_vblock_is_alive(struct nandfs_device *fsdev, + struct nandfs_vinfo *vinfo, struct nandfs_cpinfo *cp, uint32_t ncps) +{ + int64_t idx, min, max; + + if (vinfo->nvi_end >= fsdev->nd_last_cno) + return (1); + + if (ncps == 0) + return (0); + + if (vinfo->nvi_end < cp[0].nci_cno || + vinfo->nvi_start > cp[ncps - 1].nci_cno) + return (0); + + idx = min = 0; + max = ncps - 1; + while (min <= max) { + idx = (min + max) / 2; + if (vinfo->nvi_start == cp[idx].nci_cno) + return (1); + if (vinfo->nvi_start < cp[idx].nci_cno) + max = idx - 1; + else + min = idx + 1; + } + + return (vinfo->nvi_end >= cp[idx].nci_cno); +} + +static void +nandfs_cleaner_vinfo_mark_alive(struct nandfs_device *fsdev, + struct nandfs_vinfo *vinfo, uint32_t nmembs, struct nandfs_cpinfo *cp, + uint32_t ncps) +{ + uint32_t i; + + for (i = 0; i < nmembs; i++) + vinfo[i].nvi_alive = + nandfs_cleaner_vblock_is_alive(fsdev, &vinfo[i], cp, ncps); +} + +static int +nandfs_cleaner_bdesc_is_alive(struct nandfs_device *fsdev, + struct nandfs_bdesc *bdesc) +{ + int alive; + + alive = bdesc->bd_oblocknr == bdesc->bd_blocknr; + if (!alive) + MPASS(abs(bdesc->bd_oblocknr - bdesc->bd_blocknr) > 2); + + return (alive); +} + +static void +nandfs_cleaner_bdesc_mark_alive(struct nandfs_device *fsdev, + struct nandfs_bdesc *bdesc, uint32_t nmembs) +{ + uint32_t i; + + for (i = 0; i < nmembs; i++) + bdesc[i].bd_alive = nandfs_cleaner_bdesc_is_alive(fsdev, + &bdesc[i]); +} + +static void +nandfs_cleaner_iterate_psegment(struct nandfs_device *fsdev, + struct nandfs_segment_summary *segsum, union nandfs_binfo *binfo, + nandfs_daddr_t blk, struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp) +{ + int i; + + DPRINTF(CLEAN, ("%s nbinfos %x\n", __func__, segsum->ss_nbinfos)); + for (i = 0; i < segsum->ss_nbinfos; i++) { + if (binfo[i].bi_v.bi_ino == NANDFS_DAT_INO) { + (*bdpp)->bd_oblocknr = blk + segsum->ss_nblocks - + segsum->ss_nbinfos + i; + /* + * XXX Hack + */ + if (segsum->ss_flags & NANDFS_SS_SR) + (*bdpp)->bd_oblocknr--; + (*bdpp)->bd_level = binfo[i].bi_dat.bi_level; + (*bdpp)->bd_offset = binfo[i].bi_dat.bi_blkoff; + (*bdpp)++; + } else { + (*vipp)->nvi_ino = binfo[i].bi_v.bi_ino; + (*vipp)->nvi_vblocknr = binfo[i].bi_v.bi_vblocknr; + (*vipp)++; + } + } +} + +static int +nandfs_cleaner_iterate_segment(struct nandfs_device *fsdev, uint64_t segno, + struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp, int *select) +{ + struct nandfs_segment_summary *segsum; + union nandfs_binfo *binfo; + struct buf *bp; + uint32_t nblocks; + nandfs_daddr_t curr, start, end; + int error = 0; + + nandfs_get_segment_range(fsdev, segno, &start, &end); + + DPRINTF(CLEAN, ("%s: segno %jx start %jx end %jx\n", __func__, segno, + start, end)); + + *select = 0; + + for (curr = start; curr < end; curr += nblocks) { + error = nandfs_dev_bread(fsdev, curr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + nandfs_error("%s: couldn't load segment summary of %jx: %d\n", + __func__, segno, error); + return (error); + } + + segsum = (struct nandfs_segment_summary *)bp->b_data; + binfo = (union nandfs_binfo *)(bp->b_data + segsum->ss_bytes); + + if (!nandfs_segsum_valid(segsum)) { + brelse(bp); + nandfs_error("nandfs: invalid summary of segment %jx\n", segno); + return (error); + } + + DPRINTF(CLEAN, ("%s: %jx magic %x bytes %x nblocks %x nbinfos " + "%x\n", __func__, segno, segsum->ss_magic, segsum->ss_bytes, + segsum->ss_nblocks, segsum->ss_nbinfos)); + + nandfs_cleaner_iterate_psegment(fsdev, segsum, binfo, curr, + vipp, bdpp); + nblocks = segsum->ss_nblocks; + brelse(bp); + } + + if (error == 0) + *select = 1; + + return (error); +} + +static int +nandfs_cleaner_choose_segment(struct nandfs_device *fsdev, uint64_t **segpp, + uint64_t nsegs, uint64_t *rseg) +{ + struct nandfs_suinfo *suinfo; + uint64_t i, ssegs; + int error; + + suinfo = malloc(sizeof(*suinfo) * nsegs, M_NANDFSTEMP, + M_ZERO | M_WAITOK); + + if (*rseg >= fsdev->nd_fsdata.f_nsegments) + *rseg = 0; + +retry: + error = nandfs_get_segment_info_filter(fsdev, suinfo, nsegs, *rseg, + &ssegs, NANDFS_SEGMENT_USAGE_DIRTY, + NANDFS_SEGMENT_USAGE_ACTIVE | NANDFS_SEGMENT_USAGE_ERROR | + NANDFS_SEGMENT_USAGE_GC); + if (error) { + nandfs_error("%s:%d", __FILE__, __LINE__); + goto out; + } + + if (ssegs == 0 && *rseg != 0) { + *rseg = 0; + goto retry; + } + + print_suinfo(suinfo, ssegs); + + for (i = 0; i < ssegs; i++) { + (**segpp) = suinfo[i].nsi_num; + (*segpp)++; + } + + *rseg = suinfo[i - 1].nsi_num + 1; +out: + free(suinfo, M_NANDFSTEMP); + + return (error); +} + +static int +nandfs_cleaner_body(struct nandfs_device *fsdev, uint64_t *rseg) +{ + struct nandfs_vinfo *vinfo, *vip, *vipi; + struct nandfs_bdesc *bdesc, *bdp, *bdpi; + struct nandfs_cpstat cpstat; + struct nandfs_cpinfo *cpinfo = NULL; + uint64_t *segnums, *segp; + int select, selected; + int error = 0; + int nsegs; + int i; + + nsegs = nandfs_cleaner_segments; + + vip = vinfo = malloc(sizeof(*vinfo) * + fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP, + M_ZERO | M_WAITOK); + bdp = bdesc = malloc(sizeof(*bdesc) * + fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP, + M_ZERO | M_WAITOK); + segp = segnums = malloc(sizeof(*segnums) * nsegs, M_NANDFSTEMP, + M_WAITOK); + + error = nandfs_cleaner_choose_segment(fsdev, &segp, nsegs, rseg); + if (error) { + nandfs_error("%s:%d", __FILE__, __LINE__); + goto out; + } + + if (segnums == segp) + goto out; + + selected = 0; + for (i = 0; i < segp - segnums; i++) { + error = nandfs_cleaner_iterate_segment(fsdev, segnums[i], &vip, + &bdp, &select); + if (error) { + /* + * XXX deselect (see below)? + */ + goto out; + } + if (!select) + segnums[i] = NANDFS_NOSEGMENT; + else { + error = nandfs_markgc_segment(fsdev, segnums[i]); + if (error) { + nandfs_error("%s:%d\n", __FILE__, __LINE__); + goto out; + } + selected++; + } + } + + if (selected == 0) { + MPASS(vinfo == vip); + MPASS(bdesc == bdp); + goto out; + } + + error = nandfs_get_cpstat(fsdev->nd_cp_node, &cpstat); + if (error) { + nandfs_error("%s:%d\n", __FILE__, __LINE__); + goto out; + } + + if (cpstat.ncp_nss != 0) { + cpinfo = malloc(sizeof(struct nandfs_cpinfo) * cpstat.ncp_nss, + M_NANDFSTEMP, M_WAITOK); + error = nandfs_get_cpinfo(fsdev->nd_cp_node, 1, NANDFS_SNAPSHOT, + cpinfo, cpstat.ncp_nss, NULL); + if (error) { + nandfs_error("%s:%d\n", __FILE__, __LINE__); + goto out_locked; + } + } + + NANDFS_WRITELOCK(fsdev); + DPRINTF(CLEAN, ("%s: got lock\n", __func__)); + + error = nandfs_get_dat_vinfo(fsdev, vinfo, vip - vinfo); + if (error) { + nandfs_error("%s:%d\n", __FILE__, __LINE__); + goto out_locked; + } + + nandfs_cleaner_vinfo_mark_alive(fsdev, vinfo, vip - vinfo, cpinfo, + cpstat.ncp_nss); + + error = nandfs_get_dat_bdescs(fsdev, bdesc, bdp - bdesc); + if (error) { + nandfs_error("%s:%d\n", __FILE__, __LINE__); + goto out_locked; + } + + nandfs_cleaner_bdesc_mark_alive(fsdev, bdesc, bdp - bdesc); + + DPRINTF(CLEAN, ("got:\n")); + for (vipi = vinfo; vipi < vip; vipi++) { + DPRINTF(CLEAN, ("v ino %jx vblocknr %jx start %jx end %jx " + "alive %d\n", vipi->nvi_ino, vipi->nvi_vblocknr, + vipi->nvi_start, vipi->nvi_end, vipi->nvi_alive)); + } + for (bdpi = bdesc; bdpi < bdp; bdpi++) { + DPRINTF(CLEAN, ("b oblocknr %jx blocknr %jx offset %jx " + "alive %d\n", bdpi->bd_oblocknr, bdpi->bd_blocknr, + bdpi->bd_offset, bdpi->bd_alive)); + } + DPRINTF(CLEAN, ("end list\n")); + + error = nandfs_cleaner_clean_segments(fsdev, vinfo, vip - vinfo, NULL, + 0, bdesc, bdp - bdesc, segnums, segp - segnums); + if (error) + nandfs_error("%s:%d\n", __FILE__, __LINE__); + +out_locked: + NANDFS_WRITEUNLOCK(fsdev); +out: + free(cpinfo, M_NANDFSTEMP); + free(segnums, M_NANDFSTEMP); + free(bdesc, M_NANDFSTEMP); + free(vinfo, M_NANDFSTEMP); + + return (error); +} + +static void +nandfs_cleaner(struct nandfs_device *fsdev) +{ + uint64_t checked_seg = 0; + int error; + + while (!nandfs_cleaner_finished(fsdev)) { + if (!nandfs_cleaner_enable || rebooting) + continue; + + DPRINTF(CLEAN, ("%s: run started\n", __func__)); + + fsdev->nd_cleaning = 1; + + error = nandfs_cleaner_body(fsdev, &checked_seg); + + DPRINTF(CLEAN, ("%s: run finished error %d\n", __func__, + error)); + } + + DPRINTF(CLEAN, ("%s: exiting\n", __func__)); + kthread_exit(); +} + +static int +nandfs_cleaner_clean_segments(struct nandfs_device *nffsdev, + struct nandfs_vinfo *vinfo, uint32_t nvinfo, + struct nandfs_period *pd, uint32_t npd, + struct nandfs_bdesc *bdesc, uint32_t nbdesc, + uint64_t *segments, uint32_t nsegs) +{ + struct nandfs_node *gc; + struct buf *bp; + uint32_t i; + int error = 0; + + gc = nffsdev->nd_gc_node; + + DPRINTF(CLEAN, ("%s: enter\n", __func__)); + + VOP_LOCK(NTOV(gc), LK_EXCLUSIVE); + for (i = 0; i < nvinfo; i++) { + if (!vinfo[i].nvi_alive) + continue; + DPRINTF(CLEAN, ("%s: read vblknr:%#jx blk:%#jx\n", + __func__, (uintmax_t)vinfo[i].nvi_vblocknr, + (uintmax_t)vinfo[i].nvi_blocknr)); + error = nandfs_bread(nffsdev->nd_gc_node, vinfo[i].nvi_blocknr, + NULL, 0, &bp); + if (error) { + nandfs_error("%s:%d", __FILE__, __LINE__); + VOP_UNLOCK(NTOV(gc), 0); + goto out; + } + nandfs_vblk_set(bp, vinfo[i].nvi_vblocknr); + nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED); + nandfs_dirty_buf(bp, 1); + } + VOP_UNLOCK(NTOV(gc), 0); + + /* Delete checkpoints */ + for (i = 0; i < npd; i++) { + DPRINTF(CLEAN, ("delete checkpoint: %jx\n", + (uintmax_t)pd[i].p_start)); + error = nandfs_delete_cp(nffsdev->nd_cp_node, pd[i].p_start, + pd[i].p_end); + if (error) { + nandfs_error("%s:%d", __FILE__, __LINE__); + goto out; + } + } + + /* Update vblocks */ + for (i = 0; i < nvinfo; i++) { + if (vinfo[i].nvi_alive) + continue; + DPRINTF(CLEAN, ("freeing vblknr: %jx\n", vinfo[i].nvi_vblocknr)); + error = nandfs_vblock_free(nffsdev, vinfo[i].nvi_vblocknr); + if (error) { + nandfs_error("%s:%d", __FILE__, __LINE__); + goto out; + } + } + + error = nandfs_process_bdesc(nffsdev, bdesc, nbdesc); + if (error) { + nandfs_error("%s:%d", __FILE__, __LINE__); + goto out; + } + + /* Add segments to clean */ + if (nffsdev->nd_free_count) { + nffsdev->nd_free_base = realloc(nffsdev->nd_free_base, + (nffsdev->nd_free_count + nsegs) * sizeof(uint64_t), + M_NANDFSTEMP, M_WAITOK | M_ZERO); + memcpy(&nffsdev->nd_free_base[nffsdev->nd_free_count], segments, + nsegs * sizeof(uint64_t)); + nffsdev->nd_free_count += nsegs; + } else { + nffsdev->nd_free_base = malloc(nsegs * sizeof(uint64_t), + M_NANDFSTEMP, M_WAITOK|M_ZERO); + memcpy(nffsdev->nd_free_base, segments, + nsegs * sizeof(uint64_t)); + nffsdev->nd_free_count = nsegs; + } + +out: + + DPRINTF(CLEAN, ("%s: exit error %d\n", __func__, error)); + + return (error); +} + +static int +nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, + uint64_t nmembs) +{ + struct nandfs_node *dat_node; + struct buf *bp; + uint64_t i; + int error; + + dat_node = nffsdev->nd_dat_node; + + VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE); + + for (i = 0; i < nmembs; i++) { + if (!bd[i].bd_alive) + continue; + DPRINTF(CLEAN, ("%s: idx %jx offset %jx\n", + __func__, i, bd[i].bd_offset)); + if (bd[i].bd_level) { + error = nandfs_bread_meta(dat_node, bd[i].bd_offset, + NULL, 0, &bp); + if (error) { + nandfs_error("%s: cannot read dat node " + "level:%d\n", __func__, bd[i].bd_level); + brelse(bp); + VOP_UNLOCK(NTOV(dat_node), 0); + return (error); + } + nandfs_dirty_buf_meta(bp, 1); + nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1); + } else { + error = nandfs_bread(dat_node, bd[i].bd_offset, NULL, + 0, &bp); + if (error) { + nandfs_error("%s: cannot read dat node\n", + __func__); + brelse(bp); + VOP_UNLOCK(NTOV(dat_node), 0); + return (error); + } + nandfs_dirty_buf(bp, 1); + } + DPRINTF(CLEAN, ("%s: bp: %p\n", __func__, bp)); + } + + VOP_UNLOCK(NTOV(dat_node), 0); + + return (0); +} diff --git a/sys/fs/nandfs/nandfs_cpfile.c b/sys/fs/nandfs/nandfs_cpfile.c new file mode 100644 index 0000000..8814fc0 --- /dev/null +++ b/sys/fs/nandfs/nandfs_cpfile.c @@ -0,0 +1,776 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include "nandfs_mount.h" +#include "nandfs.h" +#include "nandfs_subr.h" + + +static int +nandfs_checkpoint_size(struct nandfs_device *fsdev) +{ + + return (fsdev->nd_fsdata.f_checkpoint_size); +} + +static int +nandfs_checkpoint_blk_offset(struct nandfs_device *fsdev, uint64_t cn, + uint64_t *blk, uint64_t *offset) +{ + uint64_t off; + uint16_t cp_size, cp_per_blk; + + KASSERT((cn), ("checkpoing cannot be zero")); + + cp_size = fsdev->nd_fsdata.f_checkpoint_size; + cp_per_blk = fsdev->nd_blocksize / cp_size; + off = roundup(sizeof(struct nandfs_cpfile_header), cp_size) / cp_size; + off += (cn - 1); + + *blk = off / cp_per_blk; + *offset = (off % cp_per_blk) * cp_size; + + return (0); +} + +static int +nandfs_checkpoint_blk_remaining(struct nandfs_device *fsdev, uint64_t cn, + uint64_t blk, uint64_t offset) +{ + uint16_t cp_size, cp_remaining; + + cp_size = fsdev->nd_fsdata.f_checkpoint_size; + cp_remaining = (fsdev->nd_blocksize - offset) / cp_size; + + return (cp_remaining); +} + +int +nandfs_get_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node, + uint64_t cn) +{ + struct buf *bp; + uint64_t blk, offset; + int error; + + if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) { + return (-1); + } + + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (-1); + } + + error = nandfs_dirty_buf(bp, 0); + if (error) + return (-1); + + + nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset); + + if (blk != 0) { + if (blk < cp_node->nn_inode.i_blocks) + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + else + error = nandfs_bcreate(cp_node, blk, NOCRED, 0, &bp); + if (error) { + if (bp) + brelse(bp); + return (-1); + } + + nandfs_dirty_buf(bp, 1); + } + + DPRINTF(CPFILE, ("%s: cn:%#jx entry block:%#jx offset:%#jx\n", + __func__, (uintmax_t)cn, (uintmax_t)blk, (uintmax_t)offset)); + + return (0); +} + +int +nandfs_set_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node, + uint64_t cn, struct nandfs_inode *ifile_inode, uint64_t nblocks) +{ + struct nandfs_cpfile_header *cnh; + struct nandfs_checkpoint *cnp; + struct buf *bp; + uint64_t blk, offset; + int error; + + if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) { + nandfs_error("%s: trying to set invalid chekpoint %jx - %jx\n", + __func__, cn, fsdev->nd_last_cno); + return (-1); + } + + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return error; + } + + cnh = (struct nandfs_cpfile_header *) bp->b_data; + cnh->ch_ncheckpoints++; + + nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset); + + if(blk != 0) { + brelse(bp); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return error; + } + } + + cnp = (struct nandfs_checkpoint *)((uint8_t *)bp->b_data + offset); + cnp->cp_flags = 0; + cnp->cp_checkpoints_count = 1; + memset(&cnp->cp_snapshot_list, 0, sizeof(struct nandfs_snapshot_list)); + cnp->cp_cno = cn; + cnp->cp_create = fsdev->nd_ts.tv_sec; + cnp->cp_nblk_inc = nblocks; + cnp->cp_blocks_count = 0; + memcpy (&cnp->cp_ifile_inode, ifile_inode, sizeof(cnp->cp_ifile_inode)); + + DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx nblk:%#jx\n", + __func__, (uintmax_t)cn, (uintmax_t)cnp->cp_create, + (uintmax_t)nblocks)); + + brelse(bp); + return (0); +} + +static int +nandfs_cp_mounted(struct nandfs_device *nandfsdev, uint64_t cno) +{ + struct nandfsmount *nmp; + int mounted = 0; + + mtx_lock(&nandfsdev->nd_mutex); + /* No double-mounting of the same checkpoint */ + STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) { + if (nmp->nm_mount_args.cpno == cno) { + mounted = 1; + break; + } + } + mtx_unlock(&nandfsdev->nd_mutex); + + return (mounted); +} + +static int +nandfs_cp_set_snapshot(struct nandfs_node *cp_node, uint64_t cno) +{ + struct nandfs_device *fsdev; + struct nandfs_cpfile_header *cnh; + struct nandfs_checkpoint *cnp; + struct nandfs_snapshot_list *list; + struct buf *bp; + uint64_t blk, prev_blk, offset; + uint64_t curr, prev; + int error; + + fsdev = cp_node->nn_nandfsdev; + + /* Get snapshot data */ + nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) { + brelse(bp); + return (ENOENT); + } + if ((cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) { + brelse(bp); + return (EINVAL); + } + + brelse(bp); + /* Get list from header */ + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + cnh = (struct nandfs_cpfile_header *) bp->b_data; + list = &cnh->ch_snapshot_list; + prev = list->ssl_prev; + brelse(bp); + prev_blk = ~(0); + curr = 0; + while (prev > cno) { + curr = prev; + nandfs_checkpoint_blk_offset(fsdev, prev, &prev_blk, &offset); + error = nandfs_bread(cp_node, prev_blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + prev = list->ssl_prev; + brelse(bp); + } + + if (curr == 0) { + nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + cnh = (struct nandfs_cpfile_header *) bp->b_data; + list = &cnh->ch_snapshot_list; + } else { + nandfs_checkpoint_blk_offset(fsdev, curr, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + } + + list->ssl_prev = cno; + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + + + /* Update snapshot for cno */ + nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + list->ssl_prev = prev; + list->ssl_next = curr; + cnp->cp_flags |= NANDFS_CHECKPOINT_SNAPSHOT; + nandfs_dirty_buf(bp, 1); + + if (prev == 0) { + nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + cnh = (struct nandfs_cpfile_header *) bp->b_data; + list = &cnh->ch_snapshot_list; + } else { + /* Update snapshot list for prev */ + nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + } + list->ssl_next = cno; + nandfs_dirty_buf(bp, 1); + + /* Update header */ + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnh = (struct nandfs_cpfile_header *) bp->b_data; + cnh->ch_nsnapshots++; + nandfs_dirty_buf(bp, 1); + + return (0); +} + +static int +nandfs_cp_clr_snapshot(struct nandfs_node *cp_node, uint64_t cno) +{ + struct nandfs_device *fsdev; + struct nandfs_cpfile_header *cnh; + struct nandfs_checkpoint *cnp; + struct nandfs_snapshot_list *list; + struct buf *bp; + uint64_t blk, offset, snapshot_cnt; + uint64_t next, prev; + int error; + + fsdev = cp_node->nn_nandfsdev; + + /* Get snapshot data */ + nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) { + brelse(bp); + return (ENOENT); + } + if (!(cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) { + brelse(bp); + return (EINVAL); + } + + list = &cnp->cp_snapshot_list; + next = list->ssl_next; + prev = list->ssl_prev; + brelse(bp); + + /* Get previous snapshot */ + if (prev != 0) { + nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + } else { + nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + cnh = (struct nandfs_cpfile_header *) bp->b_data; + list = &cnh->ch_snapshot_list; + } + + list->ssl_next = next; + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + + /* Get next snapshot */ + if (next != 0) { + nandfs_checkpoint_blk_offset(fsdev, next, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + } else { + nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + cnh = (struct nandfs_cpfile_header *) bp->b_data; + list = &cnh->ch_snapshot_list; + } + list->ssl_prev = prev; + nandfs_dirty_buf(bp, 1); + + /* Update snapshot list for cno */ + nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); + error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + list = &cnp->cp_snapshot_list; + list->ssl_prev = 0; + list->ssl_next = 0; + cnp->cp_flags &= !NANDFS_CHECKPOINT_SNAPSHOT; + nandfs_dirty_buf(bp, 1); + + /* Update header */ + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnh = (struct nandfs_cpfile_header *) bp->b_data; + snapshot_cnt = cnh->ch_nsnapshots; + snapshot_cnt--; + cnh->ch_nsnapshots = snapshot_cnt; + nandfs_dirty_buf(bp, 1); + + return (0); +} + +int +nandfs_chng_cpmode(struct nandfs_node *node, struct nandfs_cpmode *ncpm) +{ + struct nandfs_device *fsdev; + uint64_t cno = ncpm->ncpm_cno; + int mode = ncpm->ncpm_mode; + int ret; + + fsdev = node->nn_nandfsdev; + VOP_LOCK(NTOV(node), LK_EXCLUSIVE); + switch (mode) { + case NANDFS_CHECKPOINT: + if (nandfs_cp_mounted(fsdev, cno)) { + ret = EBUSY; + } else + ret = nandfs_cp_clr_snapshot(node, cno); + break; + case NANDFS_SNAPSHOT: + ret = nandfs_cp_set_snapshot(node, cno); + break; + default: + ret = EINVAL; + break; + } + VOP_UNLOCK(NTOV(node), 0); + + return (ret); +} + +static void +nandfs_cpinfo_fill(struct nandfs_checkpoint *cnp, struct nandfs_cpinfo *nci) +{ + + nci->nci_flags = cnp->cp_flags; + nci->nci_pad = 0; + nci->nci_cno = cnp->cp_cno; + nci->nci_create = cnp->cp_create; + nci->nci_nblk_inc = cnp->cp_nblk_inc; + nci->nci_blocks_count = cnp->cp_blocks_count; + nci->nci_next = cnp->cp_snapshot_list.ssl_next; + DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx\n", + __func__, (uintmax_t)cnp->cp_cno, + (uintmax_t)cnp->cp_create)); +} + +static int +nandfs_get_cpinfo_cp(struct nandfs_node *node, uint64_t cno, + struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs) +{ + struct nandfs_device *fsdev; + struct buf *bp; + uint64_t blk, offset, last_cno, i; + uint16_t remaining; + int error; +#ifdef INVARIANTS + uint64_t testblk, testoffset; +#endif + + if (cno == 0) { + return (ENOENT); + } + + if (mnmembs < 1) { + return (EINVAL); + } + + fsdev = node->nn_nandfsdev; + last_cno = fsdev->nd_last_cno; + DPRINTF(CPFILE, ("%s: cno:%#jx mnmembs: %#jx last:%#jx\n", __func__, + (uintmax_t)cno, (uintmax_t)mnmembs, + (uintmax_t)fsdev->nd_last_cno)); + + /* + * do { + * get block + * read checkpoints until we hit last checkpoint, end of block or + * requested number + * } while (last read checkpoint <= last checkpoint on fs && + * read checkpoints < request number); + */ + *nmembs = i = 0; + do { + nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); + remaining = nandfs_checkpoint_blk_remaining(fsdev, cno, + blk, offset); + error = nandfs_bread(node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + while (cno <= last_cno && i < mnmembs && remaining) { +#ifdef INVARIANTS + nandfs_checkpoint_blk_offset(fsdev, cno, &testblk, + &testoffset); + KASSERT(testblk == blk, ("testblk != blk")); + KASSERT(testoffset == offset, ("testoffset != offset")); +#endif + DPRINTF(CPFILE, ("%s: cno %#jx\n", __func__, + (uintmax_t)cno)); + + nandfs_cpinfo_fill((struct nandfs_checkpoint *) + (bp->b_data + offset), nci); + offset += nandfs_checkpoint_size(fsdev); + i++; + nci++; + cno++; + (*nmembs)++; + remaining--; + } + brelse(bp); + } while (cno <= last_cno && i < mnmembs); + + return (0); +} + +static int +nandfs_get_cpinfo_sp(struct nandfs_node *node, uint64_t cno, + struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs) +{ + struct nandfs_checkpoint *cnp; + struct nandfs_cpfile_header *cnh; + struct nandfs_device *fsdev; + struct buf *bp = NULL; + uint64_t curr = 0; + uint64_t blk, offset, curr_cno; + uint32_t flag; + int i, error; + + if (cno == 0 || cno == ~(0)) + return (ENOENT); + + fsdev = node->nn_nandfsdev; + curr_cno = cno; + + if (nmembs) + *nmembs = 0; + if (curr_cno == 1) { + /* Get list from header */ + error = nandfs_bread(node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + cnh = (struct nandfs_cpfile_header *) bp->b_data; + curr_cno = cnh->ch_snapshot_list.ssl_next; + brelse(bp); + bp = NULL; + + /* No snapshots */ + if (curr_cno == 0) + return (0); + } + + for (i = 0; i < mnmembs; i++, nci++) { + nandfs_checkpoint_blk_offset(fsdev, curr_cno, &blk, &offset); + if (i == 0 || curr != blk) { + if (bp) + brelse(bp); + error = nandfs_bread(node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (ENOENT); + } + curr = blk; + } + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + flag = cnp->cp_flags; + if (!(flag & NANDFS_CHECKPOINT_SNAPSHOT) || + (flag & NANDFS_CHECKPOINT_INVALID)) + break; + + nci->nci_flags = flag; + nci->nci_pad = 0; + nci->nci_cno = cnp->cp_cno; + nci->nci_create = cnp->cp_create; + nci->nci_nblk_inc = cnp->cp_nblk_inc; + nci->nci_blocks_count = cnp->cp_blocks_count; + nci->nci_next = cnp->cp_snapshot_list.ssl_next; + if (nmembs) + (*nmembs)++; + + curr_cno = nci->nci_next; + if (!curr_cno) + break; + } + + brelse(bp); + + return (0); +} + +int +nandfs_get_cpinfo(struct nandfs_node *node, uint64_t cno, uint16_t flags, + struct nandfs_cpinfo *nci, uint32_t nmembs, uint32_t *nnmembs) +{ + int error; + + VOP_LOCK(NTOV(node), LK_EXCLUSIVE); + switch (flags) { + case NANDFS_CHECKPOINT: + error = nandfs_get_cpinfo_cp(node, cno, nci, nmembs, nnmembs); + break; + case NANDFS_SNAPSHOT: + error = nandfs_get_cpinfo_sp(node, cno, nci, nmembs, nnmembs); + break; + default: + error = EINVAL; + break; + } + VOP_UNLOCK(NTOV(node), 0); + + return (error); +} + +int +nandfs_get_cpinfo_ioctl(struct nandfs_node *node, struct nandfs_argv *nargv) +{ + struct nandfs_cpinfo *nci; + uint64_t cno = nargv->nv_index; + void *buf = (void *)((uintptr_t)nargv->nv_base); + uint16_t flags = nargv->nv_flags; + uint32_t nmembs = 0; + int error; + + if (nargv->nv_nmembs > NANDFS_CPINFO_MAX) + return (EINVAL); + + nci = malloc(sizeof(struct nandfs_cpinfo) * nargv->nv_nmembs, + M_NANDFSTEMP, M_WAITOK | M_ZERO); + + error = nandfs_get_cpinfo(node, cno, flags, nci, nargv->nv_nmembs, &nmembs); + + if (error == 0) { + nargv->nv_nmembs = nmembs; + error = copyout(nci, buf, + sizeof(struct nandfs_cpinfo) * nmembs); + } + + free(nci, M_NANDFSTEMP); + return (error); +} + +int +nandfs_delete_cp(struct nandfs_node *node, uint64_t start, uint64_t end) +{ + struct nandfs_checkpoint *cnp; + struct nandfs_device *fsdev; + struct buf *bp; + uint64_t cno = start, blk, offset; + int error; + + DPRINTF(CPFILE, ("%s: delete cno %jx-%jx\n", __func__, start, end)); + VOP_LOCK(NTOV(node), LK_EXCLUSIVE); + fsdev = node->nn_nandfsdev; + for (cno = start; cno <= end; cno++) { + if (!cno) + continue; + + nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); + error = nandfs_bread(node, blk, NOCRED, 0, &bp); + if (error) { + VOP_UNLOCK(NTOV(node), 0); + brelse(bp); + return (error); + } + + cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); + if (cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) { + brelse(bp); + VOP_UNLOCK(NTOV(node), 0); + return (0); + } + + cnp->cp_flags |= NANDFS_CHECKPOINT_INVALID; + + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + } + VOP_UNLOCK(NTOV(node), 0); + + return (0); +} + +int +nandfs_make_snap(struct nandfs_device *fsdev, uint64_t *cno) +{ + struct nandfs_cpmode cpm; + int error; + + *cno = cpm.ncpm_cno = fsdev->nd_last_cno; + cpm.ncpm_mode = NANDFS_SNAPSHOT; + error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm); + return (error); +} + +int +nandfs_delete_snap(struct nandfs_device *fsdev, uint64_t cno) +{ + struct nandfs_cpmode cpm; + int error; + + cpm.ncpm_cno = cno; + cpm.ncpm_mode = NANDFS_CHECKPOINT; + error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm); + return (error); +} + +int nandfs_get_cpstat(struct nandfs_node *cp_node, struct nandfs_cpstat *ncp) +{ + struct nandfs_device *fsdev; + struct nandfs_cpfile_header *cnh; + struct buf *bp; + int error; + + VOP_LOCK(NTOV(cp_node), LK_EXCLUSIVE); + fsdev = cp_node->nn_nandfsdev; + + /* Get header */ + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + return (error); + } + cnh = (struct nandfs_cpfile_header *) bp->b_data; + ncp->ncp_cno = fsdev->nd_last_cno; + ncp->ncp_ncps = cnh->ch_ncheckpoints; + ncp->ncp_nss = cnh->ch_nsnapshots; + DPRINTF(CPFILE, ("%s: cno:%#jx ncps:%#jx nss:%#jx\n", + __func__, ncp->ncp_cno, ncp->ncp_ncps, ncp->ncp_nss)); + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + + return (0); +} diff --git a/sys/fs/nandfs/nandfs_dat.c b/sys/fs/nandfs/nandfs_dat.c new file mode 100644 index 0000000..799113d --- /dev/null +++ b/sys/fs/nandfs/nandfs_dat.c @@ -0,0 +1,344 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +int +nandfs_vblock_alloc(struct nandfs_device *nandfsdev, nandfs_daddr_t *vblock) +{ + struct nandfs_node *dat; + struct nandfs_mdt *mdt; + struct nandfs_alloc_request req; + struct nandfs_dat_entry *dat_entry; + uint64_t start; + uint32_t entry; + int locked, error; + + dat = nandfsdev->nd_dat_node; + mdt = &nandfsdev->nd_dat_mdt; + start = nandfsdev->nd_last_cno + 1; + + locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); + if (!locked) + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + req.entrynum = 0; + + /* Alloc vblock number */ + error = nandfs_find_free_entry(mdt, dat, &req); + if (error) { + nandfs_error("%s: cannot find free vblk entry\n", + __func__); + if (!locked) + VOP_UNLOCK(NTOV(dat), 0); + return (error); + } + + /* Read/create buffer */ + error = nandfs_get_entry_block(mdt, dat, &req, &entry, 1); + if (error) { + nandfs_error("%s: cannot get free vblk entry\n", + __func__); + nandfs_abort_entry(&req); + if (!locked) + VOP_UNLOCK(NTOV(dat), 0); + return (error); + } + + /* Fill out vblock data */ + dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; + dat_entry[entry].de_start = start; + dat_entry[entry].de_end = UINTMAX_MAX; + dat_entry[entry].de_blocknr = 0; + + /* Commit allocation */ + error = nandfs_alloc_entry(mdt, &req); + if (error) { + nandfs_error("%s: cannot get free vblk entry\n", + __func__); + if (!locked) + VOP_UNLOCK(NTOV(dat), 0); + return (error); + } + + /* Return allocated vblock */ + *vblock = req.entrynum; + DPRINTF(DAT, ("%s: allocated vblock %#jx\n", + __func__, (uintmax_t)*vblock)); + + if (!locked) + VOP_UNLOCK(NTOV(dat), 0); + return (error); +} + +int +nandfs_vblock_assign(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock, + nandfs_lbn_t block) +{ + struct nandfs_node *dat; + struct nandfs_mdt *mdt; + struct nandfs_alloc_request req; + struct nandfs_dat_entry *dat_entry; + uint32_t entry; + int locked, error; + + dat = nandfsdev->nd_dat_node; + mdt = &nandfsdev->nd_dat_mdt; + + locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); + if (!locked) + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + req.entrynum = vblock; + + error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0); + if (!error) { + dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; + dat_entry[entry].de_blocknr = block; + + DPRINTF(DAT, ("%s: assing vblock %jx->%jx\n", + __func__, (uintmax_t)vblock, (uintmax_t)block)); + + /* + * It is mostly called from syncer() so + * we want to force making buf dirty + */ + error = nandfs_dirty_buf(req.bp_entry, 1); + } + + if (!locked) + VOP_UNLOCK(NTOV(dat), 0); + + return (error); +} + +int +nandfs_vblock_end(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock) +{ + struct nandfs_node *dat; + struct nandfs_mdt *mdt; + struct nandfs_alloc_request req; + struct nandfs_dat_entry *dat_entry; + uint64_t end; + uint32_t entry; + int locked, error; + + dat = nandfsdev->nd_dat_node; + mdt = &nandfsdev->nd_dat_mdt; + end = nandfsdev->nd_last_cno; + + locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); + if (!locked) + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + req.entrynum = vblock; + + error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0); + if (!error) { + dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; + dat_entry[entry].de_end = end; + DPRINTF(DAT, ("%s: end vblock %#jx at checkpoint %#jx\n", + __func__, (uintmax_t)vblock, (uintmax_t)end)); + + /* + * It is mostly called from syncer() so + * we want to force making buf dirty + */ + error = nandfs_dirty_buf(req.bp_entry, 1); + } + + if (!locked) + VOP_UNLOCK(NTOV(dat), 0); + + return (error); +} + +int +nandfs_vblock_free(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock) +{ + struct nandfs_node *dat; + struct nandfs_mdt *mdt; + struct nandfs_alloc_request req; + int error; + + dat = nandfsdev->nd_dat_node; + mdt = &nandfsdev->nd_dat_mdt; + + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + req.entrynum = vblock; + + error = nandfs_find_entry(mdt, dat, &req); + if (!error) { + DPRINTF(DAT, ("%s: vblk %#jx\n", __func__, (uintmax_t)vblock)); + nandfs_free_entry(mdt, &req); + } + + VOP_UNLOCK(NTOV(dat), 0); + return (error); +} + +int +nandfs_get_dat_vinfo_ioctl(struct nandfs_device *nandfsdev, struct nandfs_argv *nargv) +{ + struct nandfs_vinfo *vinfo; + size_t size; + int error; + + if (nargv->nv_nmembs > NANDFS_VINFO_MAX) + return (EINVAL); + + size = sizeof(struct nandfs_vinfo) * nargv->nv_nmembs; + vinfo = malloc(size, M_NANDFSTEMP, M_WAITOK|M_ZERO); + + error = copyin((void *)(uintptr_t)nargv->nv_base, vinfo, size); + if (error) { + free(vinfo, M_NANDFSTEMP); + return (error); + } + + error = nandfs_get_dat_vinfo(nandfsdev, vinfo, nargv->nv_nmembs); + if (error == 0) + error = copyout(vinfo, (void *)(uintptr_t)nargv->nv_base, size); + free(vinfo, M_NANDFSTEMP); + return (error); +} + +int +nandfs_get_dat_vinfo(struct nandfs_device *nandfsdev, struct nandfs_vinfo *vinfo, + uint32_t nmembs) +{ + struct nandfs_node *dat; + struct nandfs_mdt *mdt; + struct nandfs_alloc_request req; + struct nandfs_dat_entry *dat_entry; + uint32_t i, idx; + int error = 0; + + dat = nandfsdev->nd_dat_node; + mdt = &nandfsdev->nd_dat_mdt; + + DPRINTF(DAT, ("%s: nmembs %#x\n", __func__, nmembs)); + + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + + for (i = 0; i < nmembs; i++) { + req.entrynum = vinfo[i].nvi_vblocknr; + + error = nandfs_get_entry_block(mdt, dat,&req, &idx, 0); + if (error) + break; + + dat_entry = ((struct nandfs_dat_entry *) req.bp_entry->b_data); + vinfo[i].nvi_start = dat_entry[idx].de_start; + vinfo[i].nvi_end = dat_entry[idx].de_end; + vinfo[i].nvi_blocknr = dat_entry[idx].de_blocknr; + + DPRINTF(DAT, ("%s: vinfo: %jx[%jx-%jx]->%jx\n", + __func__, vinfo[i].nvi_vblocknr, vinfo[i].nvi_start, + vinfo[i].nvi_end, vinfo[i].nvi_blocknr)); + + brelse(req.bp_entry); + } + + VOP_UNLOCK(NTOV(dat), 0); + return (error); +} + +int +nandfs_get_dat_bdescs_ioctl(struct nandfs_device *nffsdev, + struct nandfs_argv *nargv) +{ + struct nandfs_bdesc *bd; + size_t size; + int error; + + size = nargv->nv_nmembs * sizeof(struct nandfs_bdesc); + bd = malloc(size, M_NANDFSTEMP, M_WAITOK); + error = copyin((void *)(uintptr_t)nargv->nv_base, bd, size); + if (error) { + free(bd, M_NANDFSTEMP); + return (error); + } + + error = nandfs_get_dat_bdescs(nffsdev, bd, nargv->nv_nmembs); + + if (error == 0) + error = copyout(bd, (void *)(uintptr_t)nargv->nv_base, size); + + free(bd, M_NANDFSTEMP); + return (error); +} + +int +nandfs_get_dat_bdescs(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, + uint32_t nmembs) +{ + struct nandfs_node *dat_node; + uint64_t map; + uint32_t i; + int error = 0; + + dat_node = nffsdev->nd_dat_node; + + VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE); + + for (i = 0; i < nmembs; i++) { + DPRINTF(CLEAN, + ("%s: bd ino:%#jx oblk:%#jx blocknr:%#jx off:%#jx\n", + __func__, (uintmax_t)bd[i].bd_ino, + (uintmax_t)bd[i].bd_oblocknr, (uintmax_t)bd[i].bd_blocknr, + (uintmax_t)bd[i].bd_offset)); + + error = nandfs_bmap_lookup(dat_node, bd[i].bd_offset, &map); + if (error) + break; + bd[i].bd_blocknr = map; + } + + VOP_UNLOCK(NTOV(dat_node), 0); + return (error); +} diff --git a/sys/fs/nandfs/nandfs_dir.c b/sys/fs/nandfs/nandfs_dir.c new file mode 100644 index 0000000..e279510 --- /dev/null +++ b/sys/fs/nandfs/nandfs_dir.c @@ -0,0 +1,314 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/kernel.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/bio.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/signalvar.h> +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/lockf.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include "nandfs_mount.h" +#include "nandfs.h" +#include "nandfs_subr.h" + +int +nandfs_add_dirent(struct vnode *dvp, uint64_t ino, char *nameptr, long namelen, + uint8_t type) +{ + struct nandfs_node *dir_node = VTON(dvp); + struct nandfs_dir_entry *dirent, *pdirent; + uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize; + uint64_t filesize = dir_node->nn_inode.i_size; + uint64_t inode_blks = dir_node->nn_inode.i_blocks; + uint32_t off, rest; + uint8_t *pos; + struct buf *bp; + int error; + + pdirent = NULL; + bp = NULL; + if (inode_blks) { + error = nandfs_bread(dir_node, inode_blks - 1, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + pos = bp->b_data; + off = 0; + while (off < blocksize) { + pdirent = (struct nandfs_dir_entry *) (pos + off); + if (!pdirent->rec_len) { + pdirent = NULL; + break; + } + off += pdirent->rec_len; + } + + if (pdirent) + rest = pdirent->rec_len - + NANDFS_DIR_REC_LEN(pdirent->name_len); + else + rest = blocksize; + + if (rest < NANDFS_DIR_REC_LEN(namelen)) { + /* Do not update pdirent as new block is created */ + pdirent = NULL; + brelse(bp); + /* Set to NULL to create new */ + bp = NULL; + filesize += rest; + } + } + + /* If no bp found create new */ + if (!bp) { + error = nandfs_bcreate(dir_node, inode_blks, NOCRED, 0, &bp); + if (error) + return (error); + off = 0; + pos = bp->b_data; + } + + /* Modify pdirent if exists */ + if (pdirent) { + DPRINTF(LOOKUP, ("modify pdirent %p\n", pdirent)); + /* modify last de */ + off -= pdirent->rec_len; + pdirent->rec_len = + NANDFS_DIR_REC_LEN(pdirent->name_len); + off += pdirent->rec_len; + } + + /* Create new dirent */ + dirent = (struct nandfs_dir_entry *) (pos + off); + dirent->rec_len = blocksize - off; + dirent->inode = ino; + dirent->name_len = namelen; + memset(dirent->name, 0, NANDFS_DIR_NAME_LEN(namelen)); + memcpy(dirent->name, nameptr, namelen); + dirent->file_type = type; + + filesize += NANDFS_DIR_REC_LEN(dirent->name_len); + + DPRINTF(LOOKUP, ("create dir_entry '%.*s' at %p with size %x " + "new filesize: %jx\n", + (int)namelen, dirent->name, dirent, dirent->rec_len, + (uintmax_t)filesize)); + + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + + dir_node->nn_inode.i_size = filesize; + dir_node->nn_flags |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(dvp, filesize); + + return (0); +} + +int +nandfs_remove_dirent(struct vnode *dvp, struct nandfs_node *node, + struct componentname *cnp) +{ + struct nandfs_node *dir_node; + struct nandfs_dir_entry *dirent, *pdirent; + struct buf *bp; + uint64_t filesize, blocknr, ino, offset; + uint32_t blocksize, limit, off; + uint16_t newsize; + uint8_t *pos; + int error, found; + + dir_node = VTON(dvp); + filesize = dir_node->nn_inode.i_size; + if (!filesize) + return (0); + + if (node) { + offset = node->nn_diroff; + ino = node->nn_ino; + } else { + offset = dir_node->nn_diroff; + ino = NANDFS_WHT_INO; + } + + dirent = pdirent = NULL; + blocksize = dir_node->nn_nandfsdev->nd_blocksize; + blocknr = offset / blocksize; + + DPRINTF(LOOKUP, ("rm direntry dvp %p node %p ino %#jx at off %#jx\n", + dvp, node, (uintmax_t)ino, (uintmax_t)offset)); + + error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + pos = bp->b_data; + off = 0; + found = 0; + limit = offset % blocksize; + pdirent = (struct nandfs_dir_entry *) bp->b_data; + while (off <= limit) { + dirent = (struct nandfs_dir_entry *) (pos + off); + + if ((off == limit) && + (dirent->inode == ino)) { + found = 1; + break; + } + if (dirent->inode != 0) + pdirent = dirent; + off += dirent->rec_len; + } + + if (!found) { + nandfs_error("cannot find entry to remove"); + brelse(bp); + return (error); + } + DPRINTF(LOOKUP, + ("rm dirent ino %#jx at %#x with size %#x\n", + (uintmax_t)dirent->inode, off, dirent->rec_len)); + + newsize = (uintptr_t)dirent - (uintptr_t)pdirent; + newsize += dirent->rec_len; + pdirent->rec_len = newsize; + dirent->inode = 0; + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + + dir_node->nn_flags |= IN_CHANGE | IN_UPDATE; + /* If last one modify filesize */ + if ((offset + NANDFS_DIR_REC_LEN(dirent->name_len)) == filesize) { + filesize = blocknr * blocksize + + ((uintptr_t)pdirent - (uintptr_t)pos) + + NANDFS_DIR_REC_LEN(pdirent->name_len); + dir_node->nn_inode.i_size = filesize; + } + + return (0); +} + +int +nandfs_update_parent_dir(struct vnode *dvp, uint64_t newparent) +{ + struct nandfs_dir_entry *dirent; + struct nandfs_node *dir_node; + struct buf *bp; + int error; + + dir_node = VTON(dvp); + error = nandfs_bread(dir_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + dirent = (struct nandfs_dir_entry *)bp->b_data; + dirent->inode = newparent; + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + + return (0); +} + +int +nandfs_update_dirent(struct vnode *dvp, struct nandfs_node *fnode, + struct nandfs_node *tnode) +{ + struct nandfs_node *dir_node; + struct nandfs_dir_entry *dirent; + struct buf *bp; + uint64_t file_size, blocknr; + uint32_t blocksize, off; + uint8_t *pos; + int error; + + dir_node = VTON(dvp); + file_size = dir_node->nn_inode.i_size; + if (!file_size) + return (0); + + DPRINTF(LOOKUP, + ("chg direntry dvp %p ino %#jx to in %#jx at off %#jx\n", + dvp, (uintmax_t)tnode->nn_ino, (uintmax_t)fnode->nn_ino, + (uintmax_t)tnode->nn_diroff)); + + blocksize = dir_node->nn_nandfsdev->nd_blocksize; + blocknr = tnode->nn_diroff / blocksize; + off = tnode->nn_diroff % blocksize; + error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + pos = bp->b_data; + dirent = (struct nandfs_dir_entry *) (pos + off); + KASSERT((dirent->inode == tnode->nn_ino), + ("direntry mismatch")); + + dirent->inode = fnode->nn_ino; + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + + return (0); +} + +int +nandfs_init_dir(struct vnode *dvp, uint64_t ino, uint64_t parent_ino) +{ + + if (nandfs_add_dirent(dvp, parent_ino, "..", 2, DT_DIR) || + nandfs_add_dirent(dvp, ino, ".", 1, DT_DIR)) { + nandfs_error("%s: cannot initialize dir ino:%jd(pino:%jd)\n", + __func__, ino, parent_ino); + return (-1); + } + return (0); +} diff --git a/sys/fs/nandfs/nandfs_fs.h b/sys/fs/nandfs/nandfs_fs.h new file mode 100644 index 0000000..b72be40 --- /dev/null +++ b/sys/fs/nandfs/nandfs_fs.h @@ -0,0 +1,565 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Original definitions written by Koji Sato <koji@osrg.net> + * and Ryusuke Konishi <ryusuke@osrg.net> + * From: NetBSD: nandfs_fs.h,v 1.1 2009/07/18 16:31:42 reinoud + * + * $FreeBSD$ + */ + +#ifndef _NANDFS_FS_H +#define _NANDFS_FS_H + +#include <sys/uuid.h> + +#define MNINDIR(fsdev) ((fsdev)->nd_blocksize / sizeof(nandfs_daddr_t)) + +/* + * Inode structure. There are a few dedicated inode numbers that are + * defined here first. + */ +#define NANDFS_WHT_INO 1 /* Whiteout ino */ +#define NANDFS_ROOT_INO 2 /* Root file inode */ +#define NANDFS_DAT_INO 3 /* DAT file */ +#define NANDFS_CPFILE_INO 4 /* checkpoint file */ +#define NANDFS_SUFILE_INO 5 /* segment usage file */ +#define NANDFS_IFILE_INO 6 /* ifile */ +#define NANDFS_GC_INO 7 /* Cleanerd node */ +#define NANDFS_ATIME_INO 8 /* Atime file (reserved) */ +#define NANDFS_XATTR_INO 9 /* Xattribute file (reserved) */ +#define NANDFS_SKETCH_INO 10 /* Sketch file (obsolete) */ +#define NANDFS_USER_INO 11 /* First user's file inode number */ + +#define NANDFS_SYS_NODE(ino) \ + (((ino) >= NANDFS_DAT_INO) && ((ino) <= NANDFS_GC_INO)) + +#define NDADDR 12 /* Direct addresses in inode. */ +#define NIADDR 3 /* Indirect addresses in inode. */ + +typedef int64_t nandfs_daddr_t; +typedef int64_t nandfs_lbn_t; + +struct nandfs_inode { + uint64_t i_blocks; /* 0: size in device blocks */ + uint64_t i_size; /* 8: size in bytes */ + uint64_t i_ctime; /* 16: creation time in seconds */ + uint64_t i_mtime; /* 24: modification time in seconds part*/ + uint32_t i_ctime_nsec; /* 32: creation time nanoseconds part */ + uint32_t i_mtime_nsec; /* 36: modification time in nanoseconds */ + uint32_t i_uid; /* 40: user id */ + uint32_t i_gid; /* 44: group id */ + uint16_t i_mode; /* 48: file mode */ + uint16_t i_links_count; /* 50: number of references to the inode*/ + uint32_t i_flags; /* 52: NANDFS_*_FL flags */ + nandfs_daddr_t i_special; /* 56: special */ + nandfs_daddr_t i_db[NDADDR]; /* 64: Direct disk blocks. */ + nandfs_daddr_t i_ib[NIADDR]; /* 160: Indirect disk blocks. */ + uint64_t i_xattr; /* 184: reserved for extended attributes*/ + uint32_t i_generation; /* 192: file generation for NFS */ + uint32_t i_pad[15]; /* 196: make it 64 bits aligned */ +}; + +#ifdef _KERNEL +CTASSERT(sizeof(struct nandfs_inode) == 256); +#endif + +/* + * Each checkpoint/snapshot has a super root. + * + * The super root holds the inodes of the three system files: `dat', `cp' and + * 'su' files. All other FS state is defined by those. + * + * It is CRC checksum'ed and time stamped. + */ + +struct nandfs_super_root { + uint32_t sr_sum; /* check-sum */ + uint16_t sr_bytes; /* byte count of this structure */ + uint16_t sr_flags; /* reserved for flags */ + uint64_t sr_nongc_ctime; /* timestamp, not for cleaner(?) */ + struct nandfs_inode sr_dat; /* DAT, virt->phys translation inode */ + struct nandfs_inode sr_cpfile; /* CP, checkpoints inode */ + struct nandfs_inode sr_sufile; /* SU, segment usage inode */ +}; + +#define NANDFS_SR_MDT_OFFSET(inode_size, i) \ + ((uint32_t)&((struct nandfs_super_root *)0)->sr_dat + \ + (inode_size) * (i)) + +#define NANDFS_SR_DAT_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 0) +#define NANDFS_SR_CPFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 1) +#define NANDFS_SR_SUFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 2) +#define NANDFS_SR_BYTES (sizeof(struct nandfs_super_root)) + +/* + * The superblock describes the basic structure and mount history. It also + * records some sizes of structures found on the disc for sanity checks. + * + * The superblock is stored at two places: NANDFS_SB_OFFSET_BYTES and + * NANDFS_SB2_OFFSET_BYTES. + */ + +/* File system states stored on media in superblock's sbp->s_state */ +#define NANDFS_VALID_FS 0x0001 /* cleanly unmounted and all is ok */ +#define NANDFS_ERROR_FS 0x0002 /* there were errors detected, fsck */ +#define NANDFS_RESIZE_FS 0x0004 /* resize required, XXX unknown flag*/ +#define NANDFS_MOUNT_STATE_BITS "\20\1VALID_FS\2ERROR_FS\3RESIZE_FS" + +/* + * Brief description of control structures: + * + * NANDFS_NFSAREAS first blocks contain fsdata and some amount of super blocks. + * Simple round-robin policy is used in order to choose which block will + * contain new super block. + * + * Simple case with 2 blocks: + * 1: fsdata sblock1 [sblock3 [sblock5 ..]] + * 2: fsdata sblock2 [sblock4 [sblock6 ..]] + */ +struct nandfs_fsdata { + uint16_t f_magic; + uint16_t f_bytes; + + uint32_t f_sum; /* checksum of fsdata */ + uint32_t f_rev_level; /* major disk format revision */ + + uint64_t f_ctime; /* creation time (execution time + of newfs) */ + /* Block size represented as: blocksize = 1 << (f_log_block_size + 10) */ + uint32_t f_log_block_size; + + uint16_t f_inode_size; /* size of an inode */ + uint16_t f_dat_entry_size; /* size of a dat entry */ + uint16_t f_checkpoint_size; /* size of a checkpoint */ + uint16_t f_segment_usage_size; /* size of a segment usage */ + + uint16_t f_sbbytes; /* byte count of CRC calculation + for super blocks. s_reserved + is excluded! */ + + uint16_t f_errors; /* behaviour on detecting errors */ + + uint32_t f_erasesize; + uint64_t f_nsegments; /* number of segm. in filesystem */ + nandfs_daddr_t f_first_data_block; /* 1st seg disk block number */ + uint32_t f_blocks_per_segment; /* number of blocks per segment */ + uint32_t f_r_segments_percentage; /* reserved segments percentage */ + + struct uuid f_uuid; /* 128-bit uuid for volume */ + char f_volume_name[16]; /* volume name */ + uint32_t f_pad[104]; +} __packed; + +#ifdef _KERNEL +CTASSERT(sizeof(struct nandfs_fsdata) == 512); +#endif + +struct nandfs_super_block { + uint16_t s_magic; /* magic value for identification */ + + uint32_t s_sum; /* check sum of super block */ + + uint64_t s_last_cno; /* last checkpoint number */ + uint64_t s_last_pseg; /* addr part. segm. written last */ + uint64_t s_last_seq; /* seq.number of seg written last */ + uint64_t s_free_blocks_count; /* free blocks count */ + + uint64_t s_mtime; /* mount time */ + uint64_t s_wtime; /* write time */ + uint16_t s_state; /* file system state */ + + char s_last_mounted[64]; /* directory where last mounted */ + + uint32_t s_c_interval; /* commit interval of segment */ + uint32_t s_c_block_max; /* threshold of data amount for + the segment construction */ + uint32_t s_reserved[32]; /* padding to end of the block */ +} __packed; + +#ifdef _KERNEL +CTASSERT(sizeof(struct nandfs_super_block) == 256); +#endif + +#define NANDFS_FSDATA_MAGIC 0xf8da +#define NANDFS_SUPER_MAGIC 0x8008 + +#define NANDFS_NFSAREAS 4 +#define NANDFS_DATA_OFFSET_BYTES(esize) (NANDFS_NFSAREAS * (esize)) + +#define NANDFS_SBLOCK_OFFSET_BYTES (sizeof(struct nandfs_fsdata)) + +#define NANDFS_DEF_BLOCKSIZE 4096 +#define NANDFS_MIN_BLOCKSIZE 512 + +#define NANDFS_DEF_ERASESIZE (2 << 16) + +#define NANDFS_MIN_SEGSIZE NANDFS_DEF_ERASESIZE + +#define NANDFS_CURRENT_REV 9 /* current major revision */ + +#define NANDFS_FSDATA_CRC_BYTES offsetof(struct nandfs_fsdata, f_pad) +/* Bytes count of super_block for CRC-calculation */ +#define NANDFS_SB_BYTES offsetof(struct nandfs_super_block, s_reserved) + +/* Maximal count of links to a file */ +#define NANDFS_LINK_MAX 32000 + +/* + * Structure of a directory entry. + * + * Note that they can't span blocks; the rec_len fills out. + */ + +#define NANDFS_NAME_LEN 255 +struct nandfs_dir_entry { + uint64_t inode; /* inode number */ + uint16_t rec_len; /* directory entry length */ + uint8_t name_len; /* name length */ + uint8_t file_type; + char name[NANDFS_NAME_LEN]; /* file name */ + char pad; +}; + +/* + * NANDFS_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 8 + */ +#define NANDFS_DIR_PAD 8 +#define NANDFS_DIR_ROUND (NANDFS_DIR_PAD - 1) +#define NANDFS_DIR_NAME_OFFSET (offsetof(struct nandfs_dir_entry, name)) +#define NANDFS_DIR_REC_LEN(name_len) \ + (((name_len) + NANDFS_DIR_NAME_OFFSET + NANDFS_DIR_ROUND) \ + & ~NANDFS_DIR_ROUND) +#define NANDFS_DIR_NAME_LEN(name_len) \ + (NANDFS_DIR_REC_LEN(name_len) - NANDFS_DIR_NAME_OFFSET) + +/* + * NiLFS/NANDFS devides the disc into fixed length segments. Each segment is + * filled with one or more partial segments of variable lengths. + * + * Each partial segment has a segment summary header followed by updates of + * files and optionally a super root. + */ + +/* + * Virtual to physical block translation information. For data blocks it maps + * logical block number bi_blkoff to virtual block nr bi_vblocknr. For non + * datablocks it is the virtual block number assigned to an indirect block + * and has no bi_blkoff. The physical block number is the next + * available data block in the partial segment after all the binfo's. + */ +struct nandfs_binfo_v { + uint64_t bi_ino; /* file's inode */ + uint64_t bi_vblocknr; /* assigned virtual block number */ + uint64_t bi_blkoff; /* for file's logical block number */ +}; + +/* + * DAT allocation. For data blocks just the logical block number that maps on + * the next available data block in the partial segment after the binfo's. + */ +struct nandfs_binfo_dat { + uint64_t bi_ino; + uint64_t bi_blkoff; /* DAT file's logical block number */ + uint8_t bi_level; /* whether this is meta block */ + uint8_t bi_pad[7]; +}; + +#ifdef _KERNEL +CTASSERT(sizeof(struct nandfs_binfo_v) == sizeof(struct nandfs_binfo_dat)); +#endif + +/* Convenience union for both types of binfo's */ +union nandfs_binfo { + struct nandfs_binfo_v bi_v; + struct nandfs_binfo_dat bi_dat; +}; + +/* Indirect buffers path */ +struct nandfs_indir { + nandfs_daddr_t in_lbn; + int in_off; +}; + +/* The (partial) segment summary */ +struct nandfs_segment_summary { + uint32_t ss_datasum; /* CRC of complete data block */ + uint32_t ss_sumsum; /* CRC of segment summary only */ + uint32_t ss_magic; /* magic to identify segment summary */ + uint16_t ss_bytes; /* size of segment summary structure */ + uint16_t ss_flags; /* NANDFS_SS_* flags */ + uint64_t ss_seq; /* sequence number of this segm. sum */ + uint64_t ss_create; /* creation timestamp in seconds */ + uint64_t ss_next; /* blocknumber of next segment */ + uint32_t ss_nblocks; /* number of blocks used by summary */ + uint32_t ss_nbinfos; /* number of binfo structures */ + uint32_t ss_sumbytes; /* total size of segment summary */ + uint32_t ss_pad; + /* stream of binfo structures */ +}; + +#define NANDFS_SEGSUM_MAGIC 0x8e680011 /* segment summary magic number */ + +/* Segment summary flags */ +#define NANDFS_SS_LOGBGN 0x0001 /* begins a logical segment */ +#define NANDFS_SS_LOGEND 0x0002 /* ends a logical segment */ +#define NANDFS_SS_SR 0x0004 /* has super root */ +#define NANDFS_SS_SYNDT 0x0008 /* includes data only updates */ +#define NANDFS_SS_GC 0x0010 /* segment written for cleaner operation */ +#define NANDFS_SS_FLAG_BITS "\20\1LOGBGN\2LOGEND\3SR\4SYNDT\5GC" + +/* Segment summary constrains */ +#define NANDFS_SEG_MIN_BLOCKS 16 /* minimum number of blocks in a + full segment */ +#define NANDFS_PSEG_MIN_BLOCKS 2 /* minimum number of blocks in a + partial segment */ +#define NANDFS_MIN_NRSVSEGS 8 /* minimum number of reserved + segments */ + +/* + * Structure of DAT/inode file. + * + * A DAT file is devided into groups. The maximum number of groups is the + * number of block group descriptors that fit into one block; this descriptor + * only gives the number of free entries in the associated group. + * + * Each group has a block sized bitmap indicating if an entry is taken or + * empty. Each bit stands for a DAT entry. + * + * The inode file has exactly the same format only the entries are inode + * entries. + */ + +struct nandfs_block_group_desc { + uint32_t bg_nfrees; /* num. free entries in block group */ +}; + +/* DAT entry in a super root's DAT file */ +struct nandfs_dat_entry { + uint64_t de_blocknr; /* block number */ + uint64_t de_start; /* valid from checkpoint */ + uint64_t de_end; /* valid till checkpoint */ + uint64_t de_rsv; /* reserved for future use */ +}; + +/* + * Structure of CP file. + * + * A snapshot is just a checkpoint only it's protected against removal by the + * cleaner. The snapshots are kept on a double linked list of checkpoints. + */ +struct nandfs_snapshot_list { + uint64_t ssl_next; /* checkpoint nr. forward */ + uint64_t ssl_prev; /* checkpoint nr. back */ +}; + +/* Checkpoint entry structure */ +struct nandfs_checkpoint { + uint32_t cp_flags; /* NANDFS_CHECKPOINT_* flags */ + uint32_t cp_checkpoints_count; /* ZERO, not used anymore? */ + struct nandfs_snapshot_list cp_snapshot_list; /* list of snapshots */ + uint64_t cp_cno; /* checkpoint number */ + uint64_t cp_create; /* creation timestamp */ + uint64_t cp_nblk_inc; /* number of blocks incremented */ + uint64_t cp_blocks_count; /* reserved (might be deleted) */ + struct nandfs_inode cp_ifile_inode; /* inode file inode */ +}; + +/* Checkpoint flags */ +#define NANDFS_CHECKPOINT_SNAPSHOT 1 +#define NANDFS_CHECKPOINT_INVALID 2 +#define NANDFS_CHECKPOINT_SKETCH 4 +#define NANDFS_CHECKPOINT_MINOR 8 +#define NANDFS_CHECKPOINT_BITS "\20\1SNAPSHOT\2INVALID\3SKETCH\4MINOR" + +/* Header of the checkpoint file */ +struct nandfs_cpfile_header { + uint64_t ch_ncheckpoints; /* number of checkpoints */ + uint64_t ch_nsnapshots; /* number of snapshots */ + struct nandfs_snapshot_list ch_snapshot_list; /* snapshot list */ +}; + +#define NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET \ + ((sizeof(struct nandfs_cpfile_header) + \ + sizeof(struct nandfs_checkpoint) - 1) / \ + sizeof(struct nandfs_checkpoint)) + + +#define NANDFS_NOSEGMENT 0xffffffff + +/* + * Structure of SU file. + * + * The segment usage file sums up how each of the segments are used. They are + * indexed by their segment number. + */ + +/* Segment usage entry */ +struct nandfs_segment_usage { + uint64_t su_lastmod; /* last modified timestamp */ + uint32_t su_nblocks; /* number of blocks in segment */ + uint32_t su_flags; /* NANDFS_SEGMENT_USAGE_* flags */ +}; + +/* Segment usage flag */ +#define NANDFS_SEGMENT_USAGE_ACTIVE 1 +#define NANDFS_SEGMENT_USAGE_DIRTY 2 +#define NANDFS_SEGMENT_USAGE_ERROR 4 +#define NANDFS_SEGMENT_USAGE_GC 8 +#define NANDFS_SEGMENT_USAGE_BITS "\20\1ACTIVE\2DIRTY\3ERROR" + +/* Header of the segment usage file */ +struct nandfs_sufile_header { + uint64_t sh_ncleansegs; /* number of segments marked clean */ + uint64_t sh_ndirtysegs; /* number of segments marked dirty */ + uint64_t sh_last_alloc; /* last allocated segment number */ +}; + +#define NANDFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \ + ((sizeof(struct nandfs_sufile_header) + \ + sizeof(struct nandfs_segment_usage) - 1) / \ + sizeof(struct nandfs_segment_usage)) + +struct nandfs_seg_stat { + uint64_t nss_nsegs; + uint64_t nss_ncleansegs; + uint64_t nss_ndirtysegs; + uint64_t nss_ctime; + uint64_t nss_nongc_ctime; + uint64_t nss_prot_seq; +}; + +enum { + NANDFS_CHECKPOINT, + NANDFS_SNAPSHOT +}; + +#define NANDFS_CPINFO_MAX 512 + +struct nandfs_cpinfo { + uint32_t nci_flags; + uint32_t nci_pad; + uint64_t nci_cno; + uint64_t nci_create; + uint64_t nci_nblk_inc; + uint64_t nci_blocks_count; + uint64_t nci_next; +}; + +#define NANDFS_SEGMENTS_MAX 512 + +struct nandfs_suinfo { + uint64_t nsi_num; + uint64_t nsi_lastmod; + uint32_t nsi_blocks; + uint32_t nsi_flags; +}; + +#define NANDFS_VINFO_MAX 512 + +struct nandfs_vinfo { + uint64_t nvi_ino; + uint64_t nvi_vblocknr; + uint64_t nvi_start; + uint64_t nvi_end; + uint64_t nvi_blocknr; + int nvi_alive; +}; + +struct nandfs_cpmode { + uint64_t ncpm_cno; + uint32_t ncpm_mode; + uint32_t ncpm_pad; +}; + +struct nandfs_argv { + uint64_t nv_base; + uint32_t nv_nmembs; + uint16_t nv_size; + uint16_t nv_flags; + uint64_t nv_index; +}; + +struct nandfs_cpstat { + uint64_t ncp_cno; + uint64_t ncp_ncps; + uint64_t ncp_nss; +}; + +struct nandfs_period { + uint64_t p_start; + uint64_t p_end; +}; + +struct nandfs_vdesc { + uint64_t vd_ino; + uint64_t vd_cno; + uint64_t vd_vblocknr; + struct nandfs_period vd_period; + uint64_t vd_blocknr; + uint64_t vd_offset; + uint32_t vd_flags; + uint32_t vd_pad; +}; + +struct nandfs_bdesc { + uint64_t bd_ino; + uint64_t bd_oblocknr; + uint64_t bd_blocknr; + uint64_t bd_offset; + uint32_t bd_level; + uint32_t bd_alive; +}; + +#ifndef _KERNEL +#ifndef MNAMELEN +#define MNAMELEN 88 +#endif +#endif + +struct nandfs_fsinfo { + struct nandfs_fsdata fs_fsdata; + struct nandfs_super_block fs_super; + char fs_dev[MNAMELEN]; +}; + +#define NANDFS_MAX_MOUNTS 65535 + +#define NANDFS_IOCTL_GET_SUSTAT _IOR('N', 100, struct nandfs_seg_stat) +#define NANDFS_IOCTL_CHANGE_CPMODE _IOWR('N', 101, struct nandfs_cpmode) +#define NANDFS_IOCTL_GET_CPINFO _IOWR('N', 102, struct nandfs_argv) +#define NANDFS_IOCTL_DELETE_CP _IOWR('N', 103, uint64_t[2]) +#define NANDFS_IOCTL_GET_CPSTAT _IOR('N', 104, struct nandfs_cpstat) +#define NANDFS_IOCTL_GET_SUINFO _IOWR('N', 105, struct nandfs_argv) +#define NANDFS_IOCTL_GET_VINFO _IOWR('N', 106, struct nandfs_argv) +#define NANDFS_IOCTL_GET_BDESCS _IOWR('N', 107, struct nandfs_argv) +#define NANDFS_IOCTL_GET_FSINFO _IOR('N', 108, struct nandfs_fsinfo) +#define NANDFS_IOCTL_MAKE_SNAP _IOWR('N', 109, uint64_t) +#define NANDFS_IOCTL_DELETE_SNAP _IOWR('N', 110, uint64_t) +#define NANDFS_IOCTL_SYNC _IOWR('N', 111, uint64_t) + +#endif /* _NANDFS_FS_H */ diff --git a/sys/fs/nandfs/nandfs_ifile.c b/sys/fs/nandfs/nandfs_ifile.c new file mode 100644 index 0000000..7e4db87 --- /dev/null +++ b/sys/fs/nandfs/nandfs_ifile.c @@ -0,0 +1,213 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +int +nandfs_node_create(struct nandfsmount *nmp, struct nandfs_node **node, + uint16_t mode) +{ + struct nandfs_alloc_request req; + struct nandfs_device *nandfsdev; + struct nandfs_mdt *mdt; + struct nandfs_node *ifile; + struct nandfs_inode *inode; + struct vnode *vp; + uint32_t entry; + int error = 0; + + nandfsdev = nmp->nm_nandfsdev; + mdt = &nandfsdev->nd_ifile_mdt; + ifile = nmp->nm_ifile_node; + vp = NTOV(ifile); + + VOP_LOCK(vp, LK_EXCLUSIVE); + /* Allocate new inode in ifile */ + req.entrynum = nandfsdev->nd_last_ino + 1; + error = nandfs_find_free_entry(mdt, ifile, &req); + if (error) { + VOP_UNLOCK(vp, 0); + return (error); + } + + error = nandfs_get_entry_block(mdt, ifile, &req, &entry, 1); + if (error) { + VOP_UNLOCK(vp, 0); + return (error); + } + + /* Inode initialization */ + inode = ((struct nandfs_inode *) req.bp_entry->b_data) + entry; + nandfs_inode_init(inode, mode); + + error = nandfs_alloc_entry(mdt, &req); + if (error) { + VOP_UNLOCK(vp, 0); + return (error); + } + + VOP_UNLOCK(vp, 0); + + nandfsdev->nd_last_ino = req.entrynum; + error = nandfs_get_node(nmp, req.entrynum, node); + DPRINTF(IFILE, ("%s: node: %p ino: %#jx\n", + __func__, node, (uintmax_t)((*node)->nn_ino))); + + return (error); +} + +int +nandfs_node_destroy(struct nandfs_node *node) +{ + struct nandfs_alloc_request req; + struct nandfsmount *nmp; + struct nandfs_mdt *mdt; + struct nandfs_node *ifile; + struct vnode *vp; + int error = 0; + + nmp = node->nn_nmp; + req.entrynum = node->nn_ino; + mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; + ifile = nmp->nm_ifile_node; + vp = NTOV(ifile); + + DPRINTF(IFILE, ("%s: destroy node: %p ino: %#jx\n", + __func__, node, (uintmax_t)node->nn_ino)); + VOP_LOCK(vp, LK_EXCLUSIVE); + + error = nandfs_find_entry(mdt, ifile, &req); + if (error) { + nandfs_error("%s: finding entry error:%d node %p(%jx)", + __func__, error, node, node->nn_ino); + VOP_UNLOCK(vp, 0); + return (error); + } + + nandfs_inode_destroy(&node->nn_inode); + + error = nandfs_free_entry(mdt, &req); + if (error) { + nandfs_error("%s: freing entry error:%d node %p(%jx)", + __func__, error, node, node->nn_ino); + VOP_UNLOCK(vp, 0); + return (error); + } + + VOP_UNLOCK(vp, 0); + DPRINTF(IFILE, ("%s: freed node %p ino %#jx\n", + __func__, node, (uintmax_t)node->nn_ino)); + return (error); +} + +int +nandfs_node_update(struct nandfs_node *node) +{ + struct nandfs_alloc_request req; + struct nandfsmount *nmp; + struct nandfs_mdt *mdt; + struct nandfs_node *ifile; + struct nandfs_inode *inode; + uint32_t index; + int error = 0; + + nmp = node->nn_nmp; + ifile = nmp->nm_ifile_node; + ASSERT_VOP_LOCKED(NTOV(ifile), __func__); + + req.entrynum = node->nn_ino; + mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; + + DPRINTF(IFILE, ("%s: node:%p ino:%#jx\n", + __func__, &node->nn_inode, (uintmax_t)node->nn_ino)); + + error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0); + if (error) { + printf("nandfs_get_entry_block returned with ERROR=%d\n", + error); + return (error); + } + + inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index; + memcpy(inode, &node->nn_inode, sizeof(*inode)); + error = nandfs_dirty_buf(req.bp_entry, 0); + + return (error); +} + +int +nandfs_get_node_entry(struct nandfsmount *nmp, struct nandfs_inode **inode, + uint64_t ino, struct buf **bp) +{ + struct nandfs_alloc_request req; + struct nandfs_mdt *mdt; + struct nandfs_node *ifile; + struct vnode *vp; + uint32_t index; + int error = 0; + + req.entrynum = ino; + mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; + ifile = nmp->nm_ifile_node; + vp = NTOV(ifile); + + VOP_LOCK(vp, LK_EXCLUSIVE); + error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0); + if (error) { + VOP_UNLOCK(vp, 0); + return (error); + } + + *inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index; + *bp = req.bp_entry; + VOP_UNLOCK(vp, 0); + return (0); +} + diff --git a/sys/fs/nandfs/nandfs_mount.h b/sys/fs/nandfs/nandfs_mount.h new file mode 100644 index 0000000..f733e22 --- /dev/null +++ b/sys/fs/nandfs/nandfs_mount.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed for the + * NetBSD Project. See http://www.NetBSD.org/ for + * information about NetBSD. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_mount.h,v 1.1 2009/07/18 16:31:42 reinoud + * + * $FreeBSD$ + */ + +#ifndef _FS_NANDFS_NANDFS_MOUNT_H_ +#define _FS_NANDFS_NANDFS_MOUNT_H_ + +/* + * Arguments to mount NANDFS filingsystem. + */ + +struct nandfs_args { + char *fspec; /* mount specifier */ + int64_t cpno; /* checkpoint number */ +}; + +#endif /* !_FS_NANDFS_NANDFS_MOUNT_H_ */ + diff --git a/sys/fs/nandfs/nandfs_segment.c b/sys/fs/nandfs/nandfs_segment.c new file mode 100644 index 0000000..836bead --- /dev/null +++ b/sys/fs/nandfs/nandfs_segment.c @@ -0,0 +1,1329 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> +#include <sys/libkern.h> + +#include <ddb/ddb.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +static int +nandfs_new_segment(struct nandfs_device *fsdev) +{ + int error = 0; + uint64_t new; + + error = nandfs_alloc_segment(fsdev, &new); + if (!error) { + fsdev->nd_seg_num = fsdev->nd_next_seg_num; + fsdev->nd_next_seg_num = new; + } + DPRINTF(SYNC, ("%s: new segment %jx next %jx error %d\n", + __func__, (uintmax_t)fsdev->nd_seg_num, (uintmax_t)new, error)); + if (error) + nandfs_error("%s: cannot create segment error %d\n", + __func__, error); + + return (error); +} + +static int +create_segment(struct nandfs_seginfo *seginfo) +{ + struct nandfs_segment *seg; + struct nandfs_device *fsdev; + struct nandfs_segment *prev; + struct buf *bp; + uint64_t start_block, curr; + uint32_t blks_per_seg, nblocks; + int error; + + fsdev = seginfo->fsdev; + prev = seginfo->curseg; + blks_per_seg = fsdev->nd_fsdata.f_blocks_per_segment; + nblocks = fsdev->nd_last_segsum.ss_nblocks; + + if (!prev) { + vfs_timestamp(&fsdev->nd_ts); + /* Touch current segment */ + error = nandfs_touch_segment(fsdev, fsdev->nd_seg_num); + if (error) { + nandfs_error("%s: cannot preallocate segment %jx\n", + __func__, fsdev->nd_seg_num); + return (error); + } + error = nandfs_touch_segment(fsdev, 0); + if (error) { + nandfs_error("%s: cannot dirty block with segment 0\n", + __func__); + return (error); + } + start_block = fsdev->nd_last_pseg + (uint64_t)nblocks; + /* + * XXX Hack + */ + if (blks_per_seg - (start_block % blks_per_seg) - 1 == 0) + start_block++; + curr = nandfs_get_segnum_of_block(fsdev, start_block); + /* Allocate new segment if last one is full */ + if (fsdev->nd_seg_num != curr) { + error = nandfs_new_segment(fsdev); + if (error) { + nandfs_error("%s: cannot create new segment\n", + __func__); + return (error); + } + /* + * XXX Hack + */ + nandfs_get_segment_range(fsdev, fsdev->nd_seg_num, &start_block, NULL); + } + } else { + nandfs_get_segment_range(fsdev, fsdev->nd_next_seg_num, + &start_block, NULL); + + /* Touch current segment and allocate and touch new one */ + error = nandfs_new_segment(fsdev); + if (error) { + nandfs_error("%s: cannot create next segment\n", + __func__); + return (error); + } + + /* Reiterate in case new buf is dirty */ + seginfo->reiterate = 1; + } + + /* Allocate and initialize nandfs_segment structure */ + seg = malloc(sizeof(*seg), M_DEVBUF, M_WAITOK|M_ZERO); + TAILQ_INIT(&seg->segsum); + TAILQ_INIT(&seg->data); + seg->fsdev = fsdev; + seg->start_block = start_block; + seg->num_blocks = blks_per_seg - (start_block % blks_per_seg) - 1; + seg->seg_num = fsdev->nd_seg_num; + seg->seg_next = fsdev->nd_next_seg_num; + seg->segsum_blocks = 1; + seg->bytes_left = fsdev->nd_blocksize - + sizeof(struct nandfs_segment_summary); + seg->segsum_bytes = sizeof(struct nandfs_segment_summary); + + /* Allocate buffer for segment summary */ + bp = getblk(fsdev->nd_devvp, nandfs_block_to_dblock(fsdev, + seg->start_block), fsdev->nd_blocksize, 0, 0, 0); + bzero(bp->b_data, seginfo->fsdev->nd_blocksize); + bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; + bp->b_flags |= B_MANAGED; + + /* Add buffer to segment */ + TAILQ_INSERT_TAIL(&seg->segsum, bp, b_cluster.cluster_entry); + seg->current_off = bp->b_data + sizeof(struct nandfs_segment_summary); + + DPRINTF(SYNC, ("%s: seg %p : initial settings: start %#jx size :%#x\n", + __func__, seg, (uintmax_t)seg->start_block, seg->num_blocks)); + DPRINTF(SYNC, ("%s: seg->seg_num %#jx cno %#jx next %#jx\n", __func__, + (uintmax_t)seg->seg_num, (uintmax_t)(fsdev->nd_last_cno + 1), + (uintmax_t)seg->seg_next)); + + if (!prev) + LIST_INSERT_HEAD(&seginfo->seg_list, seg, seg_link); + else + LIST_INSERT_AFTER(prev, seg, seg_link); + + seginfo->curseg = seg; + + return (0); +} + +static int +delete_segment(struct nandfs_seginfo *seginfo) +{ + struct nandfs_segment *seg, *tseg; + struct buf *bp, *tbp; + + LIST_FOREACH_SAFE(seg, &seginfo->seg_list, seg_link, tseg) { + TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, + tbp) { + TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry); + bp->b_flags &= ~B_MANAGED; + brelse(bp); + }; + + LIST_REMOVE(seg, seg_link); + free(seg, M_DEVBUF); + } + + return (0); +} + +static int +create_seginfo(struct nandfs_device *fsdev, struct nandfs_seginfo **seginfo) +{ + struct nandfs_seginfo *info; + + info = malloc(sizeof(*info), M_DEVBUF, M_WAITOK); + + LIST_INIT(&info->seg_list); + info->fsdev = fsdev; + info->curseg = NULL; + info->blocks = 0; + *seginfo = info; + fsdev->nd_seginfo = info; + return (0); +} + +static int +delete_seginfo(struct nandfs_seginfo *seginfo) +{ + struct nandfs_device *nffsdev; + + nffsdev = seginfo->fsdev; + delete_segment(seginfo); + nffsdev->nd_seginfo = NULL; + free(seginfo, M_DEVBUF); + + return (0); +} + +static int +nandfs_create_superroot_block(struct nandfs_seginfo *seginfo, + struct buf **newbp) +{ + struct buf *bp; + int error; + + bp = nandfs_geteblk(seginfo->fsdev->nd_blocksize, GB_NOWAIT_BD); + + bzero(bp->b_data, seginfo->fsdev->nd_blocksize); + bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; + bp->b_flags |= B_MANAGED; + + if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) { + error = create_segment(seginfo); + if (error) { + brelse(bp); + nandfs_error("%s: no segment for superroot\n", + __func__); + return (error); + } + } + + TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry); + + seginfo->curseg->nblocks++; + seginfo->curseg->num_blocks--; + seginfo->blocks++; + + *newbp = bp; + return (0); +} + +static int +nandfs_add_superroot(struct nandfs_seginfo *seginfo) +{ + struct nandfs_device *fsdev; + struct nandfs_super_root *sr; + struct buf *bp = NULL; + uint64_t crc_skip; + uint32_t crc_calc; + int error; + + fsdev = seginfo->fsdev; + + error = nandfs_create_superroot_block(seginfo, &bp); + if (error) { + nandfs_error("%s: cannot add superroot\n", __func__); + return (error); + } + + sr = (struct nandfs_super_root *)bp->b_data; + /* Save superroot CRC */ + sr->sr_bytes = NANDFS_SR_BYTES; + sr->sr_flags = 0; + sr->sr_nongc_ctime = 0; + + memcpy(&sr->sr_dat, &fsdev->nd_dat_node->nn_inode, + sizeof(struct nandfs_inode)); + memcpy(&sr->sr_cpfile, &fsdev->nd_cp_node->nn_inode, + sizeof(struct nandfs_inode)); + memcpy(&sr->sr_sufile, &fsdev->nd_su_node->nn_inode, + sizeof(struct nandfs_inode)); + + crc_skip = sizeof(sr->sr_sum); + crc_calc = crc32((uint8_t *)sr + crc_skip, NANDFS_SR_BYTES - crc_skip); + + sr->sr_sum = crc_calc; + + bp->b_flags |= B_MANAGED; + bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; + + bp->b_flags &= ~B_INVAL; + nandfs_dirty_bufs_increment(fsdev); + DPRINTF(SYNC, ("%s: bp:%p\n", __func__, bp)); + + return (0); +} + +static int +nandfs_add_segsum_block(struct nandfs_seginfo *seginfo, struct buf **newbp) +{ + struct nandfs_device *fsdev; + nandfs_daddr_t blk; + struct buf *bp; + int error; + + if (!(seginfo->curseg) || seginfo->curseg->num_blocks <= 1) { + error = create_segment(seginfo); + if (error) { + nandfs_error("%s: error:%d when creating segment\n", + __func__, error); + return (error); + } + *newbp = TAILQ_FIRST(&seginfo->curseg->segsum); + return (0); + } + + fsdev = seginfo->fsdev; + blk = nandfs_block_to_dblock(fsdev, seginfo->curseg->start_block + + seginfo->curseg->segsum_blocks); + + bp = getblk(fsdev->nd_devvp, blk, fsdev->nd_blocksize, 0, 0, 0); + + bzero(bp->b_data, seginfo->fsdev->nd_blocksize); + bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; + bp->b_flags |= B_MANAGED; + + TAILQ_INSERT_TAIL(&seginfo->curseg->segsum, bp, + b_cluster.cluster_entry); + seginfo->curseg->num_blocks--; + + seginfo->curseg->segsum_blocks++; + seginfo->curseg->bytes_left = seginfo->fsdev->nd_blocksize; + seginfo->curseg->current_off = bp->b_data; + seginfo->blocks++; + + *newbp = bp; + + DPRINTF(SYNC, ("%s: bp %p\n", __func__, bp)); + + return (0); +} + +static int +nandfs_add_blocks(struct nandfs_seginfo *seginfo, struct nandfs_node *node, + struct buf *bp) +{ + union nandfs_binfo *binfo; + struct buf *seg_bp; + int error; + + if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) { + error = create_segment(seginfo); + if (error) { + nandfs_error("%s: error:%d when creating segment\n", + __func__, error); + return (error); + } + } + + if (seginfo->curseg->bytes_left < sizeof(union nandfs_binfo)) { + error = nandfs_add_segsum_block(seginfo, &seg_bp); + if (error) { + nandfs_error("%s: error:%d when adding segsum\n", + __func__, error); + return (error); + } + } + binfo = (union nandfs_binfo *)seginfo->curseg->current_off; + + if (node->nn_ino != NANDFS_DAT_INO) { + binfo->bi_v.bi_blkoff = bp->b_lblkno; + binfo->bi_v.bi_ino = node->nn_ino; + } else { + binfo->bi_dat.bi_blkoff = bp->b_lblkno; + binfo->bi_dat.bi_ino = node->nn_ino; + if (NANDFS_IS_INDIRECT(bp)) + binfo->bi_dat.bi_level = 1; + else + binfo->bi_dat.bi_level = 0; + } + binfo++; + + seginfo->curseg->bytes_left -= sizeof(union nandfs_binfo); + seginfo->curseg->segsum_bytes += sizeof(union nandfs_binfo); + seginfo->curseg->current_off = (char *)binfo; + + TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry); + + seginfo->curseg->nbinfos++; + seginfo->curseg->nblocks++; + seginfo->curseg->num_blocks--; + seginfo->blocks++; + + DPRINTF(SYNC, ("%s: bp (%p) number %x (left %x)\n", + __func__, bp, seginfo->curseg->nblocks, + seginfo->curseg->num_blocks)); + return (0); +} + +static int +nandfs_iterate_dirty_buf(struct vnode *vp, struct nandfs_seginfo *seginfo, + uint8_t hold) +{ + struct buf *bp, *tbd; + struct bufobj *bo; + struct nandfs_node *node; + int error; + + node = VTON(vp); + bo = &vp->v_bufobj; + + ASSERT_VOP_ELOCKED(vp, __func__); + + /* Iterate dirty data bufs */ + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, tbd) { + DPRINTF(SYNC, ("%s: vp (%p): bp (%p) with lblkno %jx ino %jx " + "add buf\n", __func__, vp, bp, bp->b_lblkno, node->nn_ino)); + + if (!(NANDFS_ISGATHERED(bp))) { + error = nandfs_bmap_update_dat(node, + nandfs_vblk_get(bp), bp); + if (error) + return (error); + NANDFS_GATHER(bp); + nandfs_add_blocks(seginfo, node, bp); + } + } + + return (0); +} + +static int +nandfs_iterate_system_vnode(struct nandfs_node *node, + struct nandfs_seginfo *seginfo) +{ + struct vnode *vp; + int nblocks; + uint8_t hold = 0; + + if (node->nn_ino != NANDFS_IFILE_INO) + hold = 1; + + vp = NTOV(node); + + nblocks = vp->v_bufobj.bo_dirty.bv_cnt; + DPRINTF(SYNC, ("%s: vp (%p): nblocks %x ino %jx\n", + __func__, vp, nblocks, node->nn_ino)); + + if (nblocks) + nandfs_iterate_dirty_buf(vp, seginfo, hold); + + return (0); +} + +static int +nandfs_iterate_dirty_vnodes(struct mount *mp, struct nandfs_seginfo *seginfo) +{ + struct nandfs_node *nandfs_node; + struct vnode *vp, *mvp; + struct thread *td; + int error, lockreq, update; + + td = curthread; + lockreq = LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY; + + MNT_ILOCK(mp); + + MNT_VNODE_FOREACH(vp, mp, mvp) { + update = 0; + + if (mp->mnt_syncer == vp) + continue; + if (VOP_ISLOCKED(vp)) + continue; + + VI_LOCK(vp); + MNT_IUNLOCK(mp); + if (vp->v_iflag & VI_DOOMED) { + VI_UNLOCK(vp); + MNT_ILOCK(mp); + continue; + } + + if ((error = vget(vp, lockreq, td)) != 0) { + MNT_ILOCK(mp); + continue; + } + + if (vp->v_iflag & VI_DOOMED) { + vput(vp); + MNT_ILOCK(mp); + continue; + } + + nandfs_node = VTON(vp); + if (nandfs_node->nn_flags & IN_MODIFIED) { + nandfs_node->nn_flags &= ~(IN_MODIFIED); + update = 1; + } + + if (vp->v_bufobj.bo_dirty.bv_cnt) { + error = nandfs_iterate_dirty_buf(vp, seginfo, 0); + if (error) { + nandfs_error("%s: cannot iterate vnode:%p " + "err:%d\n", __func__, vp, error); + vput(vp); + return (error); + } + update = 1; + } else + vput(vp); + + if (update) + nandfs_node_update(nandfs_node); + + MNT_ILOCK(mp); + } + + MNT_IUNLOCK(mp); + + return (0); +} + +static int +nandfs_update_phys_block(struct nandfs_device *fsdev, struct buf *bp, + uint64_t phys_blknr, union nandfs_binfo *binfo) +{ + struct nandfs_node *node, *dat; + struct vnode *vp; + uint64_t new_blknr; + int error; + + vp = bp->b_vp; + node = VTON(vp); + new_blknr = nandfs_vblk_get(bp); + dat = fsdev->nd_dat_node; + + DPRINTF(BMAP, ("%s: ino %#jx lblk %#jx: vblk %#jx -> %#jx\n", + __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno, + (uintmax_t)new_blknr, (uintmax_t)phys_blknr)); + + if (node->nn_ino != NANDFS_DAT_INO) { + KASSERT((new_blknr != 0), ("vblk for bp %p is 0", bp)); + + nandfs_vblock_assign(fsdev, new_blknr, phys_blknr); + binfo->bi_v.bi_vblocknr = new_blknr; + binfo->bi_v.bi_blkoff = bp->b_lblkno; + binfo->bi_v.bi_ino = node->nn_ino; + } else { + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + error = nandfs_bmap_update_block(node, bp, phys_blknr); + if (error) { + nandfs_error("%s: error updating block:%jx for bp:%p\n", + __func__, (uintmax_t)phys_blknr, bp); + VOP_UNLOCK(NTOV(dat), 0); + return (error); + } + VOP_UNLOCK(NTOV(dat), 0); + binfo->bi_dat.bi_blkoff = bp->b_lblkno; + binfo->bi_dat.bi_ino = node->nn_ino; + if (NANDFS_IS_INDIRECT(bp)) + binfo->bi_dat.bi_level = 1; + else + binfo->bi_dat.bi_level = 0; + } + + return (0); +} + +#define NBINFO(off) ((off) + sizeof(union nandfs_binfo)) +static int +nandfs_segment_assign_pblk(struct nandfs_segment *nfsseg) +{ + struct nandfs_device *fsdev; + union nandfs_binfo *binfo; + struct buf *bp, *seg_bp; + uint64_t blocknr; + uint32_t curr_off, blocksize; + int error; + + fsdev = nfsseg->fsdev; + blocksize = fsdev->nd_blocksize; + + blocknr = nfsseg->start_block + nfsseg->segsum_blocks; + seg_bp = TAILQ_FIRST(&nfsseg->segsum); + DPRINTF(SYNC, ("%s: seg:%p segsum bp:%p data:%p\n", + __func__, nfsseg, seg_bp, seg_bp->b_data)); + + binfo = (union nandfs_binfo *)(seg_bp->b_data + + sizeof(struct nandfs_segment_summary)); + curr_off = sizeof(struct nandfs_segment_summary); + + TAILQ_FOREACH(bp, &nfsseg->data, b_cluster.cluster_entry) { + KASSERT((bp->b_vp), ("bp %p has not vp", bp)); + + DPRINTF(BMAP, ("\n\n%s: assign buf %p for ino %#jx next %p\n", + __func__, bp, (uintmax_t)VTON(bp->b_vp)->nn_ino, + TAILQ_NEXT(bp, b_cluster.cluster_entry))); + + if (NBINFO(curr_off) > blocksize) { + seg_bp = TAILQ_NEXT(seg_bp, b_cluster.cluster_entry); + binfo = (union nandfs_binfo *)seg_bp->b_data; + curr_off = 0; + DPRINTF(SYNC, ("%s: next segsum %p data %p\n", + __func__, seg_bp, seg_bp->b_data)); + } + + error = nandfs_update_phys_block(fsdev, bp, blocknr, binfo); + if (error) { + nandfs_error("%s: err:%d when updatinng phys block:%jx" + " for bp:%p and binfo:%p\n", __func__, error, + (uintmax_t)blocknr, bp, binfo); + return (error); + } + binfo++; + curr_off = NBINFO(curr_off); + + blocknr++; + } + + return (0); +} + +static int +nandfs_seginfo_assign_pblk(struct nandfs_seginfo *seginfo) +{ + struct nandfs_segment *nfsseg; + int error = 0; + + LIST_FOREACH(nfsseg, &seginfo->seg_list, seg_link) { + error = nandfs_segment_assign_pblk(nfsseg); + if (error) + break; + } + + return (error); +} + +static struct nandfs_segment_summary * +nandfs_fill_segsum(struct nandfs_segment *seg, int has_sr) +{ + struct nandfs_segment_summary *ss; + struct nandfs_device *fsdev; + struct buf *bp; + uint32_t rest, segsum_size, blocksize, crc_calc; + uint16_t flags; + uint8_t *crc_area, crc_skip; + + DPRINTF(SYNC, ("%s: seg %#jx nblocks %#x sumbytes %#x\n", + __func__, (uintmax_t) seg->seg_num, + seg->nblocks + seg->segsum_blocks, + seg->segsum_bytes)); + + fsdev = seg->fsdev; + + flags = NANDFS_SS_LOGBGN | NANDFS_SS_LOGEND; + if (has_sr) + flags |= NANDFS_SS_SR; + + bp = TAILQ_FIRST(&seg->segsum); + ss = (struct nandfs_segment_summary *) bp->b_data; + ss->ss_magic = NANDFS_SEGSUM_MAGIC; + ss->ss_bytes = sizeof(struct nandfs_segment_summary); + ss->ss_flags = flags; + ss->ss_seq = ++(fsdev->nd_seg_sequence); + ss->ss_create = fsdev->nd_ts.tv_sec; + nandfs_get_segment_range(fsdev, seg->seg_next, &ss->ss_next, NULL); + ss->ss_nblocks = seg->nblocks + seg->segsum_blocks; + ss->ss_nbinfos = seg->nbinfos; + ss->ss_sumbytes = seg->segsum_bytes; + + crc_skip = sizeof(ss->ss_datasum) + sizeof(ss->ss_sumsum); + blocksize = seg->fsdev->nd_blocksize; + + segsum_size = seg->segsum_bytes - crc_skip; + rest = min(seg->segsum_bytes, blocksize) - crc_skip; + crc_area = (uint8_t *)ss + crc_skip; + crc_calc = ~0U; + while (segsum_size > 0) { + crc_calc = crc32_raw(crc_area, rest, crc_calc); + segsum_size -= rest; + if (!segsum_size) + break; + bp = TAILQ_NEXT(bp, b_cluster.cluster_entry); + crc_area = (uint8_t *)bp->b_data; + rest = segsum_size <= blocksize ? segsum_size : blocksize; + } + ss->ss_sumsum = crc_calc ^ ~0U; + + return (ss); + +} + +static int +nandfs_save_buf(struct buf *bp, uint64_t blocknr, struct nandfs_device *fsdev) +{ + struct bufobj *bo; + int error; + + bo = &fsdev->nd_devvp->v_bufobj; + + bp->b_blkno = nandfs_block_to_dblock(fsdev, blocknr); + bp->b_iooffset = dbtob(bp->b_blkno); + + KASSERT(bp->b_bufobj != NULL, ("no bufobj for %p", bp)); + if (bp->b_bufobj != bo) { + BO_LOCK(bp->b_bufobj); + BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, + BO_MTX(bp->b_bufobj)); + KASSERT(BUF_ISLOCKED(bp), ("Problem with locking buffer")); + } + + DPRINTF(SYNC, ("%s: buf: %p offset %#jx blk %#jx size %#x\n", + __func__, bp, (uintmax_t)bp->b_offset, (uintmax_t)blocknr, + fsdev->nd_blocksize)); + + NANDFS_UNGATHER(bp); + nandfs_buf_clear(bp, 0xffffffff); + bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED); + error = bwrite(bp); + if (error) { + nandfs_error("%s: error:%d when writing buffer:%p\n", + __func__, error, bp); + return (error); + } + return (error); +} + +static void +nandfs_clean_buf(struct nandfs_device *fsdev, struct buf *bp) +{ + + DPRINTF(SYNC, ("%s: buf: %p\n", __func__, bp)); + + NANDFS_UNGATHER(bp); + nandfs_buf_clear(bp, 0xffffffff); + bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED); + nandfs_undirty_buf_fsdev(fsdev, bp); +} + +static void +nandfs_clean_segblocks(struct nandfs_segment *seg, uint8_t unlock) +{ + struct nandfs_device *fsdev = seg->fsdev; + struct nandfs_segment *next_seg; + struct buf *bp, *tbp, *next_bp; + struct vnode *vp, *next_vp; + + VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE); + TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) { + TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry); + nandfs_clean_buf(fsdev, bp); + }; + + TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) { + TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry); + + /* + * If bp is not super-root and vnode is not currently + * locked lock it. + */ + vp = bp->b_vp; + next_vp = NULL; + next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry); + if (!next_bp) { + next_seg = LIST_NEXT(seg, seg_link); + if (next_seg) + next_bp = TAILQ_FIRST(&next_seg->data); + } + + if (next_bp) + next_vp = next_bp->b_vp; + + nandfs_clean_buf(fsdev, bp); + + if (unlock && vp != NULL && next_vp != vp && + !NANDFS_SYS_NODE(VTON(vp)->nn_ino)) + vput(vp); + + nandfs_dirty_bufs_decrement(fsdev); + } + + VOP_UNLOCK(fsdev->nd_devvp, 0); +} + +static int +nandfs_save_segblocks(struct nandfs_segment *seg, uint8_t unlock) +{ + struct nandfs_device *fsdev = seg->fsdev; + struct nandfs_segment *next_seg; + struct buf *bp, *tbp, *next_bp; + struct vnode *vp, *next_vp; + uint64_t blocknr; + uint32_t i = 0; + int error = 0; + + VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE); + TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) { + TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry); + blocknr = seg->start_block + i; + error = nandfs_save_buf(bp, blocknr, fsdev); + if (error) { + nandfs_error("%s: error saving buf: %p blocknr:%jx\n", + __func__, bp, (uintmax_t)blocknr); + goto out; + } + i++; + }; + + i = 0; + TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) { + TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry); + + blocknr = seg->start_block + seg->segsum_blocks + i; + /* + * If bp is not super-root and vnode is not currently + * locked lock it. + */ + vp = bp->b_vp; + next_vp = NULL; + next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry); + if (!next_bp) { + next_seg = LIST_NEXT(seg, seg_link); + if (next_seg) + next_bp = TAILQ_FIRST(&next_seg->data); + } + + if (next_bp) + next_vp = next_bp->b_vp; + + error = nandfs_save_buf(bp, blocknr, fsdev); + if (error) { + nandfs_error("%s: error saving buf: %p blknr: %jx\n", + __func__, bp, (uintmax_t)blocknr); + if (unlock && vp != NULL && next_vp != vp && + !NANDFS_SYS_NODE(VTON(vp)->nn_ino)) + vput(vp); + goto out; + } + + if (unlock && vp != NULL && next_vp != vp && + !NANDFS_SYS_NODE(VTON(vp)->nn_ino)) + vput(vp); + + i++; + nandfs_dirty_bufs_decrement(fsdev); + } +out: + if (error) { + nandfs_clean_segblocks(seg, unlock); + VOP_UNLOCK(fsdev->nd_devvp, 0); + return (error); + } + + VOP_UNLOCK(fsdev->nd_devvp, 0); + return (error); +} + + +static void +clean_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock) +{ + struct nandfs_segment *seg; + + DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo)); + + LIST_FOREACH(seg, &seginfo->seg_list, seg_link) { + nandfs_clean_segblocks(seg, unlock); + } +} + +static int +save_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock) +{ + struct nandfs_segment *seg; + struct nandfs_device *fsdev; + struct nandfs_segment_summary *ss; + int error = 0; + + fsdev = seginfo->fsdev; + + DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo)); + + LIST_FOREACH(seg, &seginfo->seg_list, seg_link) { + if (LIST_NEXT(seg, seg_link)) { + nandfs_fill_segsum(seg, 0); + error = nandfs_save_segblocks(seg, unlock); + if (error) { + nandfs_error("%s: error:%d saving seg:%p\n", + __func__, error, seg); + goto out; + } + } else { + ss = nandfs_fill_segsum(seg, 1); + fsdev->nd_last_segsum = *ss; + error = nandfs_save_segblocks(seg, unlock); + if (error) { + nandfs_error("%s: error:%d saving seg:%p\n", + __func__, error, seg); + goto out; + } + fsdev->nd_last_cno++; + fsdev->nd_last_pseg = seg->start_block; + } + } +out: + if (error) + clean_seginfo(seginfo, unlock); + return (error); +} + +static void +nandfs_invalidate_bufs(struct nandfs_device *fsdev, uint64_t segno) +{ + uint64_t start, end; + struct buf *bp, *tbd; + struct bufobj *bo; + + nandfs_get_segment_range(fsdev, segno, &start, &end); + + bo = &NTOV(fsdev->nd_gc_node)->v_bufobj; + + BO_LOCK(bo); +restart_locked_gc: + TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, tbd) { + if (!(bp->b_lblkno >= start && bp->b_lblkno <= end)) + continue; + + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) + goto restart_locked_gc; + + bremfree(bp); + bp->b_flags |= (B_INVAL | B_RELBUF); + bp->b_flags &= ~(B_ASYNC | B_MANAGED); + BO_UNLOCK(bo); + brelse(bp); + BO_LOCK(bo); + } + BO_UNLOCK(bo); +} + +/* Process segments marks to free by cleaner */ +static void +nandfs_process_segments(struct nandfs_device *fsdev) +{ + uint64_t saved_segment; + int i; + + if (fsdev->nd_free_base) { + saved_segment = nandfs_get_segnum_of_block(fsdev, + fsdev->nd_super.s_last_pseg); + for (i = 0; i < fsdev->nd_free_count; i++) { + if (fsdev->nd_free_base[i] == NANDFS_NOSEGMENT) + continue; + /* Update superblock if clearing segment point by it */ + if (fsdev->nd_free_base[i] == saved_segment) { + nandfs_write_superblock(fsdev); + saved_segment = nandfs_get_segnum_of_block( + fsdev, fsdev->nd_super.s_last_pseg); + } + nandfs_invalidate_bufs(fsdev, fsdev->nd_free_base[i]); + nandfs_clear_segment(fsdev, fsdev->nd_free_base[i]); + } + + free(fsdev->nd_free_base, M_NANDFSTEMP); + fsdev->nd_free_base = NULL; + fsdev->nd_free_count = 0; + } +} + +/* Collect and write dirty buffers */ +int +nandfs_sync_file(struct vnode *vp) +{ + struct nandfs_device *fsdev; + struct nandfs_node *nandfs_node; + struct nandfsmount *nmp; + struct nandfs_node *dat, *su, *ifile, *cp; + struct nandfs_seginfo *seginfo = NULL; + struct nandfs_segment *seg; + int update, error; + int cno_changed; + + ASSERT_VOP_LOCKED(vp, __func__); + DPRINTF(SYNC, ("%s: START\n", __func__)); + + error = 0; + nmp = VFSTONANDFS(vp->v_mount); + fsdev = nmp->nm_nandfsdev; + + dat = fsdev->nd_dat_node; + su = fsdev->nd_su_node; + cp = fsdev->nd_cp_node; + ifile = nmp->nm_ifile_node; + + NANDFS_WRITEASSERT(fsdev); + if (lockmgr(&fsdev->nd_seg_const, LK_UPGRADE, NULL) != 0) { + DPRINTF(SYNC, ("%s: lost shared lock\n", __func__)); + if (lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL) != 0) + panic("couldn't lock exclusive"); + } + DPRINTF(SYNC, ("%s: got lock\n", __func__)); + + VOP_LOCK(NTOV(su), LK_EXCLUSIVE); + create_seginfo(fsdev, &seginfo); + + update = 0; + + nandfs_node = VTON(vp); + if (nandfs_node->nn_flags & IN_MODIFIED) { + nandfs_node->nn_flags &= ~(IN_MODIFIED); + update = 1; + } + + if (vp->v_bufobj.bo_dirty.bv_cnt) { + error = nandfs_iterate_dirty_buf(vp, seginfo, 0); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + VOP_UNLOCK(NTOV(su), 0); + lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); + nandfs_error("%s: err:%d iterating dirty bufs vp:%p", + __func__, error, vp); + return (error); + } + update = 1; + } + + if (update) { + VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE); + error = nandfs_node_update(nandfs_node); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + VOP_UNLOCK(NTOV(ifile), 0); + VOP_UNLOCK(NTOV(su), 0); + lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); + nandfs_error("%s: err:%d updating vp:%p", + __func__, error, vp); + return (error); + } + VOP_UNLOCK(NTOV(ifile), 0); + } + + cno_changed = 0; + if (seginfo->blocks) { + VOP_LOCK(NTOV(cp), LK_EXCLUSIVE); + cno_changed = 1; + /* Create new checkpoint */ + error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + VOP_UNLOCK(NTOV(cp), 0); + VOP_UNLOCK(NTOV(su), 0); + lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); + nandfs_error("%s: err:%d getting cp:%jx", + __func__, error, fsdev->nd_last_cno + 1); + return (error); + } + + /* Reiterate all blocks and assign physical block number */ + nandfs_seginfo_assign_pblk(seginfo); + + /* Fill checkpoint data */ + error = nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1, + &ifile->nn_inode, seginfo->blocks); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + VOP_UNLOCK(NTOV(cp), 0); + VOP_UNLOCK(NTOV(su), 0); + lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); + nandfs_error("%s: err:%d setting cp:%jx", + __func__, error, fsdev->nd_last_cno + 1); + return (error); + } + + VOP_UNLOCK(NTOV(cp), 0); + LIST_FOREACH(seg, &seginfo->seg_list, seg_link) + nandfs_update_segment(fsdev, seg->seg_num, + seg->nblocks + seg->segsum_blocks); + + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + error = save_seginfo(seginfo, 0); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + VOP_UNLOCK(NTOV(dat), 0); + VOP_UNLOCK(NTOV(su), 0); + lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); + nandfs_error("%s: err:%d updating seg", + __func__, error); + return (error); + } + VOP_UNLOCK(NTOV(dat), 0); + } + + VOP_UNLOCK(NTOV(su), 0); + + delete_seginfo(seginfo); + lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); + + if (cno_changed && !error) { + if (nandfs_cps_between_sblocks != 0 && + fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) + nandfs_write_superblock(fsdev); + } + + ASSERT_VOP_LOCKED(vp, __func__); + DPRINTF(SYNC, ("%s: END error %d\n", __func__, error)); + return (error); +} + +int +nandfs_segment_constructor(struct nandfsmount *nmp, int flags) +{ + struct nandfs_device *fsdev; + struct nandfs_seginfo *seginfo = NULL; + struct nandfs_segment *seg; + struct nandfs_node *dat, *su, *ifile, *cp, *gc; + int cno_changed, error; + + DPRINTF(SYNC, ("%s: START\n", __func__)); + fsdev = nmp->nm_nandfsdev; + + lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); + DPRINTF(SYNC, ("%s: git lock\n", __func__)); +again: + create_seginfo(fsdev, &seginfo); + + dat = fsdev->nd_dat_node; + su = fsdev->nd_su_node; + cp = fsdev->nd_cp_node; + gc = fsdev->nd_gc_node; + ifile = nmp->nm_ifile_node; + + VOP_LOCK(NTOV(su), LK_EXCLUSIVE); + VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE); + VOP_LOCK(NTOV(gc), LK_EXCLUSIVE); + VOP_LOCK(NTOV(cp), LK_EXCLUSIVE); + + nandfs_iterate_system_vnode(gc, seginfo); + nandfs_iterate_dirty_vnodes(nmp->nm_vfs_mountp, seginfo); + nandfs_iterate_system_vnode(ifile, seginfo); + nandfs_iterate_system_vnode(su, seginfo); + + cno_changed = 0; + if (seginfo->blocks || flags) { + cno_changed = 1; + /* Create new checkpoint */ + error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + goto error_locks; + } + + /* Collect blocks from system files */ + nandfs_iterate_system_vnode(cp, seginfo); + nandfs_iterate_system_vnode(su, seginfo); + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + nandfs_iterate_system_vnode(dat, seginfo); + VOP_UNLOCK(NTOV(dat), 0); +reiterate: + seginfo->reiterate = 0; + nandfs_iterate_system_vnode(su, seginfo); + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + nandfs_iterate_system_vnode(dat, seginfo); + VOP_UNLOCK(NTOV(dat), 0); + if (seginfo->reiterate) + goto reiterate; + if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) { + error = create_segment(seginfo); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + goto error_locks; + } + goto reiterate; + } + + /* Reiterate all blocks and assign physical block number */ + nandfs_seginfo_assign_pblk(seginfo); + + /* Fill superroot */ + error = nandfs_add_superroot(seginfo); + if (error) { + clean_seginfo(seginfo, 0); + delete_seginfo(seginfo); + goto error_locks; + } + KASSERT(!(seginfo->reiterate), ("reiteration after superroot")); + + /* Fill checkpoint data */ + nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1, + &ifile->nn_inode, seginfo->blocks); + + LIST_FOREACH(seg, &seginfo->seg_list, seg_link) + nandfs_update_segment(fsdev, seg->seg_num, + seg->nblocks + seg->segsum_blocks); + + VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); + error = save_seginfo(seginfo, 1); + if (error) { + clean_seginfo(seginfo, 1); + delete_seginfo(seginfo); + goto error_dat; + } + VOP_UNLOCK(NTOV(dat), 0); + } + + VOP_UNLOCK(NTOV(cp), 0); + VOP_UNLOCK(NTOV(gc), 0); + VOP_UNLOCK(NTOV(ifile), 0); + + nandfs_process_segments(fsdev); + + VOP_UNLOCK(NTOV(su), 0); + + delete_seginfo(seginfo); + + /* + * XXX: a hack, will go away soon + */ + if ((NTOV(dat)->v_bufobj.bo_dirty.bv_cnt != 0 || + NTOV(cp)->v_bufobj.bo_dirty.bv_cnt != 0 || + NTOV(gc)->v_bufobj.bo_dirty.bv_cnt != 0 || + NTOV(ifile)->v_bufobj.bo_dirty.bv_cnt != 0 || + NTOV(su)->v_bufobj.bo_dirty.bv_cnt != 0) && + (flags & NANDFS_UMOUNT)) { + DPRINTF(SYNC, ("%s: RERUN\n", __func__)); + goto again; + } + + MPASS(fsdev->nd_free_base == NULL); + + lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); + + if (cno_changed) { + if ((nandfs_cps_between_sblocks != 0 && + fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) || + flags & NANDFS_UMOUNT) + nandfs_write_superblock(fsdev); + } + + DPRINTF(SYNC, ("%s: END\n", __func__)); + return (0); +error_dat: + VOP_UNLOCK(NTOV(dat), 0); +error_locks: + VOP_UNLOCK(NTOV(cp), 0); + VOP_UNLOCK(NTOV(gc), 0); + VOP_UNLOCK(NTOV(ifile), 0); + VOP_UNLOCK(NTOV(su), 0); + lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); + + return (error); +} + +#ifdef DDB +/* + * Show details about the given NANDFS mount point. + */ +DB_SHOW_COMMAND(nandfs, db_show_nandfs) +{ + struct mount *mp; + struct nandfs_device *nffsdev; + struct nandfs_segment *seg; + struct nandfsmount *nmp; + struct buf *bp; + struct vnode *vp; + + if (!have_addr) { + db_printf("\nUsage: show nandfs <mount_addr>\n"); + return; + } + + mp = (struct mount *)addr; + db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, + mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); + + + nmp = (struct nandfsmount *)(mp->mnt_data); + nffsdev = nmp->nm_nandfsdev; + db_printf("dev vnode:%p\n", nffsdev->nd_devvp); + db_printf("blocksize:%jx last cno:%jx last pseg:%jx seg num:%jx\n", + (uintmax_t)nffsdev->nd_blocksize, (uintmax_t)nffsdev->nd_last_cno, + (uintmax_t)nffsdev->nd_last_pseg, (uintmax_t)nffsdev->nd_seg_num); + db_printf("system nodes: dat:%p cp:%p su:%p ifile:%p gc:%p\n", + nffsdev->nd_dat_node, nffsdev->nd_cp_node, nffsdev->nd_su_node, + nmp->nm_ifile_node, nffsdev->nd_gc_node); + + if (nffsdev->nd_seginfo != NULL) { + LIST_FOREACH(seg, &nffsdev->nd_seginfo->seg_list, seg_link) { + db_printf("seg: %p\n", seg); + TAILQ_FOREACH(bp, &seg->segsum, + b_cluster.cluster_entry) + db_printf("segbp %p\n", bp); + TAILQ_FOREACH(bp, &seg->data, + b_cluster.cluster_entry) { + vp = bp->b_vp; + db_printf("bp:%p bp->b_vp:%p ino:%jx\n", bp, vp, + (uintmax_t)(vp ? VTON(vp)->nn_ino : 0)); + } + } + } +} +#endif diff --git a/sys/fs/nandfs/nandfs_subr.c b/sys/fs/nandfs/nandfs_subr.c new file mode 100644 index 0000000..b485422 --- /dev/null +++ b/sys/fs/nandfs/nandfs_subr.c @@ -0,0 +1,1120 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/resourcevar.h> +#include <sys/kernel.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/bio.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/signalvar.h> +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/lockf.h> +#include <sys/libkern.h> + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include <machine/_inttypes.h> +#include "nandfs_mount.h" +#include "nandfs.h" +#include "nandfs_subr.h" + +MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount");; +MALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp"); + +uma_zone_t nandfs_node_zone; + +void nandfs_bdflush(struct bufobj *bo, struct buf *bp); +int nandfs_bufsync(struct bufobj *bo, int waitfor); + +struct buf_ops buf_ops_nandfs = { + .bop_name = "buf_ops_nandfs", + .bop_write = bufwrite, + .bop_strategy = bufstrategy, + .bop_sync = nandfs_bufsync, + .bop_bdflush = nandfs_bdflush, +}; + +int +nandfs_bufsync(struct bufobj *bo, int waitfor) +{ + struct vnode *vp; + int error = 0; + + vp = bo->__bo_vnode; + + ASSERT_VOP_LOCKED(vp, __func__); + error = nandfs_sync_file(vp); + if (error) + nandfs_warning("%s: cannot flush buffers err:%d\n", + __func__, error); + + return (error); +} + +void +nandfs_bdflush(bo, bp) + struct bufobj *bo; + struct buf *bp; +{ + struct vnode *vp; + int error; + + if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10)) + return; + + vp = bp->b_vp; + if (NANDFS_SYS_NODE(VTON(vp)->nn_ino)) + return; + + if (NANDFS_IS_INDIRECT(bp)) + return; + + error = nandfs_sync_file(vp); + if (error) + nandfs_warning("%s: cannot flush buffers err:%d\n", + __func__, error); +} + +int +nandfs_init(struct vfsconf *vfsp) +{ + + nandfs_node_zone = uma_zcreate("nandfs node zone", + sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0); + + return (0); +} + +int +nandfs_uninit(struct vfsconf *vfsp) +{ + + uma_zdestroy(nandfs_node_zone); + return (0); +} + +/* Basic calculators */ +uint64_t +nandfs_get_segnum_of_block(struct nandfs_device *nandfsdev, + nandfs_daddr_t blocknr) +{ + uint64_t segnum, blks_per_seg; + + MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block); + + blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment; + + segnum = blocknr / blks_per_seg; + segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg; + + DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__, + blocknr, segnum)); + + return (segnum); +} + +void +nandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum, + uint64_t *seg_start, uint64_t *seg_end) +{ + uint64_t blks_per_seg; + + blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment; + *seg_start = nandfsdev->nd_fsdata.f_first_data_block + + blks_per_seg * segnum; + if (seg_end != NULL) + *seg_end = *seg_start + blks_per_seg -1; +} + +void nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev, + struct nandfs_mdt *mdt, int entry_size) +{ + uint32_t blocksize = nandfsdev->nd_blocksize; + + mdt->entries_per_group = blocksize * 8; + mdt->entries_per_block = blocksize / entry_size; + + mdt->blocks_per_group = + (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1; + mdt->groups_per_desc_block = + blocksize / sizeof(struct nandfs_block_group_desc); + mdt->blocks_per_desc_block = + mdt->groups_per_desc_block * mdt->blocks_per_group + 1; +} + +int +nandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr, + struct ucred *cred, int flags, struct buf **bpp) +{ + int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; + int error; + + DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__, + blocknr * blk2dev, nandfsdev->nd_devvp)); + error = bread(nandfsdev->nd_devvp, blocknr * blk2dev, + nandfsdev->nd_blocksize, NOCRED, bpp); + if (error) + nandfs_error("%s: cannot read from device - blk:%jx\n", + __func__, blocknr); + return (error); +} + +/* Read on a node */ +int +nandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr, + struct ucred *cred, int flags, struct buf **bpp) +{ + nandfs_daddr_t vblk; + int error; + + DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), + blocknr)); + + error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, + cred, bpp); + + KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__, + NTOV(node), blocknr, error)); + + if (!nandfs_vblk_get(*bpp) && + ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) { + nandfs_bmap_lookup(node, blocknr, &vblk); + nandfs_vblk_set(*bpp, vblk); + } + return (error); +} + +int +nandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr, + struct ucred *cred, int flags, struct buf **bpp) +{ + nandfs_daddr_t vblk; + int error; + + DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), + blocknr)); + + error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, + cred, bpp); + + KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__, + NTOV(node), blocknr, error)); + + if (!nandfs_vblk_get(*bpp) && + ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) { + nandfs_bmap_lookup(node, blocknr, &vblk); + nandfs_vblk_set(*bpp, vblk); + } + + return (error); +} + +int +nandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk) +{ + int error; + + if (!NANDFS_SYS_NODE(node->nn_ino)) + NANDFS_WRITEASSERT(node->nn_nandfsdev); + + error = nandfs_vblock_end(node->nn_nandfsdev, vblk); + if (error) { + nandfs_error("%s: ending vblk: %jx failed\n", + __func__, (uintmax_t)vblk); + return (error); + } + node->nn_inode.i_blocks--; + + return (0); +} + +int +nandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr, + struct ucred *cred, int flags, struct buf **bpp) +{ + int error; + + ASSERT_VOP_LOCKED(NTOV(node), __func__); + if (!NANDFS_SYS_NODE(node->nn_ino)) + NANDFS_WRITEASSERT(node->nn_nandfsdev); + + DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), + blocknr)); + + *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, + 0, 0, 0); + + KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__, + NTOV(node), blocknr)); + + if (*bpp) { + vfs_bio_clrbuf(*bpp); + (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */ + error = nandfs_bmap_insert_block(node, blocknr, *bpp); + if (error) { + nandfs_warning("%s: failed bmap insert node:%p" + " blk:%jx\n", __func__, node, blocknr); + brelse(*bpp); + return (error); + } + node->nn_inode.i_blocks++; + + return (0); + } + + return (-1); +} + +int +nandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr, + struct ucred *cred, int flags, struct buf **bpp) +{ + struct nandfs_device *fsdev; + nandfs_daddr_t vblk; + int error; + + ASSERT_VOP_LOCKED(NTOV(node), __func__); + NANDFS_WRITEASSERT(node->nn_nandfsdev); + + DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), + blocknr)); + + fsdev = node->nn_nandfsdev; + + *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, + 0, 0, 0); + + KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__, + NTOV(node), blocknr)); + + memset((*bpp)->b_data, 0, fsdev->nd_blocksize); + + vfs_bio_clrbuf(*bpp); + (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */ + + nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED); + + if (node->nn_ino != NANDFS_DAT_INO) { + error = nandfs_vblock_alloc(fsdev, &vblk); + if (error) { + nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED); + brelse(*bpp); + return (error); + } + } else + vblk = fsdev->nd_fakevblk++; + + nandfs_vblk_set(*bpp, vblk); + + nandfs_bmap_insert_block(node, blocknr, *bpp); + return (0); +} + +/* Translate index to a file block number and an entry */ +void +nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index, + nandfs_lbn_t *blocknr, uint32_t *entry_in_block) +{ + uint64_t blknr; + uint64_t group, group_offset, blocknr_in_group; + uint64_t desc_block, desc_offset; + + /* Calculate our offset in the file */ + group = index / mdt->entries_per_group; + group_offset = index % mdt->entries_per_group; + desc_block = group / mdt->groups_per_desc_block; + desc_offset = group % mdt->groups_per_desc_block; + blocknr_in_group = group_offset / mdt->entries_per_block; + + /* To descgroup offset */ + blknr = 1 + desc_block * mdt->blocks_per_desc_block; + + /* To group offset */ + blknr += desc_offset * mdt->blocks_per_group; + + /* To actual file block */ + blknr += 1 + blocknr_in_group; + + *blocknr = blknr; + *entry_in_block = group_offset % mdt->entries_per_block; +} + +void +nandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index, + uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr, + uint32_t *entry_in_block) +{ + uint64_t blknr; + uint64_t group, group_offset, blocknr_in_group; + uint64_t desc_block, desc_offset; + + /* Calculate our offset in the file */ + group = index / mdt->entries_per_group; + group_offset = index % mdt->entries_per_group; + desc_block = group / mdt->groups_per_desc_block; + desc_offset = group % mdt->groups_per_desc_block; + blocknr_in_group = group_offset / mdt->entries_per_block; + + /* To descgroup offset */ + *desc = desc_block * mdt->blocks_per_desc_block; + blknr = 1 + desc_block * mdt->blocks_per_desc_block; + + /* To group offset */ + blknr += desc_offset * mdt->blocks_per_group; + *bitmap = blknr; + + /* To actual file block */ + blknr += 1 + blocknr_in_group; + + *blocknr = blknr; + *entry_in_block = group_offset % mdt->entries_per_block; + + DPRINTF(ALLOC, + ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n", + __func__, (uintmax_t)*desc, (uintmax_t)*bitmap, + (uintmax_t)*blocknr, *entry_in_block)); +} + +int +nandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr, + nandfs_daddr_t *pblocknr) +{ + struct nandfs_node *dat_node; + struct nandfs_dat_entry *entry; + struct buf *bp; + nandfs_lbn_t ldatblknr; + uint32_t entry_in_block; + int locked, error; + + if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) { + *pblocknr = vblocknr; + return (0); + } + + /* only translate valid vblocknrs */ + if (vblocknr == 0) + return (0); + + dat_node = node->nn_nandfsdev->nd_dat_node; + nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr, + &entry_in_block); + + locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node)); + if (!locked) + VOP_LOCK(NTOV(dat_node), LK_SHARED); + error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp); + if (error) { + DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n", + (uintmax_t)ldatblknr)); + brelse(bp); + VOP_UNLOCK(NTOV(dat_node), 0); + return (error); + } + + /* Get our translation */ + entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block; + DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n", + entry, bp->b_data, entry_in_block)) + DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n", + (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr, + (uintmax_t)entry->de_start, (uintmax_t)entry->de_end)); + + *pblocknr = entry->de_blocknr; + brelse(bp); + if (!locked) + VOP_UNLOCK(NTOV(dat_node), 0); + + MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block || + *pblocknr == 0); + + return (0); +} + +int +nandfs_segsum_valid(struct nandfs_segment_summary *segsum) +{ + + return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC); +} + +int +nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr, + struct nandfs_segment_summary *segsum) +{ + struct buf *bp; + int error; + + DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n", + (uintmax_t)blocknr)); + + error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp); + if (error) + return (error); + + memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary)); + brelse(bp); + + if (!nandfs_segsum_valid(segsum)) { + DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__, + blocknr)); + return (EINVAL); + } + + return (error); +} + +static int +nandfs_load_super_root(struct nandfs_device *nandfsdev, + struct nandfs_segment_summary *segsum, uint64_t pseg) +{ + struct nandfs_super_root super_root; + struct buf *bp; + uint64_t blocknr; + uint32_t super_root_crc, comp_crc; + int off, error; + + /* Check if there is a superroot */ + if ((segsum->ss_flags & NANDFS_SS_SR) == 0) { + DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__, + pseg)); + return (ENOENT); + } + + /* Get our super root, located at the end of the pseg */ + blocknr = pseg + segsum->ss_nblocks - 1; + DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr)); + + error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp); + if (error) + return (error); + + memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root)); + brelse(bp); + + /* Check super root CRC */ + super_root_crc = super_root.sr_sum; + off = sizeof(super_root.sr_sum); + comp_crc = crc32((uint8_t *)&super_root + off, + NANDFS_SR_BYTES - off); + + if (super_root_crc != comp_crc) { + DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n", + __func__, super_root_crc, comp_crc)); + return (EINVAL); + } + + nandfsdev->nd_super_root = super_root; + DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__)); + + return (0); +} + +/* + * Search for the last super root recorded. + */ +int +nandfs_search_super_root(struct nandfs_device *nandfsdev) +{ + struct nandfs_super_block *super; + struct nandfs_segment_summary segsum; + uint64_t seg_start, seg_end, cno, seq, create, pseg; + uint64_t segnum; + int error, found; + + error = found = 0; + + /* Search for last super root */ + pseg = nandfsdev->nd_super.s_last_pseg; + segnum = nandfs_get_segnum_of_block(nandfsdev, pseg); + + cno = nandfsdev->nd_super.s_last_cno; + create = seq = 0; + DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__, + (uintmax_t)pseg)); + + for (;;) { + error = nandfs_load_segsum(nandfsdev, pseg, &segsum); + if (error) + break; + + if (segsum.ss_seq < seq || segsum.ss_create < create) + break; + + /* Try to load super root */ + if (segsum.ss_flags & NANDFS_SS_SR) { + error = nandfs_load_super_root(nandfsdev, &segsum, pseg); + if (error) + break; /* confused */ + found = 1; + + super = &nandfsdev->nd_super; + nandfsdev->nd_last_segsum = segsum; + super->s_last_pseg = pseg; + super->s_last_cno = cno++; + super->s_last_seq = segsum.ss_seq; + super->s_state = NANDFS_VALID_FS; + seq = segsum.ss_seq; + create = segsum.ss_create; + } else { + seq = segsum.ss_seq; + create = segsum.ss_create; + } + + /* Calculate next partial segment location */ + pseg += segsum.ss_nblocks; + DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__, + (uintmax_t)pseg)); + + /* Did we reach the end of the segment? if so, go to the next */ + nandfs_get_segment_range(nandfsdev, segnum, &seg_start, + &seg_end); + if (pseg >= seg_end) { + pseg = segsum.ss_next; + DPRINTF(VOLUMES, + (" partial seg oor next is %jx[%jx - %jx]\n", + (uintmax_t)pseg, (uintmax_t)seg_start, + (uintmax_t)seg_end)); + } + segnum = nandfs_get_segnum_of_block(nandfsdev, pseg); + } + + if (error && !found) + return (error); + + return (0); +} + +int +nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp, + uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep) +{ + struct nandfs_node *node; + struct vnode *nvp; + struct mount *mp; + int error; + + *nodep = NULL; + + /* Associate with mountpoint if present */ + if (nmp) { + mp = nmp->nm_vfs_mountp; + error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp); + if (error) { + return (error); + } + } else { + mp = NULL; + error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops, + &nvp); + if (error) { + return (error); + } + } + + if (mp) + NANDFS_WRITELOCK(nandfsdev); + + DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n", + __func__, (uintmax_t)ino, nvp)); + /* Lock node */ + lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL); + + if (mp) { + error = insmntque(nvp, mp); + if (error != 0) { + *nodep = NULL; + return (error); + } + } + + node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO); + + /* Crosslink */ + node->nn_vnode = nvp; + nvp->v_bufobj.bo_ops = &buf_ops_nandfs; + node->nn_nmp = nmp; + node->nn_nandfsdev = nandfsdev; + nvp->v_data = node; + + /* Initiase NANDFS node */ + node->nn_ino = ino; + if (inode != NULL) + node->nn_inode = *inode; + + nandfs_vinit(nvp, ino); + + /* Return node */ + *nodep = node; + DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n", + __func__, (uintmax_t)ino, nvp, *nodep)); + + return (0); +} + +int +nandfs_get_node(struct nandfsmount *nmp, uint64_t ino, + struct nandfs_node **nodep) +{ + struct nandfs_device *nandfsdev; + struct nandfs_inode inode, *entry; + struct vnode *nvp, *vpp; + struct thread *td; + struct buf *bp; + uint64_t ivblocknr; + uint32_t entry_in_block; + int error; + + /* Look up node in hash table */ + td = curthread; + *nodep = NULL; + + if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) { + printf("nandfs_get_node: system ino %"PRIu64" not in mount " + "point!\n", ino); + return (ENOENT); + } + + error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp, + NULL, NULL); + if (error) + return (error); + + if (nvp != NULL) { + *nodep = (struct nandfs_node *)nvp->v_data; + return (0); + } + + /* Look up inode structure in mountpoints ifile */ + nandfsdev = nmp->nm_nandfsdev; + nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr, + &entry_in_block); + + VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED); + error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0); + return (ENOENT); + } + + /* Get inode entry */ + entry = (struct nandfs_inode *) bp->b_data + entry_in_block; + memcpy(&inode, entry, sizeof(struct nandfs_inode)); + brelse(bp); + VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0); + + /* Get node */ + error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep); + if (error) { + *nodep = NULL; + return (error); + } + + nvp = (*nodep)->nn_vnode; + error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL); + if (error) { + *nodep = NULL; + return (error); + } + + return (error); +} + +void +nandfs_dispose_node(struct nandfs_node **nodep) +{ + struct nandfs_node *node; + struct vnode *vp; + + /* Protect against rogue values */ + node = *nodep; + if (!node) { + return; + } + DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep)); + + vp = NTOV(node); + vp->v_data = NULL; + + /* Free our associated memory */ + uma_zfree(nandfs_node_zone, node); + + *nodep = NULL; +} + +int +nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen, + uint64_t *ino, int *found, uint64_t *off) +{ + struct nandfs_node *dir_node = VTON(dvp); + struct nandfs_dir_entry *ndirent; + struct buf *bp; + uint64_t file_size, diroffset, blkoff; + uint64_t blocknr; + uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize; + uint8_t *pos, name_len; + int error; + + *found = 0; + + DPRINTF(VNCALL, ("%s: %s file\n", __func__, name)); + if (dvp->v_type != VDIR) { + return (ENOTDIR); + } + + /* Get directory filesize */ + file_size = dir_node->nn_inode.i_size; + + /* Walk the directory */ + diroffset = 0; + blocknr = 0; + blkoff = 0; + error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (EIO); + } + + while (diroffset < file_size) { + if (blkoff >= blocksize) { + blkoff = 0; blocknr++; + brelse(bp); + error = nandfs_bread(dir_node, blocknr, NOCRED, 0, + &bp); + if (error) { + brelse(bp); + return (EIO); + } + } + + /* Read in one dirent */ + pos = (uint8_t *) bp->b_data + blkoff; + ndirent = (struct nandfs_dir_entry *) pos; + name_len = ndirent->name_len; + + if ((name_len == namelen) && + (strncmp(name, ndirent->name, name_len) == 0) && + (ndirent->inode != 0)) { + *ino = ndirent->inode; + *off = diroffset; + DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n", + name_len, ndirent->name, *ino)); + *found = 1; + break; + } + + /* Advance */ + diroffset += ndirent->rec_len; + blkoff += ndirent->rec_len; + } + brelse(bp); + + return (error); +} + +int +nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo) +{ + struct nandfs_device *fsdev; + + fsdev = nmp->nm_nandfsdev; + + memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata)); + memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super)); + snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev), + "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname); + + return (0); +} + +void +nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode) +{ + struct timespec ts; + + vfs_timestamp(&ts); + + inode->i_blocks = 0; + inode->i_size = 0; + inode->i_ctime = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; + inode->i_mtime = ts.tv_sec; + inode->i_mtime_nsec = ts.tv_nsec; + inode->i_mode = mode; + inode->i_links_count = 1; + if (S_ISDIR(mode)) + inode->i_links_count = 2; + inode->i_flags = 0; + + inode->i_special = 0; + memset(inode->i_db, 0, sizeof(inode->i_db)); + memset(inode->i_ib, 0, sizeof(inode->i_ib)); +} + +void +nandfs_inode_destroy(struct nandfs_inode *inode) +{ + + MPASS(inode->i_blocks == 0); + bzero(inode, sizeof(*inode)); +} + +int +nandfs_fs_full(struct nandfs_device *nffsdev) +{ + uint64_t space, bps; + + bps = nffsdev->nd_fsdata.f_blocks_per_segment; + space = (nffsdev->nd_clean_segs - 1) * bps; + + DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__, + (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space)); + + if (nffsdev->nd_dirty_bufs + (10 * bps) >= space) + return (1); + + return (0); +} + +static int +_nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force) +{ + struct nandfs_device *nffsdev; + struct nandfs_node *node; + uint64_t ino, bps; + + if (NANDFS_ISGATHERED(bp)) { + bqrelse(bp); + return (0); + } + if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) { + bqrelse(bp); + return (0); + } + + node = VTON(bp->b_vp); + nffsdev = node->nn_nandfsdev; + DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp)); + ino = node->nn_ino; + + if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) { + brelse(bp); + return (ENOSPC); + } + + bp->b_flags |= B_MANAGED; + bdwrite(bp); + + nandfs_dirty_bufs_increment(nffsdev); + + KASSERT((bp->b_vp), ("vp missing for bp")); + KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO), + ("bp vblk is 0")); + + /* + * To maintain consistency of FS we need to force making + * meta buffers dirty, even if free space is low. + */ + if (dirty_meta && ino != NANDFS_GC_INO) + nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1); + + bps = nffsdev->nd_fsdata.f_blocks_per_segment; + + if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) { + mtx_lock(&nffsdev->nd_sync_mtx); + if (nffsdev->nd_syncing == 0) { + DPRINTF(SYNC, ("%s: wakeup gc\n", __func__)); + nffsdev->nd_syncing = 1; + wakeup(&nffsdev->nd_syncing); + } + mtx_unlock(&nffsdev->nd_sync_mtx); + } + + return (0); +} + +int +nandfs_dirty_buf(struct buf *bp, int force) +{ + + return (_nandfs_dirty_buf(bp, 1, force)); +} + +int +nandfs_dirty_buf_meta(struct buf *bp, int force) +{ + + return (_nandfs_dirty_buf(bp, 0, force)); +} + +void +nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp) +{ + + BUF_ASSERT_HELD(bp); + + if (bp->b_flags & B_DELWRI) { + bp->b_flags &= ~(B_DELWRI|B_MANAGED); + nandfs_dirty_bufs_decrement(nffsdev); + } + /* + * Since it is now being written, we can clear its deferred write flag. + */ + bp->b_flags &= ~B_DEFERRED; + + brelse(bp); +} + +void +nandfs_undirty_buf(struct buf *bp) +{ + struct nandfs_node *node; + + node = VTON(bp->b_vp); + + nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp); +} + +void +nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr) +{ + + nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1); + *vblk = blocknr; +} + +nandfs_daddr_t +nandfs_vblk_get(struct buf *bp) +{ + + nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1); + return (*vblk); +} + +void +nandfs_buf_set(struct buf *bp, uint32_t bits) +{ + uintptr_t flags; + + flags = (uintptr_t)bp->b_fsprivate3; + flags |= (uintptr_t)bits; + bp->b_fsprivate3 = (void *)flags; +} + +void +nandfs_buf_clear(struct buf *bp, uint32_t bits) +{ + uintptr_t flags; + + flags = (uintptr_t)bp->b_fsprivate3; + flags &= ~(uintptr_t)bits; + bp->b_fsprivate3 = (void *)flags; +} + +int +nandfs_buf_check(struct buf *bp, uint32_t bits) +{ + uintptr_t flags; + + flags = (uintptr_t)bp->b_fsprivate3; + if (flags & bits) + return (1); + return (0); +} + +int +nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size) +{ + struct buf *bp; + int read_size, error, i; + + DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n", + __func__, offset, size)); + + MPASS(size % fsdev->nd_erasesize == 0); + + if (fsdev->nd_is_nand) { + error = g_delete_data(fsdev->nd_gconsumer, offset, size); + return (error); + } + + if (size > MAXBSIZE) + read_size = MAXBSIZE; + else + read_size = size; + + error = 0; + for (i = 0; i < size / MAXBSIZE; i++) { + error = bread(fsdev->nd_devvp, btodb(offset + i * read_size), + read_size, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + memset(bp->b_data, 0xff, read_size); + error = bwrite(bp); + if (error) { + nandfs_error("%s: err:%d from bwrite\n", + __func__, error); + return (error); + } + } + + return (error); +} + +int +nandfs_vop_islocked(struct vnode *vp) +{ + int islocked; + + islocked = VOP_ISLOCKED(vp); + return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED); +} + +nandfs_daddr_t +nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block) +{ + + return (btodb(block * fsdev->nd_blocksize)); +} diff --git a/sys/fs/nandfs/nandfs_subr.h b/sys/fs/nandfs/nandfs_subr.h new file mode 100644 index 0000000..0bcda18 --- /dev/null +++ b/sys/fs/nandfs/nandfs_subr.h @@ -0,0 +1,238 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_subr.h,v 1.1 2009/07/18 16:31:42 reinoud + * + * $FreeBSD$ + */ + +#ifndef _FS_NANDFS_NANDFS_SUBR_H_ +#define _FS_NANDFS_NANDFS_SUBR_H_ + +struct nandfs_mdt; + +struct nandfs_alloc_request +{ + uint64_t entrynum; + struct buf *bp_desc; + struct buf *bp_bitmap; + struct buf *bp_entry; +}; + +/* Segment creation */ +void nandfs_wakeup_wait_sync(struct nandfs_device *, int); +int nandfs_segment_constructor(struct nandfsmount *, int); +int nandfs_sync_file(struct vnode *); + +/* Basic calculators */ +uint64_t nandfs_get_segnum_of_block(struct nandfs_device *, nandfs_daddr_t); +void nandfs_get_segment_range(struct nandfs_device *, uint64_t, uint64_t *, + uint64_t *); +void nandfs_calc_mdt_consts(struct nandfs_device *, struct nandfs_mdt *, int); + +/* Log reading / volume helpers */ +int nandfs_search_super_root(struct nandfs_device *); + +/* Reading */ +int nandfs_dev_bread(struct nandfs_device *, nandfs_daddr_t, struct ucred *, + int, struct buf **); +int nandfs_bread(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, + struct buf **); +int nandfs_bread_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, + struct buf **); +int nandfs_bdestroy(struct nandfs_node *, nandfs_daddr_t); +int nandfs_bcreate(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, + struct buf **); +int nandfs_bcreate_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *, + int, struct buf **); +int nandfs_bread_create(struct nandfs_node *, nandfs_lbn_t, struct ucred *, + int, struct buf **); + +/* vtop operations */ +int nandfs_vtop(struct nandfs_node *, nandfs_daddr_t, nandfs_daddr_t *); + +/* Node action implementators */ +int nandfs_vinit(struct vnode *, uint64_t); +int nandfs_get_node(struct nandfsmount *, uint64_t, struct nandfs_node **); +int nandfs_get_node_raw(struct nandfs_device *, struct nandfsmount *, uint64_t, + struct nandfs_inode *, struct nandfs_node **); +void nandfs_dispose_node(struct nandfs_node **); + +void nandfs_itimes(struct vnode *); +int nandfs_lookup_name_in_dir(struct vnode *, const char *, int, uint64_t *, + int *, uint64_t *); +int nandfs_create_node(struct vnode *, struct vnode **, struct vattr *, + struct componentname *); +void nandfs_delete_node(struct nandfs_node *); + +int nandfs_chsize(struct vnode *, u_quad_t, struct ucred *); +int nandfs_dir_detach(struct nandfsmount *, struct nandfs_node *, + struct nandfs_node *, struct componentname *); +int nandfs_dir_attach(struct nandfsmount *, struct nandfs_node *, + struct nandfs_node *, struct vattr *, struct componentname *); + +int nandfs_dirty_buf(struct buf *, int); +int nandfs_dirty_buf_meta(struct buf *, int); +int nandfs_fs_full(struct nandfs_device *); +void nandfs_undirty_buf_fsdev(struct nandfs_device *, struct buf *); +void nandfs_undirty_buf(struct buf *); + +void nandfs_clear_buf(struct buf *); +void nandfs_buf_set(struct buf *, uint32_t); +void nandfs_buf_clear(struct buf *, uint32_t); +int nandfs_buf_check(struct buf *, uint32_t); + +int nandfs_find_free_entry(struct nandfs_mdt *, struct nandfs_node *, + struct nandfs_alloc_request *); +int nandfs_find_entry(struct nandfs_mdt *, struct nandfs_node *, + struct nandfs_alloc_request *); +int nandfs_alloc_entry(struct nandfs_mdt *, struct nandfs_alloc_request *); +void nandfs_abort_entry(struct nandfs_alloc_request *); +int nandfs_free_entry(struct nandfs_mdt *, struct nandfs_alloc_request *); +int nandfs_get_entry_block(struct nandfs_mdt *, struct nandfs_node *, + struct nandfs_alloc_request *, uint32_t *, int); + +/* inode managment */ +int nandfs_node_create(struct nandfsmount *, struct nandfs_node **, uint16_t); +int nandfs_node_destroy(struct nandfs_node *); +int nandfs_node_update(struct nandfs_node *); +int nandfs_get_node_entry(struct nandfsmount *, struct nandfs_inode **, + uint64_t, struct buf **); +void nandfs_mdt_trans_blk(struct nandfs_mdt *, uint64_t, uint64_t *, + uint64_t *, nandfs_lbn_t *, uint32_t *); + +/* vblock management */ +void nandfs_mdt_trans(struct nandfs_mdt *, uint64_t, nandfs_lbn_t *, uint32_t *); +int nandfs_vblock_alloc(struct nandfs_device *, nandfs_daddr_t *); +int nandfs_vblock_end(struct nandfs_device *, nandfs_daddr_t); +int nandfs_vblock_assign(struct nandfs_device *, nandfs_daddr_t, + nandfs_lbn_t); +int nandfs_vblock_free(struct nandfs_device *, nandfs_daddr_t); + +/* Checkpoint management */ +int nandfs_get_checkpoint(struct nandfs_device *, struct nandfs_node *, + uint64_t); +int nandfs_set_checkpoint(struct nandfs_device *, struct nandfs_node *, + uint64_t, struct nandfs_inode *, uint64_t); + +/* Segment management */ +int nandfs_alloc_segment(struct nandfs_device *, uint64_t *); +int nandfs_update_segment(struct nandfs_device *, uint64_t, uint32_t); +int nandfs_free_segment(struct nandfs_device *, uint64_t); +int nandfs_clear_segment(struct nandfs_device *, uint64_t); +int nandfs_touch_segment(struct nandfs_device *, uint64_t); +int nandfs_markgc_segment(struct nandfs_device *, uint64_t); + +int nandfs_bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, struct buf *); +int nandfs_bmap_update_block(struct nandfs_node *, struct buf *, nandfs_lbn_t); +int nandfs_bmap_update_dat(struct nandfs_node *, nandfs_daddr_t, struct buf *); +int nandfs_bmap_dirty_blocks(struct nandfs_node *, struct buf *, int); +int nandfs_bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t, + nandfs_lbn_t); +int nandfs_bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *); + +/* dirent */ +int nandfs_add_dirent(struct vnode *, uint64_t, char *, long, uint8_t); +int nandfs_remove_dirent(struct vnode *, struct nandfs_node *, + struct componentname *); +int nandfs_update_dirent(struct vnode *, struct nandfs_node *, + struct nandfs_node *); +int nandfs_init_dir(struct vnode *, uint64_t, uint64_t); +int nandfs_update_parent_dir(struct vnode *, uint64_t); + +void nandfs_vblk_set(struct buf *, nandfs_daddr_t); +nandfs_daddr_t nandfs_vblk_get(struct buf *); + +void nandfs_inode_init(struct nandfs_inode *, uint16_t); +void nandfs_inode_destroy(struct nandfs_inode *); + +/* ioctl */ +int nandfs_get_seg_stat(struct nandfs_device *, struct nandfs_seg_stat *); +int nandfs_chng_cpmode(struct nandfs_node *, struct nandfs_cpmode *); +int nandfs_get_cpinfo_ioctl(struct nandfs_node *, struct nandfs_argv *); +int nandfs_delete_cp(struct nandfs_node *, uint64_t start, uint64_t); +int nandfs_make_snap(struct nandfs_device *, uint64_t *); +int nandfs_delete_snap(struct nandfs_device *, uint64_t); +int nandfs_get_cpstat(struct nandfs_node *, struct nandfs_cpstat *); +int nandfs_get_segment_info_ioctl(struct nandfs_device *, struct nandfs_argv *); +int nandfs_get_dat_vinfo_ioctl(struct nandfs_device *, struct nandfs_argv *); +int nandfs_get_dat_bdescs_ioctl(struct nandfs_device *, struct nandfs_argv *); +int nandfs_get_fsinfo(struct nandfsmount *, struct nandfs_fsinfo *); + +int nandfs_get_cpinfo(struct nandfs_node *, uint64_t, uint16_t, + struct nandfs_cpinfo *, uint32_t, uint32_t *); + +nandfs_lbn_t nandfs_get_maxfilesize(struct nandfs_device *); + +int nandfs_write_superblock(struct nandfs_device *); + +extern int nandfs_sync_interval; +extern int nandfs_max_dirty_segs; +extern int nandfs_cps_between_sblocks; + +struct buf *nandfs_geteblk(int, int); + +void nandfs_dirty_bufs_increment(struct nandfs_device *); +void nandfs_dirty_bufs_decrement(struct nandfs_device *); + +int nandfs_start_cleaner(struct nandfs_device *); +int nandfs_stop_cleaner(struct nandfs_device *); + +int nandfs_segsum_valid(struct nandfs_segment_summary *); +int nandfs_load_segsum(struct nandfs_device *, nandfs_daddr_t, + struct nandfs_segment_summary *); +int nandfs_get_segment_info(struct nandfs_device *, struct nandfs_suinfo *, + uint32_t, uint64_t); +int nandfs_get_segment_info_filter(struct nandfs_device *, + struct nandfs_suinfo *, uint32_t, uint64_t, uint64_t *, uint32_t, uint32_t); +int nandfs_get_dat_vinfo(struct nandfs_device *, struct nandfs_vinfo *, + uint32_t); +int nandfs_get_dat_bdescs(struct nandfs_device *, struct nandfs_bdesc *, + uint32_t); + +#define NANDFS_VBLK_ASSIGNED 1 + +#define NANDFS_IS_INDIRECT(bp) ((bp)->b_lblkno < 0) + +int nandfs_erase(struct nandfs_device *, off_t, size_t); + +#define NANDFS_VOP_ISLOCKED(vp) nandfs_vop_islocked((vp)) +int nandfs_vop_islocked(struct vnode *vp); + +nandfs_daddr_t nandfs_block_to_dblock(struct nandfs_device *, nandfs_lbn_t); + +#define DEBUG_MODE +#if defined(DEBUG_MODE) +#define nandfs_error panic +#define nandfs_warning printf +#elif defined(TEST_MODE) +#define nandfs_error printf +#define nandfs_warning printf +#else +#define nandfs_error(...) +#define nandfs_warning(...) +#endif + +#endif /* !_FS_NANDFS_NANDFS_SUBR_H_ */ diff --git a/sys/fs/nandfs/nandfs_sufile.c b/sys/fs/nandfs/nandfs_sufile.c new file mode 100644 index 0000000..d4f4326 --- /dev/null +++ b/sys/fs/nandfs/nandfs_sufile.c @@ -0,0 +1,569 @@ +/*- + * Copyright (c) 2010-2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +#define SU_USAGE_OFF(bp, offset) \ + ((struct nandfs_segment_usage *)((bp)->b_data + offset)) + +static int +nandfs_seg_usage_blk_offset(struct nandfs_device *fsdev, uint64_t seg, + uint64_t *blk, uint64_t *offset) +{ + uint64_t off; + uint16_t seg_size; + + seg_size = fsdev->nd_fsdata.f_segment_usage_size; + + off = roundup(sizeof(struct nandfs_sufile_header), seg_size); + off += (seg * seg_size); + + *blk = off / fsdev->nd_blocksize; + *offset = off % fsdev->nd_blocksize; + return (0); +} + +/* Alloc new segment */ +int +nandfs_alloc_segment(struct nandfs_device *fsdev, uint64_t *seg) +{ + struct nandfs_node *su_node; + struct nandfs_sufile_header *su_header; + struct nandfs_segment_usage *su_usage; + struct buf *bp_header, *bp; + uint64_t blk, vblk, offset, i, rest, nsegments; + uint16_t seg_size; + int error, found; + + seg_size = fsdev->nd_fsdata.f_segment_usage_size; + nsegments = fsdev->nd_fsdata.f_nsegments; + + su_node = fsdev->nd_su_node; + ASSERT_VOP_LOCKED(NTOV(su_node), __func__); + + /* Read header buffer */ + error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header); + if (error) { + brelse(bp_header); + return (error); + } + + su_header = (struct nandfs_sufile_header *)bp_header->b_data; + + /* Get last allocated segment */ + i = su_header->sh_last_alloc + 1; + + found = 0; + bp = NULL; + while (!found) { + nandfs_seg_usage_blk_offset(fsdev, i, &blk, &offset); + if(blk != 0) { + error = nandfs_bmap_lookup(su_node, blk, &vblk); + if (error) { + nandfs_error("%s: cannot find vblk for blk " + "blk:%jx\n", __func__, blk); + return (error); + } + if (vblk) + error = nandfs_bread(su_node, blk, NOCRED, 0, + &bp); + else + error = nandfs_bcreate(su_node, blk, NOCRED, 0, + &bp); + if (error) { + nandfs_error("%s: cannot create/read " + "vblk:%jx\n", __func__, vblk); + if (bp) + brelse(bp); + return (error); + } + + su_usage = SU_USAGE_OFF(bp, offset); + } else { + su_usage = SU_USAGE_OFF(bp_header, offset); + bp = bp_header; + } + + rest = (fsdev->nd_blocksize - offset) / seg_size; + /* Go through all su usage in block */ + while (rest) { + /* When last check start from beggining */ + if (i == nsegments) + break; + + if (!su_usage->su_flags) { + su_usage->su_flags = 1; + found = 1; + break; + } + su_usage++; + i++; + + /* If all checked return error */ + if (i == su_header->sh_last_alloc) { + DPRINTF(SEG, ("%s: cannot allocate segment \n", + __func__)); + brelse(bp_header); + if (blk != 0) + brelse(bp); + return (1); + } + rest--; + } + if (!found) { + /* Otherwise read another block */ + if (blk != 0) + brelse(bp); + if (i == nsegments) { + blk = 0; + i = 0; + } else + blk++; + offset = 0; + } + } + + if (found) { + *seg = i; + su_header->sh_last_alloc = i; + su_header->sh_ncleansegs--; + su_header->sh_ndirtysegs++; + + fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs * + fsdev->nd_fsdata.f_blocks_per_segment; + fsdev->nd_clean_segs--; + + /* + * It is mostly called from syncer() so we want to force + * making buf dirty. + */ + error = nandfs_dirty_buf(bp_header, 1); + if (error) { + if (bp && bp != bp_header) + brelse(bp); + return (error); + } + if (bp && bp != bp_header) + nandfs_dirty_buf(bp, 1); + + DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)i)); + + return (0); + } + + DPRINTF(SEG, ("%s: failed\n", __func__)); + + return (1); +} + +/* + * Make buffer dirty, it will be updated soon but first it need to be + * gathered by syncer. + */ +int +nandfs_touch_segment(struct nandfs_device *fsdev, uint64_t seg) +{ + struct nandfs_node *su_node; + struct buf *bp; + uint64_t blk, offset; + int error; + + su_node = fsdev->nd_su_node; + ASSERT_VOP_LOCKED(NTOV(su_node), __func__); + + nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); + + error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + nandfs_error("%s: cannot preallocate new segment\n", __func__); + return (error); + } else + nandfs_dirty_buf(bp, 1); + + DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); + return (error); +} + +/* Update block count of segment */ +int +nandfs_update_segment(struct nandfs_device *fsdev, uint64_t seg, uint32_t nblks) +{ + struct nandfs_node *su_node; + struct nandfs_segment_usage *su_usage; + struct buf *bp; + uint64_t blk, offset; + int error; + + su_node = fsdev->nd_su_node; + ASSERT_VOP_LOCKED(NTOV(su_node), __func__); + + nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); + + error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); + if (error) { + nandfs_error("%s: read block:%jx to update\n", + __func__, blk); + brelse(bp); + return (error); + } + + su_usage = SU_USAGE_OFF(bp, offset); + su_usage->su_lastmod = fsdev->nd_ts.tv_sec; + su_usage->su_flags = NANDFS_SEGMENT_USAGE_DIRTY; + su_usage->su_nblocks += nblks; + + DPRINTF(SEG, ("%s: seg:%#jx inc:%#x cur:%#x\n", __func__, + (uintmax_t)seg, nblks, su_usage->su_nblocks)); + + nandfs_dirty_buf(bp, 1); + + return (0); +} + +/* Make segment free */ +int +nandfs_free_segment(struct nandfs_device *fsdev, uint64_t seg) +{ + struct nandfs_node *su_node; + struct nandfs_sufile_header *su_header; + struct nandfs_segment_usage *su_usage; + struct buf *bp_header, *bp; + uint64_t blk, offset; + int error; + + su_node = fsdev->nd_su_node; + ASSERT_VOP_LOCKED(NTOV(su_node), __func__); + + /* Read su header */ + error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header); + if (error) { + brelse(bp_header); + return (error); + } + + su_header = (struct nandfs_sufile_header *)bp_header->b_data; + nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); + + /* Read su usage block if other than su header block */ + if (blk != 0) { + error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + brelse(bp_header); + return (error); + } + } else + bp = bp_header; + + /* Reset su usage data */ + su_usage = SU_USAGE_OFF(bp, offset); + su_usage->su_lastmod = fsdev->nd_ts.tv_sec; + su_usage->su_nblocks = 0; + su_usage->su_flags = 0; + + /* Update clean/dirty counter in header */ + su_header->sh_ncleansegs++; + su_header->sh_ndirtysegs--; + + /* + * Make buffers dirty, called by cleaner + * so force dirty even if no much space left + * on device + */ + nandfs_dirty_buf(bp_header, 1); + if (bp != bp_header) + nandfs_dirty_buf(bp, 1); + + /* Update free block count */ + fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs * + fsdev->nd_fsdata.f_blocks_per_segment; + fsdev->nd_clean_segs++; + + DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); + + return (0); +} + +static int +nandfs_bad_segment(struct nandfs_device *fsdev, uint64_t seg) +{ + struct nandfs_node *su_node; + struct nandfs_segment_usage *su_usage; + struct buf *bp; + uint64_t blk, offset; + int error; + + su_node = fsdev->nd_su_node; + ASSERT_VOP_LOCKED(NTOV(su_node), __func__); + + nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); + + error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (error); + } + + su_usage = SU_USAGE_OFF(bp, offset); + su_usage->su_lastmod = fsdev->nd_ts.tv_sec; + su_usage->su_flags = NANDFS_SEGMENT_USAGE_ERROR; + + DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); + + nandfs_dirty_buf(bp, 1); + + return (0); +} + +int +nandfs_markgc_segment(struct nandfs_device *fsdev, uint64_t seg) +{ + struct nandfs_node *su_node; + struct nandfs_segment_usage *su_usage; + struct buf *bp; + uint64_t blk, offset; + int error; + + su_node = fsdev->nd_su_node; + + VOP_LOCK(NTOV(su_node), LK_EXCLUSIVE); + + nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); + + error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); + if (error) { + brelse(bp); + VOP_UNLOCK(NTOV(su_node), 0); + return (error); + } + + su_usage = SU_USAGE_OFF(bp, offset); + MPASS((su_usage->su_flags & NANDFS_SEGMENT_USAGE_GC) == 0); + su_usage->su_flags |= NANDFS_SEGMENT_USAGE_GC; + + brelse(bp); + VOP_UNLOCK(NTOV(su_node), 0); + + DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); + + return (0); +} + +int +nandfs_clear_segment(struct nandfs_device *fsdev, uint64_t seg) +{ + uint64_t offset, segsize; + uint32_t bps, bsize; + int error = 0; + + bps = fsdev->nd_fsdata.f_blocks_per_segment; + bsize = fsdev->nd_blocksize; + segsize = bsize * bps; + nandfs_get_segment_range(fsdev, seg, &offset, NULL); + offset *= bsize; + + DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); + + /* Erase it and mark it bad when fail */ + if (nandfs_erase(fsdev, offset, segsize)) + error = nandfs_bad_segment(fsdev, seg); + + if (error) + return (error); + + /* Mark it free */ + error = nandfs_free_segment(fsdev, seg); + + return (error); +} + +int +nandfs_get_seg_stat(struct nandfs_device *nandfsdev, + struct nandfs_seg_stat *nss) +{ + struct nandfs_sufile_header *suhdr; + struct nandfs_node *su_node; + struct buf *bp; + int err; + + su_node = nandfsdev->nd_su_node; + + NANDFS_WRITELOCK(nandfsdev); + VOP_LOCK(NTOV(su_node), LK_SHARED); + err = nandfs_bread(nandfsdev->nd_su_node, 0, NOCRED, 0, &bp); + if (err) { + brelse(bp); + VOP_UNLOCK(NTOV(su_node), 0); + NANDFS_WRITEUNLOCK(nandfsdev); + return (-1); + } + + suhdr = (struct nandfs_sufile_header *)bp->b_data; + nss->nss_nsegs = nandfsdev->nd_fsdata.f_nsegments; + nss->nss_ncleansegs = suhdr->sh_ncleansegs; + nss->nss_ndirtysegs = suhdr->sh_ndirtysegs; + nss->nss_ctime = 0; + nss->nss_nongc_ctime = nandfsdev->nd_ts.tv_sec; + nss->nss_prot_seq = nandfsdev->nd_seg_sequence; + + brelse(bp); + VOP_UNLOCK(NTOV(su_node), 0); + + NANDFS_WRITEUNLOCK(nandfsdev); + + return (0); +} + +int +nandfs_get_segment_info_ioctl(struct nandfs_device *fsdev, + struct nandfs_argv *nargv) +{ + struct nandfs_suinfo *nsi; + int error; + + if (nargv->nv_nmembs > NANDFS_SEGMENTS_MAX) + return (EINVAL); + + nsi = malloc(sizeof(struct nandfs_suinfo) * nargv->nv_nmembs, + M_NANDFSTEMP, M_WAITOK | M_ZERO); + + error = nandfs_get_segment_info(fsdev, nsi, nargv->nv_nmembs, + nargv->nv_index); + + if (error == 0) + error = copyout(nsi, (void *)(uintptr_t)nargv->nv_base, + sizeof(struct nandfs_suinfo) * nargv->nv_nmembs); + + free(nsi, M_NANDFSTEMP); + return (error); +} + +int +nandfs_get_segment_info(struct nandfs_device *fsdev, struct nandfs_suinfo *nsi, + uint32_t nmembs, uint64_t segment) +{ + + return (nandfs_get_segment_info_filter(fsdev, nsi, nmembs, segment, + NULL, 0, 0)); +} + +int +nandfs_get_segment_info_filter(struct nandfs_device *fsdev, + struct nandfs_suinfo *nsi, uint32_t nmembs, uint64_t segment, + uint64_t *nsegs, uint32_t filter, uint32_t nfilter) +{ + struct nandfs_segment_usage *su; + struct nandfs_node *su_node; + struct buf *bp; + uint64_t curr, blocknr, blockoff, i; + uint32_t flags; + int err = 0; + + curr = ~(0); + + lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); + su_node = fsdev->nd_su_node; + + VOP_LOCK(NTOV(su_node), LK_SHARED); + + bp = NULL; + if (nsegs != NULL) + *nsegs = 0; + for (i = 0; i < nmembs; segment++) { + if (segment == fsdev->nd_fsdata.f_nsegments) + break; + + nandfs_seg_usage_blk_offset(fsdev, segment, &blocknr, + &blockoff); + + if (i == 0 || curr != blocknr) { + if (bp != NULL) + brelse(bp); + err = nandfs_bread(su_node, blocknr, NOCRED, + 0, &bp); + if (err) { + goto out; + } + curr = blocknr; + } + + su = SU_USAGE_OFF(bp, blockoff); + flags = su->su_flags; + if (segment == fsdev->nd_seg_num || + segment == fsdev->nd_next_seg_num) + flags |= NANDFS_SEGMENT_USAGE_ACTIVE; + + if (nfilter != 0 && (flags & nfilter) != 0) + continue; + if (filter != 0 && (flags & filter) == 0) + continue; + + nsi->nsi_num = segment; + nsi->nsi_lastmod = su->su_lastmod; + nsi->nsi_blocks = su->su_nblocks; + nsi->nsi_flags = flags; + nsi++; + i++; + if (nsegs != NULL) + (*nsegs)++; + } + +out: + if (bp != NULL) + brelse(bp); + VOP_UNLOCK(NTOV(su_node), 0); + lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); + + return (err); +} diff --git a/sys/fs/nandfs/nandfs_vfsops.c b/sys/fs/nandfs/nandfs_vfsops.c new file mode 100644 index 0000000..ba53546 --- /dev/null +++ b/sys/fs/nandfs/nandfs_vfsops.c @@ -0,0 +1,1590 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_vfsops.c,v 1.1 2009/07/18 16:31:42 reinoud Exp + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/fcntl.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/priv.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/sysctl.h> +#include <sys/libkern.h> + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +#include <machine/_inttypes.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +static MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount structure"); + +#define NANDFS_SET_SYSTEMFILE(vp) { \ + (vp)->v_vflag |= VV_SYSTEM; \ + vref(vp); \ + vput(vp); } + +#define NANDFS_UNSET_SYSTEMFILE(vp) { \ + VOP_LOCK(vp, LK_EXCLUSIVE); \ + MPASS(vp->v_bufobj.bo_dirty.bv_cnt == 0); \ + (vp)->v_vflag &= ~VV_SYSTEM; \ + vgone(vp); \ + vput(vp); } + +/* Globals */ +struct _nandfs_devices nandfs_devices; + +/* Parameters */ +int nandfs_verbose = 0; + +static void +nandfs_tunable_init(void *arg) +{ + + TUNABLE_INT_FETCH("vfs.nandfs.verbose", &nandfs_verbose); +} +SYSINIT(nandfs_tunables, SI_SUB_VFS, SI_ORDER_ANY, nandfs_tunable_init, NULL); + +static SYSCTL_NODE(_vfs, OID_AUTO, nandfs, CTLFLAG_RD, 0, "NAND filesystem"); +static SYSCTL_NODE(_vfs_nandfs, OID_AUTO, mount, CTLFLAG_RD, 0, + "NANDFS mountpoints"); +SYSCTL_INT(_vfs_nandfs, OID_AUTO, verbose, CTLFLAG_RW, &nandfs_verbose, 0, ""); + +#define NANDFS_CONSTR_INTERVAL 5 +int nandfs_sync_interval = NANDFS_CONSTR_INTERVAL; /* sync every 5 seconds */ +SYSCTL_UINT(_vfs_nandfs, OID_AUTO, sync_interval, CTLFLAG_RW, + &nandfs_sync_interval, 0, ""); + +#define NANDFS_MAX_DIRTY_SEGS 5 +int nandfs_max_dirty_segs = NANDFS_MAX_DIRTY_SEGS; /* sync when 5 dirty seg */ +SYSCTL_UINT(_vfs_nandfs, OID_AUTO, max_dirty_segs, CTLFLAG_RW, + &nandfs_max_dirty_segs, 0, ""); + +#define NANDFS_CPS_BETWEEN_SBLOCKS 5 +int nandfs_cps_between_sblocks = NANDFS_CPS_BETWEEN_SBLOCKS; /* write superblock every 5 checkpoints */ +SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cps_between_sblocks, CTLFLAG_RW, + &nandfs_cps_between_sblocks, 0, ""); + +#define NANDFS_CLEANER_ENABLE 1 +int nandfs_cleaner_enable = NANDFS_CLEANER_ENABLE; +SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_enable, CTLFLAG_RW, + &nandfs_cleaner_enable, 0, ""); + +#define NANDFS_CLEANER_INTERVAL 5 +int nandfs_cleaner_interval = NANDFS_CLEANER_INTERVAL; +SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_interval, CTLFLAG_RW, + &nandfs_cleaner_interval, 0, ""); + +#define NANDFS_CLEANER_SEGMENTS 5 +int nandfs_cleaner_segments = NANDFS_CLEANER_SEGMENTS; +SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_segments, CTLFLAG_RW, + &nandfs_cleaner_segments, 0, ""); + +static int nandfs_mountfs(struct vnode *devvp, struct mount *mp); +static vfs_mount_t nandfs_mount; +static vfs_root_t nandfs_root; +static vfs_statfs_t nandfs_statfs; +static vfs_unmount_t nandfs_unmount; +static vfs_vget_t nandfs_vget; +static vfs_sync_t nandfs_sync; +static const char *nandfs_opts[] = { + "snap", "from", "noatime", NULL +}; + +/* System nodes */ +static int +nandfs_create_system_nodes(struct nandfs_device *nandfsdev) +{ + int error; + + error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_DAT_INO, + &nandfsdev->nd_super_root.sr_dat, &nandfsdev->nd_dat_node); + if (error) + goto errorout; + + error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_CPFILE_INO, + &nandfsdev->nd_super_root.sr_cpfile, &nandfsdev->nd_cp_node); + if (error) + goto errorout; + + error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_SUFILE_INO, + &nandfsdev->nd_super_root.sr_sufile, &nandfsdev->nd_su_node); + if (error) + goto errorout; + + error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_GC_INO, + NULL, &nandfsdev->nd_gc_node); + if (error) + goto errorout; + + NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node)); + NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node)); + NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node)); + NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node)); + + DPRINTF(VOLUMES, ("System vnodes: dat: %p cp: %p su: %p\n", + NTOV(nandfsdev->nd_dat_node), NTOV(nandfsdev->nd_cp_node), + NTOV(nandfsdev->nd_su_node))); + return (0); + +errorout: + nandfs_dispose_node(&nandfsdev->nd_gc_node); + nandfs_dispose_node(&nandfsdev->nd_dat_node); + nandfs_dispose_node(&nandfsdev->nd_cp_node); + nandfs_dispose_node(&nandfsdev->nd_su_node); + + return (error); +} + +static void +nandfs_release_system_nodes(struct nandfs_device *nandfsdev) +{ + + if (!nandfsdev) + return; + if (nandfsdev->nd_refcnt > 0) + return; + + if (nandfsdev->nd_gc_node) + NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node)); + if (nandfsdev->nd_dat_node) + NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node)); + if (nandfsdev->nd_cp_node) + NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node)); + if (nandfsdev->nd_su_node) + NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node)); +} + +static int +nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata) +{ + uint32_t fsdata_crc, comp_crc; + + if (fsdata->f_magic != NANDFS_FSDATA_MAGIC) + return (0); + + /* Preserve CRC */ + fsdata_crc = fsdata->f_sum; + + /* Calculate */ + fsdata->f_sum = (0); + comp_crc = crc32((uint8_t *)fsdata, fsdata->f_bytes); + + /* Restore */ + fsdata->f_sum = fsdata_crc; + + /* Check CRC */ + return (fsdata_crc == comp_crc); +} + +static int +nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata, + struct nandfs_super_block *super) +{ + uint32_t super_crc, comp_crc; + + /* Check super block magic */ + if (super->s_magic != NANDFS_SUPER_MAGIC) + return (0); + + /* Preserve CRC */ + super_crc = super->s_sum; + + /* Calculate */ + super->s_sum = (0); + comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes); + + /* Restore */ + super->s_sum = super_crc; + + /* Check CRC */ + return (super_crc == comp_crc); +} + +static void +nandfs_calc_superblock_crc(struct nandfs_fsdata *fsdata, + struct nandfs_super_block *super) +{ + uint32_t comp_crc; + + /* Calculate */ + super->s_sum = 0; + comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes); + + /* Restore */ + super->s_sum = comp_crc; +} + +static int +nandfs_is_empty(u_char *area, int size) +{ + int i; + + for (i = 0; i < size; i++) + if (area[i] != 0xff) + return (0); + + return (1); +} + +static __inline int +nandfs_sblocks_in_esize(struct nandfs_device *fsdev) +{ + + return ((fsdev->nd_erasesize - NANDFS_SBLOCK_OFFSET_BYTES) / + sizeof(struct nandfs_super_block)); +} + +static __inline int +nandfs_max_sblocks(struct nandfs_device *fsdev) +{ + + return (NANDFS_NFSAREAS * nandfs_sblocks_in_esize(fsdev)); +} + +static __inline int +nandfs_sblocks_in_block(struct nandfs_device *fsdev) +{ + + return (fsdev->nd_devblocksize / sizeof(struct nandfs_super_block)); +} + +static __inline int +nandfs_sblocks_in_first_block(struct nandfs_device *fsdev) +{ + int n; + + n = nandfs_sblocks_in_block(fsdev) - + NANDFS_SBLOCK_OFFSET_BYTES / sizeof(struct nandfs_super_block); + if (n < 0) + n = 0; + + return (n); +} + +static int +nandfs_write_superblock_at(struct nandfs_device *fsdev, + struct nandfs_fsarea *fstp) +{ + struct nandfs_super_block *super, *supert; + struct buf *bp; + int sb_per_sector, sbs_in_fsd, read_block; + int index, pos, error; + off_t offset; + + DPRINTF(SYNC, ("%s: last_used %d nandfs_sblocks_in_esize %d\n", + __func__, fstp->last_used, nandfs_sblocks_in_esize(fsdev))); + if (fstp->last_used == nandfs_sblocks_in_esize(fsdev) - 1) + index = 0; + else + index = fstp->last_used + 1; + + super = &fsdev->nd_super; + supert = NULL; + + sb_per_sector = nandfs_sblocks_in_block(fsdev); + sbs_in_fsd = sizeof(struct nandfs_fsdata) / + sizeof(struct nandfs_super_block); + index += sbs_in_fsd; + offset = fstp->offset; + + DPRINTF(SYNC, ("%s: offset %#jx s_last_pseg %#jx s_last_cno %#jx " + "s_last_seq %#jx wtime %jd index %d\n", __func__, offset, + super->s_last_pseg, super->s_last_cno, super->s_last_seq, + super->s_wtime, index)); + + read_block = btodb(offset + ((index / sb_per_sector) * sb_per_sector) + * sizeof(struct nandfs_super_block)); + + DPRINTF(SYNC, ("%s: read_block %#x\n", __func__, read_block)); + + if (index == sbs_in_fsd) { + error = nandfs_erase(fsdev, offset, fsdev->nd_erasesize); + if (error) + return (error); + + error = bread(fsdev->nd_devvp, btodb(offset), + fsdev->nd_devblocksize, NOCRED, &bp); + if (error) { + printf("NANDFS: couldn't read initial data: %d\n", + error); + brelse(bp); + return (error); + } + memcpy(bp->b_data, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata)); + /* + * 0xff-out the rest. This bp could be cached, so potentially + * b_data contains stale super blocks. + * + * We don't mind cached bp since most of the time we just add + * super blocks to already 0xff-out b_data and don't need to + * perform actual read. + */ + if (fsdev->nd_devblocksize > sizeof(fsdev->nd_fsdata)) + memset(bp->b_data + sizeof(fsdev->nd_fsdata), 0xff, + fsdev->nd_devblocksize - sizeof(fsdev->nd_fsdata)); + error = bwrite(bp); + if (error) { + printf("NANDFS: cannot rewrite initial data at %jx\n", + offset); + return (error); + } + } + + error = bread(fsdev->nd_devvp, read_block, fsdev->nd_devblocksize, + NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + + supert = (struct nandfs_super_block *)(bp->b_data); + pos = index % sb_per_sector; + + DPRINTF(SYNC, ("%s: storing at %d\n", __func__, pos)); + memcpy(&supert[pos], super, sizeof(struct nandfs_super_block)); + + /* + * See comment above in code that performs erase. + */ + if (pos == 0) + memset(&supert[1], 0xff, + (sb_per_sector - 1) * sizeof(struct nandfs_super_block)); + + error = bwrite(bp); + if (error) { + printf("NANDFS: cannot update superblock at %jx\n", offset); + return (error); + } + + DPRINTF(SYNC, ("%s: fstp->last_used %d -> %d\n", __func__, + fstp->last_used, index - sbs_in_fsd)); + fstp->last_used = index - sbs_in_fsd; + + return (0); +} + +int +nandfs_write_superblock(struct nandfs_device *fsdev) +{ + struct nandfs_super_block *super; + struct timespec ts; + int error; + int i, j; + + vfs_timestamp(&ts); + + super = &fsdev->nd_super; + + super->s_last_pseg = fsdev->nd_last_pseg; + super->s_last_cno = fsdev->nd_last_cno; + super->s_last_seq = fsdev->nd_seg_sequence; + super->s_wtime = ts.tv_sec; + + nandfs_calc_superblock_crc(&fsdev->nd_fsdata, super); + + error = 0; + for (i = 0, j = fsdev->nd_last_fsarea; i < NANDFS_NFSAREAS; + i++, j = (j + 1 % NANDFS_NFSAREAS)) { + if (fsdev->nd_fsarea[j].flags & NANDFS_FSSTOR_FAILED) { + DPRINTF(SYNC, ("%s: skipping %d\n", __func__, j)); + continue; + } + error = nandfs_write_superblock_at(fsdev, &fsdev->nd_fsarea[j]); + if (error) { + printf("NANDFS: writing superblock at offset %d failed:" + "%d\n", j * fsdev->nd_erasesize, error); + fsdev->nd_fsarea[j].flags |= NANDFS_FSSTOR_FAILED; + } else + break; + } + + if (i == NANDFS_NFSAREAS) { + printf("NANDFS: superblock was not written\n"); + /* + * TODO: switch to read-only? + */ + return (error); + } else + fsdev->nd_last_fsarea = (j + 1) % NANDFS_NFSAREAS; + + return (0); +} + +static int +nandfs_select_fsdata(struct nandfs_device *fsdev, + struct nandfs_fsdata *fsdatat, struct nandfs_fsdata **fsdata, int nfsds) +{ + int i; + + *fsdata = NULL; + for (i = 0; i < nfsds; i++) { + DPRINTF(VOLUMES, ("%s: i %d f_magic %x f_crc %x\n", __func__, + i, fsdatat[i].f_magic, fsdatat[i].f_sum)); + if (!nandfs_check_fsdata_crc(&fsdatat[i])) + continue; + *fsdata = &fsdatat[i]; + break; + } + + return (*fsdata != NULL ? 0 : EINVAL); +} + +static int +nandfs_select_sb(struct nandfs_device *fsdev, + struct nandfs_super_block *supert, struct nandfs_super_block **super, + int nsbs) +{ + int i; + + *super = NULL; + for (i = 0; i < nsbs; i++) { + if (!nandfs_check_superblock_crc(&fsdev->nd_fsdata, &supert[i])) + continue; + DPRINTF(SYNC, ("%s: i %d s_last_cno %jx s_magic %x " + "s_wtime %jd\n", __func__, i, supert[i].s_last_cno, + supert[i].s_magic, supert[i].s_wtime)); + if (*super == NULL || supert[i].s_last_cno > + (*super)->s_last_cno) + *super = &supert[i]; + } + + return (*super != NULL ? 0 : EINVAL); +} + +static int +nandfs_read_structures_at(struct nandfs_device *fsdev, + struct nandfs_fsarea *fstp, struct nandfs_fsdata *fsdata, + struct nandfs_super_block *super) +{ + struct nandfs_super_block *tsuper, *tsuperd; + struct buf *bp; + int error, read_size; + int i; + int offset; + + offset = fstp->offset; + + if (fsdev->nd_erasesize > MAXBSIZE) + read_size = MAXBSIZE; + else + read_size = fsdev->nd_erasesize; + + error = bread(fsdev->nd_devvp, btodb(offset), read_size, NOCRED, &bp); + if (error) { + printf("couldn't read: %d\n", error); + brelse(bp); + fstp->flags |= NANDFS_FSSTOR_FAILED; + return (error); + } + + tsuper = super; + + memcpy(fsdata, bp->b_data, sizeof(struct nandfs_fsdata)); + memcpy(tsuper, (bp->b_data + sizeof(struct nandfs_fsdata)), + read_size - sizeof(struct nandfs_fsdata)); + brelse(bp); + + tsuper += (read_size - sizeof(struct nandfs_fsdata)) / + sizeof(struct nandfs_super_block); + + for (i = 1; i < fsdev->nd_erasesize / read_size; i++) { + error = bread(fsdev->nd_devvp, btodb(offset + i * read_size), + read_size, NOCRED, &bp); + if (error) { + printf("couldn't read: %d\n", error); + brelse(bp); + fstp->flags |= NANDFS_FSSTOR_FAILED; + return (error); + } + memcpy(tsuper, bp->b_data, read_size); + tsuper += read_size / sizeof(struct nandfs_super_block); + brelse(bp); + } + + tsuper -= 1; + fstp->last_used = nandfs_sblocks_in_esize(fsdev) - 1; + for (tsuperd = super - 1; (tsuper != tsuperd); tsuper -= 1) { + if (nandfs_is_empty((u_char *)tsuper, sizeof(*tsuper))) + fstp->last_used--; + else + break; + } + + DPRINTF(VOLUMES, ("%s: last_used %d\n", __func__, fstp->last_used)); + + return (0); +} + +static int +nandfs_read_structures(struct nandfs_device *fsdev) +{ + struct nandfs_fsdata *fsdata, *fsdatat; + struct nandfs_super_block *sblocks, *ssblock; + int nsbs, nfsds, i; + int error = 0; + int nrsbs; + + nfsds = NANDFS_NFSAREAS; + nsbs = nandfs_max_sblocks(fsdev); + + fsdatat = malloc(sizeof(struct nandfs_fsdata) * nfsds, M_NANDFSTEMP, + M_WAITOK | M_ZERO); + sblocks = malloc(sizeof(struct nandfs_super_block) * nsbs, M_NANDFSTEMP, + M_WAITOK | M_ZERO); + + nrsbs = 0; + for (i = 0; i < NANDFS_NFSAREAS; i++) { + fsdev->nd_fsarea[i].offset = i * fsdev->nd_erasesize; + error = nandfs_read_structures_at(fsdev, &fsdev->nd_fsarea[i], + &fsdatat[i], sblocks + nrsbs); + if (error) + continue; + nrsbs += (fsdev->nd_fsarea[i].last_used + 1); + if (fsdev->nd_fsarea[fsdev->nd_last_fsarea].last_used > + fsdev->nd_fsarea[i].last_used) + fsdev->nd_last_fsarea = i; + } + + if (nrsbs == 0) { + printf("nandfs: no valid superblocks found\n"); + error = EINVAL; + goto out; + } + + error = nandfs_select_fsdata(fsdev, fsdatat, &fsdata, nfsds); + if (error) + goto out; + memcpy(&fsdev->nd_fsdata, fsdata, sizeof(struct nandfs_fsdata)); + + error = nandfs_select_sb(fsdev, sblocks, &ssblock, nsbs); + if (error) + goto out; + + memcpy(&fsdev->nd_super, ssblock, sizeof(struct nandfs_super_block)); +out: + free(fsdatat, M_NANDFSTEMP); + free(sblocks, M_NANDFSTEMP); + + if (error == 0) + DPRINTF(VOLUMES, ("%s: selected sb with w_time %jd " + "last_pseg %#jx\n", __func__, fsdev->nd_super.s_wtime, + fsdev->nd_super.s_last_pseg)); + + return (error); +} + +static void +nandfs_unmount_base(struct nandfs_device *nandfsdev) +{ + int error; + + if (!nandfsdev) + return; + + /* Remove all our information */ + error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0); + if (error) { + /* + * Flushing buffers failed when fs was umounting, can't do + * much now, just printf error and continue with umount. + */ + nandfs_error("%s(): error:%d when umounting FS\n", + __func__, error); + } + + /* Release the device's system nodes */ + nandfs_release_system_nodes(nandfsdev); +} + +static void +nandfs_get_ncleanseg(struct nandfs_device *nandfsdev) +{ + struct nandfs_seg_stat nss; + + nandfs_get_seg_stat(nandfsdev, &nss); + nandfsdev->nd_clean_segs = nss.nss_ncleansegs; + DPRINTF(VOLUMES, ("nandfs_mount: clean segs: %jx\n", + (uintmax_t)nandfsdev->nd_clean_segs)); +} + + +static int +nandfs_mount_base(struct nandfs_device *nandfsdev, struct mount *mp, + struct nandfs_args *args) +{ + uint32_t log_blocksize; + int error; + + /* Flush out any old buffers remaining from a previous use. */ + if ((error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0))) + return (error); + + error = nandfs_read_structures(nandfsdev); + if (error) { + printf("nandfs: could not get valid filesystem structures\n"); + return (error); + } + + if (nandfsdev->nd_fsdata.f_rev_level != NANDFS_CURRENT_REV) { + printf("nandfs: unsupported file system revision: %d " + "(supported is %d).\n", nandfsdev->nd_fsdata.f_rev_level, + NANDFS_CURRENT_REV); + return (EINVAL); + } + + if (nandfsdev->nd_fsdata.f_erasesize != nandfsdev->nd_erasesize) { + printf("nandfs: erasesize mismatch (device %#x, fs %#x)\n", + nandfsdev->nd_erasesize, nandfsdev->nd_fsdata.f_erasesize); + return (EINVAL); + } + + /* Get our blocksize */ + log_blocksize = nandfsdev->nd_fsdata.f_log_block_size; + nandfsdev->nd_blocksize = (uint64_t) 1 << (log_blocksize + 10); + DPRINTF(VOLUMES, ("%s: blocksize:%x\n", __func__, + nandfsdev->nd_blocksize)); + + DPRINTF(VOLUMES, ("%s: accepted super block with cp %#jx\n", __func__, + (uintmax_t)nandfsdev->nd_super.s_last_cno)); + + /* Calculate dat structure parameters */ + nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_dat_mdt, + nandfsdev->nd_fsdata.f_dat_entry_size); + nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_ifile_mdt, + nandfsdev->nd_fsdata.f_inode_size); + + /* Search for the super root and roll forward when needed */ + if (nandfs_search_super_root(nandfsdev)) { + printf("Cannot find valid SuperRoot\n"); + return (EINVAL); + } + + nandfsdev->nd_mount_state = nandfsdev->nd_super.s_state; + if (nandfsdev->nd_mount_state != NANDFS_VALID_FS) { + printf("FS is seriously damaged, needs repairing\n"); + printf("aborting mount\n"); + return (EINVAL); + } + + /* + * FS should be ok now. The superblock and the last segsum could be + * updated from the repair so extract running values again. + */ + nandfsdev->nd_last_pseg = nandfsdev->nd_super.s_last_pseg; + nandfsdev->nd_seg_sequence = nandfsdev->nd_super.s_last_seq; + nandfsdev->nd_seg_num = nandfs_get_segnum_of_block(nandfsdev, + nandfsdev->nd_last_pseg); + nandfsdev->nd_next_seg_num = nandfs_get_segnum_of_block(nandfsdev, + nandfsdev->nd_last_segsum.ss_next); + nandfsdev->nd_ts.tv_sec = nandfsdev->nd_last_segsum.ss_create; + nandfsdev->nd_last_cno = nandfsdev->nd_super.s_last_cno; + nandfsdev->nd_fakevblk = 1; + nandfsdev->nd_last_ino = NANDFS_USER_INO; + DPRINTF(VOLUMES, ("%s: last_pseg %#jx last_cno %#jx last_seq %#jx\n" + "fsdev: last_seg: seq %#jx num %#jx, next_seg_num %#jx\n", + __func__, (uintmax_t)nandfsdev->nd_last_pseg, + (uintmax_t)nandfsdev->nd_last_cno, + (uintmax_t)nandfsdev->nd_seg_sequence, + (uintmax_t)nandfsdev->nd_seg_sequence, + (uintmax_t)nandfsdev->nd_seg_num, + (uintmax_t)nandfsdev->nd_next_seg_num)); + + DPRINTF(VOLUMES, ("nandfs_mount: accepted super root\n")); + + /* Create system vnodes for DAT, CP and SEGSUM */ + error = nandfs_create_system_nodes(nandfsdev); + if (error) + nandfs_unmount_base(nandfsdev); + + nandfs_get_ncleanseg(nandfsdev); + + return (error); +} + +static void +nandfs_unmount_device(struct nandfs_device *nandfsdev) +{ + + /* Is there anything? */ + if (nandfsdev == NULL) + return; + + /* Remove the device only if we're the last reference */ + nandfsdev->nd_refcnt--; + if (nandfsdev->nd_refcnt >= 1) + return; + + MPASS(nandfsdev->nd_syncer == NULL); + MPASS(nandfsdev->nd_cleaner == NULL); + MPASS(nandfsdev->nd_free_base == NULL); + + /* Unmount our base */ + nandfs_unmount_base(nandfsdev); + + /* Remove from our device list */ + SLIST_REMOVE(&nandfs_devices, nandfsdev, nandfs_device, nd_next_device); + + DROP_GIANT(); + g_topology_lock(); + g_vfs_close(nandfsdev->nd_gconsumer); + g_topology_unlock(); + PICKUP_GIANT(); + + DPRINTF(VOLUMES, ("closing device\n")); + + /* Clear our mount reference and release device node */ + vrele(nandfsdev->nd_devvp); + + dev_rel(nandfsdev->nd_devvp->v_rdev); + + /* Free our device info */ + cv_destroy(&nandfsdev->nd_sync_cv); + mtx_destroy(&nandfsdev->nd_sync_mtx); + cv_destroy(&nandfsdev->nd_clean_cv); + mtx_destroy(&nandfsdev->nd_clean_mtx); + mtx_destroy(&nandfsdev->nd_mutex); + lockdestroy(&nandfsdev->nd_seg_const); + free(nandfsdev, M_NANDFSMNT); +} + +static int +nandfs_check_mounts(struct nandfs_device *nandfsdev, struct mount *mp, + struct nandfs_args *args) +{ + struct nandfsmount *nmp; + uint64_t last_cno; + + /* no double-mounting of the same checkpoint */ + STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) { + if (nmp->nm_mount_args.cpno == args->cpno) + return (EBUSY); + } + + /* Allow readonly mounts without questioning here */ + if (mp->mnt_flag & MNT_RDONLY) + return (0); + + /* Read/write mount */ + STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) { + /* Only one RW mount on this device! */ + if ((nmp->nm_vfs_mountp->mnt_flag & MNT_RDONLY)==0) + return (EROFS); + /* RDONLY on last mountpoint is device busy */ + last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno; + if (nmp->nm_mount_args.cpno == last_cno) + return (EBUSY); + } + + /* OK for now */ + return (0); +} + +static int +nandfs_mount_device(struct vnode *devvp, struct mount *mp, + struct nandfs_args *args, struct nandfs_device **nandfsdev_p) +{ + struct nandfs_device *nandfsdev; + struct g_provider *pp; + struct g_consumer *cp; + struct cdev *dev; + uint32_t erasesize; + int error, size; + int ronly; + + DPRINTF(VOLUMES, ("Mounting NANDFS device\n")); + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + + /* Look up device in our nandfs_mountpoints */ + *nandfsdev_p = NULL; + SLIST_FOREACH(nandfsdev, &nandfs_devices, nd_next_device) + if (nandfsdev->nd_devvp == devvp) + break; + + if (nandfsdev) { + DPRINTF(VOLUMES, ("device already mounted\n")); + error = nandfs_check_mounts(nandfsdev, mp, args); + if (error) + return error; + nandfsdev->nd_refcnt++; + *nandfsdev_p = nandfsdev; + + if (!ronly) { + DROP_GIANT(); + g_topology_lock(); + error = g_access(nandfsdev->nd_gconsumer, 0, 1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + } + return (error); + } + + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); + dev = devvp->v_rdev; + dev_ref(dev); + DROP_GIANT(); + g_topology_lock(); + error = g_vfs_open(devvp, &cp, "nandfs", ronly ? 0 : 1); + pp = g_dev_getprovider(dev); + g_topology_unlock(); + PICKUP_GIANT(); + VOP_UNLOCK(devvp, 0); + if (error) { + dev_rel(dev); + return (error); + } + + nandfsdev = malloc(sizeof(struct nandfs_device), M_NANDFSMNT, M_WAITOK | M_ZERO); + + /* Initialise */ + nandfsdev->nd_refcnt = 1; + nandfsdev->nd_devvp = devvp; + nandfsdev->nd_syncing = 0; + nandfsdev->nd_cleaning = 0; + nandfsdev->nd_gconsumer = cp; + cv_init(&nandfsdev->nd_sync_cv, "nandfssync"); + mtx_init(&nandfsdev->nd_sync_mtx, "nffssyncmtx", NULL, MTX_DEF); + cv_init(&nandfsdev->nd_clean_cv, "nandfsclean"); + mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF); + mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF); + lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT, + LK_CANRECURSE); + STAILQ_INIT(&nandfsdev->nd_mounts); + + nandfsdev->nd_devsize = pp->mediasize; + nandfsdev->nd_devblocksize = pp->sectorsize; + + size = sizeof(erasesize); + error = g_io_getattr("NAND::blocksize", nandfsdev->nd_gconsumer, &size, + &erasesize); + if (error) { + DPRINTF(VOLUMES, ("couldn't get erasesize: %d\n", error)); + + if (error == ENOIOCTL || error == EOPNOTSUPP) { + /* + * We conclude that this is not NAND storage + */ + nandfsdev->nd_erasesize = NANDFS_DEF_ERASESIZE; + nandfsdev->nd_is_nand = 0; + } else { + DROP_GIANT(); + g_topology_lock(); + g_vfs_close(nandfsdev->nd_gconsumer); + g_topology_unlock(); + PICKUP_GIANT(); + dev_rel(dev); + free(nandfsdev, M_NANDFSMNT); + return (error); + } + } else { + nandfsdev->nd_erasesize = erasesize; + nandfsdev->nd_is_nand = 1; + } + + DPRINTF(VOLUMES, ("%s: erasesize %x\n", __func__, + nandfsdev->nd_erasesize)); + + /* Register nandfs_device in list */ + SLIST_INSERT_HEAD(&nandfs_devices, nandfsdev, nd_next_device); + + error = nandfs_mount_base(nandfsdev, mp, args); + if (error) { + /* Remove all our information */ + nandfs_unmount_device(nandfsdev); + return (EINVAL); + } + + nandfsdev->nd_maxfilesize = nandfs_get_maxfilesize(nandfsdev); + + *nandfsdev_p = nandfsdev; + DPRINTF(VOLUMES, ("NANDFS device mounted ok\n")); + + return (0); +} + +static int +nandfs_mount_checkpoint(struct nandfsmount *nmp) +{ + struct nandfs_cpfile_header *cphdr; + struct nandfs_checkpoint *cp; + struct nandfs_inode ifile_inode; + struct nandfs_node *cp_node; + struct buf *bp; + uint64_t ncp, nsn, cpno, fcpno, blocknr, last_cno; + uint32_t off, dlen; + int cp_per_block, error; + + cpno = nmp->nm_mount_args.cpno; + if (cpno == 0) + cpno = nmp->nm_nandfsdev->nd_super.s_last_cno; + + DPRINTF(VOLUMES, ("%s: trying to mount checkpoint number %"PRIu64"\n", + __func__, cpno)); + + cp_node = nmp->nm_nandfsdev->nd_cp_node; + + VOP_LOCK(NTOV(cp_node), LK_SHARED); + /* Get cpfile header from 1st block of cp file */ + error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + return (error); + } + + cphdr = (struct nandfs_cpfile_header *) bp->b_data; + ncp = cphdr->ch_ncheckpoints; + nsn = cphdr->ch_nsnapshots; + + brelse(bp); + + DPRINTF(VOLUMES, ("mount_nandfs: checkpoint header read in\n")); + DPRINTF(VOLUMES, ("\tNumber of checkpoints %"PRIu64"\n", ncp)); + DPRINTF(VOLUMES, ("\tNumber of snapshots %"PRIu64"\n", nsn)); + + /* Read in our specified checkpoint */ + dlen = nmp->nm_nandfsdev->nd_fsdata.f_checkpoint_size; + cp_per_block = nmp->nm_nandfsdev->nd_blocksize / dlen; + + fcpno = cpno + NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1; + blocknr = fcpno / cp_per_block; + off = (fcpno % cp_per_block) * dlen; + error = nandfs_bread(cp_node, blocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + printf("mount_nandfs: couldn't read cp block %"PRIu64"\n", + fcpno); + return (EINVAL); + } + + /* Needs to be a valid checkpoint */ + cp = (struct nandfs_checkpoint *) ((uint8_t *) bp->b_data + off); + if (cp->cp_flags & NANDFS_CHECKPOINT_INVALID) { + printf("mount_nandfs: checkpoint marked invalid\n"); + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + return (EINVAL); + } + + /* Is this really the checkpoint we want? */ + if (cp->cp_cno != cpno) { + printf("mount_nandfs: checkpoint file corrupt? " + "expected cpno %"PRIu64", found cpno %"PRIu64"\n", + cpno, cp->cp_cno); + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + return (EINVAL); + } + + /* Check if it's a snapshot ! */ + last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno; + if (cpno != last_cno) { + /* Only allow snapshots if not mounting on the last cp */ + if ((cp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) == 0) { + printf( "mount_nandfs: checkpoint %"PRIu64" is not a " + "snapshot\n", cpno); + brelse(bp); + VOP_UNLOCK(NTOV(cp_node), 0); + return (EINVAL); + } + } + + ifile_inode = cp->cp_ifile_inode; + brelse(bp); + + /* Get ifile inode */ + error = nandfs_get_node_raw(nmp->nm_nandfsdev, NULL, NANDFS_IFILE_INO, + &ifile_inode, &nmp->nm_ifile_node); + if (error) { + printf("mount_nandfs: can't read ifile node\n"); + VOP_UNLOCK(NTOV(cp_node), 0); + return (EINVAL); + } + + NANDFS_SET_SYSTEMFILE(NTOV(nmp->nm_ifile_node)); + VOP_UNLOCK(NTOV(cp_node), 0); + /* Get root node? */ + + return (0); +} + +static void +free_nandfs_mountinfo(struct mount *mp) +{ + struct nandfsmount *nmp = VFSTONANDFS(mp); + + if (nmp == NULL) + return; + + free(nmp, M_NANDFSMNT); +} + +void +nandfs_wakeup_wait_sync(struct nandfs_device *nffsdev, int reason) +{ + char *reasons[] = { + "umount", + "vfssync", + "bdflush", + "fforce", + "fsync", + "ro_upd" + }; + + DPRINTF(SYNC, ("%s: %s\n", __func__, reasons[reason])); + mtx_lock(&nffsdev->nd_sync_mtx); + if (nffsdev->nd_syncing) + cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx); + if (reason == SYNCER_UMOUNT) + nffsdev->nd_syncer_exit = 1; + nffsdev->nd_syncing = 1; + wakeup(&nffsdev->nd_syncing); + cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx); + + mtx_unlock(&nffsdev->nd_sync_mtx); +} + +static void +nandfs_gc_finished(struct nandfs_device *nffsdev, int exit) +{ + int error; + + mtx_lock(&nffsdev->nd_sync_mtx); + nffsdev->nd_syncing = 0; + DPRINTF(SYNC, ("%s: cleaner finish\n", __func__)); + cv_broadcast(&nffsdev->nd_sync_cv); + mtx_unlock(&nffsdev->nd_sync_mtx); + if (!exit) { + error = tsleep(&nffsdev->nd_syncing, PRIBIO, "-", + hz * nandfs_sync_interval); + DPRINTF(SYNC, ("%s: cleaner waked up: %d\n", + __func__, error)); + } +} + +static void +nandfs_syncer(struct nandfsmount *nmp) +{ + struct nandfs_device *nffsdev; + struct mount *mp; + int flags, error; + + mp = nmp->nm_vfs_mountp; + nffsdev = nmp->nm_nandfsdev; + tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval); + + while (!nffsdev->nd_syncer_exit) { + DPRINTF(SYNC, ("%s: syncer run\n", __func__)); + nffsdev->nd_syncing = 1; + + flags = (nmp->nm_flags & (NANDFS_FORCE_SYNCER | NANDFS_UMOUNT)); + + error = nandfs_segment_constructor(nmp, flags); + if (error) + nandfs_error("%s: error:%d when creating segments\n", + __func__, error); + + nmp->nm_flags &= ~flags; + + nandfs_gc_finished(nffsdev, 0); + } + + MPASS(nffsdev->nd_cleaner == NULL); + error = nandfs_segment_constructor(nmp, + NANDFS_FORCE_SYNCER | NANDFS_UMOUNT); + if (error) + nandfs_error("%s: error:%d when creating segments\n", + __func__, error); + nandfs_gc_finished(nffsdev, 1); + nffsdev->nd_syncer = NULL; + MPASS(nffsdev->nd_free_base == NULL); + + DPRINTF(SYNC, ("%s: exiting\n", __func__)); + kthread_exit(); +} + +static int +start_syncer(struct nandfsmount *nmp) +{ + int error; + + MPASS(nmp->nm_nandfsdev->nd_syncer == NULL); + + DPRINTF(SYNC, ("%s: start syncer\n", __func__)); + + nmp->nm_nandfsdev->nd_syncer_exit = 0; + + error = kthread_add((void(*)(void *))nandfs_syncer, nmp, NULL, + &nmp->nm_nandfsdev->nd_syncer, 0, 0, "nandfs_syncer"); + + if (error) + printf("nandfs: could not start syncer: %d\n", error); + + return (error); +} + +static int +stop_syncer(struct nandfsmount *nmp) +{ + + MPASS(nmp->nm_nandfsdev->nd_syncer != NULL); + + nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_UMOUNT); + + DPRINTF(SYNC, ("%s: stop syncer\n", __func__)); + return (0); +} + +/* + * Mount null layer + */ +static int +nandfs_mount(struct mount *mp) +{ + struct nandfsmount *nmp; + struct vnode *devvp; + struct nameidata nd; + struct vfsoptlist *opts; + struct thread *td; + char *from; + int error = 0, flags; + + DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp)); + + td = curthread; + opts = mp->mnt_optnew; + + if (vfs_filteropt(opts, nandfs_opts)) + return (EINVAL); + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) { + nmp = VFSTONANDFS(mp); + if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) { + return (error); + } + if (!(nmp->nm_ronly) && vfs_flagopt(opts, "ro", NULL, 0)) { + vn_start_write(NULL, &mp, V_WAIT); + error = VFS_SYNC(mp, MNT_WAIT); + if (error) + return (error); + vn_finished_write(mp); + + flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + + nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, + SYNCER_ROUPD); + error = vflush(mp, 0, flags, td); + if (error) + return (error); + + nandfs_stop_cleaner(nmp->nm_nandfsdev); + stop_syncer(nmp); + DROP_GIANT(); + g_topology_lock(); + g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + MNT_ILOCK(mp); + mp->mnt_flag |= MNT_RDONLY; + MNT_IUNLOCK(mp); + nmp->nm_ronly = 1; + + } else if ((nmp->nm_ronly) && + !vfs_flagopt(opts, "ro", NULL, 0)) { + /* + * Don't allow read-write snapshots. + */ + if (nmp->nm_mount_args.cpno != 0) + return (EROFS); + /* + * If upgrade to read-write by non-root, then verify + * that user has necessary permissions on the device. + */ + devvp = nmp->nm_nandfsdev->nd_devvp; + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_ACCESS(devvp, VREAD | VWRITE, + td->td_ucred, td); + if (error) { + error = priv_check(td, PRIV_VFS_MOUNT_PERM); + if (error) { + VOP_UNLOCK(devvp, 0); + return (error); + } + } + + VOP_UNLOCK(devvp, 0); + DROP_GIANT(); + g_topology_lock(); + error = g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, 1, + 0); + g_topology_unlock(); + PICKUP_GIANT(); + if (error) + return (error); + + MNT_ILOCK(mp); + mp->mnt_flag &= ~MNT_RDONLY; + MNT_IUNLOCK(mp); + error = start_syncer(nmp); + if (error == 0) + error = nandfs_start_cleaner(nmp->nm_nandfsdev); + if (error) { + DROP_GIANT(); + g_topology_lock(); + g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, + 0); + g_topology_unlock(); + PICKUP_GIANT(); + return (error); + } + + nmp->nm_ronly = 0; + } + return (0); + } + + from = vfs_getopts(opts, "from", &error); + if (error) + return (error); + + /* + * Find device node + */ + NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, from, curthread); + error = namei(&nd); + if (error) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + + devvp = nd.ni_vp; + + if (!vn_isdisk(devvp, &error)) { + vput(devvp); + return (error); + } + + /* Check the access rights on the mount device */ + error = VOP_ACCESS(devvp, VREAD, curthread->td_ucred, curthread); + if (error) + error = priv_check(curthread, PRIV_VFS_MOUNT_PERM); + if (error) { + vput(devvp); + return (error); + } + + vfs_getnewfsid(mp); + + error = nandfs_mountfs(devvp, mp); + if (error) + return (error); + vfs_mountedfrom(mp, from); + + return (0); +} + +static int +nandfs_mountfs(struct vnode *devvp, struct mount *mp) +{ + struct nandfsmount *nmp = NULL; + struct nandfs_args *args = NULL; + struct nandfs_device *nandfsdev; + char *from; + int error, ronly; + char *cpno; + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + + if (devvp->v_rdev->si_iosize_max != 0) + mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; + VOP_UNLOCK(devvp, 0); + + if (mp->mnt_iosize_max > MAXPHYS) + mp->mnt_iosize_max = MAXPHYS; + + from = vfs_getopts(mp->mnt_optnew, "from", &error); + if (error) + goto error; + + error = vfs_getopt(mp->mnt_optnew, "snap", (void **)&cpno, NULL); + if (error == ENOENT) + cpno = NULL; + else if (error) + goto error; + + args = (struct nandfs_args *)malloc(sizeof(struct nandfs_args), + M_NANDFSMNT, M_WAITOK | M_ZERO); + + if (cpno != NULL) + args->cpno = strtoul(cpno, (char **)NULL, 10); + else + args->cpno = 0; + args->fspec = from; + + if (args->cpno != 0 && !ronly) { + error = EROFS; + goto error; + } + + printf("WARNING: NANDFS is considered to be a highly experimental " + "feature in FreeBSD.\n"); + + error = nandfs_mount_device(devvp, mp, args, &nandfsdev); + if (error) + goto error; + + nmp = (struct nandfsmount *) malloc(sizeof(struct nandfsmount), + M_NANDFSMNT, M_WAITOK | M_ZERO); + + mp->mnt_data = nmp; + nmp->nm_vfs_mountp = mp; + nmp->nm_ronly = ronly; + MNT_ILOCK(mp); + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_MPSAFE; + MNT_IUNLOCK(mp); + nmp->nm_nandfsdev = nandfsdev; + /* Add our mountpoint */ + STAILQ_INSERT_TAIL(&nandfsdev->nd_mounts, nmp, nm_next_mount); + + if (args->cpno > nandfsdev->nd_last_cno) { + printf("WARNING: supplied checkpoint number (%jd) is greater " + "than last known checkpoint on filesystem (%jd). Mounting" + " checkpoint %jd\n", (uintmax_t)args->cpno, + (uintmax_t)nandfsdev->nd_last_cno, + (uintmax_t)nandfsdev->nd_last_cno); + args->cpno = nandfsdev->nd_last_cno; + } + + /* Setting up other parameters */ + nmp->nm_mount_args = *args; + free(args, M_NANDFSMNT); + error = nandfs_mount_checkpoint(nmp); + if (error) { + nandfs_unmount(mp, MNT_FORCE); + goto unmounted; + } + + if (!ronly) { + error = start_syncer(nmp); + if (error == 0) + error = nandfs_start_cleaner(nmp->nm_nandfsdev); + if (error) + nandfs_unmount(mp, MNT_FORCE); + } + + return (0); + +error: + if (args != NULL) + free(args, M_NANDFSMNT); + + if (nmp != NULL) { + free(nmp, M_NANDFSMNT); + mp->mnt_data = NULL; + } +unmounted: + return (error); +} + +static int +nandfs_unmount(struct mount *mp, int mntflags) +{ + struct nandfs_device *nandfsdev; + struct nandfsmount *nmp; + int error; + int flags = 0; + + DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp)); + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + nmp = mp->mnt_data; + nandfsdev = nmp->nm_nandfsdev; + + error = vflush(mp, 0, flags | SKIPSYSTEM, curthread); + if (error) + return (error); + + if (!(nmp->nm_ronly)) { + nandfs_stop_cleaner(nandfsdev); + stop_syncer(nmp); + } + + if (nmp->nm_ifile_node) + NANDFS_UNSET_SYSTEMFILE(NTOV(nmp->nm_ifile_node)); + + /* Remove our mount point */ + STAILQ_REMOVE(&nandfsdev->nd_mounts, nmp, nandfsmount, nm_next_mount); + + /* Unmount the device itself when we're the last one */ + nandfs_unmount_device(nandfsdev); + + free_nandfs_mountinfo(mp); + + /* + * Finally, throw away the null_mount structure + */ + mp->mnt_data = 0; + MNT_ILOCK(mp); + mp->mnt_flag &= ~MNT_LOCAL; + MNT_IUNLOCK(mp); + + return (0); +} + +static int +nandfs_statfs(struct mount *mp, struct statfs *sbp) +{ + struct nandfsmount *nmp; + struct nandfs_device *nandfsdev; + struct nandfs_fsdata *fsdata; + struct nandfs_super_block *sb; + struct nandfs_block_group_desc *groups; + struct nandfs_node *ifile; + struct nandfs_mdt *mdt; + struct buf *bp; + int i, error; + uint32_t entries_per_group; + uint64_t files = 0; + + nmp = mp->mnt_data; + nandfsdev = nmp->nm_nandfsdev; + fsdata = &nandfsdev->nd_fsdata; + sb = &nandfsdev->nd_super; + ifile = nmp->nm_ifile_node; + mdt = &nandfsdev->nd_ifile_mdt; + entries_per_group = mdt->entries_per_group; + + VOP_LOCK(NTOV(ifile), LK_SHARED); + error = nandfs_bread(ifile, 0, NOCRED, 0, &bp); + if (error) { + brelse(bp); + VOP_UNLOCK(NTOV(ifile), 0); + return (error); + } + + groups = (struct nandfs_block_group_desc *)bp->b_data; + + for (i = 0; i < mdt->groups_per_desc_block; i++) + files += (entries_per_group - groups[i].bg_nfrees); + + brelse(bp); + VOP_UNLOCK(NTOV(ifile), 0); + + sbp->f_bsize = nandfsdev->nd_blocksize; + sbp->f_iosize = sbp->f_bsize; + sbp->f_blocks = fsdata->f_blocks_per_segment * fsdata->f_nsegments; + sbp->f_bfree = sb->s_free_blocks_count; + sbp->f_bavail = sbp->f_bfree; + sbp->f_files = files; + sbp->f_ffree = 0; + return (0); +} + +static int +nandfs_root(struct mount *mp, int flags, struct vnode **vpp) +{ + struct nandfsmount *nmp = VFSTONANDFS(mp); + struct nandfs_node *node; + int error; + + error = nandfs_get_node(nmp, NANDFS_ROOT_INO, &node); + if (error) + return (error); + + KASSERT(NTOV(node)->v_vflag & VV_ROOT, + ("root_vp->v_vflag & VV_ROOT")); + + *vpp = NTOV(node); + + return (error); +} + +static int +nandfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) +{ + struct nandfsmount *nmp = VFSTONANDFS(mp); + struct nandfs_node *node; + int error; + + error = nandfs_get_node(nmp, ino, &node); + if (node) + *vpp = NTOV(node); + + return (error); +} + +static int +nandfs_sync(struct mount *mp, int waitfor) +{ + struct nandfsmount *nmp = VFSTONANDFS(mp); + + DPRINTF(SYNC, ("%s: mp %p waitfor %d\n", __func__, mp, waitfor)); + + /* + * XXX: A hack to be removed soon + */ + if (waitfor == MNT_LAZY) + return (0); + if (waitfor == MNT_SUSPEND) + return (0); + nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_VFS_SYNC); + return (0); +} + +static struct vfsops nandfs_vfsops = { + .vfs_init = nandfs_init, + .vfs_mount = nandfs_mount, + .vfs_root = nandfs_root, + .vfs_statfs = nandfs_statfs, + .vfs_uninit = nandfs_uninit, + .vfs_unmount = nandfs_unmount, + .vfs_vget = nandfs_vget, + .vfs_sync = nandfs_sync, +}; + +VFS_SET(nandfs_vfsops, nandfs, VFCF_LOOPBACK); diff --git a/sys/fs/nandfs/nandfs_vnops.c b/sys/fs/nandfs/nandfs_vnops.c new file mode 100644 index 0000000..b226d30 --- /dev/null +++ b/sys/fs/nandfs/nandfs_vnops.c @@ -0,0 +1,2455 @@ +/*- + * Copyright (c) 2010-2012 Semihalf + * Copyright (c) 2008, 2009 Reinoud Zandijk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/lockf.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/sysctl.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/bio.h> +#include <sys/fcntl.h> +#include <sys/dirent.h> +#include <sys/stat.h> +#include <sys/priv.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vnode_pager.h> + +#include <machine/_inttypes.h> + +#include <fs/nandfs/nandfs_mount.h> +#include <fs/nandfs/nandfs.h> +#include <fs/nandfs/nandfs_subr.h> + +extern uma_zone_t nandfs_node_zone; +static void nandfs_read_filebuf(struct nandfs_node *, struct buf *); +static void nandfs_itimes_locked(struct vnode *); +static int nandfs_truncate(struct vnode *, uint64_t); + +static vop_pathconf_t nandfs_pathconf; + +#define UPDATE_CLOSE 0 +#define UPDATE_WAIT 0 + +static int +nandfs_inactive(struct vop_inactive_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + int error = 0; + + DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node)); + + if (node == NULL) { + DPRINTF(NODE, ("%s: inactive NULL node\n", __func__)); + return (0); + } + + if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) { + nandfs_truncate(vp, 0); + error = nandfs_node_destroy(node); + if (error) + nandfs_error("%s: destroy node: %p\n", __func__, node); + node->nn_flags = 0; + vrecycle(vp); + } + + return (error); +} + +static int +nandfs_reclaim(struct vop_reclaim_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *nandfs_node = VTON(vp); + struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev; + uint64_t ino = nandfs_node->nn_ino; + + DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node)); + + /* Invalidate all entries to a particular vnode. */ + cache_purge(vp); + + /* Destroy the vm object and flush associated pages. */ + vnode_destroy_vobject(vp); + + /* Remove from vfs hash if not system vnode */ + if (!NANDFS_SYS_NODE(nandfs_node->nn_ino)) + vfs_hash_remove(vp); + + /* Dispose all node knowledge */ + nandfs_dispose_node(&nandfs_node); + + if (!NANDFS_SYS_NODE(ino)) + NANDFS_WRITEUNLOCK(fsdev); + + return (0); +} + +static int +nandfs_read(struct vop_read_args *ap) +{ + register struct vnode *vp = ap->a_vp; + register struct nandfs_node *node = VTON(vp); + struct nandfs_device *nandfsdev = node->nn_nandfsdev; + struct uio *uio = ap->a_uio; + struct buf *bp; + uint64_t size; + uint32_t blocksize; + off_t bytesinfile; + ssize_t toread, off; + daddr_t lbn; + ssize_t resid; + int error = 0; + + if (uio->uio_resid == 0) + return (0); + + size = node->nn_inode.i_size; + if (uio->uio_offset >= size) + return (0); + + blocksize = nandfsdev->nd_blocksize; + bytesinfile = size - uio->uio_offset; + + resid = omin(uio->uio_resid, bytesinfile); + + while (resid) { + lbn = uio->uio_offset / blocksize; + off = uio->uio_offset & (blocksize - 1); + + toread = omin(resid, blocksize - off); + + DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n", + (uintmax_t)lbn, toread, blocksize)); + + error = nandfs_bread(node, lbn, NOCRED, 0, &bp); + if (error) { + brelse(bp); + break; + } + + error = uiomove(bp->b_data + off, toread, uio); + if (error) { + brelse(bp); + break; + } + + brelse(bp); + resid -= toread; + } + + return (error); +} + +static int +nandfs_write(struct vop_write_args *ap) +{ + struct nandfs_device *fsdev; + struct nandfs_node *node; + struct vnode *vp; + struct uio *uio; + struct buf *bp; + uint64_t file_size, vblk; + uint32_t blocksize; + ssize_t towrite, off; + daddr_t lbn; + ssize_t resid; + int error, ioflag, modified; + + vp = ap->a_vp; + uio = ap->a_uio; + ioflag = ap->a_ioflag; + node = VTON(vp); + fsdev = node->nn_nandfsdev; + + if (nandfs_fs_full(fsdev)) + return (ENOSPC); + + DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n", + uio->uio_resid, (uintmax_t)uio->uio_offset)); + + if (uio->uio_offset < 0) + return (EINVAL); + if (uio->uio_resid == 0) + return (0); + + blocksize = fsdev->nd_blocksize; + file_size = node->nn_inode.i_size; + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = file_size; + break; + case VDIR: + return (EISDIR); + case VLNK: + break; + default: + panic("%s: bad file type vp: %p", __func__, vp); + } + + /* If explicitly asked to append, uio_offset can be wrong? */ + if (ioflag & IO_APPEND) + uio->uio_offset = file_size; + + resid = uio->uio_resid; + modified = error = 0; + + while (uio->uio_resid) { + lbn = uio->uio_offset / blocksize; + off = uio->uio_offset & (blocksize - 1); + + towrite = omin(uio->uio_resid, blocksize - off); + + DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n", + __func__, (uintmax_t)lbn, towrite, blocksize)); + + error = nandfs_bmap_lookup(node, lbn, &vblk); + if (error) + break; + + DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) " + "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize, + vblk)); + + if (vblk != 0) + error = nandfs_bread(node, lbn, NOCRED, 0, &bp); + else + error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp); + + DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__, + vp, bp, (uintmax_t)lbn)); + if (error) { + if (bp) + brelse(bp); + break; + } + + error = uiomove((char *)bp->b_data + off, (int)towrite, uio); + if (error) + break; + + error = nandfs_dirty_buf(bp, 0); + if (error) + break; + + modified++; + } + + /* XXX proper handling when only part of file was properly written */ + if (modified) { + if (resid > uio->uio_resid && ap->a_cred && + ap->a_cred->cr_uid != 0) + node->nn_inode.i_mode &= ~(ISUID | ISGID); + + if (file_size < uio->uio_offset + uio->uio_resid) { + node->nn_inode.i_size = uio->uio_offset + + uio->uio_resid; + node->nn_flags |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(vp, uio->uio_offset + + uio->uio_resid); + nandfs_itimes(vp); + } + } + + DPRINTF(WRITE, ("%s: return:%d\n", __func__, error)); + + return (error); +} + +static int +nandfs_lookup(struct vop_cachedlookup_args *ap) +{ + struct vnode *dvp, **vpp; + struct componentname *cnp; + struct ucred *cred; + struct thread *td; + struct nandfs_node *dir_node, *node; + struct nandfsmount *nmp; + uint64_t ino, off; + const char *name; + int namelen, nameiop, islastcn, mounted_ro; + int error, found; + + DPRINTF(VNCALL, ("%s\n", __func__)); + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + *vpp = NULL; + + cnp = ap->a_cnp; + cred = cnp->cn_cred; + td = cnp->cn_thread; + + dir_node = VTON(dvp); + nmp = dir_node->nn_nmp; + + /* Simplify/clarification flags */ + nameiop = cnp->cn_nameiop; + islastcn = cnp->cn_flags & ISLASTCN; + mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY; + + /* + * If requesting a modify on the last path element on a read-only + * filingsystem, reject lookup; + */ + if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME)) + return (EROFS); + + if (dir_node->nn_inode.i_links_count == 0) + return (ENOENT); + + /* + * Obviously, the file is not (anymore) in the namecache, we have to + * search for it. There are three basic cases: '.', '..' and others. + * + * Following the guidelines of VOP_LOOKUP manpage and tmpfs. + */ + error = 0; + if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { + DPRINTF(LOOKUP, ("\tlookup '.'\n")); + /* Special case 1 '.' */ + VREF(dvp); + *vpp = dvp; + /* Done */ + } else if (cnp->cn_flags & ISDOTDOT) { + /* Special case 2 '..' */ + DPRINTF(LOOKUP, ("\tlookup '..'\n")); + + /* Get our node */ + name = ".."; + namelen = 2; + error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino, + &found, &off); + if (error) + goto out; + if (!found) + error = ENOENT; + + /* First unlock parent */ + VOP_UNLOCK(dvp, 0); + + if (error == 0) { + DPRINTF(LOOKUP, ("\tfound '..'\n")); + /* Try to create/reuse the node */ + error = nandfs_get_node(nmp, ino, &node); + + if (!error) { + DPRINTF(LOOKUP, + ("\tnode retrieved/created OK\n")); + *vpp = NTOV(node); + } + } + + /* Try to relock parent */ + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); + } else { + DPRINTF(LOOKUP, ("\tlookup file\n")); + /* All other files */ + /* Look up filename in the directory returning its inode */ + name = cnp->cn_nameptr; + namelen = cnp->cn_namelen; + error = nandfs_lookup_name_in_dir(dvp, name, namelen, + &ino, &found, &off); + if (error) + goto out; + if (!found) { + DPRINTF(LOOKUP, ("\tNOT found\n")); + /* + * UGH, didn't find name. If we're creating or + * renaming on the last name this is OK and we ought + * to return EJUSTRETURN if its allowed to be created. + */ + error = ENOENT; + if ((nameiop == CREATE || nameiop == RENAME) && + islastcn) { + error = VOP_ACCESS(dvp, VWRITE, cred, + td); + if (!error) { + /* keep the component name */ + cnp->cn_flags |= SAVENAME; + error = EJUSTRETURN; + } + } + /* Done */ + } else { + if (ino == NANDFS_WHT_INO) + cnp->cn_flags |= ISWHITEOUT; + + if ((cnp->cn_flags & ISWHITEOUT) && + (nameiop == LOOKUP)) + return (ENOENT); + + if ((nameiop == DELETE) && islastcn) { + if ((cnp->cn_flags & ISWHITEOUT) && + (cnp->cn_flags & DOWHITEOUT)) { + cnp->cn_flags |= SAVENAME; + dir_node->nn_diroff = off; + return (EJUSTRETURN); + } + + error = VOP_ACCESS(dvp, VWRITE, cred, + cnp->cn_thread); + if (error) + return (error); + + /* Try to create/reuse the node */ + error = nandfs_get_node(nmp, ino, &node); + if (!error) { + *vpp = NTOV(node); + node->nn_diroff = off; + } + + if ((dir_node->nn_inode.i_mode & ISVTX) && + cred->cr_uid != 0 && + cred->cr_uid != dir_node->nn_inode.i_uid && + node->nn_inode.i_uid != cred->cr_uid) { + vput(*vpp); + *vpp = NULL; + return (EPERM); + } + } else if ((nameiop == RENAME) && islastcn) { + error = VOP_ACCESS(dvp, VWRITE, cred, + cnp->cn_thread); + if (error) + return (error); + + /* Try to create/reuse the node */ + error = nandfs_get_node(nmp, ino, &node); + if (!error) { + *vpp = NTOV(node); + node->nn_diroff = off; + } + } else { + /* Try to create/reuse the node */ + error = nandfs_get_node(nmp, ino, &node); + if (!error) { + *vpp = NTOV(node); + node->nn_diroff = off; + } + } + } + } + +out: + /* + * Store result in the cache if requested. If we are creating a file, + * the file might not be found and thus putting it into the namecache + * might be seen as negative caching. + */ + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) + cache_enter(dvp, *vpp, cnp); + + return (error); + +} + +static int +nandfs_getattr(struct vop_getattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct nandfs_node *node = VTON(vp); + struct nandfs_inode *inode = &node->nn_inode; + + DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp)); + nandfs_itimes(vp); + + /* Basic info */ + VATTR_NULL(vap); + vap->va_atime.tv_sec = inode->i_mtime; + vap->va_atime.tv_nsec = inode->i_mtime_nsec; + vap->va_mtime.tv_sec = inode->i_mtime; + vap->va_mtime.tv_nsec = inode->i_mtime_nsec; + vap->va_ctime.tv_sec = inode->i_ctime; + vap->va_ctime.tv_nsec = inode->i_ctime_nsec; + vap->va_type = IFTOVT(inode->i_mode); + vap->va_mode = inode->i_mode & ~S_IFMT; + vap->va_nlink = inode->i_links_count; + vap->va_uid = inode->i_uid; + vap->va_gid = inode->i_gid; + vap->va_rdev = inode->i_special; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + vap->va_fileid = node->nn_ino; + vap->va_size = inode->i_size; + vap->va_blocksize = node->nn_nandfsdev->nd_blocksize; + vap->va_gen = 0; + vap->va_flags = inode->i_flags; + vap->va_bytes = inode->i_blocks * vap->va_blocksize; + vap->va_filerev = 0; + vap->va_vaflags = 0; + + return (0); +} + +static int +nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks) +{ + struct nandfs_device *nffsdev; + struct bufobj *bo; + struct buf *bp, *nbp; + + bo = &vp->v_bufobj; + nffsdev = VTON(vp)->nn_nandfsdev; + + ASSERT_VOP_LOCKED(vp, "nandfs_truncate"); +restart: + BO_LOCK(bo); +restart_locked: + TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { + if (bp->b_lblkno < nblks) + continue; + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) + goto restart_locked; + + bremfree(bp); + bp->b_flags |= (B_INVAL | B_RELBUF); + bp->b_flags &= ~(B_ASYNC | B_MANAGED); + BO_UNLOCK(bo); + brelse(bp); + BO_LOCK(bo); + } + + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { + if (bp->b_lblkno < nblks) + continue; + if (BUF_LOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, + BO_MTX(bo)) == ENOLCK) + goto restart; + bp->b_flags |= (B_INVAL | B_RELBUF); + bp->b_flags &= ~(B_ASYNC | B_MANAGED); + brelse(bp); + nandfs_dirty_bufs_decrement(nffsdev); + BO_LOCK(bo); + } + + BO_UNLOCK(bo); + + return (0); +} + +static int +nandfs_truncate(struct vnode *vp, uint64_t newsize) +{ + struct nandfs_device *nffsdev; + struct nandfs_node *node; + struct nandfs_inode *inode; + struct buf *bp = NULL; + uint64_t oblks, nblks, vblk, size, rest; + int error; + + node = VTON(vp); + nffsdev = node->nn_nandfsdev; + inode = &node->nn_inode; + + /* Calculate end of file */ + size = inode->i_size; + + if (newsize == size) { + node->nn_flags |= IN_CHANGE | IN_UPDATE; + nandfs_itimes(vp); + return (0); + } + + if (newsize > size) { + inode->i_size = newsize; + vnode_pager_setsize(vp, newsize); + node->nn_flags |= IN_CHANGE | IN_UPDATE; + nandfs_itimes(vp); + return (0); + } + + nblks = howmany(newsize, nffsdev->nd_blocksize); + oblks = howmany(size, nffsdev->nd_blocksize); + rest = newsize % nffsdev->nd_blocksize; + + if (rest) { + error = nandfs_bmap_lookup(node, nblks - 1, &vblk); + if (error) + return (error); + + if (vblk != 0) + error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp); + else + error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp); + + if (error) { + if (bp) + brelse(bp); + return (error); + } + + bzero((char *)bp->b_data + rest, + (u_int)(nffsdev->nd_blocksize - rest)); + error = nandfs_dirty_buf(bp, 0); + if (error) + return (error); + } + + DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks, + nblks)); + + error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1); + if (error) { + if (bp) + nandfs_undirty_buf(bp); + return (error); + } + + error = nandfs_vtruncbuf(vp, nblks); + if (error) { + if (bp) + nandfs_undirty_buf(bp); + return (error); + } + + inode->i_size = newsize; + vnode_pager_setsize(vp, newsize); + node->nn_flags |= IN_CHANGE | IN_UPDATE; + nandfs_itimes(vp); + + return (error); +} + +static void +nandfs_itimes_locked(struct vnode *vp) +{ + struct nandfs_node *node; + struct nandfs_inode *inode; + struct timespec ts; + + ASSERT_VI_LOCKED(vp, __func__); + + node = VTON(vp); + inode = &node->nn_inode; + + if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) + return; + + if (((vp->v_mount->mnt_kern_flag & + (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || + (node->nn_flags & (IN_CHANGE | IN_UPDATE))) + node->nn_flags |= IN_MODIFIED; + + vfs_timestamp(&ts); + if (node->nn_flags & IN_UPDATE) { + inode->i_mtime = ts.tv_sec; + inode->i_mtime_nsec = ts.tv_nsec; + } + if (node->nn_flags & IN_CHANGE) { + inode->i_ctime = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; + } + + node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); +} + +void +nandfs_itimes(struct vnode *vp) +{ + + VI_LOCK(vp); + nandfs_itimes_locked(vp); + VI_UNLOCK(vp); +} + +static int +nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) +{ + struct nandfs_node *node = VTON(vp); + struct nandfs_inode *inode = &node->nn_inode; + uint16_t nmode; + int error = 0; + + DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, + mode, cred, td)); + /* + * To modify the permissions on a file, must possess VADMIN + * for that file. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + + /* + * Privileged processes may set the sticky bit on non-directories, + * as well as set the setgid bit on a file with a group that the + * process is not a member of. Both of these are allowed in + * jail(8). + */ + if (vp->v_type != VDIR && (mode & S_ISTXT)) { + if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) + return (EFTYPE); + } + if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) { + error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); + if (error) + return (error); + } + + /* + * Deny setting setuid if we are not the file owner. + */ + if ((mode & ISUID) && inode->i_uid != cred->cr_uid) { + error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); + if (error) + return (error); + } + + nmode = inode->i_mode; + nmode &= ~ALLPERMS; + nmode |= (mode & ALLPERMS); + inode->i_mode = nmode; + node->nn_flags |= IN_CHANGE; + + DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode)); + + return (error); +} + +static int +nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, + struct thread *td) +{ + struct nandfs_node *node = VTON(vp); + struct nandfs_inode *inode = &node->nn_inode; + uid_t ouid; + gid_t ogid; + int error = 0; + + if (uid == (uid_t)VNOVAL) + uid = inode->i_uid; + if (gid == (gid_t)VNOVAL) + gid = inode->i_gid; + /* + * To modify the ownership of a file, must possess VADMIN for that + * file. + */ + if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) + return (error); + /* + * To change the owner of a file, or change the group of a file to a + * group of which we are not a member, the caller must have + * privilege. + */ + if (((uid != inode->i_uid && uid != cred->cr_uid) || + (gid != inode->i_gid && !groupmember(gid, cred))) && + (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) + return (error); + ogid = inode->i_gid; + ouid = inode->i_uid; + + inode->i_gid = gid; + inode->i_uid = uid; + + node->nn_flags |= IN_CHANGE; + if ((inode->i_mode & (ISUID | ISGID)) && + (ouid != uid || ogid != gid)) { + if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) { + inode->i_mode &= ~(ISUID | ISGID); + } + } + DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, + cred, td)); + return (0); +} + +static int +nandfs_setattr(struct vop_setattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + struct nandfs_inode *inode = &node->nn_inode; + struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + struct thread *td = curthread; + uint32_t flags; + int error = 0; + + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || + (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { + DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__)); + return (EINVAL); + } + + if (vap->va_flags != VNOVAL) { + DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp, + td, vap->va_flags)); + + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + /* + * Callers may only modify the file flags on objects they + * have VADMIN rights for. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + /* + * Unprivileged processes are not permitted to unset system + * flags, or modify flags if any system flags are set. + * Privileged non-jail processes may not modify system flags + * if securelevel > 0 and any existing system flags are set. + * Privileged jail processes behave like privileged non-jail + * processes if the security.jail.chflags_allowed sysctl is + * is non-zero; otherwise, they behave like unprivileged + * processes. + */ + + flags = inode->i_flags; + if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { + if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { + error = securelevel_gt(cred, 0); + if (error) + return (error); + } + /* Snapshot flag cannot be set or cleared */ + if (((vap->va_flags & SF_SNAPSHOT) != 0 && + (flags & SF_SNAPSHOT) == 0) || + ((vap->va_flags & SF_SNAPSHOT) == 0 && + (flags & SF_SNAPSHOT) != 0)) + return (EPERM); + + inode->i_flags = vap->va_flags; + } else { + if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || + (vap->va_flags & UF_SETTABLE) != vap->va_flags) + return (EPERM); + + flags &= SF_SETTABLE; + flags |= (vap->va_flags & UF_SETTABLE); + inode->i_flags = flags; + } + node->nn_flags |= IN_CHANGE; + if (vap->va_flags & (IMMUTABLE | APPEND)) + return (0); + } + if (inode->i_flags & (IMMUTABLE | APPEND)) + return (EPERM); + + if (vap->va_size != (u_quad_t)VNOVAL) { + DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td, + (uintmax_t)vap->va_size)); + + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + if ((inode->i_flags & SF_SNAPSHOT) != 0) + return (EPERM); + break; + default: + return (0); + } + + if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize) + return (EFBIG); + + KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type)); + nandfs_truncate(vp, vap->va_size); + node->nn_flags |= IN_CHANGE; + + return (0); + } + + if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__, + vp, td, vap->va_uid, vap->va_gid)); + error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); + if (error) + return (error); + } + + if (vap->va_mode != (mode_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td, + vap->va_mode)); + + error = nandfs_chmod(vp, (int)vap->va_mode, cred, td); + if (error) + return (error); + } + if (vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || + vap->va_birthtime.tv_sec != VNOVAL) { + DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n", + __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec, + (uintmax_t)vap->va_mtime.tv_sec, + (uintmax_t)vap->va_birthtime.tv_sec)); + + if (vap->va_atime.tv_sec != VNOVAL) + node->nn_flags |= IN_ACCESS; + if (vap->va_mtime.tv_sec != VNOVAL) + node->nn_flags |= IN_CHANGE | IN_UPDATE; + if (vap->va_birthtime.tv_sec != VNOVAL) + node->nn_flags |= IN_MODIFIED; + nandfs_itimes(vp); + return (0); + } + + return (0); +} + +static int +nandfs_open(struct vop_open_args *ap) +{ + struct nandfs_node *node = VTON(ap->a_vp); + uint64_t filesize; + + DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode)); + + if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) + return (EOPNOTSUPP); + + if ((node->nn_inode.i_flags & APPEND) && + (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) + return (EPERM); + + filesize = node->nn_inode.i_size; + vnode_create_vobject(ap->a_vp, filesize, ap->a_td); + + return (0); +} + +static int +nandfs_close(struct vop_close_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + + DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node)); + + mtx_lock(&vp->v_interlock); + if (vp->v_usecount > 1) + nandfs_itimes_locked(vp); + mtx_unlock(&vp->v_interlock); + + return (0); +} + +static int +nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode) +{ + + /* Check if we are allowed to write */ + switch (vap->va_type) { + case VDIR: + case VLNK: + case VREG: + /* + * Normal nodes: check if we're on a read-only mounted + * filingsystem and bomb out if we're trying to write. + */ + if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EROFS); + break; + case VBLK: + case VCHR: + case VSOCK: + case VFIFO: + /* + * Special nodes: even on read-only mounted filingsystems + * these are allowed to be written to if permissions allow. + */ + break; + default: + /* No idea what this is */ + return (EINVAL); + } + + /* Noone may write immutable files */ + if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE)) + return (EPERM); + + return (0); +} + +static int +nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode, + struct ucred *cred) +{ + + return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode, + cred, NULL)); +} + +static int +nandfs_advlock(struct vop_advlock_args *ap) +{ + struct nandfs_node *nvp; + quad_t size; + + nvp = VTON(ap->a_vp); + size = nvp->nn_inode.i_size; + return (lf_advlock(ap, &(nvp->nn_lockf), size)); +} + +static int +nandfs_access(struct vop_access_args *ap) +{ + struct vnode *vp = ap->a_vp; + accmode_t accmode = ap->a_accmode; + struct ucred *cred = ap->a_cred; + struct vattr vap; + int error; + + DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode)); + + error = VOP_GETATTR(vp, &vap, NULL); + if (error) + return (error); + + error = nandfs_check_possible(vp, &vap, accmode); + if (error) { + return (error); + } + + error = nandfs_check_permitted(vp, &vap, accmode, cred); + + return (error); +} + +static int +nandfs_print(struct vop_print_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *nvp = VTON(vp); + + printf("\tvp=%p, nandfs_node=%p\n", vp, nvp); + printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino); + printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS); + + return (0); +} + +static void +nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp) +{ + struct nandfs_device *nandfsdev = node->nn_nandfsdev; + struct buf *nbp; + nandfs_daddr_t vblk, pblk; + nandfs_lbn_t from; + uint32_t blocksize; + int error = 0; + int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; + + /* + * Translate all the block sectors into a series of buffers to read + * asynchronously from the nandfs device. Note that this lookup may + * induce readin's too. + */ + + blocksize = nandfsdev->nd_blocksize; + if (bp->b_bcount / blocksize != 1) + panic("invalid b_count in bp %p\n", bp); + + from = bp->b_blkno; + + DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx" + " count %#lx\n", (uintmax_t)node->nn_ino, from, + bp->b_bcount)); + + /* Get virtual block numbers for the vnode's buffer span */ + error = nandfs_bmap_lookup(node, from, &vblk); + if (error) { + bp->b_error = EINVAL; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return; + } + + /* Translate virtual block numbers to physical block numbers */ + error = nandfs_vtop(node, vblk, &pblk); + if (error) { + bp->b_error = EINVAL; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return; + } + + /* Issue translated blocks */ + bp->b_resid = bp->b_bcount; + + /* Note virtual block 0 marks not mapped */ + if (vblk == 0) { + vfs_bio_clrbuf(bp); + bufdone(bp); + return; + } + + nbp = bp; + nbp->b_blkno = pblk * blk2dev; + bp->b_iooffset = dbtob(nbp->b_blkno); + MPASS(bp->b_iooffset >= 0); + BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp); + nandfs_vblk_set(bp, vblk); + DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> " + "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino, + (uintmax_t)(from), (uintmax_t)vblk, + (uintmax_t)pblk, nbp)); +} + +static void +nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp) +{ + struct nandfs_device *nandfsdev = node->nn_nandfsdev; + + bp->b_iooffset = dbtob(bp->b_blkno); + MPASS(bp->b_iooffset >= 0); + BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp); +} + +static int +nandfs_strategy(struct vop_strategy_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct buf *bp = ap->a_bp; + struct nandfs_node *node = VTON(vp); + + + /* check if we ought to be here */ + KASSERT((vp->v_type != VBLK && vp->v_type != VCHR), + ("nandfs_strategy on type %d", vp->v_type)); + + /* Translate if needed and pass on */ + if (bp->b_iocmd == BIO_READ) { + nandfs_read_filebuf(node, bp); + return (0); + } + + /* Send to segment collector */ + nandfs_write_filebuf(node, bp); + return (0); +} + +static int +nandfs_readdir(struct vop_readdir_args *ap) +{ + struct uio *uio = ap->a_uio; + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + struct nandfs_dir_entry *ndirent; + struct dirent dirent; + struct buf *bp; + uint64_t file_size, diroffset, transoffset, blkoff; + uint64_t blocknr; + uint32_t blocksize = node->nn_nandfsdev->nd_blocksize; + uint8_t *pos, name_len; + int error; + + DPRINTF(READDIR, ("nandfs_readdir called\n")); + + if (vp->v_type != VDIR) + return (ENOTDIR); + + file_size = node->nn_inode.i_size; + DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n", + (uintmax_t)file_size, uio->uio_resid )); + + /* We are called just as long as we keep on pushing data in */ + error = 0; + if ((uio->uio_offset < file_size) && + (uio->uio_resid >= sizeof(struct dirent))) { + diroffset = uio->uio_offset; + transoffset = diroffset; + + blocknr = diroffset / blocksize; + blkoff = diroffset % blocksize; + error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (EIO); + } + while (diroffset < file_size) { + DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n", + diroffset)); + if (blkoff >= blocksize) { + blkoff = 0; blocknr++; + brelse(bp); + error = nandfs_bread(node, blocknr, NOCRED, 0, + &bp); + if (error) { + brelse(bp); + return (EIO); + } + } + + /* Read in one dirent */ + pos = (uint8_t *)bp->b_data + blkoff; + ndirent = (struct nandfs_dir_entry *)pos; + + name_len = ndirent->name_len; + memset(&dirent, 0, sizeof(struct dirent)); + dirent.d_fileno = ndirent->inode; + if (dirent.d_fileno) { + dirent.d_type = ndirent->file_type; + dirent.d_namlen = name_len; + strncpy(dirent.d_name, ndirent->name, name_len); + dirent.d_reclen = GENERIC_DIRSIZ(&dirent); + DPRINTF(READDIR, ("copying `%*.*s`\n", name_len, + name_len, dirent.d_name)); + } + + /* + * If there isn't enough space in the uio to return a + * whole dirent, break off read + */ + if (uio->uio_resid < GENERIC_DIRSIZ(&dirent)) + break; + + /* Transfer */ + if (dirent.d_fileno) + uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio); + + /* Advance */ + diroffset += ndirent->rec_len; + blkoff += ndirent->rec_len; + + /* Remember the last entry we transfered */ + transoffset = diroffset; + } + brelse(bp); + + /* Pass on last transfered offset */ + uio->uio_offset = transoffset; + } + + if (ap->a_eofflag) + *ap->a_eofflag = (uio->uio_offset >= file_size); + + return (error); +} + +static int +nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred) +{ + struct nandfs_node *dnode = VTON(dvp); + struct nandfs_dir_entry *dirent; + uint64_t file_size = dnode->nn_inode.i_size; + uint64_t blockcount = dnode->nn_inode.i_blocks; + uint64_t blocknr; + uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize; + uint32_t limit; + uint32_t off; + uint8_t *pos; + struct buf *bp; + int error; + + DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp, + (uintmax_t)parentino, cred)); + + KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp)); + + blocknr = 0; + while (blocknr < blockcount) { + error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp); + if (error) { + brelse(bp); + return (0); + } + + pos = (uint8_t *)bp->b_data; + off = 0; + + if (blocknr == (blockcount - 1)) + limit = file_size % blocksize; + else + limit = blocksize; + + while (off < limit) { + dirent = (struct nandfs_dir_entry *)(pos + off); + off += dirent->rec_len; + + if (dirent->inode == 0) + continue; + + switch (dirent->name_len) { + case 0: + break; + case 1: + if (dirent->name[0] != '.') + goto notempty; + + KASSERT(dirent->inode == dnode->nn_ino, + (".'s inode does not match dir")); + break; + case 2: + if (dirent->name[0] != '.' && + dirent->name[1] != '.') + goto notempty; + + KASSERT(dirent->inode == parentino, + ("..'s inode does not match parent")); + break; + default: + goto notempty; + } + } + + brelse(bp); + blocknr++; + } + + return (1); +notempty: + brelse(bp); + return (0); +} + +static int +nandfs_link(struct vop_link_args *ap) +{ + struct vnode *tdvp = ap->a_tdvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + struct nandfs_node *node = VTON(vp); + struct nandfs_inode *inode = &node->nn_inode; + int error; + + if (tdvp->v_mount != vp->v_mount) + return (EXDEV); + + if (inode->i_links_count >= LINK_MAX) + return (EMLINK); + + if (inode->i_flags & (IMMUTABLE | APPEND)) + return (EPERM); + + /* Update link count */ + inode->i_links_count++; + + /* Add dir entry */ + error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr, + cnp->cn_namelen, IFTODT(inode->i_mode)); + if (error) { + inode->i_links_count--; + } + + node->nn_flags |= IN_CHANGE; + nandfs_itimes(vp); + DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n", + __func__, tdvp, vp, cnp)); + + return (0); +} + +static int +nandfs_create(struct vop_create_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + struct nandfs_node *dir_node = VTON(dvp); + struct nandfsmount *nmp = dir_node->nn_nmp; + struct nandfs_node *node; + int error; + + DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp)); + + if (nandfs_fs_full(dir_node->nn_nandfsdev)) + return (ENOSPC); + + /* Create new vnode/inode */ + error = nandfs_node_create(nmp, &node, mode); + if (error) + return (error); + node->nn_inode.i_gid = dir_node->nn_inode.i_gid; + node->nn_inode.i_uid = cnp->cn_cred->cr_uid; + + /* Add new dir entry */ + error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, + cnp->cn_namelen, IFTODT(mode)); + if (error) { + if (nandfs_node_destroy(node)) { + nandfs_error("%s: error destroying node %p\n", + __func__, node); + } + return (error); + } + *vpp = NTOV(node); + + DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node, + (uintmax_t)node->nn_ino)); + return (0); +} + +static int +nandfs_remove(struct vop_remove_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct nandfs_node *node = VTON(vp); + struct nandfs_node *dnode = VTON(dvp); + struct componentname *cnp = ap->a_cnp; + + DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n", + __func__, dvp, vp, node, (uintmax_t)node->nn_ino, + node->nn_inode.i_links_count)); + + if (vp->v_type == VDIR) + return (EISDIR); + + /* Files marked as immutable or append-only cannot be deleted. */ + if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) || + (dnode->nn_inode.i_flags & APPEND)) + return (EPERM); + + nandfs_remove_dirent(dvp, node, cnp); + node->nn_inode.i_links_count--; + node->nn_flags |= IN_CHANGE; + + return (0); +} + +/* + * Check if source directory is in the path of the target directory. + * Target is supplied locked, source is unlocked. + * The target is always vput before returning. + */ +static int +nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest, + struct ucred *cred) +{ + struct vnode *vp; + int error, rootino; + struct nandfs_dir_entry dirent; + + vp = NTOV(dest); + if (src->nn_ino == dest->nn_ino) { + error = EEXIST; + goto out; + } + rootino = NANDFS_ROOT_INO; + error = 0; + if (dest->nn_ino == rootino) + goto out; + + for (;;) { + if (vp->v_type != VDIR) { + error = ENOTDIR; + break; + } + + error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent, + NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, + NULL, NULL); + if (error != 0) + break; + if (dirent.name_len != 2 || + dirent.name[0] != '.' || + dirent.name[1] != '.') { + error = ENOTDIR; + break; + } + if (dirent.inode == src->nn_ino) { + error = EINVAL; + break; + } + if (dirent.inode == rootino) + break; + vput(vp); + if ((error = VFS_VGET(vp->v_mount, dirent.inode, + LK_EXCLUSIVE, &vp)) != 0) { + vp = NULL; + break; + } + } + +out: + if (error == ENOTDIR) + printf("checkpath: .. not a directory\n"); + if (vp != NULL) + vput(vp); + return (error); +} + +static int +nandfs_rename(struct vop_rename_args *ap) +{ + struct vnode *tvp = ap->a_tvp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *fvp = ap->a_fvp; + struct vnode *fdvp = ap->a_fdvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; + int doingdirectory = 0, oldparent = 0, newparent = 0; + int error = 0; + + struct nandfs_node *fdnode, *fnode, *fnode1; + struct nandfs_node *tdnode = VTON(tdvp); + struct nandfs_node *tnode; + + uint32_t tdflags, fflags, fdflags; + uint16_t mode; + + DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp, + fvp, tdvp, tvp)); + + /* + * Check for cross-device rename. + */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; +abortit: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + return (error); + } + + tdflags = tdnode->nn_inode.i_flags; + if (tvp && + ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || + (tdflags & APPEND))) { + error = EPERM; + goto abortit; + } + + /* + * Renaming a file to itself has no effect. The upper layers should + * not call us in that case. Temporarily just warn if they do. + */ + if (fvp == tvp) { + printf("nandfs_rename: fvp == tvp (can't happen)\n"); + error = 0; + goto abortit; + } + + if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) + goto abortit; + + fdnode = VTON(fdvp); + fnode = VTON(fvp); + + if (fnode->nn_inode.i_links_count >= LINK_MAX) { + VOP_UNLOCK(fvp, 0); + error = EMLINK; + goto abortit; + } + + fflags = fnode->nn_inode.i_flags; + fdflags = fdnode->nn_inode.i_flags; + + if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) || + (fdflags & APPEND)) { + VOP_UNLOCK(fvp, 0); + error = EPERM; + goto abortit; + } + + mode = fnode->nn_inode.i_mode; + if ((mode & S_IFMT) == S_IFDIR) { + /* + * Avoid ".", "..", and aliases of "." for obvious reasons. + */ + + if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || + (fdvp == fvp) || + ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) || + (fnode->nn_flags & IN_RENAME)) { + VOP_UNLOCK(fvp, 0); + error = EINVAL; + goto abortit; + } + fnode->nn_flags |= IN_RENAME; + doingdirectory = 1; + DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__, + tdvp)); + oldparent = fdnode->nn_ino; + } + + vrele(fdvp); + + tnode = NULL; + if (tvp) + tnode = VTON(tvp); + + /* + * Bump link count on fvp while we are moving stuff around. If we + * crash before completing the work, the link count may be wrong + * but correctable. + */ + fnode->nn_inode.i_links_count++; + + /* Check for in path moving XXX */ + error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); + VOP_UNLOCK(fvp, 0); + if (oldparent != tdnode->nn_ino) + newparent = tdnode->nn_ino; + if (doingdirectory && newparent) { + if (error) /* write access check above */ + goto bad; + if (tnode != NULL) + vput(tvp); + + error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred); + if (error) + goto out; + + VREF(tdvp); + error = relookup(tdvp, &tvp, tcnp); + if (error) + goto out; + vrele(tdvp); + tdnode = VTON(tdvp); + tnode = NULL; + if (tvp) + tnode = VTON(tvp); + } + + /* + * If the target doesn't exist, link the target to the source and + * unlink the source. Otherwise, rewrite the target directory to + * reference the source and remove the original entry. + */ + + if (tvp == NULL) { + /* + * Account for ".." in new directory. + */ + if (doingdirectory && fdvp != tdvp) + tdnode->nn_inode.i_links_count++; + + DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp)); + /* + * Add name in new directory. + */ + error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr, + tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode)); + if (error) { + if (doingdirectory && fdvp != tdvp) + tdnode->nn_inode.i_links_count--; + goto bad; + } + + vput(tdvp); + } else { + /* + * If the parent directory is "sticky", then the user must + * own the parent directory, or the destination of the rename, + * otherwise the destination may not be changed (except by + * root). This implements append-only directories. + */ + if ((tdnode->nn_inode.i_mode & S_ISTXT) && + tcnp->cn_cred->cr_uid != 0 && + tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid && + tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) { + error = EPERM; + goto bad; + } + /* + * Target must be empty if a directory and have no links + * to it. Also, ensure source and target are compatible + * (both directories, or both not directories). + */ + mode = tnode->nn_inode.i_mode; + if ((mode & S_IFMT) == S_IFDIR) { + if (!nandfs_dirempty(tvp, tdnode->nn_ino, + tcnp->cn_cred)) { + error = ENOTEMPTY; + goto bad; + } + if (!doingdirectory) { + error = ENOTDIR; + goto bad; + } + /* + * Update name cache since directory is going away. + */ + cache_purge(tdvp); + } else if (doingdirectory) { + error = EISDIR; + goto bad; + } + + DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp)); + /* + * Change name tcnp in tdvp to point at fvp. + */ + error = nandfs_update_dirent(tdvp, fnode, tnode); + if (error) + goto bad; + + if (doingdirectory && !newparent) + tdnode->nn_inode.i_links_count--; + + vput(tdvp); + + tnode->nn_inode.i_links_count--; + vput(tvp); + tnode = NULL; + } + + /* + * Unlink the source. + */ + fcnp->cn_flags &= ~MODMASK; + fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; + VREF(fdvp); + error = relookup(fdvp, &fvp, fcnp); + if (error == 0) + vrele(fdvp); + if (fvp != NULL) { + fnode1 = VTON(fvp); + fdnode = VTON(fdvp); + } else { + /* + * From name has disappeared. + */ + if (doingdirectory) + panic("nandfs_rename: lost dir entry"); + vrele(ap->a_fvp); + return (0); + } + + DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode)); + + /* + * Ensure that the directory entry still exists and has not + * changed while the new name has been entered. If the source is + * a file then the entry may have been unlinked or renamed. In + * either case there is no further work to be done. If the source + * is a directory then it cannot have been rmdir'ed; its link + * count of three would cause a rmdir to fail with ENOTEMPTY. + * The IN_RENAME flag ensures that it cannot be moved by another + * rename. + */ + if (fnode != fnode1) { + if (doingdirectory) + panic("nandfs: lost dir entry"); + } else { + /* + * If the source is a directory with a + * new parent, the link count of the old + * parent directory must be decremented + * and ".." set to point to the new parent. + */ + if (doingdirectory && newparent) { + DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n", + __func__, (uintmax_t) oldparent, + (uintmax_t) newparent)); + error = nandfs_update_parent_dir(fvp, newparent); + if (!error) { + fdnode->nn_inode.i_links_count--; + fdnode->nn_flags |= IN_CHANGE; + } + } + error = nandfs_remove_dirent(fdvp, fnode, fcnp); + if (!error) { + fnode->nn_inode.i_links_count--; + fnode->nn_flags |= IN_CHANGE; + } + fnode->nn_flags &= ~IN_RENAME; + } + if (fdnode) + vput(fdvp); + if (fnode) + vput(fvp); + vrele(ap->a_fvp); + return (error); + +bad: + DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error)); + if (tnode) + vput(NTOV(tnode)); + vput(NTOV(tdnode)); +out: + if (doingdirectory) + fnode->nn_flags &= ~IN_RENAME; + if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { + fnode->nn_inode.i_links_count--; + fnode->nn_flags |= IN_CHANGE; + fnode->nn_flags &= ~IN_RENAME; + vput(fvp); + } else + vrele(fvp); + return (error); +} + +static int +nandfs_mkdir(struct vop_mkdir_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct nandfs_node *dir_node = VTON(dvp); + struct nandfs_inode *dir_inode = &dir_node->nn_inode; + struct nandfs_node *node; + struct nandfsmount *nmp = dir_node->nn_nmp; + uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + int error; + + DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp)); + + if (nandfs_fs_full(dir_node->nn_nandfsdev)) + return (ENOSPC); + + if (dir_inode->i_links_count >= LINK_MAX) + return (EMLINK); + + error = nandfs_node_create(nmp, &node, mode); + if (error) + return (error); + + node->nn_inode.i_gid = dir_node->nn_inode.i_gid; + node->nn_inode.i_uid = cnp->cn_cred->cr_uid; + + *vpp = NTOV(node); + + error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, + cnp->cn_namelen, IFTODT(mode)); + if (error) { + vput(*vpp); + return (error); + } + + dir_node->nn_inode.i_links_count++; + dir_node->nn_flags |= IN_CHANGE; + + error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino); + if (error) { + vput(NTOV(node)); + return (error); + } + + DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node, + (uintmax_t)node->nn_ino)); + return (0); +} + +static int +nandfs_mknod(struct vop_mknod_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct vattr *vap = ap->a_vap; + uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode); + struct componentname *cnp = ap->a_cnp; + struct nandfs_node *dir_node = VTON(dvp); + struct nandfsmount *nmp = dir_node->nn_nmp; + struct nandfs_node *node; + int error; + + if (nandfs_fs_full(dir_node->nn_nandfsdev)) + return (ENOSPC); + + error = nandfs_node_create(nmp, &node, mode); + if (error) + return (error); + node->nn_inode.i_gid = dir_node->nn_inode.i_gid; + node->nn_inode.i_uid = cnp->cn_cred->cr_uid; + if (vap->va_rdev != VNOVAL) + node->nn_inode.i_special = vap->va_rdev; + + *vpp = NTOV(node); + + if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, + cnp->cn_namelen, IFTODT(mode))) { + vput(*vpp); + return (ENOTDIR); + } + + node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE; + + return (0); +} + +static int +nandfs_symlink(struct vop_symlink_args *ap) +{ + struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + struct componentname *cnp = ap->a_cnp; + struct nandfs_node *dir_node = VTON(dvp); + struct nandfsmount *nmp = dir_node->nn_nmp; + struct nandfs_node *node; + int len, error; + + if (nandfs_fs_full(dir_node->nn_nandfsdev)) + return (ENOSPC); + + error = nandfs_node_create(nmp, &node, S_IFLNK | mode); + if (error) + return (error); + node->nn_inode.i_gid = dir_node->nn_inode.i_gid; + node->nn_inode.i_uid = cnp->cn_cred->cr_uid; + + *vpp = NTOV(node); + + if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, + cnp->cn_namelen, IFTODT(mode))) { + vput(*vpp); + return (ENOTDIR); + } + + + len = strlen(ap->a_target); + error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, + UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, + cnp->cn_cred, NOCRED, NULL, NULL); + if (error) + vput(*vpp); + + return (error); +} + +static int +nandfs_readlink(struct vop_readlink_args *ap) +{ + struct vnode *vp = ap->a_vp; + + return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); +} + +static int +nandfs_rmdir(struct vop_rmdir_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + struct nandfs_node *node, *dnode; + uint32_t dflag, flag; + int error = 0; + + node = VTON(vp); + dnode = VTON(dvp); + + /* Files marked as immutable or append-only cannot be deleted. */ + if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) || + (dnode->nn_inode.i_flags & APPEND)) + return (EPERM); + + DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__, + dvp, vp, node, (uintmax_t)node->nn_ino)); + + if (node->nn_inode.i_links_count < 2) + return (EINVAL); + + if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred)) + return (ENOTEMPTY); + + /* Files marked as immutable or append-only cannot be deleted. */ + dflag = dnode->nn_inode.i_flags; + flag = node->nn_inode.i_flags; + if ((dflag & APPEND) || + (flag & (NOUNLINK | IMMUTABLE | APPEND))) { + return (EPERM); + } + + if (vp->v_mountedhere != 0) + return (EINVAL); + + nandfs_remove_dirent(dvp, node, cnp); + dnode->nn_inode.i_links_count -= 1; + dnode->nn_flags |= IN_CHANGE; + + cache_purge(dvp); + + error = nandfs_truncate(vp, (uint64_t)0); + if (error) + return (error); + + node->nn_inode.i_links_count -= 2; + node->nn_flags |= IN_CHANGE; + + cache_purge(vp); + + return (error); +} + +static int +nandfs_fsync(struct vop_fsync_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + int locked; + + DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp, + node, (uintmax_t)node->nn_ino)); + + /* + * Start syncing vnode only if inode was modified or + * there are some dirty buffers + */ + if (VTON(vp)->nn_flags & IN_MODIFIED || + vp->v_bufobj.bo_dirty.bv_cnt) { + locked = VOP_ISLOCKED(vp); + VOP_UNLOCK(vp, 0); + nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC); + VOP_LOCK(vp, locked | LK_RETRY); + } + + return (0); +} + +static int +nandfs_bmap(struct vop_bmap_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *nnode = VTON(vp); + struct nandfs_device *nandfsdev = nnode->nn_nandfsdev; + nandfs_daddr_t l2vmap, v2pmap; + int error; + int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; + + DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp, + nnode, (uintmax_t)nnode->nn_ino)); + + if (ap->a_bop != NULL) + *ap->a_bop = &nandfsdev->nd_devvp->v_bufobj; + if (ap->a_bnp == NULL) + return (0); + if (ap->a_runp != NULL) + *ap->a_runp = 0; + if (ap->a_runb != NULL) + *ap->a_runb = 0; + + /* + * Translate all the block sectors into a series of buffers to read + * asynchronously from the nandfs device. Note that this lookup may + * induce readin's too. + */ + + /* Get virtual block numbers for the vnode's buffer span */ + error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap); + if (error) + return (-1); + + /* Translate virtual block numbers to physical block numbers */ + error = nandfs_vtop(nnode, l2vmap, &v2pmap); + if (error) + return (-1); + + /* Note virtual block 0 marks not mapped */ + if (l2vmap == 0) + *ap->a_bnp = -1; + else + *ap->a_bnp = v2pmap * blk2dev; /* in DEV_BSIZE */ + + DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n", + __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn, + (uintmax_t)*ap->a_bnp )); + + return (0); +} + +static void +nandfs_force_syncer(struct nandfsmount *nmp) +{ + + nmp->nm_flags |= NANDFS_FORCE_SYNCER; + nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE); +} + +static int +nandfs_ioctl(struct vop_ioctl_args *ap) +{ + struct vnode *vp = ap->a_vp; + u_long command = ap->a_command; + caddr_t data = ap->a_data; + struct nandfs_node *node = VTON(vp); + struct nandfs_device *nandfsdev = node->nn_nandfsdev; + struct nandfsmount *nmp = node->nn_nmp; + uint64_t *tab, *cno; + struct nandfs_seg_stat *nss; + struct nandfs_cpmode *ncpm; + struct nandfs_argv *nargv; + struct nandfs_cpstat *ncp; + int error; + + DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command)); + + error = priv_check(ap->a_td, PRIV_VFS_MOUNT); + if (error) + return (error); + + if (nmp->nm_ronly) { + switch (command) { + case NANDFS_IOCTL_GET_FSINFO: + case NANDFS_IOCTL_GET_SUSTAT: + case NANDFS_IOCTL_GET_CPINFO: + case NANDFS_IOCTL_GET_CPSTAT: + case NANDFS_IOCTL_GET_SUINFO: + case NANDFS_IOCTL_GET_VINFO: + case NANDFS_IOCTL_GET_BDESCS: + break; + default: + return (EROFS); + } + } + + switch (command) { + case NANDFS_IOCTL_GET_FSINFO: + error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data); + break; + case NANDFS_IOCTL_GET_SUSTAT: + nss = (struct nandfs_seg_stat *)data; + error = nandfs_get_seg_stat(nandfsdev, nss); + break; + case NANDFS_IOCTL_CHANGE_CPMODE: + ncpm = (struct nandfs_cpmode *)data; + error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm); + nandfs_force_syncer(nmp); + break; + case NANDFS_IOCTL_GET_CPINFO: + nargv = (struct nandfs_argv *)data; + error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv); + break; + case NANDFS_IOCTL_DELETE_CP: + tab = (uint64_t *)data; + error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]); + nandfs_force_syncer(nmp); + break; + case NANDFS_IOCTL_GET_CPSTAT: + ncp = (struct nandfs_cpstat *)data; + error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp); + break; + case NANDFS_IOCTL_GET_SUINFO: + nargv = (struct nandfs_argv *)data; + error = nandfs_get_segment_info_ioctl(nandfsdev, nargv); + break; + case NANDFS_IOCTL_GET_VINFO: + nargv = (struct nandfs_argv *)data; + error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv); + break; + case NANDFS_IOCTL_GET_BDESCS: + nargv = (struct nandfs_argv *)data; + error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv); + break; + case NANDFS_IOCTL_SYNC: + cno = (uint64_t *)data; + nandfs_force_syncer(nmp); + *cno = nandfsdev->nd_last_cno; + error = 0; + break; + case NANDFS_IOCTL_MAKE_SNAP: + cno = (uint64_t *)data; + error = nandfs_make_snap(nandfsdev, cno); + nandfs_force_syncer(nmp); + break; + case NANDFS_IOCTL_DELETE_SNAP: + cno = (uint64_t *)data; + error = nandfs_delete_snap(nandfsdev, *cno); + nandfs_force_syncer(nmp); + break; + default: + error = ENOTTY; + break; + } + + return (error); +} + +/* + * Whiteout vnode call + */ +static int +nandfs_whiteout(struct vop_whiteout_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + int error = 0; + + switch (ap->a_flags) { + case LOOKUP: + return (0); + case CREATE: + /* Create a new directory whiteout */ +#ifdef INVARIANTS + if ((cnp->cn_flags & SAVENAME) == 0) + panic("ufs_whiteout: missing name"); +#endif + error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr, + cnp->cn_namelen, DT_WHT); + break; + + case DELETE: + /* Remove an existing directory whiteout */ + cnp->cn_flags &= ~DOWHITEOUT; + error = nandfs_remove_dirent(dvp, NULL, cnp); + break; + default: + panic("nandf_whiteout: unknown op: %d", ap->a_flags); + } + + return (error); +} + +static int +nandfs_pathconf(struct vop_pathconf_args *ap) +{ + int error; + + error = 0; + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = LINK_MAX; + break; + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + break; + case _PC_PATH_MAX: + *ap->a_retval = PATH_MAX; + break; + case _PC_PIPE_BUF: + *ap->a_retval = PIPE_BUF; + break; + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + break; + case _PC_NO_TRUNC: + *ap->a_retval = 1; + break; + case _PC_ACL_EXTENDED: + *ap->a_retval = 0; + break; + case _PC_ALLOC_SIZE_MIN: + *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; + break; + case _PC_FILESIZEBITS: + *ap->a_retval = 64; + break; + case _PC_REC_INCR_XFER_SIZE: + *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; + break; + case _PC_REC_MAX_XFER_SIZE: + *ap->a_retval = -1; /* means ``unlimited'' */ + break; + case _PC_REC_MIN_XFER_SIZE: + *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; + break; + default: + error = EINVAL; + break; + } + return (error); +} + +static int +nandfs_vnlock1(struct vop_lock1_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + int error, vi_locked; + + /* + * XXX can vnode go away while we are sleeping? + */ + vi_locked = mtx_owned(&vp->v_interlock); + if (vi_locked) + VI_UNLOCK(vp); + error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev, + ap->a_flags & LK_NOWAIT); + if (vi_locked && !error) + VI_LOCK(vp); + if (error) + return (error); + + error = vop_stdlock(ap); + if (error) { + NANDFS_WRITEUNLOCK(node->nn_nandfsdev); + return (error); + } + + return (0); +} + +static int +nandfs_vnunlock(struct vop_unlock_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + int error; + + error = vop_stdunlock(ap); + if (error) + return (error); + + NANDFS_WRITEUNLOCK(node->nn_nandfsdev); + + return (0); +} + +/* + * Global vfs data structures + */ +struct vop_vector nandfs_vnodeops = { + .vop_default = &default_vnodeops, + .vop_access = nandfs_access, + .vop_advlock = nandfs_advlock, + .vop_bmap = nandfs_bmap, + .vop_close = nandfs_close, + .vop_create = nandfs_create, + .vop_fsync = nandfs_fsync, + .vop_getattr = nandfs_getattr, + .vop_inactive = nandfs_inactive, + .vop_cachedlookup = nandfs_lookup, + .vop_ioctl = nandfs_ioctl, + .vop_link = nandfs_link, + .vop_lookup = vfs_cache_lookup, + .vop_mkdir = nandfs_mkdir, + .vop_mknod = nandfs_mknod, + .vop_open = nandfs_open, + .vop_pathconf = nandfs_pathconf, + .vop_print = nandfs_print, + .vop_read = nandfs_read, + .vop_readdir = nandfs_readdir, + .vop_readlink = nandfs_readlink, + .vop_reclaim = nandfs_reclaim, + .vop_remove = nandfs_remove, + .vop_rename = nandfs_rename, + .vop_rmdir = nandfs_rmdir, + .vop_whiteout = nandfs_whiteout, + .vop_write = nandfs_write, + .vop_setattr = nandfs_setattr, + .vop_strategy = nandfs_strategy, + .vop_symlink = nandfs_symlink, + .vop_lock1 = nandfs_vnlock1, + .vop_unlock = nandfs_vnunlock, +}; + +struct vop_vector nandfs_system_vnodeops = { + .vop_default = &default_vnodeops, + .vop_close = nandfs_close, + .vop_inactive = nandfs_inactive, + .vop_reclaim = nandfs_reclaim, + .vop_strategy = nandfs_strategy, + .vop_fsync = nandfs_fsync, + .vop_bmap = nandfs_bmap, + .vop_access = VOP_PANIC, + .vop_advlock = VOP_PANIC, + .vop_create = VOP_PANIC, + .vop_getattr = VOP_PANIC, + .vop_cachedlookup = VOP_PANIC, + .vop_ioctl = VOP_PANIC, + .vop_link = VOP_PANIC, + .vop_lookup = VOP_PANIC, + .vop_mkdir = VOP_PANIC, + .vop_mknod = VOP_PANIC, + .vop_open = VOP_PANIC, + .vop_pathconf = VOP_PANIC, + .vop_print = VOP_PANIC, + .vop_read = VOP_PANIC, + .vop_readdir = VOP_PANIC, + .vop_readlink = VOP_PANIC, + .vop_remove = VOP_PANIC, + .vop_rename = VOP_PANIC, + .vop_rmdir = VOP_PANIC, + .vop_whiteout = VOP_PANIC, + .vop_write = VOP_PANIC, + .vop_setattr = VOP_PANIC, + .vop_symlink = VOP_PANIC, +}; + +static int +nandfsfifo_close(struct vop_close_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct nandfs_node *node = VTON(vp); + + DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node)); + + mtx_lock(&vp->v_interlock); + if (vp->v_usecount > 1) + nandfs_itimes_locked(vp); + mtx_unlock(&vp->v_interlock); + + return (fifo_specops.vop_close(ap)); +} + +struct vop_vector nandfs_fifoops = { + .vop_default = &fifo_specops, + .vop_fsync = VOP_PANIC, + .vop_access = nandfs_access, + .vop_close = nandfsfifo_close, + .vop_getattr = nandfs_getattr, + .vop_inactive = nandfs_inactive, + .vop_print = nandfs_print, + .vop_read = VOP_PANIC, + .vop_reclaim = nandfs_reclaim, + .vop_setattr = nandfs_setattr, + .vop_write = VOP_PANIC, + .vop_lock1 = nandfs_vnlock1, + .vop_unlock = nandfs_vnunlock, +}; + +int +nandfs_vinit(struct vnode *vp, uint64_t ino) +{ + struct nandfs_node *node; + + ASSERT_VOP_LOCKED(vp, __func__); + + node = VTON(vp); + + /* Check if we're fetching the root */ + if (ino == NANDFS_ROOT_INO) + vp->v_vflag |= VV_ROOT; + + if (ino != NANDFS_GC_INO) + vp->v_type = IFTOVT(node->nn_inode.i_mode); + else + vp->v_type = VREG; + + if (vp->v_type == VFIFO) + vp->v_op = &nandfs_fifoops; + + return (0); +} |