summaryrefslogtreecommitdiffstats
path: root/sys/fs/nandfs
diff options
context:
space:
mode:
authorgber <gber@FreeBSD.org>2012-05-17 10:11:18 +0000
committergber <gber@FreeBSD.org>2012-05-17 10:11:18 +0000
commit6f7c7353004e2ff9709b326a4008ce8ea63d9270 (patch)
treea325137a898341311de8641f7212e28b7d87950e /sys/fs/nandfs
parent661b9d94414ea6d11d5b7960aef1f172975ce52b (diff)
downloadFreeBSD-src-6f7c7353004e2ff9709b326a4008ce8ea63d9270.zip
FreeBSD-src-6f7c7353004e2ff9709b326a4008ce8ea63d9270.tar.gz
Import work done under project/nand (@235533) into head.
The NAND Flash environment consists of several distinct components: - NAND framework (drivers harness for NAND controllers and NAND chips) - NAND simulator (NANDsim) - NAND file system (NAND FS) - Companion tools and utilities - Documentation (manual pages) This work is still experimental. Please use with caution. Obtained from: Semihalf Supported by: FreeBSD Foundation, Juniper Networks
Diffstat (limited to 'sys/fs/nandfs')
-rw-r--r--sys/fs/nandfs/bmap.c621
-rw-r--r--sys/fs/nandfs/bmap.h40
-rw-r--r--sys/fs/nandfs/nandfs.h310
-rw-r--r--sys/fs/nandfs/nandfs_alloc.c364
-rw-r--r--sys/fs/nandfs/nandfs_bmap.c230
-rw-r--r--sys/fs/nandfs/nandfs_buffer.c83
-rw-r--r--sys/fs/nandfs/nandfs_cleaner.c621
-rw-r--r--sys/fs/nandfs/nandfs_cpfile.c776
-rw-r--r--sys/fs/nandfs/nandfs_dat.c344
-rw-r--r--sys/fs/nandfs/nandfs_dir.c314
-rw-r--r--sys/fs/nandfs/nandfs_fs.h565
-rw-r--r--sys/fs/nandfs/nandfs_ifile.c213
-rw-r--r--sys/fs/nandfs/nandfs_mount.h50
-rw-r--r--sys/fs/nandfs/nandfs_segment.c1329
-rw-r--r--sys/fs/nandfs/nandfs_subr.c1120
-rw-r--r--sys/fs/nandfs/nandfs_subr.h238
-rw-r--r--sys/fs/nandfs/nandfs_sufile.c569
-rw-r--r--sys/fs/nandfs/nandfs_vfsops.c1590
-rw-r--r--sys/fs/nandfs/nandfs_vnops.c2455
19 files changed, 11832 insertions, 0 deletions
diff --git a/sys/fs/nandfs/bmap.c b/sys/fs/nandfs/bmap.c
new file mode 100644
index 0000000..84e4a9e
--- /dev/null
+++ b/sys/fs/nandfs/bmap.c
@@ -0,0 +1,621 @@
+/*-
+ * Copyright (c) 2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/ktr.h>
+#include <sys/kdb.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include <machine/_inttypes.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+#include "bmap.h"
+
+static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t,
+ struct nandfs_indir *, int *);
+
+int
+bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk)
+{
+ struct nandfs_inode *ip;
+ struct nandfs_indir a[NIADDR + 1], *ap;
+ nandfs_daddr_t daddr;
+ struct buf *bp;
+ int error;
+ int num, *nump;
+
+ DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk));
+ ip = &node->nn_inode;
+
+ ap = a;
+ nump = &num;
+
+ error = bmap_getlbns(node, lblk, ap, nump);
+ if (error)
+ return (error);
+
+ if (num == 0) {
+ *vblk = ip->i_db[lblk];
+ return (0);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__,
+ node, lblk, ap->in_off));
+ daddr = ip->i_ib[ap->in_off];
+ for (bp = NULL, ++ap; --num; ap++) {
+ if (daddr == 0) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with "
+ "vblk 0\n", __func__, node, lblk));
+ *vblk = 0;
+ return (0);
+ }
+ if (ap->in_lbn == lblk) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx "
+ "returning address of indirect block (%jx)\n",
+ __func__, node, lblk, ap->in_lbn, daddr));
+ *vblk = daddr;
+ return (0);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block "
+ "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn));
+
+ error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
+ brelse(bp);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__,
+ node, lblk, daddr));
+ *vblk = daddr;
+
+ return (0);
+}
+
+int
+bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force)
+{
+ struct nandfs_indir a[NIADDR+1], *ap;
+#ifdef DEBUG
+ nandfs_daddr_t daddr;
+#endif
+ struct buf *bp;
+ int error;
+ int num, *nump;
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk));
+
+ ap = a;
+ nump = &num;
+
+ error = bmap_getlbns(node, lblk, ap, nump);
+ if (error)
+ return (error);
+
+ /*
+ * Direct block, nothing to do
+ */
+ if (num == 0)
+ return (0);
+
+ DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node));
+
+ for (bp = NULL, ++ap; --num; ap++) {
+ error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+#ifdef DEBUG
+ daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off];
+ MPASS(daddr != 0 || node->nn_ino == 3);
+#endif
+
+ error = nandfs_dirty_buf_meta(bp, force);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk,
+ nandfs_daddr_t vblk)
+{
+ struct nandfs_inode *ip;
+ struct nandfs_indir a[NIADDR+1], *ap;
+ struct buf *bp;
+ nandfs_daddr_t daddr;
+ int error;
+ int num, *nump, i;
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk,
+ vblk));
+
+ ip = &node->nn_inode;
+
+ ap = a;
+ nump = &num;
+
+ error = bmap_getlbns(node, lblk, ap, nump);
+ if (error)
+ return (error);
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__,
+ node, lblk, vblk, num));
+
+ if (num == 0) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__,
+ node, lblk));
+ ip->i_db[lblk] = vblk;
+ return (0);
+ }
+
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n",
+ __func__, node, lblk, ap->in_off));
+
+ if (num == 1) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting "
+ "%jx as vblk for indirect block %d\n", __func__, node,
+ lblk, vblk, ap->in_off));
+ ip->i_ib[ap->in_off] = vblk;
+ return (0);
+ }
+
+ bp = NULL;
+ daddr = ip->i_ib[a[0].in_off];
+ for (i = 1; i < num; i++) {
+ if (bp)
+ brelse(bp);
+ if (daddr == 0) {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create "
+ "block %jx %d\n", __func__, node, lblk, vblk,
+ a[i].in_lbn, a[i].in_off));
+ error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED,
+ 0, &bp);
+ if (error)
+ return (error);
+ } else {
+ DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read "
+ "block %jx %d\n", __func__, node, daddr, vblk,
+ a[i].in_lbn, a[i].in_off));
+ error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off];
+ }
+ i--;
+
+ DPRINTF(BMAP,
+ ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at "
+ "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off,
+ daddr));
+
+ if (!bp) {
+ nandfs_error("%s: cannot find indirect block\n", __func__);
+ return (-1);
+ }
+ ((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk;
+
+ error = nandfs_dirty_buf_meta(bp, 0);
+ if (error) {
+ nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp);
+ return (error);
+ }
+ DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__,
+ node, lblk, vblk));
+
+ return (error);
+}
+
+CTASSERT(NIADDR <= 3);
+#define SINGLE 0 /* index of single indirect block */
+#define DOUBLE 1 /* index of double indirect block */
+#define TRIPLE 2 /* index of triple indirect block */
+
+static __inline nandfs_lbn_t
+lbn_offset(struct nandfs_device *fsdev, int level)
+{
+ nandfs_lbn_t res;
+
+ for (res = 1; level > 0; level--)
+ res *= MNINDIR(fsdev);
+ return (res);
+}
+
+static nandfs_lbn_t
+blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip)
+{
+ nandfs_lbn_t blocks;
+
+ for (blocks = 1; level >= SINGLE; level--, nip++) {
+ MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev));
+ blocks += nip->in_off * lbn_offset(fsdev, level);
+ }
+
+ return (blocks);
+}
+
+static int
+bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left,
+ int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp,
+ nandfs_daddr_t *copy)
+{
+ struct buf *bp;
+ nandfs_lbn_t i, lbn, nlbn, factor, tosub;
+ struct nandfs_device *fsdev;
+ int error, lcleaned, modified;
+
+ DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__,
+ node, level, *left));
+
+ fsdev = node->nn_nandfsdev;
+
+ MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev));
+
+ factor = lbn_offset(fsdev, level);
+ lbn = ap->in_lbn;
+
+ error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ bcopy(bp->b_data, copy, fsdev->nd_blocksize);
+ bqrelse(bp);
+
+ modified = 0;
+
+ i = ap->in_off;
+
+ if (ap != fp)
+ ap++;
+ for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--,
+ nlbn += factor) {
+ lcleaned = 0;
+
+ DPRINTF(BMAP,
+ ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n",
+ __func__, node, i, nlbn, *left, ap, copy[i]));
+
+ if (copy[i] == 0) {
+ tosub = blocks_inside(fsdev, level - 1, ap);
+ if (tosub > *left)
+ tosub = 0;
+
+ *left -= tosub;
+ } else {
+ if (level > SINGLE) {
+ if (ap == fp)
+ ap->in_lbn = nlbn;
+
+ error = bmap_truncate_indirect(node, level - 1,
+ left, &lcleaned, ap, fp,
+ copy + MNINDIR(fsdev));
+ if (error)
+ return (error);
+ } else {
+ error = nandfs_bdestroy(node, copy[i]);
+ if (error)
+ return (error);
+ lcleaned = 1;
+ *left -= 1;
+ }
+ }
+
+ if (lcleaned) {
+ if (level > SINGLE) {
+ error = nandfs_vblock_end(fsdev, copy[i]);
+ if (error)
+ return (error);
+ }
+ copy[i] = 0;
+ modified++;
+ }
+
+ ap = fp;
+ }
+
+ if (i == -1)
+ *cleaned = 1;
+
+ error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ if (modified)
+ bcopy(copy, bp->b_data, fsdev->nd_blocksize);
+
+ error = nandfs_dirty_buf_meta(bp, 0);
+ if (error)
+ return (error);
+
+ return (error);
+}
+
+int
+bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk,
+ nandfs_lbn_t todo)
+{
+ struct nandfs_inode *ip;
+ struct nandfs_indir a[NIADDR + 1], f[NIADDR], *ap;
+ nandfs_daddr_t indir_lbn[NIADDR];
+ nandfs_daddr_t *copy;
+ int error, level;
+ nandfs_lbn_t left, tosub;
+ struct nandfs_device *fsdev;
+ int cleaned, i;
+ int num, *nump;
+
+ DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__,
+ node, lastblk, todo));
+
+ ip = &node->nn_inode;
+ fsdev = node->nn_nandfsdev;
+
+ ap = a;
+ nump = &num;
+
+ error = bmap_getlbns(node, lastblk, ap, nump);
+ if (error)
+ return (error);
+
+ indir_lbn[SINGLE] = -NDADDR;
+ indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1;
+ indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev)
+ * MNINDIR(fsdev) - 1;
+
+ for (i = 0; i < NIADDR; i++) {
+ f[i].in_off = MNINDIR(fsdev) - 1;
+ f[i].in_lbn = 0xdeadbeef;
+ }
+
+ left = todo;
+
+#ifdef DEBUG
+ a[num].in_off = -1;
+#endif
+
+ ap++;
+ num -= 2;
+
+ if (num < 0)
+ goto direct;
+
+ copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1),
+ M_NANDFSTEMP, M_WAITOK);
+
+ for (level = num; level >= SINGLE && left > 0; level--) {
+ cleaned = 0;
+
+ if (ip->i_ib[level] == 0) {
+ tosub = blocks_inside(fsdev, level, ap);
+ if (tosub > left)
+ left = 0;
+ else
+ left -= tosub;
+ } else {
+ if (ap == f)
+ ap->in_lbn = indir_lbn[level];
+ error = bmap_truncate_indirect(node, level, &left,
+ &cleaned, ap, f, copy);
+ if (error) {
+ nandfs_error("%s: error %d when truncate "
+ "at level %d\n", __func__, error, level);
+ return (error);
+ }
+ }
+
+ if (cleaned) {
+ nandfs_vblock_end(fsdev, ip->i_ib[level]);
+ ip->i_ib[level] = 0;
+ }
+
+ ap = f;
+ }
+
+ free(copy, M_NANDFSTEMP);
+
+direct:
+ if (num < 0)
+ i = lastblk;
+ else
+ i = NDADDR - 1;
+
+ for (; i >= 0 && left > 0; i--) {
+ if (ip->i_db[i] != 0) {
+ error = nandfs_bdestroy(node, ip->i_db[i]);
+ if (error) {
+ nandfs_error("%s: cannot destroy "
+ "block %jx, error %d\n", __func__,
+ (uintmax_t)ip->i_db[i], error);
+ return (error);
+ }
+ ip->i_db[i] = 0;
+ }
+
+ left--;
+ }
+
+ KASSERT(left == 0,
+ ("truncated wrong number of blocks (%jd should be 0)", left));
+
+ return (error);
+}
+
+nandfs_lbn_t
+get_maxfilesize(struct nandfs_device *fsdev)
+{
+ struct nandfs_indir f[NIADDR];
+ nandfs_lbn_t max;
+ int i;
+
+ max = NDADDR;
+
+ for (i = 0; i < NIADDR; i++) {
+ f[i].in_off = MNINDIR(fsdev) - 1;
+ max += blocks_inside(fsdev, i, f);
+ }
+
+ max *= fsdev->nd_blocksize;
+
+ return (max);
+}
+
+/*
+ * This is ufs_getlbns with minor modifications.
+ */
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+static int
+bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump)
+{
+ nandfs_daddr_t blockcnt;
+ nandfs_lbn_t metalbn, realbn;
+ struct nandfs_device *fsdev;
+ int i, numlevels, off;
+
+ fsdev = node->nn_nandfsdev;
+
+ DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__,
+ node, bn, MNINDIR(fsdev)));
+
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+
+ if (bn < 0)
+ bn = -bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the previous level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__,
+ blockcnt, i, bn));
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(fsdev);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+
+ DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__,
+ metalbn, off));
+
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(fsdev);
+ off = (bn / blockcnt) % MNINDIR(fsdev);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+
+ DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__,
+ ap->in_lbn, ap->in_off));
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+
+ DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels));
+
+ return (0);
+}
diff --git a/sys/fs/nandfs/bmap.h b/sys/fs/nandfs/bmap.h
new file mode 100644
index 0000000..c27c61c
--- /dev/null
+++ b/sys/fs/nandfs/bmap.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2012 Semihalf
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _BMAP_H
+#define _BMAP_H
+
+#include "nandfs_fs.h"
+
+int bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *);
+int bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t);
+int bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t, nandfs_lbn_t);
+int bmap_dirty_meta(struct nandfs_node *, nandfs_lbn_t, int);
+
+nandfs_lbn_t get_maxfilesize(struct nandfs_device *);
+
+#endif /* _BMAP_H */
diff --git a/sys/fs/nandfs/nandfs.h b/sys/fs/nandfs/nandfs.h
new file mode 100644
index 0000000..beb4e16
--- /dev/null
+++ b/sys/fs/nandfs/nandfs.h
@@ -0,0 +1,310 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _FS_NANDFS_NANDFS_H_
+#define _FS_NANDFS_NANDFS_H_
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <sys/queue.h>
+#include <sys/uio.h>
+#include <sys/mutex.h>
+
+#include <sys/disk.h>
+#include <sys/kthread.h>
+#include "nandfs_fs.h"
+
+MALLOC_DECLARE(M_NANDFSTEMP);
+
+/* Debug categories */
+#define NANDFS_DEBUG_VOLUMES 0x000001
+#define NANDFS_DEBUG_BLOCK 0x000004
+#define NANDFS_DEBUG_LOCKING 0x000008
+#define NANDFS_DEBUG_NODE 0x000010
+#define NANDFS_DEBUG_LOOKUP 0x000020
+#define NANDFS_DEBUG_READDIR 0x000040
+#define NANDFS_DEBUG_TRANSLATE 0x000080
+#define NANDFS_DEBUG_STRATEGY 0x000100
+#define NANDFS_DEBUG_READ 0x000200
+#define NANDFS_DEBUG_WRITE 0x000400
+#define NANDFS_DEBUG_IFILE 0x000800
+#define NANDFS_DEBUG_ATTR 0x001000
+#define NANDFS_DEBUG_EXTATTR 0x002000
+#define NANDFS_DEBUG_ALLOC 0x004000
+#define NANDFS_DEBUG_CPFILE 0x008000
+#define NANDFS_DEBUG_DIRHASH 0x010000
+#define NANDFS_DEBUG_NOTIMPL 0x020000
+#define NANDFS_DEBUG_SHEDULE 0x040000
+#define NANDFS_DEBUG_SEG 0x080000
+#define NANDFS_DEBUG_SYNC 0x100000
+#define NANDFS_DEBUG_PARANOIA 0x200000
+#define NANDFS_DEBUG_VNCALL 0x400000
+#define NANDFS_DEBUG_BUF 0x1000000
+#define NANDFS_DEBUG_BMAP 0x2000000
+#define NANDFS_DEBUG_DAT 0x4000000
+#define NANDFS_DEBUG_GENERIC 0x8000000
+#define NANDFS_DEBUG_CLEAN 0x10000000
+
+extern int nandfs_verbose;
+
+#define DPRINTF(name, arg) { \
+ if (nandfs_verbose & NANDFS_DEBUG_##name) {\
+ printf arg;\
+ };\
+ }
+#define DPRINTFIF(name, cond, arg) { \
+ if (nandfs_verbose & NANDFS_DEBUG_##name) { \
+ if (cond) printf arg;\
+ };\
+ }
+
+#define VFSTONANDFS(mp) ((struct nandfsmount *)((mp)->mnt_data))
+#define VTON(vp) ((struct nandfs_node *)(vp)->v_data)
+#define NTOV(xp) ((xp)->nn_vnode)
+
+int nandfs_init(struct vfsconf *);
+int nandfs_uninit(struct vfsconf *);
+
+extern struct vop_vector nandfs_vnodeops;
+extern struct vop_vector nandfs_system_vnodeops;
+
+struct nandfs_node;
+
+/* Structure and derivatives */
+struct nandfs_mdt {
+ uint32_t entries_per_block;
+ uint32_t entries_per_group;
+ uint32_t blocks_per_group;
+ uint32_t groups_per_desc_block; /* desc is super group */
+ uint32_t blocks_per_desc_block; /* desc is super group */
+};
+
+struct nandfs_segment {
+ LIST_ENTRY(nandfs_segment) seg_link;
+
+ struct nandfs_device *fsdev;
+
+ TAILQ_HEAD(, buf) segsum;
+ TAILQ_HEAD(, buf) data;
+
+ uint64_t seg_num;
+ uint64_t seg_next;
+ uint64_t start_block;
+ uint32_t num_blocks;
+
+ uint32_t nblocks;
+ uint32_t nbinfos;
+ uint32_t segsum_blocks;
+ uint32_t segsum_bytes;
+ uint32_t bytes_left;
+ char *current_off;
+};
+
+struct nandfs_seginfo {
+ LIST_HEAD( ,nandfs_segment) seg_list;
+ struct nandfs_segment *curseg;
+ struct nandfs_device *fsdev;
+ uint32_t blocks;
+ uint8_t reiterate;
+};
+
+#define NANDFS_FSSTOR_FAILED 1
+struct nandfs_fsarea {
+ int offset;
+ int flags;
+ int last_used;
+};
+
+extern int nandfs_cleaner_enable;
+extern int nandfs_cleaner_interval;
+extern int nandfs_cleaner_segments;
+
+struct nandfs_device {
+ struct vnode *nd_devvp;
+ struct g_consumer *nd_gconsumer;
+
+ struct thread *nd_syncer;
+ struct thread *nd_cleaner;
+ int nd_syncer_exit;
+ int nd_cleaner_exit;
+
+ int nd_is_nand;
+
+ struct nandfs_fsarea nd_fsarea[NANDFS_NFSAREAS];
+ int nd_last_fsarea;
+
+ STAILQ_HEAD(nandfs_mnts, nandfsmount) nd_mounts;
+ SLIST_ENTRY(nandfs_device) nd_next_device;
+
+ /* FS structures */
+ struct nandfs_fsdata nd_fsdata;
+ struct nandfs_super_block nd_super;
+ struct nandfs_segment_summary nd_last_segsum;
+ struct nandfs_super_root nd_super_root;
+ struct nandfs_node *nd_dat_node;
+ struct nandfs_node *nd_cp_node;
+ struct nandfs_node *nd_su_node;
+ struct nandfs_node *nd_gc_node;
+
+ struct nandfs_mdt nd_dat_mdt;
+ struct nandfs_mdt nd_ifile_mdt;
+
+ struct timespec nd_ts;
+
+ /* Synchronization */
+ struct mtx nd_mutex;
+ struct mtx nd_sync_mtx;
+ struct cv nd_sync_cv;
+ struct mtx nd_clean_mtx;
+ struct cv nd_clean_cv;
+ struct lock nd_seg_const;
+
+ struct nandfs_seginfo *nd_seginfo;
+
+ /* FS geometry */
+ uint64_t nd_devsize;
+ uint64_t nd_maxfilesize;
+ uint32_t nd_blocksize;
+ uint32_t nd_erasesize;
+
+ uint32_t nd_devblocksize;
+
+ /* Segment usage */
+ uint64_t nd_clean_segs;
+ uint64_t *nd_free_base;
+ uint64_t nd_free_count;
+ uint64_t nd_dirty_bufs;
+
+ /* Running values */
+ uint64_t nd_seg_sequence;
+ uint64_t nd_seg_num;
+ uint64_t nd_next_seg_num;
+ uint64_t nd_last_pseg;
+ uint64_t nd_last_cno;
+ uint64_t nd_last_ino;
+ uint64_t nd_fakevblk;
+
+ int nd_mount_state;
+ int nd_refcnt;
+ int nd_syncing;
+ int nd_cleaning;
+};
+
+extern SLIST_HEAD(_nandfs_devices, nandfs_device) nandfs_devices;
+
+#define NANDFS_FORCE_SYNCER 0x1
+#define NANDFS_UMOUNT 0x2
+
+#define SYNCER_UMOUNT 0x0
+#define SYNCER_VFS_SYNC 0x1
+#define SYNCER_BDFLUSH 0x2
+#define SYNCER_FFORCE 0x3
+#define SYNCER_FSYNC 0x4
+#define SYNCER_ROUPD 0x5
+
+static __inline int
+nandfs_writelockflags(struct nandfs_device *fsdev, int flags)
+{
+ int error = 0;
+
+ if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE)
+ error = lockmgr(&fsdev->nd_seg_const, flags | LK_SHARED, NULL);
+
+ return (error);
+}
+
+static __inline void
+nandfs_writeunlock(struct nandfs_device *fsdev)
+{
+
+ if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE)
+ lockmgr(&(fsdev)->nd_seg_const, LK_RELEASE, NULL);
+}
+
+#define NANDFS_WRITELOCKFLAGS(fsdev, flags) nandfs_writelockflags(fsdev, flags)
+
+#define NANDFS_WRITELOCK(fsdev) NANDFS_WRITELOCKFLAGS(fsdev, 0)
+
+#define NANDFS_WRITEUNLOCK(fsdev) nandfs_writeunlock(fsdev)
+
+#define NANDFS_WRITEASSERT(fsdev) lockmgr_assert(&(fsdev)->nd_seg_const, KA_LOCKED)
+
+/* Specific mountpoint; head or a checkpoint/snapshot */
+struct nandfsmount {
+ STAILQ_ENTRY(nandfsmount) nm_next_mount;
+
+ struct mount *nm_vfs_mountp;
+ struct nandfs_device *nm_nandfsdev;
+ struct nandfs_args nm_mount_args;
+ struct nandfs_node *nm_ifile_node;
+
+ uint8_t nm_flags;
+ int8_t nm_ronly;
+};
+
+struct nandfs_node {
+ struct vnode *nn_vnode;
+ struct nandfsmount *nn_nmp;
+ struct nandfs_device *nn_nandfsdev;
+ struct lockf *nn_lockf;
+
+ uint64_t nn_ino;
+ struct nandfs_inode nn_inode;
+
+ uint64_t nn_diroff;
+ uint32_t nn_flags;
+};
+
+#define IN_ACCESS 0x0001 /* Inode access time update request */
+#define IN_CHANGE 0x0002 /* Inode change time update request */
+#define IN_UPDATE 0x0004 /* Inode was written to; update mtime*/
+#define IN_MODIFIED 0x0008 /* node has been modified */
+#define IN_RENAME 0x0010 /* node is being renamed. */
+
+/* File permissions. */
+#define IEXEC 0000100 /* Executable. */
+#define IWRITE 0000200 /* Writeable. */
+#define IREAD 0000400 /* Readable. */
+#define ISVTX 0001000 /* Sticky bit. */
+#define ISGID 0002000 /* Set-gid. */
+#define ISUID 0004000 /* Set-uid. */
+
+#define PRINT_NODE_FLAGS \
+ "\10\1IN_ACCESS\2IN_CHANGE\3IN_UPDATE\4IN_MODIFIED\5IN_RENAME"
+
+#define NANDFS_GATHER(x) ((x)->b_flags |= B_00800000)
+#define NANDFS_UNGATHER(x) ((x)->b_flags &= ~B_00800000)
+#define NANDFS_ISGATHERED(x) ((x)->b_flags & B_00800000)
+
+#endif /* !_FS_NANDFS_NANDFS_H_ */
diff --git a/sys/fs/nandfs/nandfs_alloc.c b/sys/fs/nandfs/nandfs_alloc.c
new file mode 100644
index 0000000..3417266
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_alloc.c
@@ -0,0 +1,364 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+static void
+nandfs_get_desc_block_nr(struct nandfs_mdt *mdt, uint64_t desc,
+ uint64_t *desc_block)
+{
+
+ *desc_block = desc * mdt->blocks_per_desc_block;
+}
+
+static void
+nandfs_get_group_block_nr(struct nandfs_mdt *mdt, uint64_t group,
+ uint64_t *group_block)
+{
+ uint64_t desc, group_off;
+
+ desc = group / mdt->groups_per_desc_block;
+ group_off = group % mdt->groups_per_desc_block;
+ *group_block = desc * mdt->blocks_per_desc_block +
+ 1 + group_off * mdt->blocks_per_group;
+}
+
+static void
+init_desc_block(struct nandfs_mdt *mdt, uint8_t *block_data)
+{
+ struct nandfs_block_group_desc *desc;
+ uint32_t i;
+
+ desc = (struct nandfs_block_group_desc *) block_data;
+ for (i = 0; i < mdt->groups_per_desc_block; i++)
+ desc[i].bg_nfrees = mdt->entries_per_group;
+}
+
+int
+nandfs_find_free_entry(struct nandfs_mdt *mdt, struct nandfs_node *node,
+ struct nandfs_alloc_request *req)
+{
+ nandfs_daddr_t desc, group, maxgroup, maxdesc, pos = 0;
+ nandfs_daddr_t start_group, start_desc;
+ nandfs_daddr_t desc_block, group_block;
+ nandfs_daddr_t file_blocks;
+ struct nandfs_block_group_desc *descriptors;
+ struct buf *bp, *bp2;
+ uint32_t *mask, i, mcount, msize;
+ int error;
+
+ file_blocks = node->nn_inode.i_blocks;
+ maxgroup = 0x100000000ull / mdt->entries_per_group;
+ maxdesc = maxgroup / mdt->groups_per_desc_block;
+ start_group = req->entrynum / mdt->entries_per_group;
+ start_desc = start_group / mdt->groups_per_desc_block;
+
+ bp = bp2 = NULL;
+restart:
+ for (desc = start_desc; desc < maxdesc; desc++) {
+ nandfs_get_desc_block_nr(mdt, desc, &desc_block);
+
+ if (bp)
+ brelse(bp);
+ if (desc_block < file_blocks) {
+ error = nandfs_bread(node, desc_block, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ } else {
+ error = nandfs_bcreate(node, desc_block, NOCRED, 0,
+ &bp);
+ if (error)
+ return (error);
+ file_blocks++;
+ init_desc_block(mdt, bp->b_data);
+ }
+
+ descriptors = (struct nandfs_block_group_desc *) bp->b_data;
+ for (group = start_group; group < mdt->groups_per_desc_block;
+ group++) {
+ if (descriptors[group].bg_nfrees > 0) {
+ nandfs_get_group_block_nr(mdt, group,
+ &group_block);
+
+ if (bp2)
+ brelse(bp2);
+ if (group_block < file_blocks) {
+ error = nandfs_bread(node, group_block,
+ NOCRED, 0, &bp2);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ } else {
+ error = nandfs_bcreate(node,
+ group_block, NOCRED, 0, &bp2);
+ if (error)
+ return (error);
+ file_blocks++;
+ }
+ mask = (uint32_t *)bp2->b_data;
+ msize = (sizeof(uint32_t) * __CHAR_BIT);
+ mcount = mdt->entries_per_group / msize;
+ for (i = 0; i < mcount; i++) {
+ if (mask[i] == UINT32_MAX)
+ continue;
+
+ pos = ffs(~mask[i]) - 1;
+ pos += (msize * i);
+ pos += (group * mdt->entries_per_group);
+ pos += desc * group *
+ mdt->groups_per_desc_block *
+ mdt->entries_per_group;
+ goto found;
+ }
+ }
+ }
+ start_group = 0;
+ }
+
+ if (start_desc != 0) {
+ maxdesc = start_desc;
+ start_desc = 0;
+ req->entrynum = 0;
+ goto restart;
+ }
+
+ return (ENOENT);
+
+found:
+ req->entrynum = pos;
+ req->bp_desc = bp;
+ req->bp_bitmap = bp2;
+ DPRINTF(ALLOC, ("%s: desc: %p bitmap: %p entry: %#jx\n",
+ __func__, req->bp_desc, req->bp_bitmap, (uintmax_t)pos));
+
+ return (0);
+}
+
+int
+nandfs_find_entry(struct nandfs_mdt* mdt, struct nandfs_node *nnode,
+ struct nandfs_alloc_request *req)
+{
+ uint64_t dblock, bblock, eblock;
+ uint32_t offset;
+ int error;
+
+ nandfs_mdt_trans_blk(mdt, req->entrynum, &dblock, &bblock, &eblock,
+ &offset);
+
+ error = nandfs_bread(nnode, dblock, NOCRED, 0, &req->bp_desc);
+ if (error) {
+ brelse(req->bp_desc);
+ return (error);
+ }
+
+ error = nandfs_bread(nnode, bblock, NOCRED, 0, &req->bp_bitmap);
+ if (error) {
+ brelse(req->bp_desc);
+ brelse(req->bp_bitmap);
+ return (error);
+ }
+
+ error = nandfs_bread(nnode, eblock, NOCRED, 0, &req->bp_entry);
+ if (error) {
+ brelse(req->bp_desc);
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+ return (error);
+ }
+
+ DPRINTF(ALLOC,
+ ("%s: desc_buf: %p bitmap_buf %p entry_buf %p offset %x\n",
+ __func__, req->bp_desc, req->bp_bitmap, req->bp_entry, offset));
+
+ return (0);
+}
+
+static __inline void
+nandfs_calc_idx_entry(struct nandfs_mdt* mdt, uint32_t entrynum,
+ uint64_t *group, uint64_t *bitmap_idx, uint64_t *bitmap_off)
+{
+
+ /* Find group_desc index */
+ entrynum = entrynum %
+ (mdt->entries_per_group * mdt->groups_per_desc_block);
+ *group = entrynum / mdt->entries_per_group;
+ /* Find bitmap index and bit offset */
+ entrynum = entrynum % mdt->entries_per_group;
+ *bitmap_idx = entrynum / (sizeof(uint32_t) * __CHAR_BIT);
+ *bitmap_off = entrynum % (sizeof(uint32_t) * __CHAR_BIT);
+}
+
+int
+nandfs_free_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req)
+{
+ struct nandfs_block_group_desc *descriptors;
+ uint64_t bitmap_idx, bitmap_off;
+ uint64_t group;
+ uint32_t *mask, maskrw;
+
+ nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx,
+ &bitmap_off);
+
+ DPRINTF(ALLOC, ("nandfs_free_entry: req->entrynum=%jx bitmap_idx=%jx"
+ " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum,
+ (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group));
+
+ /* Update counter of free entries for group */
+ descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data;
+ descriptors[group].bg_nfrees++;
+
+ /* Set bit to indicate that entry is taken */
+ mask = (uint32_t *)req->bp_bitmap->b_data;
+ maskrw = mask[bitmap_idx];
+ KASSERT(maskrw & (1 << bitmap_off), ("freeing unallocated vblock"));
+ maskrw &= ~(1 << bitmap_off);
+ mask[bitmap_idx] = maskrw;
+
+ /* Make descriptor, bitmap and entry buffer dirty */
+ if (nandfs_dirty_buf(req->bp_desc, 0) == 0) {
+ nandfs_dirty_buf(req->bp_bitmap, 1);
+ nandfs_dirty_buf(req->bp_entry, 1);
+ } else {
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+nandfs_alloc_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req)
+{
+ struct nandfs_block_group_desc *descriptors;
+ uint64_t bitmap_idx, bitmap_off;
+ uint64_t group;
+ uint32_t *mask, maskrw;
+
+ nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx,
+ &bitmap_off);
+
+ DPRINTF(ALLOC, ("nandfs_alloc_entry: req->entrynum=%jx bitmap_idx=%jx"
+ " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum,
+ (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group));
+
+ /* Update counter of free entries for group */
+ descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data;
+ descriptors[group].bg_nfrees--;
+
+ /* Clear bit to indicate that entry is free */
+ mask = (uint32_t *)req->bp_bitmap->b_data;
+ maskrw = mask[bitmap_idx];
+ maskrw |= 1 << bitmap_off;
+ mask[bitmap_idx] = maskrw;
+
+ /* Make descriptor, bitmap and entry buffer dirty */
+ if (nandfs_dirty_buf(req->bp_desc, 0) == 0) {
+ nandfs_dirty_buf(req->bp_bitmap, 1);
+ nandfs_dirty_buf(req->bp_entry, 1);
+ } else {
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+ return (-1);
+ }
+
+ return (0);
+}
+
+void
+nandfs_abort_entry(struct nandfs_alloc_request *req)
+{
+
+ brelse(req->bp_desc);
+ brelse(req->bp_bitmap);
+ brelse(req->bp_entry);
+}
+
+int
+nandfs_get_entry_block(struct nandfs_mdt *mdt, struct nandfs_node *node,
+ struct nandfs_alloc_request *req, uint32_t *entry, int create)
+{
+ struct buf *bp;
+ nandfs_lbn_t blocknr;
+ int error;
+
+ /* Find buffer number for given entry */
+ nandfs_mdt_trans(mdt, req->entrynum, &blocknr, entry);
+ DPRINTF(ALLOC, ("%s: ino %#jx entrynum:%#jx block:%#jx entry:%x\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)req->entrynum,
+ (uintmax_t)blocknr, *entry));
+
+ /* Read entry block or create if 'create' parameter is not zero */
+ bp = NULL;
+
+ if (blocknr < node->nn_inode.i_blocks)
+ error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
+ else if (create)
+ error = nandfs_bcreate(node, blocknr, NOCRED, 0, &bp);
+ else
+ error = E2BIG;
+
+ if (error) {
+ DPRINTF(ALLOC, ("%s: ino %#jx block %#jx entry %x error %d\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)blocknr,
+ *entry, error));
+ if (bp)
+ brelse(bp);
+ return (error);
+ }
+
+ MPASS(nandfs_vblk_get(bp) != 0 || node->nn_ino == NANDFS_DAT_INO);
+
+ req->bp_entry = bp;
+ return (0);
+}
diff --git a/sys/fs/nandfs/nandfs_bmap.c b/sys/fs/nandfs/nandfs_bmap.c
new file mode 100644
index 0000000..9f800b8
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_bmap.c
@@ -0,0 +1,230 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/ktr.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include <machine/_inttypes.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+#include "bmap.h"
+
+nandfs_lbn_t
+nandfs_get_maxfilesize(struct nandfs_device *fsdev)
+{
+
+ return (get_maxfilesize(fsdev));
+}
+
+int
+nandfs_bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk,
+ nandfs_daddr_t *vblk)
+{
+ int error = 0;
+
+ if (node->nn_ino == NANDFS_GC_INO && lblk >= 0)
+ *vblk = lblk;
+ else
+ error = bmap_lookup(node, lblk, vblk);
+
+ DPRINTF(TRANSLATE, ("%s: error %d ino %#jx lblocknr %#jx -> %#jx\n",
+ __func__, error, (uintmax_t)node->nn_ino, (uintmax_t)lblk,
+ (uintmax_t)*vblk));
+
+ if (error)
+ nandfs_error("%s: returned %d", __func__, error);
+
+ return (error);
+}
+
+int
+nandfs_bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk,
+ struct buf *bp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t vblk;
+ int error;
+
+ fsdev = node->nn_nandfsdev;
+
+ vblk = 0;
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ error = nandfs_vblock_alloc(fsdev, &vblk);
+ if (error)
+ return (error);
+ }
+
+ nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED);
+ nandfs_vblk_set(bp, vblk);
+
+ error = bmap_insert_block(node, lblk, vblk);
+ if (error) {
+ nandfs_vblock_free(fsdev, vblk);
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+nandfs_bmap_dirty_blocks(struct nandfs_node *node, struct buf *bp, int force)
+{
+ int error;
+
+ error = bmap_dirty_meta(node, bp->b_lblkno, force);
+ if (error)
+ nandfs_error("%s: cannot dirty buffer %p\n",
+ __func__, bp);
+
+ return (error);
+}
+
+static int
+nandfs_bmap_update_mapping(struct nandfs_node *node, nandfs_lbn_t lblk,
+ nandfs_daddr_t blknr)
+{
+ int error;
+
+ DPRINTF(BMAP,
+ ("%s: node: %p ino: %#jx lblk: %#jx vblk: %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino, (uintmax_t)lblk,
+ (uintmax_t)blknr));
+
+ error = bmap_insert_block(node, lblk, blknr);
+
+ return (error);
+}
+
+int
+nandfs_bmap_update_block(struct nandfs_node *node, struct buf *bp,
+ nandfs_lbn_t blknr)
+{
+ nandfs_lbn_t lblk;
+ int error;
+
+ lblk = bp->b_lblkno;
+ nandfs_vblk_set(bp, blknr);
+
+ DPRINTF(BMAP, ("%s: node: %p ino: %#jx bp: %p lblk: %#jx blk: %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino, bp,
+ (uintmax_t)lblk, (uintmax_t)blknr));
+
+ error = nandfs_bmap_update_mapping(node, lblk, blknr);
+ if (error) {
+ nandfs_error("%s: cannot update lblk:%jx to blk:%jx for "
+ "node:%p, error:%d\n", __func__, (uintmax_t)lblk,
+ (uintmax_t)blknr, node, error);
+ return (error);
+ }
+
+ return (error);
+}
+
+int
+nandfs_bmap_update_dat(struct nandfs_node *node, nandfs_daddr_t oldblk,
+ struct buf *bp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t vblk = 0;
+ int error;
+
+ if (node->nn_ino == NANDFS_DAT_INO)
+ return (0);
+
+ if (nandfs_buf_check(bp, NANDFS_VBLK_ASSIGNED)) {
+ nandfs_buf_clear(bp, NANDFS_VBLK_ASSIGNED);
+ return (0);
+ }
+
+ fsdev = node->nn_nandfsdev;
+
+ /* First alloc new virtual block.... */
+ error = nandfs_vblock_alloc(fsdev, &vblk);
+ if (error)
+ return (error);
+
+ error = nandfs_bmap_update_block(node, bp, vblk);
+ if (error)
+ return (error);
+
+ /* Then we can end up with old one */
+ nandfs_vblock_end(fsdev, oldblk);
+
+ DPRINTF(BMAP,
+ ("%s: ino %#jx block %#jx: update vblk %#jx to %#jx\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno,
+ (uintmax_t)oldblk, (uintmax_t)vblk));
+ return (error);
+}
+
+int
+nandfs_bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t oblk,
+ nandfs_lbn_t nblk)
+{
+ nandfs_lbn_t todo;
+ int error;
+
+ todo = oblk - nblk;
+
+ DPRINTF(BMAP, ("%s: node %p oblk %jx nblk %jx truncate by %jx\n",
+ __func__, node, oblk, nblk, todo));
+
+ error = bmap_truncate_mapping(node, oblk, todo);
+ if (error)
+ return (error);
+
+ return (error);
+}
diff --git a/sys/fs/nandfs/nandfs_buffer.c b/sys/fs/nandfs/nandfs_buffer.c
new file mode 100644
index 0000000..b0d72668
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_buffer.c
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/buf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/bio.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+struct buf *
+nandfs_geteblk(int size, int flags)
+{
+ struct buf *bp;
+
+ /*
+ * XXX
+ * Right now we can call geteblk with GB_NOWAIT_BD flag, which means
+ * it can return NULL. But we cannot afford to get NULL, hence this panic.
+ */
+ bp = geteblk(size, flags);
+ if (bp == NULL)
+ panic("geteblk returned NULL");
+
+ return (bp);
+}
+
+void
+nandfs_dirty_bufs_increment(struct nandfs_device *fsdev)
+{
+
+ mtx_lock(&fsdev->nd_mutex);
+ KASSERT(fsdev->nd_dirty_bufs >= 0, ("negative nd_dirty_bufs"));
+ fsdev->nd_dirty_bufs++;
+ mtx_unlock(&fsdev->nd_mutex);
+}
+
+void
+nandfs_dirty_bufs_decrement(struct nandfs_device *fsdev)
+{
+
+ mtx_lock(&fsdev->nd_mutex);
+ KASSERT(fsdev->nd_dirty_bufs > 0,
+ ("decrementing not-positive nd_dirty_bufs"));
+ fsdev->nd_dirty_bufs--;
+ mtx_unlock(&fsdev->nd_mutex);
+}
diff --git a/sys/fs/nandfs/nandfs_cleaner.c b/sys/fs/nandfs/nandfs_cleaner.c
new file mode 100644
index 0000000..9257c10
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_cleaner.c
@@ -0,0 +1,621 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/buf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/bio.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+#define NANDFS_CLEANER_KILL 1
+
+static void nandfs_cleaner(struct nandfs_device *);
+static int nandfs_cleaner_clean_segments(struct nandfs_device *,
+ struct nandfs_vinfo *, uint32_t, struct nandfs_period *, uint32_t,
+ struct nandfs_bdesc *, uint32_t, uint64_t *, uint32_t);
+
+static int
+nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd,
+ uint64_t nmembs);
+
+static void
+nandfs_wakeup_wait_cleaner(struct nandfs_device *fsdev, int reason)
+{
+
+ mtx_lock(&fsdev->nd_clean_mtx);
+ if (reason == NANDFS_CLEANER_KILL)
+ fsdev->nd_cleaner_exit = 1;
+ if (fsdev->nd_cleaning == 0) {
+ fsdev->nd_cleaning = 1;
+ wakeup(&fsdev->nd_cleaning);
+ }
+ cv_wait(&fsdev->nd_clean_cv, &fsdev->nd_clean_mtx);
+ mtx_unlock(&fsdev->nd_clean_mtx);
+}
+
+int
+nandfs_start_cleaner(struct nandfs_device *fsdev)
+{
+ int error;
+
+ MPASS(fsdev->nd_cleaner == NULL);
+
+ fsdev->nd_cleaner_exit = 0;
+
+ error = kthread_add((void(*)(void *))nandfs_cleaner, fsdev, NULL,
+ &fsdev->nd_cleaner, 0, 0, "nandfs_cleaner");
+ if (error)
+ printf("nandfs: could not start cleaner: %d\n", error);
+
+ return (error);
+}
+
+int
+nandfs_stop_cleaner(struct nandfs_device *fsdev)
+{
+
+ MPASS(fsdev->nd_cleaner != NULL);
+ nandfs_wakeup_wait_cleaner(fsdev, NANDFS_CLEANER_KILL);
+ fsdev->nd_cleaner = NULL;
+
+ DPRINTF(CLEAN, ("cleaner stopped\n"));
+ return (0);
+}
+
+static int
+nandfs_cleaner_finished(struct nandfs_device *fsdev)
+{
+ int exit;
+
+ mtx_lock(&fsdev->nd_clean_mtx);
+ fsdev->nd_cleaning = 0;
+ if (!fsdev->nd_cleaner_exit) {
+ DPRINTF(CLEAN, ("%s: sleep\n", __func__));
+ msleep(&fsdev->nd_cleaning, &fsdev->nd_clean_mtx, PRIBIO, "-",
+ hz * nandfs_cleaner_interval);
+ }
+ exit = fsdev->nd_cleaner_exit;
+ cv_broadcast(&fsdev->nd_clean_cv);
+ mtx_unlock(&fsdev->nd_clean_mtx);
+ if (exit) {
+ DPRINTF(CLEAN, ("%s: no longer active\n", __func__));
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+print_suinfo(struct nandfs_suinfo *suinfo, int nsegs)
+{
+ int i;
+
+ for (i = 0; i < nsegs; i++) {
+ DPRINTF(CLEAN, ("%jx %jd %c%c%c %10u\n",
+ suinfo[i].nsi_num, suinfo[i].nsi_lastmod,
+ (suinfo[i].nsi_flags &
+ (NANDFS_SEGMENT_USAGE_ACTIVE) ? 'a' : '-'),
+ (suinfo[i].nsi_flags &
+ (NANDFS_SEGMENT_USAGE_DIRTY) ? 'd' : '-'),
+ (suinfo[i].nsi_flags &
+ (NANDFS_SEGMENT_USAGE_ERROR) ? 'e' : '-'),
+ suinfo[i].nsi_blocks));
+ }
+}
+
+static int
+nandfs_cleaner_vblock_is_alive(struct nandfs_device *fsdev,
+ struct nandfs_vinfo *vinfo, struct nandfs_cpinfo *cp, uint32_t ncps)
+{
+ int64_t idx, min, max;
+
+ if (vinfo->nvi_end >= fsdev->nd_last_cno)
+ return (1);
+
+ if (ncps == 0)
+ return (0);
+
+ if (vinfo->nvi_end < cp[0].nci_cno ||
+ vinfo->nvi_start > cp[ncps - 1].nci_cno)
+ return (0);
+
+ idx = min = 0;
+ max = ncps - 1;
+ while (min <= max) {
+ idx = (min + max) / 2;
+ if (vinfo->nvi_start == cp[idx].nci_cno)
+ return (1);
+ if (vinfo->nvi_start < cp[idx].nci_cno)
+ max = idx - 1;
+ else
+ min = idx + 1;
+ }
+
+ return (vinfo->nvi_end >= cp[idx].nci_cno);
+}
+
+static void
+nandfs_cleaner_vinfo_mark_alive(struct nandfs_device *fsdev,
+ struct nandfs_vinfo *vinfo, uint32_t nmembs, struct nandfs_cpinfo *cp,
+ uint32_t ncps)
+{
+ uint32_t i;
+
+ for (i = 0; i < nmembs; i++)
+ vinfo[i].nvi_alive =
+ nandfs_cleaner_vblock_is_alive(fsdev, &vinfo[i], cp, ncps);
+}
+
+static int
+nandfs_cleaner_bdesc_is_alive(struct nandfs_device *fsdev,
+ struct nandfs_bdesc *bdesc)
+{
+ int alive;
+
+ alive = bdesc->bd_oblocknr == bdesc->bd_blocknr;
+ if (!alive)
+ MPASS(abs(bdesc->bd_oblocknr - bdesc->bd_blocknr) > 2);
+
+ return (alive);
+}
+
+static void
+nandfs_cleaner_bdesc_mark_alive(struct nandfs_device *fsdev,
+ struct nandfs_bdesc *bdesc, uint32_t nmembs)
+{
+ uint32_t i;
+
+ for (i = 0; i < nmembs; i++)
+ bdesc[i].bd_alive = nandfs_cleaner_bdesc_is_alive(fsdev,
+ &bdesc[i]);
+}
+
+static void
+nandfs_cleaner_iterate_psegment(struct nandfs_device *fsdev,
+ struct nandfs_segment_summary *segsum, union nandfs_binfo *binfo,
+ nandfs_daddr_t blk, struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp)
+{
+ int i;
+
+ DPRINTF(CLEAN, ("%s nbinfos %x\n", __func__, segsum->ss_nbinfos));
+ for (i = 0; i < segsum->ss_nbinfos; i++) {
+ if (binfo[i].bi_v.bi_ino == NANDFS_DAT_INO) {
+ (*bdpp)->bd_oblocknr = blk + segsum->ss_nblocks -
+ segsum->ss_nbinfos + i;
+ /*
+ * XXX Hack
+ */
+ if (segsum->ss_flags & NANDFS_SS_SR)
+ (*bdpp)->bd_oblocknr--;
+ (*bdpp)->bd_level = binfo[i].bi_dat.bi_level;
+ (*bdpp)->bd_offset = binfo[i].bi_dat.bi_blkoff;
+ (*bdpp)++;
+ } else {
+ (*vipp)->nvi_ino = binfo[i].bi_v.bi_ino;
+ (*vipp)->nvi_vblocknr = binfo[i].bi_v.bi_vblocknr;
+ (*vipp)++;
+ }
+ }
+}
+
+static int
+nandfs_cleaner_iterate_segment(struct nandfs_device *fsdev, uint64_t segno,
+ struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp, int *select)
+{
+ struct nandfs_segment_summary *segsum;
+ union nandfs_binfo *binfo;
+ struct buf *bp;
+ uint32_t nblocks;
+ nandfs_daddr_t curr, start, end;
+ int error = 0;
+
+ nandfs_get_segment_range(fsdev, segno, &start, &end);
+
+ DPRINTF(CLEAN, ("%s: segno %jx start %jx end %jx\n", __func__, segno,
+ start, end));
+
+ *select = 0;
+
+ for (curr = start; curr < end; curr += nblocks) {
+ error = nandfs_dev_bread(fsdev, curr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ nandfs_error("%s: couldn't load segment summary of %jx: %d\n",
+ __func__, segno, error);
+ return (error);
+ }
+
+ segsum = (struct nandfs_segment_summary *)bp->b_data;
+ binfo = (union nandfs_binfo *)(bp->b_data + segsum->ss_bytes);
+
+ if (!nandfs_segsum_valid(segsum)) {
+ brelse(bp);
+ nandfs_error("nandfs: invalid summary of segment %jx\n", segno);
+ return (error);
+ }
+
+ DPRINTF(CLEAN, ("%s: %jx magic %x bytes %x nblocks %x nbinfos "
+ "%x\n", __func__, segno, segsum->ss_magic, segsum->ss_bytes,
+ segsum->ss_nblocks, segsum->ss_nbinfos));
+
+ nandfs_cleaner_iterate_psegment(fsdev, segsum, binfo, curr,
+ vipp, bdpp);
+ nblocks = segsum->ss_nblocks;
+ brelse(bp);
+ }
+
+ if (error == 0)
+ *select = 1;
+
+ return (error);
+}
+
+static int
+nandfs_cleaner_choose_segment(struct nandfs_device *fsdev, uint64_t **segpp,
+ uint64_t nsegs, uint64_t *rseg)
+{
+ struct nandfs_suinfo *suinfo;
+ uint64_t i, ssegs;
+ int error;
+
+ suinfo = malloc(sizeof(*suinfo) * nsegs, M_NANDFSTEMP,
+ M_ZERO | M_WAITOK);
+
+ if (*rseg >= fsdev->nd_fsdata.f_nsegments)
+ *rseg = 0;
+
+retry:
+ error = nandfs_get_segment_info_filter(fsdev, suinfo, nsegs, *rseg,
+ &ssegs, NANDFS_SEGMENT_USAGE_DIRTY,
+ NANDFS_SEGMENT_USAGE_ACTIVE | NANDFS_SEGMENT_USAGE_ERROR |
+ NANDFS_SEGMENT_USAGE_GC);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+
+ if (ssegs == 0 && *rseg != 0) {
+ *rseg = 0;
+ goto retry;
+ }
+
+ print_suinfo(suinfo, ssegs);
+
+ for (i = 0; i < ssegs; i++) {
+ (**segpp) = suinfo[i].nsi_num;
+ (*segpp)++;
+ }
+
+ *rseg = suinfo[i - 1].nsi_num + 1;
+out:
+ free(suinfo, M_NANDFSTEMP);
+
+ return (error);
+}
+
+static int
+nandfs_cleaner_body(struct nandfs_device *fsdev, uint64_t *rseg)
+{
+ struct nandfs_vinfo *vinfo, *vip, *vipi;
+ struct nandfs_bdesc *bdesc, *bdp, *bdpi;
+ struct nandfs_cpstat cpstat;
+ struct nandfs_cpinfo *cpinfo = NULL;
+ uint64_t *segnums, *segp;
+ int select, selected;
+ int error = 0;
+ int nsegs;
+ int i;
+
+ nsegs = nandfs_cleaner_segments;
+
+ vip = vinfo = malloc(sizeof(*vinfo) *
+ fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP,
+ M_ZERO | M_WAITOK);
+ bdp = bdesc = malloc(sizeof(*bdesc) *
+ fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP,
+ M_ZERO | M_WAITOK);
+ segp = segnums = malloc(sizeof(*segnums) * nsegs, M_NANDFSTEMP,
+ M_WAITOK);
+
+ error = nandfs_cleaner_choose_segment(fsdev, &segp, nsegs, rseg);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+
+ if (segnums == segp)
+ goto out;
+
+ selected = 0;
+ for (i = 0; i < segp - segnums; i++) {
+ error = nandfs_cleaner_iterate_segment(fsdev, segnums[i], &vip,
+ &bdp, &select);
+ if (error) {
+ /*
+ * XXX deselect (see below)?
+ */
+ goto out;
+ }
+ if (!select)
+ segnums[i] = NANDFS_NOSEGMENT;
+ else {
+ error = nandfs_markgc_segment(fsdev, segnums[i]);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out;
+ }
+ selected++;
+ }
+ }
+
+ if (selected == 0) {
+ MPASS(vinfo == vip);
+ MPASS(bdesc == bdp);
+ goto out;
+ }
+
+ error = nandfs_get_cpstat(fsdev->nd_cp_node, &cpstat);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out;
+ }
+
+ if (cpstat.ncp_nss != 0) {
+ cpinfo = malloc(sizeof(struct nandfs_cpinfo) * cpstat.ncp_nss,
+ M_NANDFSTEMP, M_WAITOK);
+ error = nandfs_get_cpinfo(fsdev->nd_cp_node, 1, NANDFS_SNAPSHOT,
+ cpinfo, cpstat.ncp_nss, NULL);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out_locked;
+ }
+ }
+
+ NANDFS_WRITELOCK(fsdev);
+ DPRINTF(CLEAN, ("%s: got lock\n", __func__));
+
+ error = nandfs_get_dat_vinfo(fsdev, vinfo, vip - vinfo);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out_locked;
+ }
+
+ nandfs_cleaner_vinfo_mark_alive(fsdev, vinfo, vip - vinfo, cpinfo,
+ cpstat.ncp_nss);
+
+ error = nandfs_get_dat_bdescs(fsdev, bdesc, bdp - bdesc);
+ if (error) {
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+ goto out_locked;
+ }
+
+ nandfs_cleaner_bdesc_mark_alive(fsdev, bdesc, bdp - bdesc);
+
+ DPRINTF(CLEAN, ("got:\n"));
+ for (vipi = vinfo; vipi < vip; vipi++) {
+ DPRINTF(CLEAN, ("v ino %jx vblocknr %jx start %jx end %jx "
+ "alive %d\n", vipi->nvi_ino, vipi->nvi_vblocknr,
+ vipi->nvi_start, vipi->nvi_end, vipi->nvi_alive));
+ }
+ for (bdpi = bdesc; bdpi < bdp; bdpi++) {
+ DPRINTF(CLEAN, ("b oblocknr %jx blocknr %jx offset %jx "
+ "alive %d\n", bdpi->bd_oblocknr, bdpi->bd_blocknr,
+ bdpi->bd_offset, bdpi->bd_alive));
+ }
+ DPRINTF(CLEAN, ("end list\n"));
+
+ error = nandfs_cleaner_clean_segments(fsdev, vinfo, vip - vinfo, NULL,
+ 0, bdesc, bdp - bdesc, segnums, segp - segnums);
+ if (error)
+ nandfs_error("%s:%d\n", __FILE__, __LINE__);
+
+out_locked:
+ NANDFS_WRITEUNLOCK(fsdev);
+out:
+ free(cpinfo, M_NANDFSTEMP);
+ free(segnums, M_NANDFSTEMP);
+ free(bdesc, M_NANDFSTEMP);
+ free(vinfo, M_NANDFSTEMP);
+
+ return (error);
+}
+
+static void
+nandfs_cleaner(struct nandfs_device *fsdev)
+{
+ uint64_t checked_seg = 0;
+ int error;
+
+ while (!nandfs_cleaner_finished(fsdev)) {
+ if (!nandfs_cleaner_enable || rebooting)
+ continue;
+
+ DPRINTF(CLEAN, ("%s: run started\n", __func__));
+
+ fsdev->nd_cleaning = 1;
+
+ error = nandfs_cleaner_body(fsdev, &checked_seg);
+
+ DPRINTF(CLEAN, ("%s: run finished error %d\n", __func__,
+ error));
+ }
+
+ DPRINTF(CLEAN, ("%s: exiting\n", __func__));
+ kthread_exit();
+}
+
+static int
+nandfs_cleaner_clean_segments(struct nandfs_device *nffsdev,
+ struct nandfs_vinfo *vinfo, uint32_t nvinfo,
+ struct nandfs_period *pd, uint32_t npd,
+ struct nandfs_bdesc *bdesc, uint32_t nbdesc,
+ uint64_t *segments, uint32_t nsegs)
+{
+ struct nandfs_node *gc;
+ struct buf *bp;
+ uint32_t i;
+ int error = 0;
+
+ gc = nffsdev->nd_gc_node;
+
+ DPRINTF(CLEAN, ("%s: enter\n", __func__));
+
+ VOP_LOCK(NTOV(gc), LK_EXCLUSIVE);
+ for (i = 0; i < nvinfo; i++) {
+ if (!vinfo[i].nvi_alive)
+ continue;
+ DPRINTF(CLEAN, ("%s: read vblknr:%#jx blk:%#jx\n",
+ __func__, (uintmax_t)vinfo[i].nvi_vblocknr,
+ (uintmax_t)vinfo[i].nvi_blocknr));
+ error = nandfs_bread(nffsdev->nd_gc_node, vinfo[i].nvi_blocknr,
+ NULL, 0, &bp);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ VOP_UNLOCK(NTOV(gc), 0);
+ goto out;
+ }
+ nandfs_vblk_set(bp, vinfo[i].nvi_vblocknr);
+ nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED);
+ nandfs_dirty_buf(bp, 1);
+ }
+ VOP_UNLOCK(NTOV(gc), 0);
+
+ /* Delete checkpoints */
+ for (i = 0; i < npd; i++) {
+ DPRINTF(CLEAN, ("delete checkpoint: %jx\n",
+ (uintmax_t)pd[i].p_start));
+ error = nandfs_delete_cp(nffsdev->nd_cp_node, pd[i].p_start,
+ pd[i].p_end);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+ }
+
+ /* Update vblocks */
+ for (i = 0; i < nvinfo; i++) {
+ if (vinfo[i].nvi_alive)
+ continue;
+ DPRINTF(CLEAN, ("freeing vblknr: %jx\n", vinfo[i].nvi_vblocknr));
+ error = nandfs_vblock_free(nffsdev, vinfo[i].nvi_vblocknr);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+ }
+
+ error = nandfs_process_bdesc(nffsdev, bdesc, nbdesc);
+ if (error) {
+ nandfs_error("%s:%d", __FILE__, __LINE__);
+ goto out;
+ }
+
+ /* Add segments to clean */
+ if (nffsdev->nd_free_count) {
+ nffsdev->nd_free_base = realloc(nffsdev->nd_free_base,
+ (nffsdev->nd_free_count + nsegs) * sizeof(uint64_t),
+ M_NANDFSTEMP, M_WAITOK | M_ZERO);
+ memcpy(&nffsdev->nd_free_base[nffsdev->nd_free_count], segments,
+ nsegs * sizeof(uint64_t));
+ nffsdev->nd_free_count += nsegs;
+ } else {
+ nffsdev->nd_free_base = malloc(nsegs * sizeof(uint64_t),
+ M_NANDFSTEMP, M_WAITOK|M_ZERO);
+ memcpy(nffsdev->nd_free_base, segments,
+ nsegs * sizeof(uint64_t));
+ nffsdev->nd_free_count = nsegs;
+ }
+
+out:
+
+ DPRINTF(CLEAN, ("%s: exit error %d\n", __func__, error));
+
+ return (error);
+}
+
+static int
+nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd,
+ uint64_t nmembs)
+{
+ struct nandfs_node *dat_node;
+ struct buf *bp;
+ uint64_t i;
+ int error;
+
+ dat_node = nffsdev->nd_dat_node;
+
+ VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE);
+
+ for (i = 0; i < nmembs; i++) {
+ if (!bd[i].bd_alive)
+ continue;
+ DPRINTF(CLEAN, ("%s: idx %jx offset %jx\n",
+ __func__, i, bd[i].bd_offset));
+ if (bd[i].bd_level) {
+ error = nandfs_bread_meta(dat_node, bd[i].bd_offset,
+ NULL, 0, &bp);
+ if (error) {
+ nandfs_error("%s: cannot read dat node "
+ "level:%d\n", __func__, bd[i].bd_level);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+ }
+ nandfs_dirty_buf_meta(bp, 1);
+ nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
+ } else {
+ error = nandfs_bread(dat_node, bd[i].bd_offset, NULL,
+ 0, &bp);
+ if (error) {
+ nandfs_error("%s: cannot read dat node\n",
+ __func__);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+ }
+ nandfs_dirty_buf(bp, 1);
+ }
+ DPRINTF(CLEAN, ("%s: bp: %p\n", __func__, bp));
+ }
+
+ VOP_UNLOCK(NTOV(dat_node), 0);
+
+ return (0);
+}
diff --git a/sys/fs/nandfs/nandfs_cpfile.c b/sys/fs/nandfs/nandfs_cpfile.c
new file mode 100644
index 0000000..8814fc0
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_cpfile.c
@@ -0,0 +1,776 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+
+
+static int
+nandfs_checkpoint_size(struct nandfs_device *fsdev)
+{
+
+ return (fsdev->nd_fsdata.f_checkpoint_size);
+}
+
+static int
+nandfs_checkpoint_blk_offset(struct nandfs_device *fsdev, uint64_t cn,
+ uint64_t *blk, uint64_t *offset)
+{
+ uint64_t off;
+ uint16_t cp_size, cp_per_blk;
+
+ KASSERT((cn), ("checkpoing cannot be zero"));
+
+ cp_size = fsdev->nd_fsdata.f_checkpoint_size;
+ cp_per_blk = fsdev->nd_blocksize / cp_size;
+ off = roundup(sizeof(struct nandfs_cpfile_header), cp_size) / cp_size;
+ off += (cn - 1);
+
+ *blk = off / cp_per_blk;
+ *offset = (off % cp_per_blk) * cp_size;
+
+ return (0);
+}
+
+static int
+nandfs_checkpoint_blk_remaining(struct nandfs_device *fsdev, uint64_t cn,
+ uint64_t blk, uint64_t offset)
+{
+ uint16_t cp_size, cp_remaining;
+
+ cp_size = fsdev->nd_fsdata.f_checkpoint_size;
+ cp_remaining = (fsdev->nd_blocksize - offset) / cp_size;
+
+ return (cp_remaining);
+}
+
+int
+nandfs_get_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node,
+ uint64_t cn)
+{
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) {
+ return (-1);
+ }
+
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (-1);
+ }
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (-1);
+
+
+ nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset);
+
+ if (blk != 0) {
+ if (blk < cp_node->nn_inode.i_blocks)
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ else
+ error = nandfs_bcreate(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ if (bp)
+ brelse(bp);
+ return (-1);
+ }
+
+ nandfs_dirty_buf(bp, 1);
+ }
+
+ DPRINTF(CPFILE, ("%s: cn:%#jx entry block:%#jx offset:%#jx\n",
+ __func__, (uintmax_t)cn, (uintmax_t)blk, (uintmax_t)offset));
+
+ return (0);
+}
+
+int
+nandfs_set_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node,
+ uint64_t cn, struct nandfs_inode *ifile_inode, uint64_t nblocks)
+{
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_checkpoint *cnp;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) {
+ nandfs_error("%s: trying to set invalid chekpoint %jx - %jx\n",
+ __func__, cn, fsdev->nd_last_cno);
+ return (-1);
+ }
+
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ cnh->ch_ncheckpoints++;
+
+ nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset);
+
+ if(blk != 0) {
+ brelse(bp);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+ }
+
+ cnp = (struct nandfs_checkpoint *)((uint8_t *)bp->b_data + offset);
+ cnp->cp_flags = 0;
+ cnp->cp_checkpoints_count = 1;
+ memset(&cnp->cp_snapshot_list, 0, sizeof(struct nandfs_snapshot_list));
+ cnp->cp_cno = cn;
+ cnp->cp_create = fsdev->nd_ts.tv_sec;
+ cnp->cp_nblk_inc = nblocks;
+ cnp->cp_blocks_count = 0;
+ memcpy (&cnp->cp_ifile_inode, ifile_inode, sizeof(cnp->cp_ifile_inode));
+
+ DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx nblk:%#jx\n",
+ __func__, (uintmax_t)cn, (uintmax_t)cnp->cp_create,
+ (uintmax_t)nblocks));
+
+ brelse(bp);
+ return (0);
+}
+
+static int
+nandfs_cp_mounted(struct nandfs_device *nandfsdev, uint64_t cno)
+{
+ struct nandfsmount *nmp;
+ int mounted = 0;
+
+ mtx_lock(&nandfsdev->nd_mutex);
+ /* No double-mounting of the same checkpoint */
+ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
+ if (nmp->nm_mount_args.cpno == cno) {
+ mounted = 1;
+ break;
+ }
+ }
+ mtx_unlock(&nandfsdev->nd_mutex);
+
+ return (mounted);
+}
+
+static int
+nandfs_cp_set_snapshot(struct nandfs_node *cp_node, uint64_t cno)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_snapshot_list *list;
+ struct buf *bp;
+ uint64_t blk, prev_blk, offset;
+ uint64_t curr, prev;
+ int error;
+
+ fsdev = cp_node->nn_nandfsdev;
+
+ /* Get snapshot data */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
+ brelse(bp);
+ return (ENOENT);
+ }
+ if ((cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) {
+ brelse(bp);
+ return (EINVAL);
+ }
+
+ brelse(bp);
+ /* Get list from header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ prev = list->ssl_prev;
+ brelse(bp);
+ prev_blk = ~(0);
+ curr = 0;
+ while (prev > cno) {
+ curr = prev;
+ nandfs_checkpoint_blk_offset(fsdev, prev, &prev_blk, &offset);
+ error = nandfs_bread(cp_node, prev_blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ prev = list->ssl_prev;
+ brelse(bp);
+ }
+
+ if (curr == 0) {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ } else {
+ nandfs_checkpoint_blk_offset(fsdev, curr, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ }
+
+ list->ssl_prev = cno;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+
+ /* Update snapshot for cno */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ list->ssl_prev = prev;
+ list->ssl_next = curr;
+ cnp->cp_flags |= NANDFS_CHECKPOINT_SNAPSHOT;
+ nandfs_dirty_buf(bp, 1);
+
+ if (prev == 0) {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ } else {
+ /* Update snapshot list for prev */
+ nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ }
+ list->ssl_next = cno;
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ cnh->ch_nsnapshots++;
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+static int
+nandfs_cp_clr_snapshot(struct nandfs_node *cp_node, uint64_t cno)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_snapshot_list *list;
+ struct buf *bp;
+ uint64_t blk, offset, snapshot_cnt;
+ uint64_t next, prev;
+ int error;
+
+ fsdev = cp_node->nn_nandfsdev;
+
+ /* Get snapshot data */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
+ brelse(bp);
+ return (ENOENT);
+ }
+ if (!(cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) {
+ brelse(bp);
+ return (EINVAL);
+ }
+
+ list = &cnp->cp_snapshot_list;
+ next = list->ssl_next;
+ prev = list->ssl_prev;
+ brelse(bp);
+
+ /* Get previous snapshot */
+ if (prev != 0) {
+ nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ } else {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ }
+
+ list->ssl_next = next;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ /* Get next snapshot */
+ if (next != 0) {
+ nandfs_checkpoint_blk_offset(fsdev, next, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ } else {
+ nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ list = &cnh->ch_snapshot_list;
+ }
+ list->ssl_prev = prev;
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update snapshot list for cno */
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ list = &cnp->cp_snapshot_list;
+ list->ssl_prev = 0;
+ list->ssl_next = 0;
+ cnp->cp_flags &= !NANDFS_CHECKPOINT_SNAPSHOT;
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ snapshot_cnt = cnh->ch_nsnapshots;
+ snapshot_cnt--;
+ cnh->ch_nsnapshots = snapshot_cnt;
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+int
+nandfs_chng_cpmode(struct nandfs_node *node, struct nandfs_cpmode *ncpm)
+{
+ struct nandfs_device *fsdev;
+ uint64_t cno = ncpm->ncpm_cno;
+ int mode = ncpm->ncpm_mode;
+ int ret;
+
+ fsdev = node->nn_nandfsdev;
+ VOP_LOCK(NTOV(node), LK_EXCLUSIVE);
+ switch (mode) {
+ case NANDFS_CHECKPOINT:
+ if (nandfs_cp_mounted(fsdev, cno)) {
+ ret = EBUSY;
+ } else
+ ret = nandfs_cp_clr_snapshot(node, cno);
+ break;
+ case NANDFS_SNAPSHOT:
+ ret = nandfs_cp_set_snapshot(node, cno);
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+ VOP_UNLOCK(NTOV(node), 0);
+
+ return (ret);
+}
+
+static void
+nandfs_cpinfo_fill(struct nandfs_checkpoint *cnp, struct nandfs_cpinfo *nci)
+{
+
+ nci->nci_flags = cnp->cp_flags;
+ nci->nci_pad = 0;
+ nci->nci_cno = cnp->cp_cno;
+ nci->nci_create = cnp->cp_create;
+ nci->nci_nblk_inc = cnp->cp_nblk_inc;
+ nci->nci_blocks_count = cnp->cp_blocks_count;
+ nci->nci_next = cnp->cp_snapshot_list.ssl_next;
+ DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx\n",
+ __func__, (uintmax_t)cnp->cp_cno,
+ (uintmax_t)cnp->cp_create));
+}
+
+static int
+nandfs_get_cpinfo_cp(struct nandfs_node *node, uint64_t cno,
+ struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs)
+{
+ struct nandfs_device *fsdev;
+ struct buf *bp;
+ uint64_t blk, offset, last_cno, i;
+ uint16_t remaining;
+ int error;
+#ifdef INVARIANTS
+ uint64_t testblk, testoffset;
+#endif
+
+ if (cno == 0) {
+ return (ENOENT);
+ }
+
+ if (mnmembs < 1) {
+ return (EINVAL);
+ }
+
+ fsdev = node->nn_nandfsdev;
+ last_cno = fsdev->nd_last_cno;
+ DPRINTF(CPFILE, ("%s: cno:%#jx mnmembs: %#jx last:%#jx\n", __func__,
+ (uintmax_t)cno, (uintmax_t)mnmembs,
+ (uintmax_t)fsdev->nd_last_cno));
+
+ /*
+ * do {
+ * get block
+ * read checkpoints until we hit last checkpoint, end of block or
+ * requested number
+ * } while (last read checkpoint <= last checkpoint on fs &&
+ * read checkpoints < request number);
+ */
+ *nmembs = i = 0;
+ do {
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ remaining = nandfs_checkpoint_blk_remaining(fsdev, cno,
+ blk, offset);
+ error = nandfs_bread(node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ while (cno <= last_cno && i < mnmembs && remaining) {
+#ifdef INVARIANTS
+ nandfs_checkpoint_blk_offset(fsdev, cno, &testblk,
+ &testoffset);
+ KASSERT(testblk == blk, ("testblk != blk"));
+ KASSERT(testoffset == offset, ("testoffset != offset"));
+#endif
+ DPRINTF(CPFILE, ("%s: cno %#jx\n", __func__,
+ (uintmax_t)cno));
+
+ nandfs_cpinfo_fill((struct nandfs_checkpoint *)
+ (bp->b_data + offset), nci);
+ offset += nandfs_checkpoint_size(fsdev);
+ i++;
+ nci++;
+ cno++;
+ (*nmembs)++;
+ remaining--;
+ }
+ brelse(bp);
+ } while (cno <= last_cno && i < mnmembs);
+
+ return (0);
+}
+
+static int
+nandfs_get_cpinfo_sp(struct nandfs_node *node, uint64_t cno,
+ struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs)
+{
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_cpfile_header *cnh;
+ struct nandfs_device *fsdev;
+ struct buf *bp = NULL;
+ uint64_t curr = 0;
+ uint64_t blk, offset, curr_cno;
+ uint32_t flag;
+ int i, error;
+
+ if (cno == 0 || cno == ~(0))
+ return (ENOENT);
+
+ fsdev = node->nn_nandfsdev;
+ curr_cno = cno;
+
+ if (nmembs)
+ *nmembs = 0;
+ if (curr_cno == 1) {
+ /* Get list from header */
+ error = nandfs_bread(node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ curr_cno = cnh->ch_snapshot_list.ssl_next;
+ brelse(bp);
+ bp = NULL;
+
+ /* No snapshots */
+ if (curr_cno == 0)
+ return (0);
+ }
+
+ for (i = 0; i < mnmembs; i++, nci++) {
+ nandfs_checkpoint_blk_offset(fsdev, curr_cno, &blk, &offset);
+ if (i == 0 || curr != blk) {
+ if (bp)
+ brelse(bp);
+ error = nandfs_bread(node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (ENOENT);
+ }
+ curr = blk;
+ }
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ flag = cnp->cp_flags;
+ if (!(flag & NANDFS_CHECKPOINT_SNAPSHOT) ||
+ (flag & NANDFS_CHECKPOINT_INVALID))
+ break;
+
+ nci->nci_flags = flag;
+ nci->nci_pad = 0;
+ nci->nci_cno = cnp->cp_cno;
+ nci->nci_create = cnp->cp_create;
+ nci->nci_nblk_inc = cnp->cp_nblk_inc;
+ nci->nci_blocks_count = cnp->cp_blocks_count;
+ nci->nci_next = cnp->cp_snapshot_list.ssl_next;
+ if (nmembs)
+ (*nmembs)++;
+
+ curr_cno = nci->nci_next;
+ if (!curr_cno)
+ break;
+ }
+
+ brelse(bp);
+
+ return (0);
+}
+
+int
+nandfs_get_cpinfo(struct nandfs_node *node, uint64_t cno, uint16_t flags,
+ struct nandfs_cpinfo *nci, uint32_t nmembs, uint32_t *nnmembs)
+{
+ int error;
+
+ VOP_LOCK(NTOV(node), LK_EXCLUSIVE);
+ switch (flags) {
+ case NANDFS_CHECKPOINT:
+ error = nandfs_get_cpinfo_cp(node, cno, nci, nmembs, nnmembs);
+ break;
+ case NANDFS_SNAPSHOT:
+ error = nandfs_get_cpinfo_sp(node, cno, nci, nmembs, nnmembs);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ VOP_UNLOCK(NTOV(node), 0);
+
+ return (error);
+}
+
+int
+nandfs_get_cpinfo_ioctl(struct nandfs_node *node, struct nandfs_argv *nargv)
+{
+ struct nandfs_cpinfo *nci;
+ uint64_t cno = nargv->nv_index;
+ void *buf = (void *)((uintptr_t)nargv->nv_base);
+ uint16_t flags = nargv->nv_flags;
+ uint32_t nmembs = 0;
+ int error;
+
+ if (nargv->nv_nmembs > NANDFS_CPINFO_MAX)
+ return (EINVAL);
+
+ nci = malloc(sizeof(struct nandfs_cpinfo) * nargv->nv_nmembs,
+ M_NANDFSTEMP, M_WAITOK | M_ZERO);
+
+ error = nandfs_get_cpinfo(node, cno, flags, nci, nargv->nv_nmembs, &nmembs);
+
+ if (error == 0) {
+ nargv->nv_nmembs = nmembs;
+ error = copyout(nci, buf,
+ sizeof(struct nandfs_cpinfo) * nmembs);
+ }
+
+ free(nci, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_delete_cp(struct nandfs_node *node, uint64_t start, uint64_t end)
+{
+ struct nandfs_checkpoint *cnp;
+ struct nandfs_device *fsdev;
+ struct buf *bp;
+ uint64_t cno = start, blk, offset;
+ int error;
+
+ DPRINTF(CPFILE, ("%s: delete cno %jx-%jx\n", __func__, start, end));
+ VOP_LOCK(NTOV(node), LK_EXCLUSIVE);
+ fsdev = node->nn_nandfsdev;
+ for (cno = start; cno <= end; cno++) {
+ if (!cno)
+ continue;
+
+ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset);
+ error = nandfs_bread(node, blk, NOCRED, 0, &bp);
+ if (error) {
+ VOP_UNLOCK(NTOV(node), 0);
+ brelse(bp);
+ return (error);
+ }
+
+ cnp = (struct nandfs_checkpoint *)(bp->b_data + offset);
+ if (cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(node), 0);
+ return (0);
+ }
+
+ cnp->cp_flags |= NANDFS_CHECKPOINT_INVALID;
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(node), 0);
+
+ return (0);
+}
+
+int
+nandfs_make_snap(struct nandfs_device *fsdev, uint64_t *cno)
+{
+ struct nandfs_cpmode cpm;
+ int error;
+
+ *cno = cpm.ncpm_cno = fsdev->nd_last_cno;
+ cpm.ncpm_mode = NANDFS_SNAPSHOT;
+ error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm);
+ return (error);
+}
+
+int
+nandfs_delete_snap(struct nandfs_device *fsdev, uint64_t cno)
+{
+ struct nandfs_cpmode cpm;
+ int error;
+
+ cpm.ncpm_cno = cno;
+ cpm.ncpm_mode = NANDFS_CHECKPOINT;
+ error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm);
+ return (error);
+}
+
+int nandfs_get_cpstat(struct nandfs_node *cp_node, struct nandfs_cpstat *ncp)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_cpfile_header *cnh;
+ struct buf *bp;
+ int error;
+
+ VOP_LOCK(NTOV(cp_node), LK_EXCLUSIVE);
+ fsdev = cp_node->nn_nandfsdev;
+
+ /* Get header */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (error);
+ }
+ cnh = (struct nandfs_cpfile_header *) bp->b_data;
+ ncp->ncp_cno = fsdev->nd_last_cno;
+ ncp->ncp_ncps = cnh->ch_ncheckpoints;
+ ncp->ncp_nss = cnh->ch_nsnapshots;
+ DPRINTF(CPFILE, ("%s: cno:%#jx ncps:%#jx nss:%#jx\n",
+ __func__, ncp->ncp_cno, ncp->ncp_ncps, ncp->ncp_nss));
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+
+ return (0);
+}
diff --git a/sys/fs/nandfs/nandfs_dat.c b/sys/fs/nandfs/nandfs_dat.c
new file mode 100644
index 0000000..799113d
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_dat.c
@@ -0,0 +1,344 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+int
+nandfs_vblock_alloc(struct nandfs_device *nandfsdev, nandfs_daddr_t *vblock)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint64_t start;
+ uint32_t entry;
+ int locked, error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+ start = nandfsdev->nd_last_cno + 1;
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat));
+ if (!locked)
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = 0;
+
+ /* Alloc vblock number */
+ error = nandfs_find_free_entry(mdt, dat, &req);
+ if (error) {
+ nandfs_error("%s: cannot find free vblk entry\n",
+ __func__);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+
+ /* Read/create buffer */
+ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 1);
+ if (error) {
+ nandfs_error("%s: cannot get free vblk entry\n",
+ __func__);
+ nandfs_abort_entry(&req);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+
+ /* Fill out vblock data */
+ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data;
+ dat_entry[entry].de_start = start;
+ dat_entry[entry].de_end = UINTMAX_MAX;
+ dat_entry[entry].de_blocknr = 0;
+
+ /* Commit allocation */
+ error = nandfs_alloc_entry(mdt, &req);
+ if (error) {
+ nandfs_error("%s: cannot get free vblk entry\n",
+ __func__);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+
+ /* Return allocated vblock */
+ *vblock = req.entrynum;
+ DPRINTF(DAT, ("%s: allocated vblock %#jx\n",
+ __func__, (uintmax_t)*vblock));
+
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+}
+
+int
+nandfs_vblock_assign(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock,
+ nandfs_lbn_t block)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint32_t entry;
+ int locked, error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat));
+ if (!locked)
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = vblock;
+
+ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0);
+ if (!error) {
+ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data;
+ dat_entry[entry].de_blocknr = block;
+
+ DPRINTF(DAT, ("%s: assing vblock %jx->%jx\n",
+ __func__, (uintmax_t)vblock, (uintmax_t)block));
+
+ /*
+ * It is mostly called from syncer() so
+ * we want to force making buf dirty
+ */
+ error = nandfs_dirty_buf(req.bp_entry, 1);
+ }
+
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+
+ return (error);
+}
+
+int
+nandfs_vblock_end(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint64_t end;
+ uint32_t entry;
+ int locked, error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+ end = nandfsdev->nd_last_cno;
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat));
+ if (!locked)
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = vblock;
+
+ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0);
+ if (!error) {
+ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data;
+ dat_entry[entry].de_end = end;
+ DPRINTF(DAT, ("%s: end vblock %#jx at checkpoint %#jx\n",
+ __func__, (uintmax_t)vblock, (uintmax_t)end));
+
+ /*
+ * It is mostly called from syncer() so
+ * we want to force making buf dirty
+ */
+ error = nandfs_dirty_buf(req.bp_entry, 1);
+ }
+
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat), 0);
+
+ return (error);
+}
+
+int
+nandfs_vblock_free(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ int error;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ req.entrynum = vblock;
+
+ error = nandfs_find_entry(mdt, dat, &req);
+ if (!error) {
+ DPRINTF(DAT, ("%s: vblk %#jx\n", __func__, (uintmax_t)vblock));
+ nandfs_free_entry(mdt, &req);
+ }
+
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+}
+
+int
+nandfs_get_dat_vinfo_ioctl(struct nandfs_device *nandfsdev, struct nandfs_argv *nargv)
+{
+ struct nandfs_vinfo *vinfo;
+ size_t size;
+ int error;
+
+ if (nargv->nv_nmembs > NANDFS_VINFO_MAX)
+ return (EINVAL);
+
+ size = sizeof(struct nandfs_vinfo) * nargv->nv_nmembs;
+ vinfo = malloc(size, M_NANDFSTEMP, M_WAITOK|M_ZERO);
+
+ error = copyin((void *)(uintptr_t)nargv->nv_base, vinfo, size);
+ if (error) {
+ free(vinfo, M_NANDFSTEMP);
+ return (error);
+ }
+
+ error = nandfs_get_dat_vinfo(nandfsdev, vinfo, nargv->nv_nmembs);
+ if (error == 0)
+ error = copyout(vinfo, (void *)(uintptr_t)nargv->nv_base, size);
+ free(vinfo, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_get_dat_vinfo(struct nandfs_device *nandfsdev, struct nandfs_vinfo *vinfo,
+ uint32_t nmembs)
+{
+ struct nandfs_node *dat;
+ struct nandfs_mdt *mdt;
+ struct nandfs_alloc_request req;
+ struct nandfs_dat_entry *dat_entry;
+ uint32_t i, idx;
+ int error = 0;
+
+ dat = nandfsdev->nd_dat_node;
+ mdt = &nandfsdev->nd_dat_mdt;
+
+ DPRINTF(DAT, ("%s: nmembs %#x\n", __func__, nmembs));
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+
+ for (i = 0; i < nmembs; i++) {
+ req.entrynum = vinfo[i].nvi_vblocknr;
+
+ error = nandfs_get_entry_block(mdt, dat,&req, &idx, 0);
+ if (error)
+ break;
+
+ dat_entry = ((struct nandfs_dat_entry *) req.bp_entry->b_data);
+ vinfo[i].nvi_start = dat_entry[idx].de_start;
+ vinfo[i].nvi_end = dat_entry[idx].de_end;
+ vinfo[i].nvi_blocknr = dat_entry[idx].de_blocknr;
+
+ DPRINTF(DAT, ("%s: vinfo: %jx[%jx-%jx]->%jx\n",
+ __func__, vinfo[i].nvi_vblocknr, vinfo[i].nvi_start,
+ vinfo[i].nvi_end, vinfo[i].nvi_blocknr));
+
+ brelse(req.bp_entry);
+ }
+
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+}
+
+int
+nandfs_get_dat_bdescs_ioctl(struct nandfs_device *nffsdev,
+ struct nandfs_argv *nargv)
+{
+ struct nandfs_bdesc *bd;
+ size_t size;
+ int error;
+
+ size = nargv->nv_nmembs * sizeof(struct nandfs_bdesc);
+ bd = malloc(size, M_NANDFSTEMP, M_WAITOK);
+ error = copyin((void *)(uintptr_t)nargv->nv_base, bd, size);
+ if (error) {
+ free(bd, M_NANDFSTEMP);
+ return (error);
+ }
+
+ error = nandfs_get_dat_bdescs(nffsdev, bd, nargv->nv_nmembs);
+
+ if (error == 0)
+ error = copyout(bd, (void *)(uintptr_t)nargv->nv_base, size);
+
+ free(bd, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_get_dat_bdescs(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd,
+ uint32_t nmembs)
+{
+ struct nandfs_node *dat_node;
+ uint64_t map;
+ uint32_t i;
+ int error = 0;
+
+ dat_node = nffsdev->nd_dat_node;
+
+ VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE);
+
+ for (i = 0; i < nmembs; i++) {
+ DPRINTF(CLEAN,
+ ("%s: bd ino:%#jx oblk:%#jx blocknr:%#jx off:%#jx\n",
+ __func__, (uintmax_t)bd[i].bd_ino,
+ (uintmax_t)bd[i].bd_oblocknr, (uintmax_t)bd[i].bd_blocknr,
+ (uintmax_t)bd[i].bd_offset));
+
+ error = nandfs_bmap_lookup(dat_node, bd[i].bd_offset, &map);
+ if (error)
+ break;
+ bd[i].bd_blocknr = map;
+ }
+
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+}
diff --git a/sys/fs/nandfs/nandfs_dir.c b/sys/fs/nandfs/nandfs_dir.c
new file mode 100644
index 0000000..e279510
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_dir.c
@@ -0,0 +1,314 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+
+int
+nandfs_add_dirent(struct vnode *dvp, uint64_t ino, char *nameptr, long namelen,
+ uint8_t type)
+{
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfs_dir_entry *dirent, *pdirent;
+ uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ uint64_t filesize = dir_node->nn_inode.i_size;
+ uint64_t inode_blks = dir_node->nn_inode.i_blocks;
+ uint32_t off, rest;
+ uint8_t *pos;
+ struct buf *bp;
+ int error;
+
+ pdirent = NULL;
+ bp = NULL;
+ if (inode_blks) {
+ error = nandfs_bread(dir_node, inode_blks - 1, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ pos = bp->b_data;
+ off = 0;
+ while (off < blocksize) {
+ pdirent = (struct nandfs_dir_entry *) (pos + off);
+ if (!pdirent->rec_len) {
+ pdirent = NULL;
+ break;
+ }
+ off += pdirent->rec_len;
+ }
+
+ if (pdirent)
+ rest = pdirent->rec_len -
+ NANDFS_DIR_REC_LEN(pdirent->name_len);
+ else
+ rest = blocksize;
+
+ if (rest < NANDFS_DIR_REC_LEN(namelen)) {
+ /* Do not update pdirent as new block is created */
+ pdirent = NULL;
+ brelse(bp);
+ /* Set to NULL to create new */
+ bp = NULL;
+ filesize += rest;
+ }
+ }
+
+ /* If no bp found create new */
+ if (!bp) {
+ error = nandfs_bcreate(dir_node, inode_blks, NOCRED, 0, &bp);
+ if (error)
+ return (error);
+ off = 0;
+ pos = bp->b_data;
+ }
+
+ /* Modify pdirent if exists */
+ if (pdirent) {
+ DPRINTF(LOOKUP, ("modify pdirent %p\n", pdirent));
+ /* modify last de */
+ off -= pdirent->rec_len;
+ pdirent->rec_len =
+ NANDFS_DIR_REC_LEN(pdirent->name_len);
+ off += pdirent->rec_len;
+ }
+
+ /* Create new dirent */
+ dirent = (struct nandfs_dir_entry *) (pos + off);
+ dirent->rec_len = blocksize - off;
+ dirent->inode = ino;
+ dirent->name_len = namelen;
+ memset(dirent->name, 0, NANDFS_DIR_NAME_LEN(namelen));
+ memcpy(dirent->name, nameptr, namelen);
+ dirent->file_type = type;
+
+ filesize += NANDFS_DIR_REC_LEN(dirent->name_len);
+
+ DPRINTF(LOOKUP, ("create dir_entry '%.*s' at %p with size %x "
+ "new filesize: %jx\n",
+ (int)namelen, dirent->name, dirent, dirent->rec_len,
+ (uintmax_t)filesize));
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ dir_node->nn_inode.i_size = filesize;
+ dir_node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ vnode_pager_setsize(dvp, filesize);
+
+ return (0);
+}
+
+int
+nandfs_remove_dirent(struct vnode *dvp, struct nandfs_node *node,
+ struct componentname *cnp)
+{
+ struct nandfs_node *dir_node;
+ struct nandfs_dir_entry *dirent, *pdirent;
+ struct buf *bp;
+ uint64_t filesize, blocknr, ino, offset;
+ uint32_t blocksize, limit, off;
+ uint16_t newsize;
+ uint8_t *pos;
+ int error, found;
+
+ dir_node = VTON(dvp);
+ filesize = dir_node->nn_inode.i_size;
+ if (!filesize)
+ return (0);
+
+ if (node) {
+ offset = node->nn_diroff;
+ ino = node->nn_ino;
+ } else {
+ offset = dir_node->nn_diroff;
+ ino = NANDFS_WHT_INO;
+ }
+
+ dirent = pdirent = NULL;
+ blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ blocknr = offset / blocksize;
+
+ DPRINTF(LOOKUP, ("rm direntry dvp %p node %p ino %#jx at off %#jx\n",
+ dvp, node, (uintmax_t)ino, (uintmax_t)offset));
+
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ pos = bp->b_data;
+ off = 0;
+ found = 0;
+ limit = offset % blocksize;
+ pdirent = (struct nandfs_dir_entry *) bp->b_data;
+ while (off <= limit) {
+ dirent = (struct nandfs_dir_entry *) (pos + off);
+
+ if ((off == limit) &&
+ (dirent->inode == ino)) {
+ found = 1;
+ break;
+ }
+ if (dirent->inode != 0)
+ pdirent = dirent;
+ off += dirent->rec_len;
+ }
+
+ if (!found) {
+ nandfs_error("cannot find entry to remove");
+ brelse(bp);
+ return (error);
+ }
+ DPRINTF(LOOKUP,
+ ("rm dirent ino %#jx at %#x with size %#x\n",
+ (uintmax_t)dirent->inode, off, dirent->rec_len));
+
+ newsize = (uintptr_t)dirent - (uintptr_t)pdirent;
+ newsize += dirent->rec_len;
+ pdirent->rec_len = newsize;
+ dirent->inode = 0;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ dir_node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ /* If last one modify filesize */
+ if ((offset + NANDFS_DIR_REC_LEN(dirent->name_len)) == filesize) {
+ filesize = blocknr * blocksize +
+ ((uintptr_t)pdirent - (uintptr_t)pos) +
+ NANDFS_DIR_REC_LEN(pdirent->name_len);
+ dir_node->nn_inode.i_size = filesize;
+ }
+
+ return (0);
+}
+
+int
+nandfs_update_parent_dir(struct vnode *dvp, uint64_t newparent)
+{
+ struct nandfs_dir_entry *dirent;
+ struct nandfs_node *dir_node;
+ struct buf *bp;
+ int error;
+
+ dir_node = VTON(dvp);
+ error = nandfs_bread(dir_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ dirent = (struct nandfs_dir_entry *)bp->b_data;
+ dirent->inode = newparent;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+int
+nandfs_update_dirent(struct vnode *dvp, struct nandfs_node *fnode,
+ struct nandfs_node *tnode)
+{
+ struct nandfs_node *dir_node;
+ struct nandfs_dir_entry *dirent;
+ struct buf *bp;
+ uint64_t file_size, blocknr;
+ uint32_t blocksize, off;
+ uint8_t *pos;
+ int error;
+
+ dir_node = VTON(dvp);
+ file_size = dir_node->nn_inode.i_size;
+ if (!file_size)
+ return (0);
+
+ DPRINTF(LOOKUP,
+ ("chg direntry dvp %p ino %#jx to in %#jx at off %#jx\n",
+ dvp, (uintmax_t)tnode->nn_ino, (uintmax_t)fnode->nn_ino,
+ (uintmax_t)tnode->nn_diroff));
+
+ blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ blocknr = tnode->nn_diroff / blocksize;
+ off = tnode->nn_diroff % blocksize;
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ pos = bp->b_data;
+ dirent = (struct nandfs_dir_entry *) (pos + off);
+ KASSERT((dirent->inode == tnode->nn_ino),
+ ("direntry mismatch"));
+
+ dirent->inode = fnode->nn_ino;
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+int
+nandfs_init_dir(struct vnode *dvp, uint64_t ino, uint64_t parent_ino)
+{
+
+ if (nandfs_add_dirent(dvp, parent_ino, "..", 2, DT_DIR) ||
+ nandfs_add_dirent(dvp, ino, ".", 1, DT_DIR)) {
+ nandfs_error("%s: cannot initialize dir ino:%jd(pino:%jd)\n",
+ __func__, ino, parent_ino);
+ return (-1);
+ }
+ return (0);
+}
diff --git a/sys/fs/nandfs/nandfs_fs.h b/sys/fs/nandfs/nandfs_fs.h
new file mode 100644
index 0000000..b72be40
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_fs.h
@@ -0,0 +1,565 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Original definitions written by Koji Sato <koji@osrg.net>
+ * and Ryusuke Konishi <ryusuke@osrg.net>
+ * From: NetBSD: nandfs_fs.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NANDFS_FS_H
+#define _NANDFS_FS_H
+
+#include <sys/uuid.h>
+
+#define MNINDIR(fsdev) ((fsdev)->nd_blocksize / sizeof(nandfs_daddr_t))
+
+/*
+ * Inode structure. There are a few dedicated inode numbers that are
+ * defined here first.
+ */
+#define NANDFS_WHT_INO 1 /* Whiteout ino */
+#define NANDFS_ROOT_INO 2 /* Root file inode */
+#define NANDFS_DAT_INO 3 /* DAT file */
+#define NANDFS_CPFILE_INO 4 /* checkpoint file */
+#define NANDFS_SUFILE_INO 5 /* segment usage file */
+#define NANDFS_IFILE_INO 6 /* ifile */
+#define NANDFS_GC_INO 7 /* Cleanerd node */
+#define NANDFS_ATIME_INO 8 /* Atime file (reserved) */
+#define NANDFS_XATTR_INO 9 /* Xattribute file (reserved) */
+#define NANDFS_SKETCH_INO 10 /* Sketch file (obsolete) */
+#define NANDFS_USER_INO 11 /* First user's file inode number */
+
+#define NANDFS_SYS_NODE(ino) \
+ (((ino) >= NANDFS_DAT_INO) && ((ino) <= NANDFS_GC_INO))
+
+#define NDADDR 12 /* Direct addresses in inode. */
+#define NIADDR 3 /* Indirect addresses in inode. */
+
+typedef int64_t nandfs_daddr_t;
+typedef int64_t nandfs_lbn_t;
+
+struct nandfs_inode {
+ uint64_t i_blocks; /* 0: size in device blocks */
+ uint64_t i_size; /* 8: size in bytes */
+ uint64_t i_ctime; /* 16: creation time in seconds */
+ uint64_t i_mtime; /* 24: modification time in seconds part*/
+ uint32_t i_ctime_nsec; /* 32: creation time nanoseconds part */
+ uint32_t i_mtime_nsec; /* 36: modification time in nanoseconds */
+ uint32_t i_uid; /* 40: user id */
+ uint32_t i_gid; /* 44: group id */
+ uint16_t i_mode; /* 48: file mode */
+ uint16_t i_links_count; /* 50: number of references to the inode*/
+ uint32_t i_flags; /* 52: NANDFS_*_FL flags */
+ nandfs_daddr_t i_special; /* 56: special */
+ nandfs_daddr_t i_db[NDADDR]; /* 64: Direct disk blocks. */
+ nandfs_daddr_t i_ib[NIADDR]; /* 160: Indirect disk blocks. */
+ uint64_t i_xattr; /* 184: reserved for extended attributes*/
+ uint32_t i_generation; /* 192: file generation for NFS */
+ uint32_t i_pad[15]; /* 196: make it 64 bits aligned */
+};
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_inode) == 256);
+#endif
+
+/*
+ * Each checkpoint/snapshot has a super root.
+ *
+ * The super root holds the inodes of the three system files: `dat', `cp' and
+ * 'su' files. All other FS state is defined by those.
+ *
+ * It is CRC checksum'ed and time stamped.
+ */
+
+struct nandfs_super_root {
+ uint32_t sr_sum; /* check-sum */
+ uint16_t sr_bytes; /* byte count of this structure */
+ uint16_t sr_flags; /* reserved for flags */
+ uint64_t sr_nongc_ctime; /* timestamp, not for cleaner(?) */
+ struct nandfs_inode sr_dat; /* DAT, virt->phys translation inode */
+ struct nandfs_inode sr_cpfile; /* CP, checkpoints inode */
+ struct nandfs_inode sr_sufile; /* SU, segment usage inode */
+};
+
+#define NANDFS_SR_MDT_OFFSET(inode_size, i) \
+ ((uint32_t)&((struct nandfs_super_root *)0)->sr_dat + \
+ (inode_size) * (i))
+
+#define NANDFS_SR_DAT_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 0)
+#define NANDFS_SR_CPFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 1)
+#define NANDFS_SR_SUFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 2)
+#define NANDFS_SR_BYTES (sizeof(struct nandfs_super_root))
+
+/*
+ * The superblock describes the basic structure and mount history. It also
+ * records some sizes of structures found on the disc for sanity checks.
+ *
+ * The superblock is stored at two places: NANDFS_SB_OFFSET_BYTES and
+ * NANDFS_SB2_OFFSET_BYTES.
+ */
+
+/* File system states stored on media in superblock's sbp->s_state */
+#define NANDFS_VALID_FS 0x0001 /* cleanly unmounted and all is ok */
+#define NANDFS_ERROR_FS 0x0002 /* there were errors detected, fsck */
+#define NANDFS_RESIZE_FS 0x0004 /* resize required, XXX unknown flag*/
+#define NANDFS_MOUNT_STATE_BITS "\20\1VALID_FS\2ERROR_FS\3RESIZE_FS"
+
+/*
+ * Brief description of control structures:
+ *
+ * NANDFS_NFSAREAS first blocks contain fsdata and some amount of super blocks.
+ * Simple round-robin policy is used in order to choose which block will
+ * contain new super block.
+ *
+ * Simple case with 2 blocks:
+ * 1: fsdata sblock1 [sblock3 [sblock5 ..]]
+ * 2: fsdata sblock2 [sblock4 [sblock6 ..]]
+ */
+struct nandfs_fsdata {
+ uint16_t f_magic;
+ uint16_t f_bytes;
+
+ uint32_t f_sum; /* checksum of fsdata */
+ uint32_t f_rev_level; /* major disk format revision */
+
+ uint64_t f_ctime; /* creation time (execution time
+ of newfs) */
+ /* Block size represented as: blocksize = 1 << (f_log_block_size + 10) */
+ uint32_t f_log_block_size;
+
+ uint16_t f_inode_size; /* size of an inode */
+ uint16_t f_dat_entry_size; /* size of a dat entry */
+ uint16_t f_checkpoint_size; /* size of a checkpoint */
+ uint16_t f_segment_usage_size; /* size of a segment usage */
+
+ uint16_t f_sbbytes; /* byte count of CRC calculation
+ for super blocks. s_reserved
+ is excluded! */
+
+ uint16_t f_errors; /* behaviour on detecting errors */
+
+ uint32_t f_erasesize;
+ uint64_t f_nsegments; /* number of segm. in filesystem */
+ nandfs_daddr_t f_first_data_block; /* 1st seg disk block number */
+ uint32_t f_blocks_per_segment; /* number of blocks per segment */
+ uint32_t f_r_segments_percentage; /* reserved segments percentage */
+
+ struct uuid f_uuid; /* 128-bit uuid for volume */
+ char f_volume_name[16]; /* volume name */
+ uint32_t f_pad[104];
+} __packed;
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_fsdata) == 512);
+#endif
+
+struct nandfs_super_block {
+ uint16_t s_magic; /* magic value for identification */
+
+ uint32_t s_sum; /* check sum of super block */
+
+ uint64_t s_last_cno; /* last checkpoint number */
+ uint64_t s_last_pseg; /* addr part. segm. written last */
+ uint64_t s_last_seq; /* seq.number of seg written last */
+ uint64_t s_free_blocks_count; /* free blocks count */
+
+ uint64_t s_mtime; /* mount time */
+ uint64_t s_wtime; /* write time */
+ uint16_t s_state; /* file system state */
+
+ char s_last_mounted[64]; /* directory where last mounted */
+
+ uint32_t s_c_interval; /* commit interval of segment */
+ uint32_t s_c_block_max; /* threshold of data amount for
+ the segment construction */
+ uint32_t s_reserved[32]; /* padding to end of the block */
+} __packed;
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_super_block) == 256);
+#endif
+
+#define NANDFS_FSDATA_MAGIC 0xf8da
+#define NANDFS_SUPER_MAGIC 0x8008
+
+#define NANDFS_NFSAREAS 4
+#define NANDFS_DATA_OFFSET_BYTES(esize) (NANDFS_NFSAREAS * (esize))
+
+#define NANDFS_SBLOCK_OFFSET_BYTES (sizeof(struct nandfs_fsdata))
+
+#define NANDFS_DEF_BLOCKSIZE 4096
+#define NANDFS_MIN_BLOCKSIZE 512
+
+#define NANDFS_DEF_ERASESIZE (2 << 16)
+
+#define NANDFS_MIN_SEGSIZE NANDFS_DEF_ERASESIZE
+
+#define NANDFS_CURRENT_REV 9 /* current major revision */
+
+#define NANDFS_FSDATA_CRC_BYTES offsetof(struct nandfs_fsdata, f_pad)
+/* Bytes count of super_block for CRC-calculation */
+#define NANDFS_SB_BYTES offsetof(struct nandfs_super_block, s_reserved)
+
+/* Maximal count of links to a file */
+#define NANDFS_LINK_MAX 32000
+
+/*
+ * Structure of a directory entry.
+ *
+ * Note that they can't span blocks; the rec_len fills out.
+ */
+
+#define NANDFS_NAME_LEN 255
+struct nandfs_dir_entry {
+ uint64_t inode; /* inode number */
+ uint16_t rec_len; /* directory entry length */
+ uint8_t name_len; /* name length */
+ uint8_t file_type;
+ char name[NANDFS_NAME_LEN]; /* file name */
+ char pad;
+};
+
+/*
+ * NANDFS_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 8
+ */
+#define NANDFS_DIR_PAD 8
+#define NANDFS_DIR_ROUND (NANDFS_DIR_PAD - 1)
+#define NANDFS_DIR_NAME_OFFSET (offsetof(struct nandfs_dir_entry, name))
+#define NANDFS_DIR_REC_LEN(name_len) \
+ (((name_len) + NANDFS_DIR_NAME_OFFSET + NANDFS_DIR_ROUND) \
+ & ~NANDFS_DIR_ROUND)
+#define NANDFS_DIR_NAME_LEN(name_len) \
+ (NANDFS_DIR_REC_LEN(name_len) - NANDFS_DIR_NAME_OFFSET)
+
+/*
+ * NiLFS/NANDFS devides the disc into fixed length segments. Each segment is
+ * filled with one or more partial segments of variable lengths.
+ *
+ * Each partial segment has a segment summary header followed by updates of
+ * files and optionally a super root.
+ */
+
+/*
+ * Virtual to physical block translation information. For data blocks it maps
+ * logical block number bi_blkoff to virtual block nr bi_vblocknr. For non
+ * datablocks it is the virtual block number assigned to an indirect block
+ * and has no bi_blkoff. The physical block number is the next
+ * available data block in the partial segment after all the binfo's.
+ */
+struct nandfs_binfo_v {
+ uint64_t bi_ino; /* file's inode */
+ uint64_t bi_vblocknr; /* assigned virtual block number */
+ uint64_t bi_blkoff; /* for file's logical block number */
+};
+
+/*
+ * DAT allocation. For data blocks just the logical block number that maps on
+ * the next available data block in the partial segment after the binfo's.
+ */
+struct nandfs_binfo_dat {
+ uint64_t bi_ino;
+ uint64_t bi_blkoff; /* DAT file's logical block number */
+ uint8_t bi_level; /* whether this is meta block */
+ uint8_t bi_pad[7];
+};
+
+#ifdef _KERNEL
+CTASSERT(sizeof(struct nandfs_binfo_v) == sizeof(struct nandfs_binfo_dat));
+#endif
+
+/* Convenience union for both types of binfo's */
+union nandfs_binfo {
+ struct nandfs_binfo_v bi_v;
+ struct nandfs_binfo_dat bi_dat;
+};
+
+/* Indirect buffers path */
+struct nandfs_indir {
+ nandfs_daddr_t in_lbn;
+ int in_off;
+};
+
+/* The (partial) segment summary */
+struct nandfs_segment_summary {
+ uint32_t ss_datasum; /* CRC of complete data block */
+ uint32_t ss_sumsum; /* CRC of segment summary only */
+ uint32_t ss_magic; /* magic to identify segment summary */
+ uint16_t ss_bytes; /* size of segment summary structure */
+ uint16_t ss_flags; /* NANDFS_SS_* flags */
+ uint64_t ss_seq; /* sequence number of this segm. sum */
+ uint64_t ss_create; /* creation timestamp in seconds */
+ uint64_t ss_next; /* blocknumber of next segment */
+ uint32_t ss_nblocks; /* number of blocks used by summary */
+ uint32_t ss_nbinfos; /* number of binfo structures */
+ uint32_t ss_sumbytes; /* total size of segment summary */
+ uint32_t ss_pad;
+ /* stream of binfo structures */
+};
+
+#define NANDFS_SEGSUM_MAGIC 0x8e680011 /* segment summary magic number */
+
+/* Segment summary flags */
+#define NANDFS_SS_LOGBGN 0x0001 /* begins a logical segment */
+#define NANDFS_SS_LOGEND 0x0002 /* ends a logical segment */
+#define NANDFS_SS_SR 0x0004 /* has super root */
+#define NANDFS_SS_SYNDT 0x0008 /* includes data only updates */
+#define NANDFS_SS_GC 0x0010 /* segment written for cleaner operation */
+#define NANDFS_SS_FLAG_BITS "\20\1LOGBGN\2LOGEND\3SR\4SYNDT\5GC"
+
+/* Segment summary constrains */
+#define NANDFS_SEG_MIN_BLOCKS 16 /* minimum number of blocks in a
+ full segment */
+#define NANDFS_PSEG_MIN_BLOCKS 2 /* minimum number of blocks in a
+ partial segment */
+#define NANDFS_MIN_NRSVSEGS 8 /* minimum number of reserved
+ segments */
+
+/*
+ * Structure of DAT/inode file.
+ *
+ * A DAT file is devided into groups. The maximum number of groups is the
+ * number of block group descriptors that fit into one block; this descriptor
+ * only gives the number of free entries in the associated group.
+ *
+ * Each group has a block sized bitmap indicating if an entry is taken or
+ * empty. Each bit stands for a DAT entry.
+ *
+ * The inode file has exactly the same format only the entries are inode
+ * entries.
+ */
+
+struct nandfs_block_group_desc {
+ uint32_t bg_nfrees; /* num. free entries in block group */
+};
+
+/* DAT entry in a super root's DAT file */
+struct nandfs_dat_entry {
+ uint64_t de_blocknr; /* block number */
+ uint64_t de_start; /* valid from checkpoint */
+ uint64_t de_end; /* valid till checkpoint */
+ uint64_t de_rsv; /* reserved for future use */
+};
+
+/*
+ * Structure of CP file.
+ *
+ * A snapshot is just a checkpoint only it's protected against removal by the
+ * cleaner. The snapshots are kept on a double linked list of checkpoints.
+ */
+struct nandfs_snapshot_list {
+ uint64_t ssl_next; /* checkpoint nr. forward */
+ uint64_t ssl_prev; /* checkpoint nr. back */
+};
+
+/* Checkpoint entry structure */
+struct nandfs_checkpoint {
+ uint32_t cp_flags; /* NANDFS_CHECKPOINT_* flags */
+ uint32_t cp_checkpoints_count; /* ZERO, not used anymore? */
+ struct nandfs_snapshot_list cp_snapshot_list; /* list of snapshots */
+ uint64_t cp_cno; /* checkpoint number */
+ uint64_t cp_create; /* creation timestamp */
+ uint64_t cp_nblk_inc; /* number of blocks incremented */
+ uint64_t cp_blocks_count; /* reserved (might be deleted) */
+ struct nandfs_inode cp_ifile_inode; /* inode file inode */
+};
+
+/* Checkpoint flags */
+#define NANDFS_CHECKPOINT_SNAPSHOT 1
+#define NANDFS_CHECKPOINT_INVALID 2
+#define NANDFS_CHECKPOINT_SKETCH 4
+#define NANDFS_CHECKPOINT_MINOR 8
+#define NANDFS_CHECKPOINT_BITS "\20\1SNAPSHOT\2INVALID\3SKETCH\4MINOR"
+
+/* Header of the checkpoint file */
+struct nandfs_cpfile_header {
+ uint64_t ch_ncheckpoints; /* number of checkpoints */
+ uint64_t ch_nsnapshots; /* number of snapshots */
+ struct nandfs_snapshot_list ch_snapshot_list; /* snapshot list */
+};
+
+#define NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET \
+ ((sizeof(struct nandfs_cpfile_header) + \
+ sizeof(struct nandfs_checkpoint) - 1) / \
+ sizeof(struct nandfs_checkpoint))
+
+
+#define NANDFS_NOSEGMENT 0xffffffff
+
+/*
+ * Structure of SU file.
+ *
+ * The segment usage file sums up how each of the segments are used. They are
+ * indexed by their segment number.
+ */
+
+/* Segment usage entry */
+struct nandfs_segment_usage {
+ uint64_t su_lastmod; /* last modified timestamp */
+ uint32_t su_nblocks; /* number of blocks in segment */
+ uint32_t su_flags; /* NANDFS_SEGMENT_USAGE_* flags */
+};
+
+/* Segment usage flag */
+#define NANDFS_SEGMENT_USAGE_ACTIVE 1
+#define NANDFS_SEGMENT_USAGE_DIRTY 2
+#define NANDFS_SEGMENT_USAGE_ERROR 4
+#define NANDFS_SEGMENT_USAGE_GC 8
+#define NANDFS_SEGMENT_USAGE_BITS "\20\1ACTIVE\2DIRTY\3ERROR"
+
+/* Header of the segment usage file */
+struct nandfs_sufile_header {
+ uint64_t sh_ncleansegs; /* number of segments marked clean */
+ uint64_t sh_ndirtysegs; /* number of segments marked dirty */
+ uint64_t sh_last_alloc; /* last allocated segment number */
+};
+
+#define NANDFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \
+ ((sizeof(struct nandfs_sufile_header) + \
+ sizeof(struct nandfs_segment_usage) - 1) / \
+ sizeof(struct nandfs_segment_usage))
+
+struct nandfs_seg_stat {
+ uint64_t nss_nsegs;
+ uint64_t nss_ncleansegs;
+ uint64_t nss_ndirtysegs;
+ uint64_t nss_ctime;
+ uint64_t nss_nongc_ctime;
+ uint64_t nss_prot_seq;
+};
+
+enum {
+ NANDFS_CHECKPOINT,
+ NANDFS_SNAPSHOT
+};
+
+#define NANDFS_CPINFO_MAX 512
+
+struct nandfs_cpinfo {
+ uint32_t nci_flags;
+ uint32_t nci_pad;
+ uint64_t nci_cno;
+ uint64_t nci_create;
+ uint64_t nci_nblk_inc;
+ uint64_t nci_blocks_count;
+ uint64_t nci_next;
+};
+
+#define NANDFS_SEGMENTS_MAX 512
+
+struct nandfs_suinfo {
+ uint64_t nsi_num;
+ uint64_t nsi_lastmod;
+ uint32_t nsi_blocks;
+ uint32_t nsi_flags;
+};
+
+#define NANDFS_VINFO_MAX 512
+
+struct nandfs_vinfo {
+ uint64_t nvi_ino;
+ uint64_t nvi_vblocknr;
+ uint64_t nvi_start;
+ uint64_t nvi_end;
+ uint64_t nvi_blocknr;
+ int nvi_alive;
+};
+
+struct nandfs_cpmode {
+ uint64_t ncpm_cno;
+ uint32_t ncpm_mode;
+ uint32_t ncpm_pad;
+};
+
+struct nandfs_argv {
+ uint64_t nv_base;
+ uint32_t nv_nmembs;
+ uint16_t nv_size;
+ uint16_t nv_flags;
+ uint64_t nv_index;
+};
+
+struct nandfs_cpstat {
+ uint64_t ncp_cno;
+ uint64_t ncp_ncps;
+ uint64_t ncp_nss;
+};
+
+struct nandfs_period {
+ uint64_t p_start;
+ uint64_t p_end;
+};
+
+struct nandfs_vdesc {
+ uint64_t vd_ino;
+ uint64_t vd_cno;
+ uint64_t vd_vblocknr;
+ struct nandfs_period vd_period;
+ uint64_t vd_blocknr;
+ uint64_t vd_offset;
+ uint32_t vd_flags;
+ uint32_t vd_pad;
+};
+
+struct nandfs_bdesc {
+ uint64_t bd_ino;
+ uint64_t bd_oblocknr;
+ uint64_t bd_blocknr;
+ uint64_t bd_offset;
+ uint32_t bd_level;
+ uint32_t bd_alive;
+};
+
+#ifndef _KERNEL
+#ifndef MNAMELEN
+#define MNAMELEN 88
+#endif
+#endif
+
+struct nandfs_fsinfo {
+ struct nandfs_fsdata fs_fsdata;
+ struct nandfs_super_block fs_super;
+ char fs_dev[MNAMELEN];
+};
+
+#define NANDFS_MAX_MOUNTS 65535
+
+#define NANDFS_IOCTL_GET_SUSTAT _IOR('N', 100, struct nandfs_seg_stat)
+#define NANDFS_IOCTL_CHANGE_CPMODE _IOWR('N', 101, struct nandfs_cpmode)
+#define NANDFS_IOCTL_GET_CPINFO _IOWR('N', 102, struct nandfs_argv)
+#define NANDFS_IOCTL_DELETE_CP _IOWR('N', 103, uint64_t[2])
+#define NANDFS_IOCTL_GET_CPSTAT _IOR('N', 104, struct nandfs_cpstat)
+#define NANDFS_IOCTL_GET_SUINFO _IOWR('N', 105, struct nandfs_argv)
+#define NANDFS_IOCTL_GET_VINFO _IOWR('N', 106, struct nandfs_argv)
+#define NANDFS_IOCTL_GET_BDESCS _IOWR('N', 107, struct nandfs_argv)
+#define NANDFS_IOCTL_GET_FSINFO _IOR('N', 108, struct nandfs_fsinfo)
+#define NANDFS_IOCTL_MAKE_SNAP _IOWR('N', 109, uint64_t)
+#define NANDFS_IOCTL_DELETE_SNAP _IOWR('N', 110, uint64_t)
+#define NANDFS_IOCTL_SYNC _IOWR('N', 111, uint64_t)
+
+#endif /* _NANDFS_FS_H */
diff --git a/sys/fs/nandfs/nandfs_ifile.c b/sys/fs/nandfs/nandfs_ifile.c
new file mode 100644
index 0000000..7e4db87
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_ifile.c
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+int
+nandfs_node_create(struct nandfsmount *nmp, struct nandfs_node **node,
+ uint16_t mode)
+{
+ struct nandfs_alloc_request req;
+ struct nandfs_device *nandfsdev;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct nandfs_inode *inode;
+ struct vnode *vp;
+ uint32_t entry;
+ int error = 0;
+
+ nandfsdev = nmp->nm_nandfsdev;
+ mdt = &nandfsdev->nd_ifile_mdt;
+ ifile = nmp->nm_ifile_node;
+ vp = NTOV(ifile);
+
+ VOP_LOCK(vp, LK_EXCLUSIVE);
+ /* Allocate new inode in ifile */
+ req.entrynum = nandfsdev->nd_last_ino + 1;
+ error = nandfs_find_free_entry(mdt, ifile, &req);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ error = nandfs_get_entry_block(mdt, ifile, &req, &entry, 1);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ /* Inode initialization */
+ inode = ((struct nandfs_inode *) req.bp_entry->b_data) + entry;
+ nandfs_inode_init(inode, mode);
+
+ error = nandfs_alloc_entry(mdt, &req);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ VOP_UNLOCK(vp, 0);
+
+ nandfsdev->nd_last_ino = req.entrynum;
+ error = nandfs_get_node(nmp, req.entrynum, node);
+ DPRINTF(IFILE, ("%s: node: %p ino: %#jx\n",
+ __func__, node, (uintmax_t)((*node)->nn_ino)));
+
+ return (error);
+}
+
+int
+nandfs_node_destroy(struct nandfs_node *node)
+{
+ struct nandfs_alloc_request req;
+ struct nandfsmount *nmp;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct vnode *vp;
+ int error = 0;
+
+ nmp = node->nn_nmp;
+ req.entrynum = node->nn_ino;
+ mdt = &nmp->nm_nandfsdev->nd_ifile_mdt;
+ ifile = nmp->nm_ifile_node;
+ vp = NTOV(ifile);
+
+ DPRINTF(IFILE, ("%s: destroy node: %p ino: %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino));
+ VOP_LOCK(vp, LK_EXCLUSIVE);
+
+ error = nandfs_find_entry(mdt, ifile, &req);
+ if (error) {
+ nandfs_error("%s: finding entry error:%d node %p(%jx)",
+ __func__, error, node, node->nn_ino);
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ nandfs_inode_destroy(&node->nn_inode);
+
+ error = nandfs_free_entry(mdt, &req);
+ if (error) {
+ nandfs_error("%s: freing entry error:%d node %p(%jx)",
+ __func__, error, node, node->nn_ino);
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ VOP_UNLOCK(vp, 0);
+ DPRINTF(IFILE, ("%s: freed node %p ino %#jx\n",
+ __func__, node, (uintmax_t)node->nn_ino));
+ return (error);
+}
+
+int
+nandfs_node_update(struct nandfs_node *node)
+{
+ struct nandfs_alloc_request req;
+ struct nandfsmount *nmp;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct nandfs_inode *inode;
+ uint32_t index;
+ int error = 0;
+
+ nmp = node->nn_nmp;
+ ifile = nmp->nm_ifile_node;
+ ASSERT_VOP_LOCKED(NTOV(ifile), __func__);
+
+ req.entrynum = node->nn_ino;
+ mdt = &nmp->nm_nandfsdev->nd_ifile_mdt;
+
+ DPRINTF(IFILE, ("%s: node:%p ino:%#jx\n",
+ __func__, &node->nn_inode, (uintmax_t)node->nn_ino));
+
+ error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0);
+ if (error) {
+ printf("nandfs_get_entry_block returned with ERROR=%d\n",
+ error);
+ return (error);
+ }
+
+ inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index;
+ memcpy(inode, &node->nn_inode, sizeof(*inode));
+ error = nandfs_dirty_buf(req.bp_entry, 0);
+
+ return (error);
+}
+
+int
+nandfs_get_node_entry(struct nandfsmount *nmp, struct nandfs_inode **inode,
+ uint64_t ino, struct buf **bp)
+{
+ struct nandfs_alloc_request req;
+ struct nandfs_mdt *mdt;
+ struct nandfs_node *ifile;
+ struct vnode *vp;
+ uint32_t index;
+ int error = 0;
+
+ req.entrynum = ino;
+ mdt = &nmp->nm_nandfsdev->nd_ifile_mdt;
+ ifile = nmp->nm_ifile_node;
+ vp = NTOV(ifile);
+
+ VOP_LOCK(vp, LK_EXCLUSIVE);
+ error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0);
+ if (error) {
+ VOP_UNLOCK(vp, 0);
+ return (error);
+ }
+
+ *inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index;
+ *bp = req.bp_entry;
+ VOP_UNLOCK(vp, 0);
+ return (0);
+}
+
diff --git a/sys/fs/nandfs/nandfs_mount.h b/sys/fs/nandfs/nandfs_mount.h
new file mode 100644
index 0000000..f733e22
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_mount.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the
+ * NetBSD Project. See http://www.NetBSD.org/ for
+ * information about NetBSD.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_mount.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _FS_NANDFS_NANDFS_MOUNT_H_
+#define _FS_NANDFS_NANDFS_MOUNT_H_
+
+/*
+ * Arguments to mount NANDFS filingsystem.
+ */
+
+struct nandfs_args {
+ char *fspec; /* mount specifier */
+ int64_t cpno; /* checkpoint number */
+};
+
+#endif /* !_FS_NANDFS_NANDFS_MOUNT_H_ */
+
diff --git a/sys/fs/nandfs/nandfs_segment.c b/sys/fs/nandfs/nandfs_segment.c
new file mode 100644
index 0000000..836bead
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_segment.c
@@ -0,0 +1,1329 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/libkern.h>
+
+#include <ddb/ddb.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+static int
+nandfs_new_segment(struct nandfs_device *fsdev)
+{
+ int error = 0;
+ uint64_t new;
+
+ error = nandfs_alloc_segment(fsdev, &new);
+ if (!error) {
+ fsdev->nd_seg_num = fsdev->nd_next_seg_num;
+ fsdev->nd_next_seg_num = new;
+ }
+ DPRINTF(SYNC, ("%s: new segment %jx next %jx error %d\n",
+ __func__, (uintmax_t)fsdev->nd_seg_num, (uintmax_t)new, error));
+ if (error)
+ nandfs_error("%s: cannot create segment error %d\n",
+ __func__, error);
+
+ return (error);
+}
+
+static int
+create_segment(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_segment *seg;
+ struct nandfs_device *fsdev;
+ struct nandfs_segment *prev;
+ struct buf *bp;
+ uint64_t start_block, curr;
+ uint32_t blks_per_seg, nblocks;
+ int error;
+
+ fsdev = seginfo->fsdev;
+ prev = seginfo->curseg;
+ blks_per_seg = fsdev->nd_fsdata.f_blocks_per_segment;
+ nblocks = fsdev->nd_last_segsum.ss_nblocks;
+
+ if (!prev) {
+ vfs_timestamp(&fsdev->nd_ts);
+ /* Touch current segment */
+ error = nandfs_touch_segment(fsdev, fsdev->nd_seg_num);
+ if (error) {
+ nandfs_error("%s: cannot preallocate segment %jx\n",
+ __func__, fsdev->nd_seg_num);
+ return (error);
+ }
+ error = nandfs_touch_segment(fsdev, 0);
+ if (error) {
+ nandfs_error("%s: cannot dirty block with segment 0\n",
+ __func__);
+ return (error);
+ }
+ start_block = fsdev->nd_last_pseg + (uint64_t)nblocks;
+ /*
+ * XXX Hack
+ */
+ if (blks_per_seg - (start_block % blks_per_seg) - 1 == 0)
+ start_block++;
+ curr = nandfs_get_segnum_of_block(fsdev, start_block);
+ /* Allocate new segment if last one is full */
+ if (fsdev->nd_seg_num != curr) {
+ error = nandfs_new_segment(fsdev);
+ if (error) {
+ nandfs_error("%s: cannot create new segment\n",
+ __func__);
+ return (error);
+ }
+ /*
+ * XXX Hack
+ */
+ nandfs_get_segment_range(fsdev, fsdev->nd_seg_num, &start_block, NULL);
+ }
+ } else {
+ nandfs_get_segment_range(fsdev, fsdev->nd_next_seg_num,
+ &start_block, NULL);
+
+ /* Touch current segment and allocate and touch new one */
+ error = nandfs_new_segment(fsdev);
+ if (error) {
+ nandfs_error("%s: cannot create next segment\n",
+ __func__);
+ return (error);
+ }
+
+ /* Reiterate in case new buf is dirty */
+ seginfo->reiterate = 1;
+ }
+
+ /* Allocate and initialize nandfs_segment structure */
+ seg = malloc(sizeof(*seg), M_DEVBUF, M_WAITOK|M_ZERO);
+ TAILQ_INIT(&seg->segsum);
+ TAILQ_INIT(&seg->data);
+ seg->fsdev = fsdev;
+ seg->start_block = start_block;
+ seg->num_blocks = blks_per_seg - (start_block % blks_per_seg) - 1;
+ seg->seg_num = fsdev->nd_seg_num;
+ seg->seg_next = fsdev->nd_next_seg_num;
+ seg->segsum_blocks = 1;
+ seg->bytes_left = fsdev->nd_blocksize -
+ sizeof(struct nandfs_segment_summary);
+ seg->segsum_bytes = sizeof(struct nandfs_segment_summary);
+
+ /* Allocate buffer for segment summary */
+ bp = getblk(fsdev->nd_devvp, nandfs_block_to_dblock(fsdev,
+ seg->start_block), fsdev->nd_blocksize, 0, 0, 0);
+ bzero(bp->b_data, seginfo->fsdev->nd_blocksize);
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+ bp->b_flags |= B_MANAGED;
+
+ /* Add buffer to segment */
+ TAILQ_INSERT_TAIL(&seg->segsum, bp, b_cluster.cluster_entry);
+ seg->current_off = bp->b_data + sizeof(struct nandfs_segment_summary);
+
+ DPRINTF(SYNC, ("%s: seg %p : initial settings: start %#jx size :%#x\n",
+ __func__, seg, (uintmax_t)seg->start_block, seg->num_blocks));
+ DPRINTF(SYNC, ("%s: seg->seg_num %#jx cno %#jx next %#jx\n", __func__,
+ (uintmax_t)seg->seg_num, (uintmax_t)(fsdev->nd_last_cno + 1),
+ (uintmax_t)seg->seg_next));
+
+ if (!prev)
+ LIST_INSERT_HEAD(&seginfo->seg_list, seg, seg_link);
+ else
+ LIST_INSERT_AFTER(prev, seg, seg_link);
+
+ seginfo->curseg = seg;
+
+ return (0);
+}
+
+static int
+delete_segment(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_segment *seg, *tseg;
+ struct buf *bp, *tbp;
+
+ LIST_FOREACH_SAFE(seg, &seginfo->seg_list, seg_link, tseg) {
+ TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry,
+ tbp) {
+ TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry);
+ bp->b_flags &= ~B_MANAGED;
+ brelse(bp);
+ };
+
+ LIST_REMOVE(seg, seg_link);
+ free(seg, M_DEVBUF);
+ }
+
+ return (0);
+}
+
+static int
+create_seginfo(struct nandfs_device *fsdev, struct nandfs_seginfo **seginfo)
+{
+ struct nandfs_seginfo *info;
+
+ info = malloc(sizeof(*info), M_DEVBUF, M_WAITOK);
+
+ LIST_INIT(&info->seg_list);
+ info->fsdev = fsdev;
+ info->curseg = NULL;
+ info->blocks = 0;
+ *seginfo = info;
+ fsdev->nd_seginfo = info;
+ return (0);
+}
+
+static int
+delete_seginfo(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_device *nffsdev;
+
+ nffsdev = seginfo->fsdev;
+ delete_segment(seginfo);
+ nffsdev->nd_seginfo = NULL;
+ free(seginfo, M_DEVBUF);
+
+ return (0);
+}
+
+static int
+nandfs_create_superroot_block(struct nandfs_seginfo *seginfo,
+ struct buf **newbp)
+{
+ struct buf *bp;
+ int error;
+
+ bp = nandfs_geteblk(seginfo->fsdev->nd_blocksize, GB_NOWAIT_BD);
+
+ bzero(bp->b_data, seginfo->fsdev->nd_blocksize);
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+ bp->b_flags |= B_MANAGED;
+
+ if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) {
+ error = create_segment(seginfo);
+ if (error) {
+ brelse(bp);
+ nandfs_error("%s: no segment for superroot\n",
+ __func__);
+ return (error);
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry);
+
+ seginfo->curseg->nblocks++;
+ seginfo->curseg->num_blocks--;
+ seginfo->blocks++;
+
+ *newbp = bp;
+ return (0);
+}
+
+static int
+nandfs_add_superroot(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_super_root *sr;
+ struct buf *bp = NULL;
+ uint64_t crc_skip;
+ uint32_t crc_calc;
+ int error;
+
+ fsdev = seginfo->fsdev;
+
+ error = nandfs_create_superroot_block(seginfo, &bp);
+ if (error) {
+ nandfs_error("%s: cannot add superroot\n", __func__);
+ return (error);
+ }
+
+ sr = (struct nandfs_super_root *)bp->b_data;
+ /* Save superroot CRC */
+ sr->sr_bytes = NANDFS_SR_BYTES;
+ sr->sr_flags = 0;
+ sr->sr_nongc_ctime = 0;
+
+ memcpy(&sr->sr_dat, &fsdev->nd_dat_node->nn_inode,
+ sizeof(struct nandfs_inode));
+ memcpy(&sr->sr_cpfile, &fsdev->nd_cp_node->nn_inode,
+ sizeof(struct nandfs_inode));
+ memcpy(&sr->sr_sufile, &fsdev->nd_su_node->nn_inode,
+ sizeof(struct nandfs_inode));
+
+ crc_skip = sizeof(sr->sr_sum);
+ crc_calc = crc32((uint8_t *)sr + crc_skip, NANDFS_SR_BYTES - crc_skip);
+
+ sr->sr_sum = crc_calc;
+
+ bp->b_flags |= B_MANAGED;
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+
+ bp->b_flags &= ~B_INVAL;
+ nandfs_dirty_bufs_increment(fsdev);
+ DPRINTF(SYNC, ("%s: bp:%p\n", __func__, bp));
+
+ return (0);
+}
+
+static int
+nandfs_add_segsum_block(struct nandfs_seginfo *seginfo, struct buf **newbp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t blk;
+ struct buf *bp;
+ int error;
+
+ if (!(seginfo->curseg) || seginfo->curseg->num_blocks <= 1) {
+ error = create_segment(seginfo);
+ if (error) {
+ nandfs_error("%s: error:%d when creating segment\n",
+ __func__, error);
+ return (error);
+ }
+ *newbp = TAILQ_FIRST(&seginfo->curseg->segsum);
+ return (0);
+ }
+
+ fsdev = seginfo->fsdev;
+ blk = nandfs_block_to_dblock(fsdev, seginfo->curseg->start_block +
+ seginfo->curseg->segsum_blocks);
+
+ bp = getblk(fsdev->nd_devvp, blk, fsdev->nd_blocksize, 0, 0, 0);
+
+ bzero(bp->b_data, seginfo->fsdev->nd_blocksize);
+ bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj;
+ bp->b_flags |= B_MANAGED;
+
+ TAILQ_INSERT_TAIL(&seginfo->curseg->segsum, bp,
+ b_cluster.cluster_entry);
+ seginfo->curseg->num_blocks--;
+
+ seginfo->curseg->segsum_blocks++;
+ seginfo->curseg->bytes_left = seginfo->fsdev->nd_blocksize;
+ seginfo->curseg->current_off = bp->b_data;
+ seginfo->blocks++;
+
+ *newbp = bp;
+
+ DPRINTF(SYNC, ("%s: bp %p\n", __func__, bp));
+
+ return (0);
+}
+
+static int
+nandfs_add_blocks(struct nandfs_seginfo *seginfo, struct nandfs_node *node,
+ struct buf *bp)
+{
+ union nandfs_binfo *binfo;
+ struct buf *seg_bp;
+ int error;
+
+ if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) {
+ error = create_segment(seginfo);
+ if (error) {
+ nandfs_error("%s: error:%d when creating segment\n",
+ __func__, error);
+ return (error);
+ }
+ }
+
+ if (seginfo->curseg->bytes_left < sizeof(union nandfs_binfo)) {
+ error = nandfs_add_segsum_block(seginfo, &seg_bp);
+ if (error) {
+ nandfs_error("%s: error:%d when adding segsum\n",
+ __func__, error);
+ return (error);
+ }
+ }
+ binfo = (union nandfs_binfo *)seginfo->curseg->current_off;
+
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ binfo->bi_v.bi_blkoff = bp->b_lblkno;
+ binfo->bi_v.bi_ino = node->nn_ino;
+ } else {
+ binfo->bi_dat.bi_blkoff = bp->b_lblkno;
+ binfo->bi_dat.bi_ino = node->nn_ino;
+ if (NANDFS_IS_INDIRECT(bp))
+ binfo->bi_dat.bi_level = 1;
+ else
+ binfo->bi_dat.bi_level = 0;
+ }
+ binfo++;
+
+ seginfo->curseg->bytes_left -= sizeof(union nandfs_binfo);
+ seginfo->curseg->segsum_bytes += sizeof(union nandfs_binfo);
+ seginfo->curseg->current_off = (char *)binfo;
+
+ TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry);
+
+ seginfo->curseg->nbinfos++;
+ seginfo->curseg->nblocks++;
+ seginfo->curseg->num_blocks--;
+ seginfo->blocks++;
+
+ DPRINTF(SYNC, ("%s: bp (%p) number %x (left %x)\n",
+ __func__, bp, seginfo->curseg->nblocks,
+ seginfo->curseg->num_blocks));
+ return (0);
+}
+
+static int
+nandfs_iterate_dirty_buf(struct vnode *vp, struct nandfs_seginfo *seginfo,
+ uint8_t hold)
+{
+ struct buf *bp, *tbd;
+ struct bufobj *bo;
+ struct nandfs_node *node;
+ int error;
+
+ node = VTON(vp);
+ bo = &vp->v_bufobj;
+
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
+ /* Iterate dirty data bufs */
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, tbd) {
+ DPRINTF(SYNC, ("%s: vp (%p): bp (%p) with lblkno %jx ino %jx "
+ "add buf\n", __func__, vp, bp, bp->b_lblkno, node->nn_ino));
+
+ if (!(NANDFS_ISGATHERED(bp))) {
+ error = nandfs_bmap_update_dat(node,
+ nandfs_vblk_get(bp), bp);
+ if (error)
+ return (error);
+ NANDFS_GATHER(bp);
+ nandfs_add_blocks(seginfo, node, bp);
+ }
+ }
+
+ return (0);
+}
+
+static int
+nandfs_iterate_system_vnode(struct nandfs_node *node,
+ struct nandfs_seginfo *seginfo)
+{
+ struct vnode *vp;
+ int nblocks;
+ uint8_t hold = 0;
+
+ if (node->nn_ino != NANDFS_IFILE_INO)
+ hold = 1;
+
+ vp = NTOV(node);
+
+ nblocks = vp->v_bufobj.bo_dirty.bv_cnt;
+ DPRINTF(SYNC, ("%s: vp (%p): nblocks %x ino %jx\n",
+ __func__, vp, nblocks, node->nn_ino));
+
+ if (nblocks)
+ nandfs_iterate_dirty_buf(vp, seginfo, hold);
+
+ return (0);
+}
+
+static int
+nandfs_iterate_dirty_vnodes(struct mount *mp, struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_node *nandfs_node;
+ struct vnode *vp, *mvp;
+ struct thread *td;
+ int error, lockreq, update;
+
+ td = curthread;
+ lockreq = LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY;
+
+ MNT_ILOCK(mp);
+
+ MNT_VNODE_FOREACH(vp, mp, mvp) {
+ update = 0;
+
+ if (mp->mnt_syncer == vp)
+ continue;
+ if (VOP_ISLOCKED(vp))
+ continue;
+
+ VI_LOCK(vp);
+ MNT_IUNLOCK(mp);
+ if (vp->v_iflag & VI_DOOMED) {
+ VI_UNLOCK(vp);
+ MNT_ILOCK(mp);
+ continue;
+ }
+
+ if ((error = vget(vp, lockreq, td)) != 0) {
+ MNT_ILOCK(mp);
+ continue;
+ }
+
+ if (vp->v_iflag & VI_DOOMED) {
+ vput(vp);
+ MNT_ILOCK(mp);
+ continue;
+ }
+
+ nandfs_node = VTON(vp);
+ if (nandfs_node->nn_flags & IN_MODIFIED) {
+ nandfs_node->nn_flags &= ~(IN_MODIFIED);
+ update = 1;
+ }
+
+ if (vp->v_bufobj.bo_dirty.bv_cnt) {
+ error = nandfs_iterate_dirty_buf(vp, seginfo, 0);
+ if (error) {
+ nandfs_error("%s: cannot iterate vnode:%p "
+ "err:%d\n", __func__, vp, error);
+ vput(vp);
+ return (error);
+ }
+ update = 1;
+ } else
+ vput(vp);
+
+ if (update)
+ nandfs_node_update(nandfs_node);
+
+ MNT_ILOCK(mp);
+ }
+
+ MNT_IUNLOCK(mp);
+
+ return (0);
+}
+
+static int
+nandfs_update_phys_block(struct nandfs_device *fsdev, struct buf *bp,
+ uint64_t phys_blknr, union nandfs_binfo *binfo)
+{
+ struct nandfs_node *node, *dat;
+ struct vnode *vp;
+ uint64_t new_blknr;
+ int error;
+
+ vp = bp->b_vp;
+ node = VTON(vp);
+ new_blknr = nandfs_vblk_get(bp);
+ dat = fsdev->nd_dat_node;
+
+ DPRINTF(BMAP, ("%s: ino %#jx lblk %#jx: vblk %#jx -> %#jx\n",
+ __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno,
+ (uintmax_t)new_blknr, (uintmax_t)phys_blknr));
+
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ KASSERT((new_blknr != 0), ("vblk for bp %p is 0", bp));
+
+ nandfs_vblock_assign(fsdev, new_blknr, phys_blknr);
+ binfo->bi_v.bi_vblocknr = new_blknr;
+ binfo->bi_v.bi_blkoff = bp->b_lblkno;
+ binfo->bi_v.bi_ino = node->nn_ino;
+ } else {
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ error = nandfs_bmap_update_block(node, bp, phys_blknr);
+ if (error) {
+ nandfs_error("%s: error updating block:%jx for bp:%p\n",
+ __func__, (uintmax_t)phys_blknr, bp);
+ VOP_UNLOCK(NTOV(dat), 0);
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(dat), 0);
+ binfo->bi_dat.bi_blkoff = bp->b_lblkno;
+ binfo->bi_dat.bi_ino = node->nn_ino;
+ if (NANDFS_IS_INDIRECT(bp))
+ binfo->bi_dat.bi_level = 1;
+ else
+ binfo->bi_dat.bi_level = 0;
+ }
+
+ return (0);
+}
+
+#define NBINFO(off) ((off) + sizeof(union nandfs_binfo))
+static int
+nandfs_segment_assign_pblk(struct nandfs_segment *nfsseg)
+{
+ struct nandfs_device *fsdev;
+ union nandfs_binfo *binfo;
+ struct buf *bp, *seg_bp;
+ uint64_t blocknr;
+ uint32_t curr_off, blocksize;
+ int error;
+
+ fsdev = nfsseg->fsdev;
+ blocksize = fsdev->nd_blocksize;
+
+ blocknr = nfsseg->start_block + nfsseg->segsum_blocks;
+ seg_bp = TAILQ_FIRST(&nfsseg->segsum);
+ DPRINTF(SYNC, ("%s: seg:%p segsum bp:%p data:%p\n",
+ __func__, nfsseg, seg_bp, seg_bp->b_data));
+
+ binfo = (union nandfs_binfo *)(seg_bp->b_data +
+ sizeof(struct nandfs_segment_summary));
+ curr_off = sizeof(struct nandfs_segment_summary);
+
+ TAILQ_FOREACH(bp, &nfsseg->data, b_cluster.cluster_entry) {
+ KASSERT((bp->b_vp), ("bp %p has not vp", bp));
+
+ DPRINTF(BMAP, ("\n\n%s: assign buf %p for ino %#jx next %p\n",
+ __func__, bp, (uintmax_t)VTON(bp->b_vp)->nn_ino,
+ TAILQ_NEXT(bp, b_cluster.cluster_entry)));
+
+ if (NBINFO(curr_off) > blocksize) {
+ seg_bp = TAILQ_NEXT(seg_bp, b_cluster.cluster_entry);
+ binfo = (union nandfs_binfo *)seg_bp->b_data;
+ curr_off = 0;
+ DPRINTF(SYNC, ("%s: next segsum %p data %p\n",
+ __func__, seg_bp, seg_bp->b_data));
+ }
+
+ error = nandfs_update_phys_block(fsdev, bp, blocknr, binfo);
+ if (error) {
+ nandfs_error("%s: err:%d when updatinng phys block:%jx"
+ " for bp:%p and binfo:%p\n", __func__, error,
+ (uintmax_t)blocknr, bp, binfo);
+ return (error);
+ }
+ binfo++;
+ curr_off = NBINFO(curr_off);
+
+ blocknr++;
+ }
+
+ return (0);
+}
+
+static int
+nandfs_seginfo_assign_pblk(struct nandfs_seginfo *seginfo)
+{
+ struct nandfs_segment *nfsseg;
+ int error = 0;
+
+ LIST_FOREACH(nfsseg, &seginfo->seg_list, seg_link) {
+ error = nandfs_segment_assign_pblk(nfsseg);
+ if (error)
+ break;
+ }
+
+ return (error);
+}
+
+static struct nandfs_segment_summary *
+nandfs_fill_segsum(struct nandfs_segment *seg, int has_sr)
+{
+ struct nandfs_segment_summary *ss;
+ struct nandfs_device *fsdev;
+ struct buf *bp;
+ uint32_t rest, segsum_size, blocksize, crc_calc;
+ uint16_t flags;
+ uint8_t *crc_area, crc_skip;
+
+ DPRINTF(SYNC, ("%s: seg %#jx nblocks %#x sumbytes %#x\n",
+ __func__, (uintmax_t) seg->seg_num,
+ seg->nblocks + seg->segsum_blocks,
+ seg->segsum_bytes));
+
+ fsdev = seg->fsdev;
+
+ flags = NANDFS_SS_LOGBGN | NANDFS_SS_LOGEND;
+ if (has_sr)
+ flags |= NANDFS_SS_SR;
+
+ bp = TAILQ_FIRST(&seg->segsum);
+ ss = (struct nandfs_segment_summary *) bp->b_data;
+ ss->ss_magic = NANDFS_SEGSUM_MAGIC;
+ ss->ss_bytes = sizeof(struct nandfs_segment_summary);
+ ss->ss_flags = flags;
+ ss->ss_seq = ++(fsdev->nd_seg_sequence);
+ ss->ss_create = fsdev->nd_ts.tv_sec;
+ nandfs_get_segment_range(fsdev, seg->seg_next, &ss->ss_next, NULL);
+ ss->ss_nblocks = seg->nblocks + seg->segsum_blocks;
+ ss->ss_nbinfos = seg->nbinfos;
+ ss->ss_sumbytes = seg->segsum_bytes;
+
+ crc_skip = sizeof(ss->ss_datasum) + sizeof(ss->ss_sumsum);
+ blocksize = seg->fsdev->nd_blocksize;
+
+ segsum_size = seg->segsum_bytes - crc_skip;
+ rest = min(seg->segsum_bytes, blocksize) - crc_skip;
+ crc_area = (uint8_t *)ss + crc_skip;
+ crc_calc = ~0U;
+ while (segsum_size > 0) {
+ crc_calc = crc32_raw(crc_area, rest, crc_calc);
+ segsum_size -= rest;
+ if (!segsum_size)
+ break;
+ bp = TAILQ_NEXT(bp, b_cluster.cluster_entry);
+ crc_area = (uint8_t *)bp->b_data;
+ rest = segsum_size <= blocksize ? segsum_size : blocksize;
+ }
+ ss->ss_sumsum = crc_calc ^ ~0U;
+
+ return (ss);
+
+}
+
+static int
+nandfs_save_buf(struct buf *bp, uint64_t blocknr, struct nandfs_device *fsdev)
+{
+ struct bufobj *bo;
+ int error;
+
+ bo = &fsdev->nd_devvp->v_bufobj;
+
+ bp->b_blkno = nandfs_block_to_dblock(fsdev, blocknr);
+ bp->b_iooffset = dbtob(bp->b_blkno);
+
+ KASSERT(bp->b_bufobj != NULL, ("no bufobj for %p", bp));
+ if (bp->b_bufobj != bo) {
+ BO_LOCK(bp->b_bufobj);
+ BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
+ BO_MTX(bp->b_bufobj));
+ KASSERT(BUF_ISLOCKED(bp), ("Problem with locking buffer"));
+ }
+
+ DPRINTF(SYNC, ("%s: buf: %p offset %#jx blk %#jx size %#x\n",
+ __func__, bp, (uintmax_t)bp->b_offset, (uintmax_t)blocknr,
+ fsdev->nd_blocksize));
+
+ NANDFS_UNGATHER(bp);
+ nandfs_buf_clear(bp, 0xffffffff);
+ bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED);
+ error = bwrite(bp);
+ if (error) {
+ nandfs_error("%s: error:%d when writing buffer:%p\n",
+ __func__, error, bp);
+ return (error);
+ }
+ return (error);
+}
+
+static void
+nandfs_clean_buf(struct nandfs_device *fsdev, struct buf *bp)
+{
+
+ DPRINTF(SYNC, ("%s: buf: %p\n", __func__, bp));
+
+ NANDFS_UNGATHER(bp);
+ nandfs_buf_clear(bp, 0xffffffff);
+ bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED);
+ nandfs_undirty_buf_fsdev(fsdev, bp);
+}
+
+static void
+nandfs_clean_segblocks(struct nandfs_segment *seg, uint8_t unlock)
+{
+ struct nandfs_device *fsdev = seg->fsdev;
+ struct nandfs_segment *next_seg;
+ struct buf *bp, *tbp, *next_bp;
+ struct vnode *vp, *next_vp;
+
+ VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE);
+ TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry);
+ nandfs_clean_buf(fsdev, bp);
+ };
+
+ TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry);
+
+ /*
+ * If bp is not super-root and vnode is not currently
+ * locked lock it.
+ */
+ vp = bp->b_vp;
+ next_vp = NULL;
+ next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry);
+ if (!next_bp) {
+ next_seg = LIST_NEXT(seg, seg_link);
+ if (next_seg)
+ next_bp = TAILQ_FIRST(&next_seg->data);
+ }
+
+ if (next_bp)
+ next_vp = next_bp->b_vp;
+
+ nandfs_clean_buf(fsdev, bp);
+
+ if (unlock && vp != NULL && next_vp != vp &&
+ !NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ vput(vp);
+
+ nandfs_dirty_bufs_decrement(fsdev);
+ }
+
+ VOP_UNLOCK(fsdev->nd_devvp, 0);
+}
+
+static int
+nandfs_save_segblocks(struct nandfs_segment *seg, uint8_t unlock)
+{
+ struct nandfs_device *fsdev = seg->fsdev;
+ struct nandfs_segment *next_seg;
+ struct buf *bp, *tbp, *next_bp;
+ struct vnode *vp, *next_vp;
+ uint64_t blocknr;
+ uint32_t i = 0;
+ int error = 0;
+
+ VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE);
+ TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry);
+ blocknr = seg->start_block + i;
+ error = nandfs_save_buf(bp, blocknr, fsdev);
+ if (error) {
+ nandfs_error("%s: error saving buf: %p blocknr:%jx\n",
+ __func__, bp, (uintmax_t)blocknr);
+ goto out;
+ }
+ i++;
+ };
+
+ i = 0;
+ TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) {
+ TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry);
+
+ blocknr = seg->start_block + seg->segsum_blocks + i;
+ /*
+ * If bp is not super-root and vnode is not currently
+ * locked lock it.
+ */
+ vp = bp->b_vp;
+ next_vp = NULL;
+ next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry);
+ if (!next_bp) {
+ next_seg = LIST_NEXT(seg, seg_link);
+ if (next_seg)
+ next_bp = TAILQ_FIRST(&next_seg->data);
+ }
+
+ if (next_bp)
+ next_vp = next_bp->b_vp;
+
+ error = nandfs_save_buf(bp, blocknr, fsdev);
+ if (error) {
+ nandfs_error("%s: error saving buf: %p blknr: %jx\n",
+ __func__, bp, (uintmax_t)blocknr);
+ if (unlock && vp != NULL && next_vp != vp &&
+ !NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ vput(vp);
+ goto out;
+ }
+
+ if (unlock && vp != NULL && next_vp != vp &&
+ !NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ vput(vp);
+
+ i++;
+ nandfs_dirty_bufs_decrement(fsdev);
+ }
+out:
+ if (error) {
+ nandfs_clean_segblocks(seg, unlock);
+ VOP_UNLOCK(fsdev->nd_devvp, 0);
+ return (error);
+ }
+
+ VOP_UNLOCK(fsdev->nd_devvp, 0);
+ return (error);
+}
+
+
+static void
+clean_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock)
+{
+ struct nandfs_segment *seg;
+
+ DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo));
+
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link) {
+ nandfs_clean_segblocks(seg, unlock);
+ }
+}
+
+static int
+save_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock)
+{
+ struct nandfs_segment *seg;
+ struct nandfs_device *fsdev;
+ struct nandfs_segment_summary *ss;
+ int error = 0;
+
+ fsdev = seginfo->fsdev;
+
+ DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo));
+
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link) {
+ if (LIST_NEXT(seg, seg_link)) {
+ nandfs_fill_segsum(seg, 0);
+ error = nandfs_save_segblocks(seg, unlock);
+ if (error) {
+ nandfs_error("%s: error:%d saving seg:%p\n",
+ __func__, error, seg);
+ goto out;
+ }
+ } else {
+ ss = nandfs_fill_segsum(seg, 1);
+ fsdev->nd_last_segsum = *ss;
+ error = nandfs_save_segblocks(seg, unlock);
+ if (error) {
+ nandfs_error("%s: error:%d saving seg:%p\n",
+ __func__, error, seg);
+ goto out;
+ }
+ fsdev->nd_last_cno++;
+ fsdev->nd_last_pseg = seg->start_block;
+ }
+ }
+out:
+ if (error)
+ clean_seginfo(seginfo, unlock);
+ return (error);
+}
+
+static void
+nandfs_invalidate_bufs(struct nandfs_device *fsdev, uint64_t segno)
+{
+ uint64_t start, end;
+ struct buf *bp, *tbd;
+ struct bufobj *bo;
+
+ nandfs_get_segment_range(fsdev, segno, &start, &end);
+
+ bo = &NTOV(fsdev->nd_gc_node)->v_bufobj;
+
+ BO_LOCK(bo);
+restart_locked_gc:
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, tbd) {
+ if (!(bp->b_lblkno >= start && bp->b_lblkno <= end))
+ continue;
+
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ goto restart_locked_gc;
+
+ bremfree(bp);
+ bp->b_flags |= (B_INVAL | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_MANAGED);
+ BO_UNLOCK(bo);
+ brelse(bp);
+ BO_LOCK(bo);
+ }
+ BO_UNLOCK(bo);
+}
+
+/* Process segments marks to free by cleaner */
+static void
+nandfs_process_segments(struct nandfs_device *fsdev)
+{
+ uint64_t saved_segment;
+ int i;
+
+ if (fsdev->nd_free_base) {
+ saved_segment = nandfs_get_segnum_of_block(fsdev,
+ fsdev->nd_super.s_last_pseg);
+ for (i = 0; i < fsdev->nd_free_count; i++) {
+ if (fsdev->nd_free_base[i] == NANDFS_NOSEGMENT)
+ continue;
+ /* Update superblock if clearing segment point by it */
+ if (fsdev->nd_free_base[i] == saved_segment) {
+ nandfs_write_superblock(fsdev);
+ saved_segment = nandfs_get_segnum_of_block(
+ fsdev, fsdev->nd_super.s_last_pseg);
+ }
+ nandfs_invalidate_bufs(fsdev, fsdev->nd_free_base[i]);
+ nandfs_clear_segment(fsdev, fsdev->nd_free_base[i]);
+ }
+
+ free(fsdev->nd_free_base, M_NANDFSTEMP);
+ fsdev->nd_free_base = NULL;
+ fsdev->nd_free_count = 0;
+ }
+}
+
+/* Collect and write dirty buffers */
+int
+nandfs_sync_file(struct vnode *vp)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_node *nandfs_node;
+ struct nandfsmount *nmp;
+ struct nandfs_node *dat, *su, *ifile, *cp;
+ struct nandfs_seginfo *seginfo = NULL;
+ struct nandfs_segment *seg;
+ int update, error;
+ int cno_changed;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ DPRINTF(SYNC, ("%s: START\n", __func__));
+
+ error = 0;
+ nmp = VFSTONANDFS(vp->v_mount);
+ fsdev = nmp->nm_nandfsdev;
+
+ dat = fsdev->nd_dat_node;
+ su = fsdev->nd_su_node;
+ cp = fsdev->nd_cp_node;
+ ifile = nmp->nm_ifile_node;
+
+ NANDFS_WRITEASSERT(fsdev);
+ if (lockmgr(&fsdev->nd_seg_const, LK_UPGRADE, NULL) != 0) {
+ DPRINTF(SYNC, ("%s: lost shared lock\n", __func__));
+ if (lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL) != 0)
+ panic("couldn't lock exclusive");
+ }
+ DPRINTF(SYNC, ("%s: got lock\n", __func__));
+
+ VOP_LOCK(NTOV(su), LK_EXCLUSIVE);
+ create_seginfo(fsdev, &seginfo);
+
+ update = 0;
+
+ nandfs_node = VTON(vp);
+ if (nandfs_node->nn_flags & IN_MODIFIED) {
+ nandfs_node->nn_flags &= ~(IN_MODIFIED);
+ update = 1;
+ }
+
+ if (vp->v_bufobj.bo_dirty.bv_cnt) {
+ error = nandfs_iterate_dirty_buf(vp, seginfo, 0);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d iterating dirty bufs vp:%p",
+ __func__, error, vp);
+ return (error);
+ }
+ update = 1;
+ }
+
+ if (update) {
+ VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE);
+ error = nandfs_node_update(nandfs_node);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(ifile), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d updating vp:%p",
+ __func__, error, vp);
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(ifile), 0);
+ }
+
+ cno_changed = 0;
+ if (seginfo->blocks) {
+ VOP_LOCK(NTOV(cp), LK_EXCLUSIVE);
+ cno_changed = 1;
+ /* Create new checkpoint */
+ error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d getting cp:%jx",
+ __func__, error, fsdev->nd_last_cno + 1);
+ return (error);
+ }
+
+ /* Reiterate all blocks and assign physical block number */
+ nandfs_seginfo_assign_pblk(seginfo);
+
+ /* Fill checkpoint data */
+ error = nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1,
+ &ifile->nn_inode, seginfo->blocks);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d setting cp:%jx",
+ __func__, error, fsdev->nd_last_cno + 1);
+ return (error);
+ }
+
+ VOP_UNLOCK(NTOV(cp), 0);
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link)
+ nandfs_update_segment(fsdev, seg->seg_num,
+ seg->nblocks + seg->segsum_blocks);
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ error = save_seginfo(seginfo, 0);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ VOP_UNLOCK(NTOV(dat), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+ nandfs_error("%s: err:%d updating seg",
+ __func__, error);
+ return (error);
+ }
+ VOP_UNLOCK(NTOV(dat), 0);
+ }
+
+ VOP_UNLOCK(NTOV(su), 0);
+
+ delete_seginfo(seginfo);
+ lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL);
+
+ if (cno_changed && !error) {
+ if (nandfs_cps_between_sblocks != 0 &&
+ fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0)
+ nandfs_write_superblock(fsdev);
+ }
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ DPRINTF(SYNC, ("%s: END error %d\n", __func__, error));
+ return (error);
+}
+
+int
+nandfs_segment_constructor(struct nandfsmount *nmp, int flags)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_seginfo *seginfo = NULL;
+ struct nandfs_segment *seg;
+ struct nandfs_node *dat, *su, *ifile, *cp, *gc;
+ int cno_changed, error;
+
+ DPRINTF(SYNC, ("%s: START\n", __func__));
+ fsdev = nmp->nm_nandfsdev;
+
+ lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL);
+ DPRINTF(SYNC, ("%s: git lock\n", __func__));
+again:
+ create_seginfo(fsdev, &seginfo);
+
+ dat = fsdev->nd_dat_node;
+ su = fsdev->nd_su_node;
+ cp = fsdev->nd_cp_node;
+ gc = fsdev->nd_gc_node;
+ ifile = nmp->nm_ifile_node;
+
+ VOP_LOCK(NTOV(su), LK_EXCLUSIVE);
+ VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE);
+ VOP_LOCK(NTOV(gc), LK_EXCLUSIVE);
+ VOP_LOCK(NTOV(cp), LK_EXCLUSIVE);
+
+ nandfs_iterate_system_vnode(gc, seginfo);
+ nandfs_iterate_dirty_vnodes(nmp->nm_vfs_mountp, seginfo);
+ nandfs_iterate_system_vnode(ifile, seginfo);
+ nandfs_iterate_system_vnode(su, seginfo);
+
+ cno_changed = 0;
+ if (seginfo->blocks || flags) {
+ cno_changed = 1;
+ /* Create new checkpoint */
+ error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ goto error_locks;
+ }
+
+ /* Collect blocks from system files */
+ nandfs_iterate_system_vnode(cp, seginfo);
+ nandfs_iterate_system_vnode(su, seginfo);
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ nandfs_iterate_system_vnode(dat, seginfo);
+ VOP_UNLOCK(NTOV(dat), 0);
+reiterate:
+ seginfo->reiterate = 0;
+ nandfs_iterate_system_vnode(su, seginfo);
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ nandfs_iterate_system_vnode(dat, seginfo);
+ VOP_UNLOCK(NTOV(dat), 0);
+ if (seginfo->reiterate)
+ goto reiterate;
+ if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) {
+ error = create_segment(seginfo);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ goto error_locks;
+ }
+ goto reiterate;
+ }
+
+ /* Reiterate all blocks and assign physical block number */
+ nandfs_seginfo_assign_pblk(seginfo);
+
+ /* Fill superroot */
+ error = nandfs_add_superroot(seginfo);
+ if (error) {
+ clean_seginfo(seginfo, 0);
+ delete_seginfo(seginfo);
+ goto error_locks;
+ }
+ KASSERT(!(seginfo->reiterate), ("reiteration after superroot"));
+
+ /* Fill checkpoint data */
+ nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1,
+ &ifile->nn_inode, seginfo->blocks);
+
+ LIST_FOREACH(seg, &seginfo->seg_list, seg_link)
+ nandfs_update_segment(fsdev, seg->seg_num,
+ seg->nblocks + seg->segsum_blocks);
+
+ VOP_LOCK(NTOV(dat), LK_EXCLUSIVE);
+ error = save_seginfo(seginfo, 1);
+ if (error) {
+ clean_seginfo(seginfo, 1);
+ delete_seginfo(seginfo);
+ goto error_dat;
+ }
+ VOP_UNLOCK(NTOV(dat), 0);
+ }
+
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(gc), 0);
+ VOP_UNLOCK(NTOV(ifile), 0);
+
+ nandfs_process_segments(fsdev);
+
+ VOP_UNLOCK(NTOV(su), 0);
+
+ delete_seginfo(seginfo);
+
+ /*
+ * XXX: a hack, will go away soon
+ */
+ if ((NTOV(dat)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(cp)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(gc)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(ifile)->v_bufobj.bo_dirty.bv_cnt != 0 ||
+ NTOV(su)->v_bufobj.bo_dirty.bv_cnt != 0) &&
+ (flags & NANDFS_UMOUNT)) {
+ DPRINTF(SYNC, ("%s: RERUN\n", __func__));
+ goto again;
+ }
+
+ MPASS(fsdev->nd_free_base == NULL);
+
+ lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL);
+
+ if (cno_changed) {
+ if ((nandfs_cps_between_sblocks != 0 &&
+ fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) ||
+ flags & NANDFS_UMOUNT)
+ nandfs_write_superblock(fsdev);
+ }
+
+ DPRINTF(SYNC, ("%s: END\n", __func__));
+ return (0);
+error_dat:
+ VOP_UNLOCK(NTOV(dat), 0);
+error_locks:
+ VOP_UNLOCK(NTOV(cp), 0);
+ VOP_UNLOCK(NTOV(gc), 0);
+ VOP_UNLOCK(NTOV(ifile), 0);
+ VOP_UNLOCK(NTOV(su), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL);
+
+ return (error);
+}
+
+#ifdef DDB
+/*
+ * Show details about the given NANDFS mount point.
+ */
+DB_SHOW_COMMAND(nandfs, db_show_nandfs)
+{
+ struct mount *mp;
+ struct nandfs_device *nffsdev;
+ struct nandfs_segment *seg;
+ struct nandfsmount *nmp;
+ struct buf *bp;
+ struct vnode *vp;
+
+ if (!have_addr) {
+ db_printf("\nUsage: show nandfs <mount_addr>\n");
+ return;
+ }
+
+ mp = (struct mount *)addr;
+ db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname,
+ mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename);
+
+
+ nmp = (struct nandfsmount *)(mp->mnt_data);
+ nffsdev = nmp->nm_nandfsdev;
+ db_printf("dev vnode:%p\n", nffsdev->nd_devvp);
+ db_printf("blocksize:%jx last cno:%jx last pseg:%jx seg num:%jx\n",
+ (uintmax_t)nffsdev->nd_blocksize, (uintmax_t)nffsdev->nd_last_cno,
+ (uintmax_t)nffsdev->nd_last_pseg, (uintmax_t)nffsdev->nd_seg_num);
+ db_printf("system nodes: dat:%p cp:%p su:%p ifile:%p gc:%p\n",
+ nffsdev->nd_dat_node, nffsdev->nd_cp_node, nffsdev->nd_su_node,
+ nmp->nm_ifile_node, nffsdev->nd_gc_node);
+
+ if (nffsdev->nd_seginfo != NULL) {
+ LIST_FOREACH(seg, &nffsdev->nd_seginfo->seg_list, seg_link) {
+ db_printf("seg: %p\n", seg);
+ TAILQ_FOREACH(bp, &seg->segsum,
+ b_cluster.cluster_entry)
+ db_printf("segbp %p\n", bp);
+ TAILQ_FOREACH(bp, &seg->data,
+ b_cluster.cluster_entry) {
+ vp = bp->b_vp;
+ db_printf("bp:%p bp->b_vp:%p ino:%jx\n", bp, vp,
+ (uintmax_t)(vp ? VTON(vp)->nn_ino : 0));
+ }
+ }
+ }
+}
+#endif
diff --git a/sys/fs/nandfs/nandfs_subr.c b/sys/fs/nandfs/nandfs_subr.c
new file mode 100644
index 0000000..b485422
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_subr.c
@@ -0,0 +1,1120 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/signalvar.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/lockf.h>
+#include <sys/libkern.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <machine/_inttypes.h>
+#include "nandfs_mount.h"
+#include "nandfs.h"
+#include "nandfs_subr.h"
+
+MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount");;
+MALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp");
+
+uma_zone_t nandfs_node_zone;
+
+void nandfs_bdflush(struct bufobj *bo, struct buf *bp);
+int nandfs_bufsync(struct bufobj *bo, int waitfor);
+
+struct buf_ops buf_ops_nandfs = {
+ .bop_name = "buf_ops_nandfs",
+ .bop_write = bufwrite,
+ .bop_strategy = bufstrategy,
+ .bop_sync = nandfs_bufsync,
+ .bop_bdflush = nandfs_bdflush,
+};
+
+int
+nandfs_bufsync(struct bufobj *bo, int waitfor)
+{
+ struct vnode *vp;
+ int error = 0;
+
+ vp = bo->__bo_vnode;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ error = nandfs_sync_file(vp);
+ if (error)
+ nandfs_warning("%s: cannot flush buffers err:%d\n",
+ __func__, error);
+
+ return (error);
+}
+
+void
+nandfs_bdflush(bo, bp)
+ struct bufobj *bo;
+ struct buf *bp;
+{
+ struct vnode *vp;
+ int error;
+
+ if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10))
+ return;
+
+ vp = bp->b_vp;
+ if (NANDFS_SYS_NODE(VTON(vp)->nn_ino))
+ return;
+
+ if (NANDFS_IS_INDIRECT(bp))
+ return;
+
+ error = nandfs_sync_file(vp);
+ if (error)
+ nandfs_warning("%s: cannot flush buffers err:%d\n",
+ __func__, error);
+}
+
+int
+nandfs_init(struct vfsconf *vfsp)
+{
+
+ nandfs_node_zone = uma_zcreate("nandfs node zone",
+ sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0);
+
+ return (0);
+}
+
+int
+nandfs_uninit(struct vfsconf *vfsp)
+{
+
+ uma_zdestroy(nandfs_node_zone);
+ return (0);
+}
+
+/* Basic calculators */
+uint64_t
+nandfs_get_segnum_of_block(struct nandfs_device *nandfsdev,
+ nandfs_daddr_t blocknr)
+{
+ uint64_t segnum, blks_per_seg;
+
+ MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block);
+
+ blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
+
+ segnum = blocknr / blks_per_seg;
+ segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg;
+
+ DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__,
+ blocknr, segnum));
+
+ return (segnum);
+}
+
+void
+nandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum,
+ uint64_t *seg_start, uint64_t *seg_end)
+{
+ uint64_t blks_per_seg;
+
+ blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment;
+ *seg_start = nandfsdev->nd_fsdata.f_first_data_block +
+ blks_per_seg * segnum;
+ if (seg_end != NULL)
+ *seg_end = *seg_start + blks_per_seg -1;
+}
+
+void nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev,
+ struct nandfs_mdt *mdt, int entry_size)
+{
+ uint32_t blocksize = nandfsdev->nd_blocksize;
+
+ mdt->entries_per_group = blocksize * 8;
+ mdt->entries_per_block = blocksize / entry_size;
+
+ mdt->blocks_per_group =
+ (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
+ mdt->groups_per_desc_block =
+ blocksize / sizeof(struct nandfs_block_group_desc);
+ mdt->blocks_per_desc_block =
+ mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
+}
+
+int
+nandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
+ int error;
+
+ DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__,
+ blocknr * blk2dev, nandfsdev->nd_devvp));
+ error = bread(nandfsdev->nd_devvp, blocknr * blk2dev,
+ nandfsdev->nd_blocksize, NOCRED, bpp);
+ if (error)
+ nandfs_error("%s: cannot read from device - blk:%jx\n",
+ __func__, blocknr);
+ return (error);
+}
+
+/* Read on a node */
+int
+nandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ nandfs_daddr_t vblk;
+ int error;
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ cred, bpp);
+
+ KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
+ NTOV(node), blocknr, error));
+
+ if (!nandfs_vblk_get(*bpp) &&
+ ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
+ nandfs_bmap_lookup(node, blocknr, &vblk);
+ nandfs_vblk_set(*bpp, vblk);
+ }
+ return (error);
+}
+
+int
+nandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ nandfs_daddr_t vblk;
+ int error;
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ cred, bpp);
+
+ KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__,
+ NTOV(node), blocknr, error));
+
+ if (!nandfs_vblk_get(*bpp) &&
+ ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) {
+ nandfs_bmap_lookup(node, blocknr, &vblk);
+ nandfs_vblk_set(*bpp, vblk);
+ }
+
+ return (error);
+}
+
+int
+nandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk)
+{
+ int error;
+
+ if (!NANDFS_SYS_NODE(node->nn_ino))
+ NANDFS_WRITEASSERT(node->nn_nandfsdev);
+
+ error = nandfs_vblock_end(node->nn_nandfsdev, vblk);
+ if (error) {
+ nandfs_error("%s: ending vblk: %jx failed\n",
+ __func__, (uintmax_t)vblk);
+ return (error);
+ }
+ node->nn_inode.i_blocks--;
+
+ return (0);
+}
+
+int
+nandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ int error;
+
+ ASSERT_VOP_LOCKED(NTOV(node), __func__);
+ if (!NANDFS_SYS_NODE(node->nn_ino))
+ NANDFS_WRITEASSERT(node->nn_nandfsdev);
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ 0, 0, 0);
+
+ KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
+ NTOV(node), blocknr));
+
+ if (*bpp) {
+ vfs_bio_clrbuf(*bpp);
+ (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
+ error = nandfs_bmap_insert_block(node, blocknr, *bpp);
+ if (error) {
+ nandfs_warning("%s: failed bmap insert node:%p"
+ " blk:%jx\n", __func__, node, blocknr);
+ brelse(*bpp);
+ return (error);
+ }
+ node->nn_inode.i_blocks++;
+
+ return (0);
+ }
+
+ return (-1);
+}
+
+int
+nandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr,
+ struct ucred *cred, int flags, struct buf **bpp)
+{
+ struct nandfs_device *fsdev;
+ nandfs_daddr_t vblk;
+ int error;
+
+ ASSERT_VOP_LOCKED(NTOV(node), __func__);
+ NANDFS_WRITEASSERT(node->nn_nandfsdev);
+
+ DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node),
+ blocknr));
+
+ fsdev = node->nn_nandfsdev;
+
+ *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize,
+ 0, 0, 0);
+
+ KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__,
+ NTOV(node), blocknr));
+
+ memset((*bpp)->b_data, 0, fsdev->nd_blocksize);
+
+ vfs_bio_clrbuf(*bpp);
+ (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */
+
+ nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED);
+
+ if (node->nn_ino != NANDFS_DAT_INO) {
+ error = nandfs_vblock_alloc(fsdev, &vblk);
+ if (error) {
+ nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED);
+ brelse(*bpp);
+ return (error);
+ }
+ } else
+ vblk = fsdev->nd_fakevblk++;
+
+ nandfs_vblk_set(*bpp, vblk);
+
+ nandfs_bmap_insert_block(node, blocknr, *bpp);
+ return (0);
+}
+
+/* Translate index to a file block number and an entry */
+void
+nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index,
+ nandfs_lbn_t *blocknr, uint32_t *entry_in_block)
+{
+ uint64_t blknr;
+ uint64_t group, group_offset, blocknr_in_group;
+ uint64_t desc_block, desc_offset;
+
+ /* Calculate our offset in the file */
+ group = index / mdt->entries_per_group;
+ group_offset = index % mdt->entries_per_group;
+ desc_block = group / mdt->groups_per_desc_block;
+ desc_offset = group % mdt->groups_per_desc_block;
+ blocknr_in_group = group_offset / mdt->entries_per_block;
+
+ /* To descgroup offset */
+ blknr = 1 + desc_block * mdt->blocks_per_desc_block;
+
+ /* To group offset */
+ blknr += desc_offset * mdt->blocks_per_group;
+
+ /* To actual file block */
+ blknr += 1 + blocknr_in_group;
+
+ *blocknr = blknr;
+ *entry_in_block = group_offset % mdt->entries_per_block;
+}
+
+void
+nandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index,
+ uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr,
+ uint32_t *entry_in_block)
+{
+ uint64_t blknr;
+ uint64_t group, group_offset, blocknr_in_group;
+ uint64_t desc_block, desc_offset;
+
+ /* Calculate our offset in the file */
+ group = index / mdt->entries_per_group;
+ group_offset = index % mdt->entries_per_group;
+ desc_block = group / mdt->groups_per_desc_block;
+ desc_offset = group % mdt->groups_per_desc_block;
+ blocknr_in_group = group_offset / mdt->entries_per_block;
+
+ /* To descgroup offset */
+ *desc = desc_block * mdt->blocks_per_desc_block;
+ blknr = 1 + desc_block * mdt->blocks_per_desc_block;
+
+ /* To group offset */
+ blknr += desc_offset * mdt->blocks_per_group;
+ *bitmap = blknr;
+
+ /* To actual file block */
+ blknr += 1 + blocknr_in_group;
+
+ *blocknr = blknr;
+ *entry_in_block = group_offset % mdt->entries_per_block;
+
+ DPRINTF(ALLOC,
+ ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n",
+ __func__, (uintmax_t)*desc, (uintmax_t)*bitmap,
+ (uintmax_t)*blocknr, *entry_in_block));
+}
+
+int
+nandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr,
+ nandfs_daddr_t *pblocknr)
+{
+ struct nandfs_node *dat_node;
+ struct nandfs_dat_entry *entry;
+ struct buf *bp;
+ nandfs_lbn_t ldatblknr;
+ uint32_t entry_in_block;
+ int locked, error;
+
+ if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) {
+ *pblocknr = vblocknr;
+ return (0);
+ }
+
+ /* only translate valid vblocknrs */
+ if (vblocknr == 0)
+ return (0);
+
+ dat_node = node->nn_nandfsdev->nd_dat_node;
+ nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr,
+ &entry_in_block);
+
+ locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node));
+ if (!locked)
+ VOP_LOCK(NTOV(dat_node), LK_SHARED);
+ error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp);
+ if (error) {
+ DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n",
+ (uintmax_t)ldatblknr));
+ brelse(bp);
+ VOP_UNLOCK(NTOV(dat_node), 0);
+ return (error);
+ }
+
+ /* Get our translation */
+ entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block;
+ DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n",
+ entry, bp->b_data, entry_in_block))
+ DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n",
+ (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr,
+ (uintmax_t)entry->de_start, (uintmax_t)entry->de_end));
+
+ *pblocknr = entry->de_blocknr;
+ brelse(bp);
+ if (!locked)
+ VOP_UNLOCK(NTOV(dat_node), 0);
+
+ MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block ||
+ *pblocknr == 0);
+
+ return (0);
+}
+
+int
+nandfs_segsum_valid(struct nandfs_segment_summary *segsum)
+{
+
+ return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC);
+}
+
+int
+nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr,
+ struct nandfs_segment_summary *segsum)
+{
+ struct buf *bp;
+ int error;
+
+ DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n",
+ (uintmax_t)blocknr));
+
+ error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp);
+ if (error)
+ return (error);
+
+ memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary));
+ brelse(bp);
+
+ if (!nandfs_segsum_valid(segsum)) {
+ DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__,
+ blocknr));
+ return (EINVAL);
+ }
+
+ return (error);
+}
+
+static int
+nandfs_load_super_root(struct nandfs_device *nandfsdev,
+ struct nandfs_segment_summary *segsum, uint64_t pseg)
+{
+ struct nandfs_super_root super_root;
+ struct buf *bp;
+ uint64_t blocknr;
+ uint32_t super_root_crc, comp_crc;
+ int off, error;
+
+ /* Check if there is a superroot */
+ if ((segsum->ss_flags & NANDFS_SS_SR) == 0) {
+ DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__,
+ pseg));
+ return (ENOENT);
+ }
+
+ /* Get our super root, located at the end of the pseg */
+ blocknr = pseg + segsum->ss_nblocks - 1;
+ DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr));
+
+ error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp);
+ if (error)
+ return (error);
+
+ memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root));
+ brelse(bp);
+
+ /* Check super root CRC */
+ super_root_crc = super_root.sr_sum;
+ off = sizeof(super_root.sr_sum);
+ comp_crc = crc32((uint8_t *)&super_root + off,
+ NANDFS_SR_BYTES - off);
+
+ if (super_root_crc != comp_crc) {
+ DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n",
+ __func__, super_root_crc, comp_crc));
+ return (EINVAL);
+ }
+
+ nandfsdev->nd_super_root = super_root;
+ DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__));
+
+ return (0);
+}
+
+/*
+ * Search for the last super root recorded.
+ */
+int
+nandfs_search_super_root(struct nandfs_device *nandfsdev)
+{
+ struct nandfs_super_block *super;
+ struct nandfs_segment_summary segsum;
+ uint64_t seg_start, seg_end, cno, seq, create, pseg;
+ uint64_t segnum;
+ int error, found;
+
+ error = found = 0;
+
+ /* Search for last super root */
+ pseg = nandfsdev->nd_super.s_last_pseg;
+ segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
+
+ cno = nandfsdev->nd_super.s_last_cno;
+ create = seq = 0;
+ DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__,
+ (uintmax_t)pseg));
+
+ for (;;) {
+ error = nandfs_load_segsum(nandfsdev, pseg, &segsum);
+ if (error)
+ break;
+
+ if (segsum.ss_seq < seq || segsum.ss_create < create)
+ break;
+
+ /* Try to load super root */
+ if (segsum.ss_flags & NANDFS_SS_SR) {
+ error = nandfs_load_super_root(nandfsdev, &segsum, pseg);
+ if (error)
+ break; /* confused */
+ found = 1;
+
+ super = &nandfsdev->nd_super;
+ nandfsdev->nd_last_segsum = segsum;
+ super->s_last_pseg = pseg;
+ super->s_last_cno = cno++;
+ super->s_last_seq = segsum.ss_seq;
+ super->s_state = NANDFS_VALID_FS;
+ seq = segsum.ss_seq;
+ create = segsum.ss_create;
+ } else {
+ seq = segsum.ss_seq;
+ create = segsum.ss_create;
+ }
+
+ /* Calculate next partial segment location */
+ pseg += segsum.ss_nblocks;
+ DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__,
+ (uintmax_t)pseg));
+
+ /* Did we reach the end of the segment? if so, go to the next */
+ nandfs_get_segment_range(nandfsdev, segnum, &seg_start,
+ &seg_end);
+ if (pseg >= seg_end) {
+ pseg = segsum.ss_next;
+ DPRINTF(VOLUMES,
+ (" partial seg oor next is %jx[%jx - %jx]\n",
+ (uintmax_t)pseg, (uintmax_t)seg_start,
+ (uintmax_t)seg_end));
+ }
+ segnum = nandfs_get_segnum_of_block(nandfsdev, pseg);
+ }
+
+ if (error && !found)
+ return (error);
+
+ return (0);
+}
+
+int
+nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp,
+ uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep)
+{
+ struct nandfs_node *node;
+ struct vnode *nvp;
+ struct mount *mp;
+ int error;
+
+ *nodep = NULL;
+
+ /* Associate with mountpoint if present */
+ if (nmp) {
+ mp = nmp->nm_vfs_mountp;
+ error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp);
+ if (error) {
+ return (error);
+ }
+ } else {
+ mp = NULL;
+ error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops,
+ &nvp);
+ if (error) {
+ return (error);
+ }
+ }
+
+ if (mp)
+ NANDFS_WRITELOCK(nandfsdev);
+
+ DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n",
+ __func__, (uintmax_t)ino, nvp));
+ /* Lock node */
+ lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL);
+
+ if (mp) {
+ error = insmntque(nvp, mp);
+ if (error != 0) {
+ *nodep = NULL;
+ return (error);
+ }
+ }
+
+ node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO);
+
+ /* Crosslink */
+ node->nn_vnode = nvp;
+ nvp->v_bufobj.bo_ops = &buf_ops_nandfs;
+ node->nn_nmp = nmp;
+ node->nn_nandfsdev = nandfsdev;
+ nvp->v_data = node;
+
+ /* Initiase NANDFS node */
+ node->nn_ino = ino;
+ if (inode != NULL)
+ node->nn_inode = *inode;
+
+ nandfs_vinit(nvp, ino);
+
+ /* Return node */
+ *nodep = node;
+ DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n",
+ __func__, (uintmax_t)ino, nvp, *nodep));
+
+ return (0);
+}
+
+int
+nandfs_get_node(struct nandfsmount *nmp, uint64_t ino,
+ struct nandfs_node **nodep)
+{
+ struct nandfs_device *nandfsdev;
+ struct nandfs_inode inode, *entry;
+ struct vnode *nvp, *vpp;
+ struct thread *td;
+ struct buf *bp;
+ uint64_t ivblocknr;
+ uint32_t entry_in_block;
+ int error;
+
+ /* Look up node in hash table */
+ td = curthread;
+ *nodep = NULL;
+
+ if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) {
+ printf("nandfs_get_node: system ino %"PRIu64" not in mount "
+ "point!\n", ino);
+ return (ENOENT);
+ }
+
+ error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp,
+ NULL, NULL);
+ if (error)
+ return (error);
+
+ if (nvp != NULL) {
+ *nodep = (struct nandfs_node *)nvp->v_data;
+ return (0);
+ }
+
+ /* Look up inode structure in mountpoints ifile */
+ nandfsdev = nmp->nm_nandfsdev;
+ nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr,
+ &entry_in_block);
+
+ VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED);
+ error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
+ return (ENOENT);
+ }
+
+ /* Get inode entry */
+ entry = (struct nandfs_inode *) bp->b_data + entry_in_block;
+ memcpy(&inode, entry, sizeof(struct nandfs_inode));
+ brelse(bp);
+ VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0);
+
+ /* Get node */
+ error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep);
+ if (error) {
+ *nodep = NULL;
+ return (error);
+ }
+
+ nvp = (*nodep)->nn_vnode;
+ error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL);
+ if (error) {
+ *nodep = NULL;
+ return (error);
+ }
+
+ return (error);
+}
+
+void
+nandfs_dispose_node(struct nandfs_node **nodep)
+{
+ struct nandfs_node *node;
+ struct vnode *vp;
+
+ /* Protect against rogue values */
+ node = *nodep;
+ if (!node) {
+ return;
+ }
+ DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep));
+
+ vp = NTOV(node);
+ vp->v_data = NULL;
+
+ /* Free our associated memory */
+ uma_zfree(nandfs_node_zone, node);
+
+ *nodep = NULL;
+}
+
+int
+nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
+ uint64_t *ino, int *found, uint64_t *off)
+{
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfs_dir_entry *ndirent;
+ struct buf *bp;
+ uint64_t file_size, diroffset, blkoff;
+ uint64_t blocknr;
+ uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize;
+ uint8_t *pos, name_len;
+ int error;
+
+ *found = 0;
+
+ DPRINTF(VNCALL, ("%s: %s file\n", __func__, name));
+ if (dvp->v_type != VDIR) {
+ return (ENOTDIR);
+ }
+
+ /* Get directory filesize */
+ file_size = dir_node->nn_inode.i_size;
+
+ /* Walk the directory */
+ diroffset = 0;
+ blocknr = 0;
+ blkoff = 0;
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+
+ while (diroffset < file_size) {
+ if (blkoff >= blocksize) {
+ blkoff = 0; blocknr++;
+ brelse(bp);
+ error = nandfs_bread(dir_node, blocknr, NOCRED, 0,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+ }
+
+ /* Read in one dirent */
+ pos = (uint8_t *) bp->b_data + blkoff;
+ ndirent = (struct nandfs_dir_entry *) pos;
+ name_len = ndirent->name_len;
+
+ if ((name_len == namelen) &&
+ (strncmp(name, ndirent->name, name_len) == 0) &&
+ (ndirent->inode != 0)) {
+ *ino = ndirent->inode;
+ *off = diroffset;
+ DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n",
+ name_len, ndirent->name, *ino));
+ *found = 1;
+ break;
+ }
+
+ /* Advance */
+ diroffset += ndirent->rec_len;
+ blkoff += ndirent->rec_len;
+ }
+ brelse(bp);
+
+ return (error);
+}
+
+int
+nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo)
+{
+ struct nandfs_device *fsdev;
+
+ fsdev = nmp->nm_nandfsdev;
+
+ memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
+ memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super));
+ snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev),
+ "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname);
+
+ return (0);
+}
+
+void
+nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode)
+{
+ struct timespec ts;
+
+ vfs_timestamp(&ts);
+
+ inode->i_blocks = 0;
+ inode->i_size = 0;
+ inode->i_ctime = ts.tv_sec;
+ inode->i_ctime_nsec = ts.tv_nsec;
+ inode->i_mtime = ts.tv_sec;
+ inode->i_mtime_nsec = ts.tv_nsec;
+ inode->i_mode = mode;
+ inode->i_links_count = 1;
+ if (S_ISDIR(mode))
+ inode->i_links_count = 2;
+ inode->i_flags = 0;
+
+ inode->i_special = 0;
+ memset(inode->i_db, 0, sizeof(inode->i_db));
+ memset(inode->i_ib, 0, sizeof(inode->i_ib));
+}
+
+void
+nandfs_inode_destroy(struct nandfs_inode *inode)
+{
+
+ MPASS(inode->i_blocks == 0);
+ bzero(inode, sizeof(*inode));
+}
+
+int
+nandfs_fs_full(struct nandfs_device *nffsdev)
+{
+ uint64_t space, bps;
+
+ bps = nffsdev->nd_fsdata.f_blocks_per_segment;
+ space = (nffsdev->nd_clean_segs - 1) * bps;
+
+ DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__,
+ (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space));
+
+ if (nffsdev->nd_dirty_bufs + (10 * bps) >= space)
+ return (1);
+
+ return (0);
+}
+
+static int
+_nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force)
+{
+ struct nandfs_device *nffsdev;
+ struct nandfs_node *node;
+ uint64_t ino, bps;
+
+ if (NANDFS_ISGATHERED(bp)) {
+ bqrelse(bp);
+ return (0);
+ }
+ if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) {
+ bqrelse(bp);
+ return (0);
+ }
+
+ node = VTON(bp->b_vp);
+ nffsdev = node->nn_nandfsdev;
+ DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp));
+ ino = node->nn_ino;
+
+ if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) {
+ brelse(bp);
+ return (ENOSPC);
+ }
+
+ bp->b_flags |= B_MANAGED;
+ bdwrite(bp);
+
+ nandfs_dirty_bufs_increment(nffsdev);
+
+ KASSERT((bp->b_vp), ("vp missing for bp"));
+ KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO),
+ ("bp vblk is 0"));
+
+ /*
+ * To maintain consistency of FS we need to force making
+ * meta buffers dirty, even if free space is low.
+ */
+ if (dirty_meta && ino != NANDFS_GC_INO)
+ nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1);
+
+ bps = nffsdev->nd_fsdata.f_blocks_per_segment;
+
+ if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) {
+ mtx_lock(&nffsdev->nd_sync_mtx);
+ if (nffsdev->nd_syncing == 0) {
+ DPRINTF(SYNC, ("%s: wakeup gc\n", __func__));
+ nffsdev->nd_syncing = 1;
+ wakeup(&nffsdev->nd_syncing);
+ }
+ mtx_unlock(&nffsdev->nd_sync_mtx);
+ }
+
+ return (0);
+}
+
+int
+nandfs_dirty_buf(struct buf *bp, int force)
+{
+
+ return (_nandfs_dirty_buf(bp, 1, force));
+}
+
+int
+nandfs_dirty_buf_meta(struct buf *bp, int force)
+{
+
+ return (_nandfs_dirty_buf(bp, 0, force));
+}
+
+void
+nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp)
+{
+
+ BUF_ASSERT_HELD(bp);
+
+ if (bp->b_flags & B_DELWRI) {
+ bp->b_flags &= ~(B_DELWRI|B_MANAGED);
+ nandfs_dirty_bufs_decrement(nffsdev);
+ }
+ /*
+ * Since it is now being written, we can clear its deferred write flag.
+ */
+ bp->b_flags &= ~B_DEFERRED;
+
+ brelse(bp);
+}
+
+void
+nandfs_undirty_buf(struct buf *bp)
+{
+ struct nandfs_node *node;
+
+ node = VTON(bp->b_vp);
+
+ nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp);
+}
+
+void
+nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr)
+{
+
+ nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
+ *vblk = blocknr;
+}
+
+nandfs_daddr_t
+nandfs_vblk_get(struct buf *bp)
+{
+
+ nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1);
+ return (*vblk);
+}
+
+void
+nandfs_buf_set(struct buf *bp, uint32_t bits)
+{
+ uintptr_t flags;
+
+ flags = (uintptr_t)bp->b_fsprivate3;
+ flags |= (uintptr_t)bits;
+ bp->b_fsprivate3 = (void *)flags;
+}
+
+void
+nandfs_buf_clear(struct buf *bp, uint32_t bits)
+{
+ uintptr_t flags;
+
+ flags = (uintptr_t)bp->b_fsprivate3;
+ flags &= ~(uintptr_t)bits;
+ bp->b_fsprivate3 = (void *)flags;
+}
+
+int
+nandfs_buf_check(struct buf *bp, uint32_t bits)
+{
+ uintptr_t flags;
+
+ flags = (uintptr_t)bp->b_fsprivate3;
+ if (flags & bits)
+ return (1);
+ return (0);
+}
+
+int
+nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size)
+{
+ struct buf *bp;
+ int read_size, error, i;
+
+ DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n",
+ __func__, offset, size));
+
+ MPASS(size % fsdev->nd_erasesize == 0);
+
+ if (fsdev->nd_is_nand) {
+ error = g_delete_data(fsdev->nd_gconsumer, offset, size);
+ return (error);
+ }
+
+ if (size > MAXBSIZE)
+ read_size = MAXBSIZE;
+ else
+ read_size = size;
+
+ error = 0;
+ for (i = 0; i < size / MAXBSIZE; i++) {
+ error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
+ read_size, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ memset(bp->b_data, 0xff, read_size);
+ error = bwrite(bp);
+ if (error) {
+ nandfs_error("%s: err:%d from bwrite\n",
+ __func__, error);
+ return (error);
+ }
+ }
+
+ return (error);
+}
+
+int
+nandfs_vop_islocked(struct vnode *vp)
+{
+ int islocked;
+
+ islocked = VOP_ISLOCKED(vp);
+ return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED);
+}
+
+nandfs_daddr_t
+nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block)
+{
+
+ return (btodb(block * fsdev->nd_blocksize));
+}
diff --git a/sys/fs/nandfs/nandfs_subr.h b/sys/fs/nandfs/nandfs_subr.h
new file mode 100644
index 0000000..0bcda18
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_subr.h
@@ -0,0 +1,238 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_subr.h,v 1.1 2009/07/18 16:31:42 reinoud
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _FS_NANDFS_NANDFS_SUBR_H_
+#define _FS_NANDFS_NANDFS_SUBR_H_
+
+struct nandfs_mdt;
+
+struct nandfs_alloc_request
+{
+ uint64_t entrynum;
+ struct buf *bp_desc;
+ struct buf *bp_bitmap;
+ struct buf *bp_entry;
+};
+
+/* Segment creation */
+void nandfs_wakeup_wait_sync(struct nandfs_device *, int);
+int nandfs_segment_constructor(struct nandfsmount *, int);
+int nandfs_sync_file(struct vnode *);
+
+/* Basic calculators */
+uint64_t nandfs_get_segnum_of_block(struct nandfs_device *, nandfs_daddr_t);
+void nandfs_get_segment_range(struct nandfs_device *, uint64_t, uint64_t *,
+ uint64_t *);
+void nandfs_calc_mdt_consts(struct nandfs_device *, struct nandfs_mdt *, int);
+
+/* Log reading / volume helpers */
+int nandfs_search_super_root(struct nandfs_device *);
+
+/* Reading */
+int nandfs_dev_bread(struct nandfs_device *, nandfs_daddr_t, struct ucred *,
+ int, struct buf **);
+int nandfs_bread(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int,
+ struct buf **);
+int nandfs_bread_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int,
+ struct buf **);
+int nandfs_bdestroy(struct nandfs_node *, nandfs_daddr_t);
+int nandfs_bcreate(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int,
+ struct buf **);
+int nandfs_bcreate_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *,
+ int, struct buf **);
+int nandfs_bread_create(struct nandfs_node *, nandfs_lbn_t, struct ucred *,
+ int, struct buf **);
+
+/* vtop operations */
+int nandfs_vtop(struct nandfs_node *, nandfs_daddr_t, nandfs_daddr_t *);
+
+/* Node action implementators */
+int nandfs_vinit(struct vnode *, uint64_t);
+int nandfs_get_node(struct nandfsmount *, uint64_t, struct nandfs_node **);
+int nandfs_get_node_raw(struct nandfs_device *, struct nandfsmount *, uint64_t,
+ struct nandfs_inode *, struct nandfs_node **);
+void nandfs_dispose_node(struct nandfs_node **);
+
+void nandfs_itimes(struct vnode *);
+int nandfs_lookup_name_in_dir(struct vnode *, const char *, int, uint64_t *,
+ int *, uint64_t *);
+int nandfs_create_node(struct vnode *, struct vnode **, struct vattr *,
+ struct componentname *);
+void nandfs_delete_node(struct nandfs_node *);
+
+int nandfs_chsize(struct vnode *, u_quad_t, struct ucred *);
+int nandfs_dir_detach(struct nandfsmount *, struct nandfs_node *,
+ struct nandfs_node *, struct componentname *);
+int nandfs_dir_attach(struct nandfsmount *, struct nandfs_node *,
+ struct nandfs_node *, struct vattr *, struct componentname *);
+
+int nandfs_dirty_buf(struct buf *, int);
+int nandfs_dirty_buf_meta(struct buf *, int);
+int nandfs_fs_full(struct nandfs_device *);
+void nandfs_undirty_buf_fsdev(struct nandfs_device *, struct buf *);
+void nandfs_undirty_buf(struct buf *);
+
+void nandfs_clear_buf(struct buf *);
+void nandfs_buf_set(struct buf *, uint32_t);
+void nandfs_buf_clear(struct buf *, uint32_t);
+int nandfs_buf_check(struct buf *, uint32_t);
+
+int nandfs_find_free_entry(struct nandfs_mdt *, struct nandfs_node *,
+ struct nandfs_alloc_request *);
+int nandfs_find_entry(struct nandfs_mdt *, struct nandfs_node *,
+ struct nandfs_alloc_request *);
+int nandfs_alloc_entry(struct nandfs_mdt *, struct nandfs_alloc_request *);
+void nandfs_abort_entry(struct nandfs_alloc_request *);
+int nandfs_free_entry(struct nandfs_mdt *, struct nandfs_alloc_request *);
+int nandfs_get_entry_block(struct nandfs_mdt *, struct nandfs_node *,
+ struct nandfs_alloc_request *, uint32_t *, int);
+
+/* inode managment */
+int nandfs_node_create(struct nandfsmount *, struct nandfs_node **, uint16_t);
+int nandfs_node_destroy(struct nandfs_node *);
+int nandfs_node_update(struct nandfs_node *);
+int nandfs_get_node_entry(struct nandfsmount *, struct nandfs_inode **,
+ uint64_t, struct buf **);
+void nandfs_mdt_trans_blk(struct nandfs_mdt *, uint64_t, uint64_t *,
+ uint64_t *, nandfs_lbn_t *, uint32_t *);
+
+/* vblock management */
+void nandfs_mdt_trans(struct nandfs_mdt *, uint64_t, nandfs_lbn_t *, uint32_t *);
+int nandfs_vblock_alloc(struct nandfs_device *, nandfs_daddr_t *);
+int nandfs_vblock_end(struct nandfs_device *, nandfs_daddr_t);
+int nandfs_vblock_assign(struct nandfs_device *, nandfs_daddr_t,
+ nandfs_lbn_t);
+int nandfs_vblock_free(struct nandfs_device *, nandfs_daddr_t);
+
+/* Checkpoint management */
+int nandfs_get_checkpoint(struct nandfs_device *, struct nandfs_node *,
+ uint64_t);
+int nandfs_set_checkpoint(struct nandfs_device *, struct nandfs_node *,
+ uint64_t, struct nandfs_inode *, uint64_t);
+
+/* Segment management */
+int nandfs_alloc_segment(struct nandfs_device *, uint64_t *);
+int nandfs_update_segment(struct nandfs_device *, uint64_t, uint32_t);
+int nandfs_free_segment(struct nandfs_device *, uint64_t);
+int nandfs_clear_segment(struct nandfs_device *, uint64_t);
+int nandfs_touch_segment(struct nandfs_device *, uint64_t);
+int nandfs_markgc_segment(struct nandfs_device *, uint64_t);
+
+int nandfs_bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, struct buf *);
+int nandfs_bmap_update_block(struct nandfs_node *, struct buf *, nandfs_lbn_t);
+int nandfs_bmap_update_dat(struct nandfs_node *, nandfs_daddr_t, struct buf *);
+int nandfs_bmap_dirty_blocks(struct nandfs_node *, struct buf *, int);
+int nandfs_bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t,
+ nandfs_lbn_t);
+int nandfs_bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *);
+
+/* dirent */
+int nandfs_add_dirent(struct vnode *, uint64_t, char *, long, uint8_t);
+int nandfs_remove_dirent(struct vnode *, struct nandfs_node *,
+ struct componentname *);
+int nandfs_update_dirent(struct vnode *, struct nandfs_node *,
+ struct nandfs_node *);
+int nandfs_init_dir(struct vnode *, uint64_t, uint64_t);
+int nandfs_update_parent_dir(struct vnode *, uint64_t);
+
+void nandfs_vblk_set(struct buf *, nandfs_daddr_t);
+nandfs_daddr_t nandfs_vblk_get(struct buf *);
+
+void nandfs_inode_init(struct nandfs_inode *, uint16_t);
+void nandfs_inode_destroy(struct nandfs_inode *);
+
+/* ioctl */
+int nandfs_get_seg_stat(struct nandfs_device *, struct nandfs_seg_stat *);
+int nandfs_chng_cpmode(struct nandfs_node *, struct nandfs_cpmode *);
+int nandfs_get_cpinfo_ioctl(struct nandfs_node *, struct nandfs_argv *);
+int nandfs_delete_cp(struct nandfs_node *, uint64_t start, uint64_t);
+int nandfs_make_snap(struct nandfs_device *, uint64_t *);
+int nandfs_delete_snap(struct nandfs_device *, uint64_t);
+int nandfs_get_cpstat(struct nandfs_node *, struct nandfs_cpstat *);
+int nandfs_get_segment_info_ioctl(struct nandfs_device *, struct nandfs_argv *);
+int nandfs_get_dat_vinfo_ioctl(struct nandfs_device *, struct nandfs_argv *);
+int nandfs_get_dat_bdescs_ioctl(struct nandfs_device *, struct nandfs_argv *);
+int nandfs_get_fsinfo(struct nandfsmount *, struct nandfs_fsinfo *);
+
+int nandfs_get_cpinfo(struct nandfs_node *, uint64_t, uint16_t,
+ struct nandfs_cpinfo *, uint32_t, uint32_t *);
+
+nandfs_lbn_t nandfs_get_maxfilesize(struct nandfs_device *);
+
+int nandfs_write_superblock(struct nandfs_device *);
+
+extern int nandfs_sync_interval;
+extern int nandfs_max_dirty_segs;
+extern int nandfs_cps_between_sblocks;
+
+struct buf *nandfs_geteblk(int, int);
+
+void nandfs_dirty_bufs_increment(struct nandfs_device *);
+void nandfs_dirty_bufs_decrement(struct nandfs_device *);
+
+int nandfs_start_cleaner(struct nandfs_device *);
+int nandfs_stop_cleaner(struct nandfs_device *);
+
+int nandfs_segsum_valid(struct nandfs_segment_summary *);
+int nandfs_load_segsum(struct nandfs_device *, nandfs_daddr_t,
+ struct nandfs_segment_summary *);
+int nandfs_get_segment_info(struct nandfs_device *, struct nandfs_suinfo *,
+ uint32_t, uint64_t);
+int nandfs_get_segment_info_filter(struct nandfs_device *,
+ struct nandfs_suinfo *, uint32_t, uint64_t, uint64_t *, uint32_t, uint32_t);
+int nandfs_get_dat_vinfo(struct nandfs_device *, struct nandfs_vinfo *,
+ uint32_t);
+int nandfs_get_dat_bdescs(struct nandfs_device *, struct nandfs_bdesc *,
+ uint32_t);
+
+#define NANDFS_VBLK_ASSIGNED 1
+
+#define NANDFS_IS_INDIRECT(bp) ((bp)->b_lblkno < 0)
+
+int nandfs_erase(struct nandfs_device *, off_t, size_t);
+
+#define NANDFS_VOP_ISLOCKED(vp) nandfs_vop_islocked((vp))
+int nandfs_vop_islocked(struct vnode *vp);
+
+nandfs_daddr_t nandfs_block_to_dblock(struct nandfs_device *, nandfs_lbn_t);
+
+#define DEBUG_MODE
+#if defined(DEBUG_MODE)
+#define nandfs_error panic
+#define nandfs_warning printf
+#elif defined(TEST_MODE)
+#define nandfs_error printf
+#define nandfs_warning printf
+#else
+#define nandfs_error(...)
+#define nandfs_warning(...)
+#endif
+
+#endif /* !_FS_NANDFS_NANDFS_SUBR_H_ */
diff --git a/sys/fs/nandfs/nandfs_sufile.c b/sys/fs/nandfs/nandfs_sufile.c
new file mode 100644
index 0000000..d4f4326
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_sufile.c
@@ -0,0 +1,569 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+#define SU_USAGE_OFF(bp, offset) \
+ ((struct nandfs_segment_usage *)((bp)->b_data + offset))
+
+static int
+nandfs_seg_usage_blk_offset(struct nandfs_device *fsdev, uint64_t seg,
+ uint64_t *blk, uint64_t *offset)
+{
+ uint64_t off;
+ uint16_t seg_size;
+
+ seg_size = fsdev->nd_fsdata.f_segment_usage_size;
+
+ off = roundup(sizeof(struct nandfs_sufile_header), seg_size);
+ off += (seg * seg_size);
+
+ *blk = off / fsdev->nd_blocksize;
+ *offset = off % fsdev->nd_blocksize;
+ return (0);
+}
+
+/* Alloc new segment */
+int
+nandfs_alloc_segment(struct nandfs_device *fsdev, uint64_t *seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_sufile_header *su_header;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp_header, *bp;
+ uint64_t blk, vblk, offset, i, rest, nsegments;
+ uint16_t seg_size;
+ int error, found;
+
+ seg_size = fsdev->nd_fsdata.f_segment_usage_size;
+ nsegments = fsdev->nd_fsdata.f_nsegments;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ /* Read header buffer */
+ error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header);
+ if (error) {
+ brelse(bp_header);
+ return (error);
+ }
+
+ su_header = (struct nandfs_sufile_header *)bp_header->b_data;
+
+ /* Get last allocated segment */
+ i = su_header->sh_last_alloc + 1;
+
+ found = 0;
+ bp = NULL;
+ while (!found) {
+ nandfs_seg_usage_blk_offset(fsdev, i, &blk, &offset);
+ if(blk != 0) {
+ error = nandfs_bmap_lookup(su_node, blk, &vblk);
+ if (error) {
+ nandfs_error("%s: cannot find vblk for blk "
+ "blk:%jx\n", __func__, blk);
+ return (error);
+ }
+ if (vblk)
+ error = nandfs_bread(su_node, blk, NOCRED, 0,
+ &bp);
+ else
+ error = nandfs_bcreate(su_node, blk, NOCRED, 0,
+ &bp);
+ if (error) {
+ nandfs_error("%s: cannot create/read "
+ "vblk:%jx\n", __func__, vblk);
+ if (bp)
+ brelse(bp);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ } else {
+ su_usage = SU_USAGE_OFF(bp_header, offset);
+ bp = bp_header;
+ }
+
+ rest = (fsdev->nd_blocksize - offset) / seg_size;
+ /* Go through all su usage in block */
+ while (rest) {
+ /* When last check start from beggining */
+ if (i == nsegments)
+ break;
+
+ if (!su_usage->su_flags) {
+ su_usage->su_flags = 1;
+ found = 1;
+ break;
+ }
+ su_usage++;
+ i++;
+
+ /* If all checked return error */
+ if (i == su_header->sh_last_alloc) {
+ DPRINTF(SEG, ("%s: cannot allocate segment \n",
+ __func__));
+ brelse(bp_header);
+ if (blk != 0)
+ brelse(bp);
+ return (1);
+ }
+ rest--;
+ }
+ if (!found) {
+ /* Otherwise read another block */
+ if (blk != 0)
+ brelse(bp);
+ if (i == nsegments) {
+ blk = 0;
+ i = 0;
+ } else
+ blk++;
+ offset = 0;
+ }
+ }
+
+ if (found) {
+ *seg = i;
+ su_header->sh_last_alloc = i;
+ su_header->sh_ncleansegs--;
+ su_header->sh_ndirtysegs++;
+
+ fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs *
+ fsdev->nd_fsdata.f_blocks_per_segment;
+ fsdev->nd_clean_segs--;
+
+ /*
+ * It is mostly called from syncer() so we want to force
+ * making buf dirty.
+ */
+ error = nandfs_dirty_buf(bp_header, 1);
+ if (error) {
+ if (bp && bp != bp_header)
+ brelse(bp);
+ return (error);
+ }
+ if (bp && bp != bp_header)
+ nandfs_dirty_buf(bp, 1);
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)i));
+
+ return (0);
+ }
+
+ DPRINTF(SEG, ("%s: failed\n", __func__));
+
+ return (1);
+}
+
+/*
+ * Make buffer dirty, it will be updated soon but first it need to be
+ * gathered by syncer.
+ */
+int
+nandfs_touch_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ nandfs_error("%s: cannot preallocate new segment\n", __func__);
+ return (error);
+ } else
+ nandfs_dirty_buf(bp, 1);
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+ return (error);
+}
+
+/* Update block count of segment */
+int
+nandfs_update_segment(struct nandfs_device *fsdev, uint64_t seg, uint32_t nblks)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ nandfs_error("%s: read block:%jx to update\n",
+ __func__, blk);
+ brelse(bp);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ su_usage->su_lastmod = fsdev->nd_ts.tv_sec;
+ su_usage->su_flags = NANDFS_SEGMENT_USAGE_DIRTY;
+ su_usage->su_nblocks += nblks;
+
+ DPRINTF(SEG, ("%s: seg:%#jx inc:%#x cur:%#x\n", __func__,
+ (uintmax_t)seg, nblks, su_usage->su_nblocks));
+
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+/* Make segment free */
+int
+nandfs_free_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_sufile_header *su_header;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp_header, *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ /* Read su header */
+ error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header);
+ if (error) {
+ brelse(bp_header);
+ return (error);
+ }
+
+ su_header = (struct nandfs_sufile_header *)bp_header->b_data;
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ /* Read su usage block if other than su header block */
+ if (blk != 0) {
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ brelse(bp_header);
+ return (error);
+ }
+ } else
+ bp = bp_header;
+
+ /* Reset su usage data */
+ su_usage = SU_USAGE_OFF(bp, offset);
+ su_usage->su_lastmod = fsdev->nd_ts.tv_sec;
+ su_usage->su_nblocks = 0;
+ su_usage->su_flags = 0;
+
+ /* Update clean/dirty counter in header */
+ su_header->sh_ncleansegs++;
+ su_header->sh_ndirtysegs--;
+
+ /*
+ * Make buffers dirty, called by cleaner
+ * so force dirty even if no much space left
+ * on device
+ */
+ nandfs_dirty_buf(bp_header, 1);
+ if (bp != bp_header)
+ nandfs_dirty_buf(bp, 1);
+
+ /* Update free block count */
+ fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs *
+ fsdev->nd_fsdata.f_blocks_per_segment;
+ fsdev->nd_clean_segs++;
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ return (0);
+}
+
+static int
+nandfs_bad_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+ ASSERT_VOP_LOCKED(NTOV(su_node), __func__);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ su_usage->su_lastmod = fsdev->nd_ts.tv_sec;
+ su_usage->su_flags = NANDFS_SEGMENT_USAGE_ERROR;
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ nandfs_dirty_buf(bp, 1);
+
+ return (0);
+}
+
+int
+nandfs_markgc_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ struct nandfs_node *su_node;
+ struct nandfs_segment_usage *su_usage;
+ struct buf *bp;
+ uint64_t blk, offset;
+ int error;
+
+ su_node = fsdev->nd_su_node;
+
+ VOP_LOCK(NTOV(su_node), LK_EXCLUSIVE);
+
+ nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset);
+
+ error = nandfs_bread(su_node, blk, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+ return (error);
+ }
+
+ su_usage = SU_USAGE_OFF(bp, offset);
+ MPASS((su_usage->su_flags & NANDFS_SEGMENT_USAGE_GC) == 0);
+ su_usage->su_flags |= NANDFS_SEGMENT_USAGE_GC;
+
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ return (0);
+}
+
+int
+nandfs_clear_segment(struct nandfs_device *fsdev, uint64_t seg)
+{
+ uint64_t offset, segsize;
+ uint32_t bps, bsize;
+ int error = 0;
+
+ bps = fsdev->nd_fsdata.f_blocks_per_segment;
+ bsize = fsdev->nd_blocksize;
+ segsize = bsize * bps;
+ nandfs_get_segment_range(fsdev, seg, &offset, NULL);
+ offset *= bsize;
+
+ DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg));
+
+ /* Erase it and mark it bad when fail */
+ if (nandfs_erase(fsdev, offset, segsize))
+ error = nandfs_bad_segment(fsdev, seg);
+
+ if (error)
+ return (error);
+
+ /* Mark it free */
+ error = nandfs_free_segment(fsdev, seg);
+
+ return (error);
+}
+
+int
+nandfs_get_seg_stat(struct nandfs_device *nandfsdev,
+ struct nandfs_seg_stat *nss)
+{
+ struct nandfs_sufile_header *suhdr;
+ struct nandfs_node *su_node;
+ struct buf *bp;
+ int err;
+
+ su_node = nandfsdev->nd_su_node;
+
+ NANDFS_WRITELOCK(nandfsdev);
+ VOP_LOCK(NTOV(su_node), LK_SHARED);
+ err = nandfs_bread(nandfsdev->nd_su_node, 0, NOCRED, 0, &bp);
+ if (err) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+ NANDFS_WRITEUNLOCK(nandfsdev);
+ return (-1);
+ }
+
+ suhdr = (struct nandfs_sufile_header *)bp->b_data;
+ nss->nss_nsegs = nandfsdev->nd_fsdata.f_nsegments;
+ nss->nss_ncleansegs = suhdr->sh_ncleansegs;
+ nss->nss_ndirtysegs = suhdr->sh_ndirtysegs;
+ nss->nss_ctime = 0;
+ nss->nss_nongc_ctime = nandfsdev->nd_ts.tv_sec;
+ nss->nss_prot_seq = nandfsdev->nd_seg_sequence;
+
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+
+ NANDFS_WRITEUNLOCK(nandfsdev);
+
+ return (0);
+}
+
+int
+nandfs_get_segment_info_ioctl(struct nandfs_device *fsdev,
+ struct nandfs_argv *nargv)
+{
+ struct nandfs_suinfo *nsi;
+ int error;
+
+ if (nargv->nv_nmembs > NANDFS_SEGMENTS_MAX)
+ return (EINVAL);
+
+ nsi = malloc(sizeof(struct nandfs_suinfo) * nargv->nv_nmembs,
+ M_NANDFSTEMP, M_WAITOK | M_ZERO);
+
+ error = nandfs_get_segment_info(fsdev, nsi, nargv->nv_nmembs,
+ nargv->nv_index);
+
+ if (error == 0)
+ error = copyout(nsi, (void *)(uintptr_t)nargv->nv_base,
+ sizeof(struct nandfs_suinfo) * nargv->nv_nmembs);
+
+ free(nsi, M_NANDFSTEMP);
+ return (error);
+}
+
+int
+nandfs_get_segment_info(struct nandfs_device *fsdev, struct nandfs_suinfo *nsi,
+ uint32_t nmembs, uint64_t segment)
+{
+
+ return (nandfs_get_segment_info_filter(fsdev, nsi, nmembs, segment,
+ NULL, 0, 0));
+}
+
+int
+nandfs_get_segment_info_filter(struct nandfs_device *fsdev,
+ struct nandfs_suinfo *nsi, uint32_t nmembs, uint64_t segment,
+ uint64_t *nsegs, uint32_t filter, uint32_t nfilter)
+{
+ struct nandfs_segment_usage *su;
+ struct nandfs_node *su_node;
+ struct buf *bp;
+ uint64_t curr, blocknr, blockoff, i;
+ uint32_t flags;
+ int err = 0;
+
+ curr = ~(0);
+
+ lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL);
+ su_node = fsdev->nd_su_node;
+
+ VOP_LOCK(NTOV(su_node), LK_SHARED);
+
+ bp = NULL;
+ if (nsegs != NULL)
+ *nsegs = 0;
+ for (i = 0; i < nmembs; segment++) {
+ if (segment == fsdev->nd_fsdata.f_nsegments)
+ break;
+
+ nandfs_seg_usage_blk_offset(fsdev, segment, &blocknr,
+ &blockoff);
+
+ if (i == 0 || curr != blocknr) {
+ if (bp != NULL)
+ brelse(bp);
+ err = nandfs_bread(su_node, blocknr, NOCRED,
+ 0, &bp);
+ if (err) {
+ goto out;
+ }
+ curr = blocknr;
+ }
+
+ su = SU_USAGE_OFF(bp, blockoff);
+ flags = su->su_flags;
+ if (segment == fsdev->nd_seg_num ||
+ segment == fsdev->nd_next_seg_num)
+ flags |= NANDFS_SEGMENT_USAGE_ACTIVE;
+
+ if (nfilter != 0 && (flags & nfilter) != 0)
+ continue;
+ if (filter != 0 && (flags & filter) == 0)
+ continue;
+
+ nsi->nsi_num = segment;
+ nsi->nsi_lastmod = su->su_lastmod;
+ nsi->nsi_blocks = su->su_nblocks;
+ nsi->nsi_flags = flags;
+ nsi++;
+ i++;
+ if (nsegs != NULL)
+ (*nsegs)++;
+ }
+
+out:
+ if (bp != NULL)
+ brelse(bp);
+ VOP_UNLOCK(NTOV(su_node), 0);
+ lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL);
+
+ return (err);
+}
diff --git a/sys/fs/nandfs/nandfs_vfsops.c b/sys/fs/nandfs/nandfs_vfsops.c
new file mode 100644
index 0000000..ba53546
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_vfsops.c
@@ -0,0 +1,1590 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_vfsops.c,v 1.1 2009/07/18 16:31:42 reinoud Exp
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+
+#include <geom/geom.h>
+#include <geom/geom_vfs.h>
+
+#include <machine/_inttypes.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+static MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount structure");
+
+#define NANDFS_SET_SYSTEMFILE(vp) { \
+ (vp)->v_vflag |= VV_SYSTEM; \
+ vref(vp); \
+ vput(vp); }
+
+#define NANDFS_UNSET_SYSTEMFILE(vp) { \
+ VOP_LOCK(vp, LK_EXCLUSIVE); \
+ MPASS(vp->v_bufobj.bo_dirty.bv_cnt == 0); \
+ (vp)->v_vflag &= ~VV_SYSTEM; \
+ vgone(vp); \
+ vput(vp); }
+
+/* Globals */
+struct _nandfs_devices nandfs_devices;
+
+/* Parameters */
+int nandfs_verbose = 0;
+
+static void
+nandfs_tunable_init(void *arg)
+{
+
+ TUNABLE_INT_FETCH("vfs.nandfs.verbose", &nandfs_verbose);
+}
+SYSINIT(nandfs_tunables, SI_SUB_VFS, SI_ORDER_ANY, nandfs_tunable_init, NULL);
+
+static SYSCTL_NODE(_vfs, OID_AUTO, nandfs, CTLFLAG_RD, 0, "NAND filesystem");
+static SYSCTL_NODE(_vfs_nandfs, OID_AUTO, mount, CTLFLAG_RD, 0,
+ "NANDFS mountpoints");
+SYSCTL_INT(_vfs_nandfs, OID_AUTO, verbose, CTLFLAG_RW, &nandfs_verbose, 0, "");
+
+#define NANDFS_CONSTR_INTERVAL 5
+int nandfs_sync_interval = NANDFS_CONSTR_INTERVAL; /* sync every 5 seconds */
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, sync_interval, CTLFLAG_RW,
+ &nandfs_sync_interval, 0, "");
+
+#define NANDFS_MAX_DIRTY_SEGS 5
+int nandfs_max_dirty_segs = NANDFS_MAX_DIRTY_SEGS; /* sync when 5 dirty seg */
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, max_dirty_segs, CTLFLAG_RW,
+ &nandfs_max_dirty_segs, 0, "");
+
+#define NANDFS_CPS_BETWEEN_SBLOCKS 5
+int nandfs_cps_between_sblocks = NANDFS_CPS_BETWEEN_SBLOCKS; /* write superblock every 5 checkpoints */
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cps_between_sblocks, CTLFLAG_RW,
+ &nandfs_cps_between_sblocks, 0, "");
+
+#define NANDFS_CLEANER_ENABLE 1
+int nandfs_cleaner_enable = NANDFS_CLEANER_ENABLE;
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_enable, CTLFLAG_RW,
+ &nandfs_cleaner_enable, 0, "");
+
+#define NANDFS_CLEANER_INTERVAL 5
+int nandfs_cleaner_interval = NANDFS_CLEANER_INTERVAL;
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_interval, CTLFLAG_RW,
+ &nandfs_cleaner_interval, 0, "");
+
+#define NANDFS_CLEANER_SEGMENTS 5
+int nandfs_cleaner_segments = NANDFS_CLEANER_SEGMENTS;
+SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_segments, CTLFLAG_RW,
+ &nandfs_cleaner_segments, 0, "");
+
+static int nandfs_mountfs(struct vnode *devvp, struct mount *mp);
+static vfs_mount_t nandfs_mount;
+static vfs_root_t nandfs_root;
+static vfs_statfs_t nandfs_statfs;
+static vfs_unmount_t nandfs_unmount;
+static vfs_vget_t nandfs_vget;
+static vfs_sync_t nandfs_sync;
+static const char *nandfs_opts[] = {
+ "snap", "from", "noatime", NULL
+};
+
+/* System nodes */
+static int
+nandfs_create_system_nodes(struct nandfs_device *nandfsdev)
+{
+ int error;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_DAT_INO,
+ &nandfsdev->nd_super_root.sr_dat, &nandfsdev->nd_dat_node);
+ if (error)
+ goto errorout;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_CPFILE_INO,
+ &nandfsdev->nd_super_root.sr_cpfile, &nandfsdev->nd_cp_node);
+ if (error)
+ goto errorout;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_SUFILE_INO,
+ &nandfsdev->nd_super_root.sr_sufile, &nandfsdev->nd_su_node);
+ if (error)
+ goto errorout;
+
+ error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_GC_INO,
+ NULL, &nandfsdev->nd_gc_node);
+ if (error)
+ goto errorout;
+
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
+ NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
+
+ DPRINTF(VOLUMES, ("System vnodes: dat: %p cp: %p su: %p\n",
+ NTOV(nandfsdev->nd_dat_node), NTOV(nandfsdev->nd_cp_node),
+ NTOV(nandfsdev->nd_su_node)));
+ return (0);
+
+errorout:
+ nandfs_dispose_node(&nandfsdev->nd_gc_node);
+ nandfs_dispose_node(&nandfsdev->nd_dat_node);
+ nandfs_dispose_node(&nandfsdev->nd_cp_node);
+ nandfs_dispose_node(&nandfsdev->nd_su_node);
+
+ return (error);
+}
+
+static void
+nandfs_release_system_nodes(struct nandfs_device *nandfsdev)
+{
+
+ if (!nandfsdev)
+ return;
+ if (nandfsdev->nd_refcnt > 0)
+ return;
+
+ if (nandfsdev->nd_gc_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node));
+ if (nandfsdev->nd_dat_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node));
+ if (nandfsdev->nd_cp_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node));
+ if (nandfsdev->nd_su_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node));
+}
+
+static int
+nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata)
+{
+ uint32_t fsdata_crc, comp_crc;
+
+ if (fsdata->f_magic != NANDFS_FSDATA_MAGIC)
+ return (0);
+
+ /* Preserve CRC */
+ fsdata_crc = fsdata->f_sum;
+
+ /* Calculate */
+ fsdata->f_sum = (0);
+ comp_crc = crc32((uint8_t *)fsdata, fsdata->f_bytes);
+
+ /* Restore */
+ fsdata->f_sum = fsdata_crc;
+
+ /* Check CRC */
+ return (fsdata_crc == comp_crc);
+}
+
+static int
+nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata,
+ struct nandfs_super_block *super)
+{
+ uint32_t super_crc, comp_crc;
+
+ /* Check super block magic */
+ if (super->s_magic != NANDFS_SUPER_MAGIC)
+ return (0);
+
+ /* Preserve CRC */
+ super_crc = super->s_sum;
+
+ /* Calculate */
+ super->s_sum = (0);
+ comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
+
+ /* Restore */
+ super->s_sum = super_crc;
+
+ /* Check CRC */
+ return (super_crc == comp_crc);
+}
+
+static void
+nandfs_calc_superblock_crc(struct nandfs_fsdata *fsdata,
+ struct nandfs_super_block *super)
+{
+ uint32_t comp_crc;
+
+ /* Calculate */
+ super->s_sum = 0;
+ comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes);
+
+ /* Restore */
+ super->s_sum = comp_crc;
+}
+
+static int
+nandfs_is_empty(u_char *area, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ if (area[i] != 0xff)
+ return (0);
+
+ return (1);
+}
+
+static __inline int
+nandfs_sblocks_in_esize(struct nandfs_device *fsdev)
+{
+
+ return ((fsdev->nd_erasesize - NANDFS_SBLOCK_OFFSET_BYTES) /
+ sizeof(struct nandfs_super_block));
+}
+
+static __inline int
+nandfs_max_sblocks(struct nandfs_device *fsdev)
+{
+
+ return (NANDFS_NFSAREAS * nandfs_sblocks_in_esize(fsdev));
+}
+
+static __inline int
+nandfs_sblocks_in_block(struct nandfs_device *fsdev)
+{
+
+ return (fsdev->nd_devblocksize / sizeof(struct nandfs_super_block));
+}
+
+static __inline int
+nandfs_sblocks_in_first_block(struct nandfs_device *fsdev)
+{
+ int n;
+
+ n = nandfs_sblocks_in_block(fsdev) -
+ NANDFS_SBLOCK_OFFSET_BYTES / sizeof(struct nandfs_super_block);
+ if (n < 0)
+ n = 0;
+
+ return (n);
+}
+
+static int
+nandfs_write_superblock_at(struct nandfs_device *fsdev,
+ struct nandfs_fsarea *fstp)
+{
+ struct nandfs_super_block *super, *supert;
+ struct buf *bp;
+ int sb_per_sector, sbs_in_fsd, read_block;
+ int index, pos, error;
+ off_t offset;
+
+ DPRINTF(SYNC, ("%s: last_used %d nandfs_sblocks_in_esize %d\n",
+ __func__, fstp->last_used, nandfs_sblocks_in_esize(fsdev)));
+ if (fstp->last_used == nandfs_sblocks_in_esize(fsdev) - 1)
+ index = 0;
+ else
+ index = fstp->last_used + 1;
+
+ super = &fsdev->nd_super;
+ supert = NULL;
+
+ sb_per_sector = nandfs_sblocks_in_block(fsdev);
+ sbs_in_fsd = sizeof(struct nandfs_fsdata) /
+ sizeof(struct nandfs_super_block);
+ index += sbs_in_fsd;
+ offset = fstp->offset;
+
+ DPRINTF(SYNC, ("%s: offset %#jx s_last_pseg %#jx s_last_cno %#jx "
+ "s_last_seq %#jx wtime %jd index %d\n", __func__, offset,
+ super->s_last_pseg, super->s_last_cno, super->s_last_seq,
+ super->s_wtime, index));
+
+ read_block = btodb(offset + ((index / sb_per_sector) * sb_per_sector)
+ * sizeof(struct nandfs_super_block));
+
+ DPRINTF(SYNC, ("%s: read_block %#x\n", __func__, read_block));
+
+ if (index == sbs_in_fsd) {
+ error = nandfs_erase(fsdev, offset, fsdev->nd_erasesize);
+ if (error)
+ return (error);
+
+ error = bread(fsdev->nd_devvp, btodb(offset),
+ fsdev->nd_devblocksize, NOCRED, &bp);
+ if (error) {
+ printf("NANDFS: couldn't read initial data: %d\n",
+ error);
+ brelse(bp);
+ return (error);
+ }
+ memcpy(bp->b_data, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata));
+ /*
+ * 0xff-out the rest. This bp could be cached, so potentially
+ * b_data contains stale super blocks.
+ *
+ * We don't mind cached bp since most of the time we just add
+ * super blocks to already 0xff-out b_data and don't need to
+ * perform actual read.
+ */
+ if (fsdev->nd_devblocksize > sizeof(fsdev->nd_fsdata))
+ memset(bp->b_data + sizeof(fsdev->nd_fsdata), 0xff,
+ fsdev->nd_devblocksize - sizeof(fsdev->nd_fsdata));
+ error = bwrite(bp);
+ if (error) {
+ printf("NANDFS: cannot rewrite initial data at %jx\n",
+ offset);
+ return (error);
+ }
+ }
+
+ error = bread(fsdev->nd_devvp, read_block, fsdev->nd_devblocksize,
+ NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ supert = (struct nandfs_super_block *)(bp->b_data);
+ pos = index % sb_per_sector;
+
+ DPRINTF(SYNC, ("%s: storing at %d\n", __func__, pos));
+ memcpy(&supert[pos], super, sizeof(struct nandfs_super_block));
+
+ /*
+ * See comment above in code that performs erase.
+ */
+ if (pos == 0)
+ memset(&supert[1], 0xff,
+ (sb_per_sector - 1) * sizeof(struct nandfs_super_block));
+
+ error = bwrite(bp);
+ if (error) {
+ printf("NANDFS: cannot update superblock at %jx\n", offset);
+ return (error);
+ }
+
+ DPRINTF(SYNC, ("%s: fstp->last_used %d -> %d\n", __func__,
+ fstp->last_used, index - sbs_in_fsd));
+ fstp->last_used = index - sbs_in_fsd;
+
+ return (0);
+}
+
+int
+nandfs_write_superblock(struct nandfs_device *fsdev)
+{
+ struct nandfs_super_block *super;
+ struct timespec ts;
+ int error;
+ int i, j;
+
+ vfs_timestamp(&ts);
+
+ super = &fsdev->nd_super;
+
+ super->s_last_pseg = fsdev->nd_last_pseg;
+ super->s_last_cno = fsdev->nd_last_cno;
+ super->s_last_seq = fsdev->nd_seg_sequence;
+ super->s_wtime = ts.tv_sec;
+
+ nandfs_calc_superblock_crc(&fsdev->nd_fsdata, super);
+
+ error = 0;
+ for (i = 0, j = fsdev->nd_last_fsarea; i < NANDFS_NFSAREAS;
+ i++, j = (j + 1 % NANDFS_NFSAREAS)) {
+ if (fsdev->nd_fsarea[j].flags & NANDFS_FSSTOR_FAILED) {
+ DPRINTF(SYNC, ("%s: skipping %d\n", __func__, j));
+ continue;
+ }
+ error = nandfs_write_superblock_at(fsdev, &fsdev->nd_fsarea[j]);
+ if (error) {
+ printf("NANDFS: writing superblock at offset %d failed:"
+ "%d\n", j * fsdev->nd_erasesize, error);
+ fsdev->nd_fsarea[j].flags |= NANDFS_FSSTOR_FAILED;
+ } else
+ break;
+ }
+
+ if (i == NANDFS_NFSAREAS) {
+ printf("NANDFS: superblock was not written\n");
+ /*
+ * TODO: switch to read-only?
+ */
+ return (error);
+ } else
+ fsdev->nd_last_fsarea = (j + 1) % NANDFS_NFSAREAS;
+
+ return (0);
+}
+
+static int
+nandfs_select_fsdata(struct nandfs_device *fsdev,
+ struct nandfs_fsdata *fsdatat, struct nandfs_fsdata **fsdata, int nfsds)
+{
+ int i;
+
+ *fsdata = NULL;
+ for (i = 0; i < nfsds; i++) {
+ DPRINTF(VOLUMES, ("%s: i %d f_magic %x f_crc %x\n", __func__,
+ i, fsdatat[i].f_magic, fsdatat[i].f_sum));
+ if (!nandfs_check_fsdata_crc(&fsdatat[i]))
+ continue;
+ *fsdata = &fsdatat[i];
+ break;
+ }
+
+ return (*fsdata != NULL ? 0 : EINVAL);
+}
+
+static int
+nandfs_select_sb(struct nandfs_device *fsdev,
+ struct nandfs_super_block *supert, struct nandfs_super_block **super,
+ int nsbs)
+{
+ int i;
+
+ *super = NULL;
+ for (i = 0; i < nsbs; i++) {
+ if (!nandfs_check_superblock_crc(&fsdev->nd_fsdata, &supert[i]))
+ continue;
+ DPRINTF(SYNC, ("%s: i %d s_last_cno %jx s_magic %x "
+ "s_wtime %jd\n", __func__, i, supert[i].s_last_cno,
+ supert[i].s_magic, supert[i].s_wtime));
+ if (*super == NULL || supert[i].s_last_cno >
+ (*super)->s_last_cno)
+ *super = &supert[i];
+ }
+
+ return (*super != NULL ? 0 : EINVAL);
+}
+
+static int
+nandfs_read_structures_at(struct nandfs_device *fsdev,
+ struct nandfs_fsarea *fstp, struct nandfs_fsdata *fsdata,
+ struct nandfs_super_block *super)
+{
+ struct nandfs_super_block *tsuper, *tsuperd;
+ struct buf *bp;
+ int error, read_size;
+ int i;
+ int offset;
+
+ offset = fstp->offset;
+
+ if (fsdev->nd_erasesize > MAXBSIZE)
+ read_size = MAXBSIZE;
+ else
+ read_size = fsdev->nd_erasesize;
+
+ error = bread(fsdev->nd_devvp, btodb(offset), read_size, NOCRED, &bp);
+ if (error) {
+ printf("couldn't read: %d\n", error);
+ brelse(bp);
+ fstp->flags |= NANDFS_FSSTOR_FAILED;
+ return (error);
+ }
+
+ tsuper = super;
+
+ memcpy(fsdata, bp->b_data, sizeof(struct nandfs_fsdata));
+ memcpy(tsuper, (bp->b_data + sizeof(struct nandfs_fsdata)),
+ read_size - sizeof(struct nandfs_fsdata));
+ brelse(bp);
+
+ tsuper += (read_size - sizeof(struct nandfs_fsdata)) /
+ sizeof(struct nandfs_super_block);
+
+ for (i = 1; i < fsdev->nd_erasesize / read_size; i++) {
+ error = bread(fsdev->nd_devvp, btodb(offset + i * read_size),
+ read_size, NOCRED, &bp);
+ if (error) {
+ printf("couldn't read: %d\n", error);
+ brelse(bp);
+ fstp->flags |= NANDFS_FSSTOR_FAILED;
+ return (error);
+ }
+ memcpy(tsuper, bp->b_data, read_size);
+ tsuper += read_size / sizeof(struct nandfs_super_block);
+ brelse(bp);
+ }
+
+ tsuper -= 1;
+ fstp->last_used = nandfs_sblocks_in_esize(fsdev) - 1;
+ for (tsuperd = super - 1; (tsuper != tsuperd); tsuper -= 1) {
+ if (nandfs_is_empty((u_char *)tsuper, sizeof(*tsuper)))
+ fstp->last_used--;
+ else
+ break;
+ }
+
+ DPRINTF(VOLUMES, ("%s: last_used %d\n", __func__, fstp->last_used));
+
+ return (0);
+}
+
+static int
+nandfs_read_structures(struct nandfs_device *fsdev)
+{
+ struct nandfs_fsdata *fsdata, *fsdatat;
+ struct nandfs_super_block *sblocks, *ssblock;
+ int nsbs, nfsds, i;
+ int error = 0;
+ int nrsbs;
+
+ nfsds = NANDFS_NFSAREAS;
+ nsbs = nandfs_max_sblocks(fsdev);
+
+ fsdatat = malloc(sizeof(struct nandfs_fsdata) * nfsds, M_NANDFSTEMP,
+ M_WAITOK | M_ZERO);
+ sblocks = malloc(sizeof(struct nandfs_super_block) * nsbs, M_NANDFSTEMP,
+ M_WAITOK | M_ZERO);
+
+ nrsbs = 0;
+ for (i = 0; i < NANDFS_NFSAREAS; i++) {
+ fsdev->nd_fsarea[i].offset = i * fsdev->nd_erasesize;
+ error = nandfs_read_structures_at(fsdev, &fsdev->nd_fsarea[i],
+ &fsdatat[i], sblocks + nrsbs);
+ if (error)
+ continue;
+ nrsbs += (fsdev->nd_fsarea[i].last_used + 1);
+ if (fsdev->nd_fsarea[fsdev->nd_last_fsarea].last_used >
+ fsdev->nd_fsarea[i].last_used)
+ fsdev->nd_last_fsarea = i;
+ }
+
+ if (nrsbs == 0) {
+ printf("nandfs: no valid superblocks found\n");
+ error = EINVAL;
+ goto out;
+ }
+
+ error = nandfs_select_fsdata(fsdev, fsdatat, &fsdata, nfsds);
+ if (error)
+ goto out;
+ memcpy(&fsdev->nd_fsdata, fsdata, sizeof(struct nandfs_fsdata));
+
+ error = nandfs_select_sb(fsdev, sblocks, &ssblock, nsbs);
+ if (error)
+ goto out;
+
+ memcpy(&fsdev->nd_super, ssblock, sizeof(struct nandfs_super_block));
+out:
+ free(fsdatat, M_NANDFSTEMP);
+ free(sblocks, M_NANDFSTEMP);
+
+ if (error == 0)
+ DPRINTF(VOLUMES, ("%s: selected sb with w_time %jd "
+ "last_pseg %#jx\n", __func__, fsdev->nd_super.s_wtime,
+ fsdev->nd_super.s_last_pseg));
+
+ return (error);
+}
+
+static void
+nandfs_unmount_base(struct nandfs_device *nandfsdev)
+{
+ int error;
+
+ if (!nandfsdev)
+ return;
+
+ /* Remove all our information */
+ error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0);
+ if (error) {
+ /*
+ * Flushing buffers failed when fs was umounting, can't do
+ * much now, just printf error and continue with umount.
+ */
+ nandfs_error("%s(): error:%d when umounting FS\n",
+ __func__, error);
+ }
+
+ /* Release the device's system nodes */
+ nandfs_release_system_nodes(nandfsdev);
+}
+
+static void
+nandfs_get_ncleanseg(struct nandfs_device *nandfsdev)
+{
+ struct nandfs_seg_stat nss;
+
+ nandfs_get_seg_stat(nandfsdev, &nss);
+ nandfsdev->nd_clean_segs = nss.nss_ncleansegs;
+ DPRINTF(VOLUMES, ("nandfs_mount: clean segs: %jx\n",
+ (uintmax_t)nandfsdev->nd_clean_segs));
+}
+
+
+static int
+nandfs_mount_base(struct nandfs_device *nandfsdev, struct mount *mp,
+ struct nandfs_args *args)
+{
+ uint32_t log_blocksize;
+ int error;
+
+ /* Flush out any old buffers remaining from a previous use. */
+ if ((error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0)))
+ return (error);
+
+ error = nandfs_read_structures(nandfsdev);
+ if (error) {
+ printf("nandfs: could not get valid filesystem structures\n");
+ return (error);
+ }
+
+ if (nandfsdev->nd_fsdata.f_rev_level != NANDFS_CURRENT_REV) {
+ printf("nandfs: unsupported file system revision: %d "
+ "(supported is %d).\n", nandfsdev->nd_fsdata.f_rev_level,
+ NANDFS_CURRENT_REV);
+ return (EINVAL);
+ }
+
+ if (nandfsdev->nd_fsdata.f_erasesize != nandfsdev->nd_erasesize) {
+ printf("nandfs: erasesize mismatch (device %#x, fs %#x)\n",
+ nandfsdev->nd_erasesize, nandfsdev->nd_fsdata.f_erasesize);
+ return (EINVAL);
+ }
+
+ /* Get our blocksize */
+ log_blocksize = nandfsdev->nd_fsdata.f_log_block_size;
+ nandfsdev->nd_blocksize = (uint64_t) 1 << (log_blocksize + 10);
+ DPRINTF(VOLUMES, ("%s: blocksize:%x\n", __func__,
+ nandfsdev->nd_blocksize));
+
+ DPRINTF(VOLUMES, ("%s: accepted super block with cp %#jx\n", __func__,
+ (uintmax_t)nandfsdev->nd_super.s_last_cno));
+
+ /* Calculate dat structure parameters */
+ nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_dat_mdt,
+ nandfsdev->nd_fsdata.f_dat_entry_size);
+ nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_ifile_mdt,
+ nandfsdev->nd_fsdata.f_inode_size);
+
+ /* Search for the super root and roll forward when needed */
+ if (nandfs_search_super_root(nandfsdev)) {
+ printf("Cannot find valid SuperRoot\n");
+ return (EINVAL);
+ }
+
+ nandfsdev->nd_mount_state = nandfsdev->nd_super.s_state;
+ if (nandfsdev->nd_mount_state != NANDFS_VALID_FS) {
+ printf("FS is seriously damaged, needs repairing\n");
+ printf("aborting mount\n");
+ return (EINVAL);
+ }
+
+ /*
+ * FS should be ok now. The superblock and the last segsum could be
+ * updated from the repair so extract running values again.
+ */
+ nandfsdev->nd_last_pseg = nandfsdev->nd_super.s_last_pseg;
+ nandfsdev->nd_seg_sequence = nandfsdev->nd_super.s_last_seq;
+ nandfsdev->nd_seg_num = nandfs_get_segnum_of_block(nandfsdev,
+ nandfsdev->nd_last_pseg);
+ nandfsdev->nd_next_seg_num = nandfs_get_segnum_of_block(nandfsdev,
+ nandfsdev->nd_last_segsum.ss_next);
+ nandfsdev->nd_ts.tv_sec = nandfsdev->nd_last_segsum.ss_create;
+ nandfsdev->nd_last_cno = nandfsdev->nd_super.s_last_cno;
+ nandfsdev->nd_fakevblk = 1;
+ nandfsdev->nd_last_ino = NANDFS_USER_INO;
+ DPRINTF(VOLUMES, ("%s: last_pseg %#jx last_cno %#jx last_seq %#jx\n"
+ "fsdev: last_seg: seq %#jx num %#jx, next_seg_num %#jx\n",
+ __func__, (uintmax_t)nandfsdev->nd_last_pseg,
+ (uintmax_t)nandfsdev->nd_last_cno,
+ (uintmax_t)nandfsdev->nd_seg_sequence,
+ (uintmax_t)nandfsdev->nd_seg_sequence,
+ (uintmax_t)nandfsdev->nd_seg_num,
+ (uintmax_t)nandfsdev->nd_next_seg_num));
+
+ DPRINTF(VOLUMES, ("nandfs_mount: accepted super root\n"));
+
+ /* Create system vnodes for DAT, CP and SEGSUM */
+ error = nandfs_create_system_nodes(nandfsdev);
+ if (error)
+ nandfs_unmount_base(nandfsdev);
+
+ nandfs_get_ncleanseg(nandfsdev);
+
+ return (error);
+}
+
+static void
+nandfs_unmount_device(struct nandfs_device *nandfsdev)
+{
+
+ /* Is there anything? */
+ if (nandfsdev == NULL)
+ return;
+
+ /* Remove the device only if we're the last reference */
+ nandfsdev->nd_refcnt--;
+ if (nandfsdev->nd_refcnt >= 1)
+ return;
+
+ MPASS(nandfsdev->nd_syncer == NULL);
+ MPASS(nandfsdev->nd_cleaner == NULL);
+ MPASS(nandfsdev->nd_free_base == NULL);
+
+ /* Unmount our base */
+ nandfs_unmount_base(nandfsdev);
+
+ /* Remove from our device list */
+ SLIST_REMOVE(&nandfs_devices, nandfsdev, nandfs_device, nd_next_device);
+
+ DROP_GIANT();
+ g_topology_lock();
+ g_vfs_close(nandfsdev->nd_gconsumer);
+ g_topology_unlock();
+ PICKUP_GIANT();
+
+ DPRINTF(VOLUMES, ("closing device\n"));
+
+ /* Clear our mount reference and release device node */
+ vrele(nandfsdev->nd_devvp);
+
+ dev_rel(nandfsdev->nd_devvp->v_rdev);
+
+ /* Free our device info */
+ cv_destroy(&nandfsdev->nd_sync_cv);
+ mtx_destroy(&nandfsdev->nd_sync_mtx);
+ cv_destroy(&nandfsdev->nd_clean_cv);
+ mtx_destroy(&nandfsdev->nd_clean_mtx);
+ mtx_destroy(&nandfsdev->nd_mutex);
+ lockdestroy(&nandfsdev->nd_seg_const);
+ free(nandfsdev, M_NANDFSMNT);
+}
+
+static int
+nandfs_check_mounts(struct nandfs_device *nandfsdev, struct mount *mp,
+ struct nandfs_args *args)
+{
+ struct nandfsmount *nmp;
+ uint64_t last_cno;
+
+ /* no double-mounting of the same checkpoint */
+ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
+ if (nmp->nm_mount_args.cpno == args->cpno)
+ return (EBUSY);
+ }
+
+ /* Allow readonly mounts without questioning here */
+ if (mp->mnt_flag & MNT_RDONLY)
+ return (0);
+
+ /* Read/write mount */
+ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) {
+ /* Only one RW mount on this device! */
+ if ((nmp->nm_vfs_mountp->mnt_flag & MNT_RDONLY)==0)
+ return (EROFS);
+ /* RDONLY on last mountpoint is device busy */
+ last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
+ if (nmp->nm_mount_args.cpno == last_cno)
+ return (EBUSY);
+ }
+
+ /* OK for now */
+ return (0);
+}
+
+static int
+nandfs_mount_device(struct vnode *devvp, struct mount *mp,
+ struct nandfs_args *args, struct nandfs_device **nandfsdev_p)
+{
+ struct nandfs_device *nandfsdev;
+ struct g_provider *pp;
+ struct g_consumer *cp;
+ struct cdev *dev;
+ uint32_t erasesize;
+ int error, size;
+ int ronly;
+
+ DPRINTF(VOLUMES, ("Mounting NANDFS device\n"));
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+ /* Look up device in our nandfs_mountpoints */
+ *nandfsdev_p = NULL;
+ SLIST_FOREACH(nandfsdev, &nandfs_devices, nd_next_device)
+ if (nandfsdev->nd_devvp == devvp)
+ break;
+
+ if (nandfsdev) {
+ DPRINTF(VOLUMES, ("device already mounted\n"));
+ error = nandfs_check_mounts(nandfsdev, mp, args);
+ if (error)
+ return error;
+ nandfsdev->nd_refcnt++;
+ *nandfsdev_p = nandfsdev;
+
+ if (!ronly) {
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_access(nandfsdev->nd_gconsumer, 0, 1, 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ }
+ return (error);
+ }
+
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+ dev = devvp->v_rdev;
+ dev_ref(dev);
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_vfs_open(devvp, &cp, "nandfs", ronly ? 0 : 1);
+ pp = g_dev_getprovider(dev);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ VOP_UNLOCK(devvp, 0);
+ if (error) {
+ dev_rel(dev);
+ return (error);
+ }
+
+ nandfsdev = malloc(sizeof(struct nandfs_device), M_NANDFSMNT, M_WAITOK | M_ZERO);
+
+ /* Initialise */
+ nandfsdev->nd_refcnt = 1;
+ nandfsdev->nd_devvp = devvp;
+ nandfsdev->nd_syncing = 0;
+ nandfsdev->nd_cleaning = 0;
+ nandfsdev->nd_gconsumer = cp;
+ cv_init(&nandfsdev->nd_sync_cv, "nandfssync");
+ mtx_init(&nandfsdev->nd_sync_mtx, "nffssyncmtx", NULL, MTX_DEF);
+ cv_init(&nandfsdev->nd_clean_cv, "nandfsclean");
+ mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF);
+ mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF);
+ lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT,
+ LK_CANRECURSE);
+ STAILQ_INIT(&nandfsdev->nd_mounts);
+
+ nandfsdev->nd_devsize = pp->mediasize;
+ nandfsdev->nd_devblocksize = pp->sectorsize;
+
+ size = sizeof(erasesize);
+ error = g_io_getattr("NAND::blocksize", nandfsdev->nd_gconsumer, &size,
+ &erasesize);
+ if (error) {
+ DPRINTF(VOLUMES, ("couldn't get erasesize: %d\n", error));
+
+ if (error == ENOIOCTL || error == EOPNOTSUPP) {
+ /*
+ * We conclude that this is not NAND storage
+ */
+ nandfsdev->nd_erasesize = NANDFS_DEF_ERASESIZE;
+ nandfsdev->nd_is_nand = 0;
+ } else {
+ DROP_GIANT();
+ g_topology_lock();
+ g_vfs_close(nandfsdev->nd_gconsumer);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ dev_rel(dev);
+ free(nandfsdev, M_NANDFSMNT);
+ return (error);
+ }
+ } else {
+ nandfsdev->nd_erasesize = erasesize;
+ nandfsdev->nd_is_nand = 1;
+ }
+
+ DPRINTF(VOLUMES, ("%s: erasesize %x\n", __func__,
+ nandfsdev->nd_erasesize));
+
+ /* Register nandfs_device in list */
+ SLIST_INSERT_HEAD(&nandfs_devices, nandfsdev, nd_next_device);
+
+ error = nandfs_mount_base(nandfsdev, mp, args);
+ if (error) {
+ /* Remove all our information */
+ nandfs_unmount_device(nandfsdev);
+ return (EINVAL);
+ }
+
+ nandfsdev->nd_maxfilesize = nandfs_get_maxfilesize(nandfsdev);
+
+ *nandfsdev_p = nandfsdev;
+ DPRINTF(VOLUMES, ("NANDFS device mounted ok\n"));
+
+ return (0);
+}
+
+static int
+nandfs_mount_checkpoint(struct nandfsmount *nmp)
+{
+ struct nandfs_cpfile_header *cphdr;
+ struct nandfs_checkpoint *cp;
+ struct nandfs_inode ifile_inode;
+ struct nandfs_node *cp_node;
+ struct buf *bp;
+ uint64_t ncp, nsn, cpno, fcpno, blocknr, last_cno;
+ uint32_t off, dlen;
+ int cp_per_block, error;
+
+ cpno = nmp->nm_mount_args.cpno;
+ if (cpno == 0)
+ cpno = nmp->nm_nandfsdev->nd_super.s_last_cno;
+
+ DPRINTF(VOLUMES, ("%s: trying to mount checkpoint number %"PRIu64"\n",
+ __func__, cpno));
+
+ cp_node = nmp->nm_nandfsdev->nd_cp_node;
+
+ VOP_LOCK(NTOV(cp_node), LK_SHARED);
+ /* Get cpfile header from 1st block of cp file */
+ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (error);
+ }
+
+ cphdr = (struct nandfs_cpfile_header *) bp->b_data;
+ ncp = cphdr->ch_ncheckpoints;
+ nsn = cphdr->ch_nsnapshots;
+
+ brelse(bp);
+
+ DPRINTF(VOLUMES, ("mount_nandfs: checkpoint header read in\n"));
+ DPRINTF(VOLUMES, ("\tNumber of checkpoints %"PRIu64"\n", ncp));
+ DPRINTF(VOLUMES, ("\tNumber of snapshots %"PRIu64"\n", nsn));
+
+ /* Read in our specified checkpoint */
+ dlen = nmp->nm_nandfsdev->nd_fsdata.f_checkpoint_size;
+ cp_per_block = nmp->nm_nandfsdev->nd_blocksize / dlen;
+
+ fcpno = cpno + NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1;
+ blocknr = fcpno / cp_per_block;
+ off = (fcpno % cp_per_block) * dlen;
+ error = nandfs_bread(cp_node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ printf("mount_nandfs: couldn't read cp block %"PRIu64"\n",
+ fcpno);
+ return (EINVAL);
+ }
+
+ /* Needs to be a valid checkpoint */
+ cp = (struct nandfs_checkpoint *) ((uint8_t *) bp->b_data + off);
+ if (cp->cp_flags & NANDFS_CHECKPOINT_INVALID) {
+ printf("mount_nandfs: checkpoint marked invalid\n");
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+
+ /* Is this really the checkpoint we want? */
+ if (cp->cp_cno != cpno) {
+ printf("mount_nandfs: checkpoint file corrupt? "
+ "expected cpno %"PRIu64", found cpno %"PRIu64"\n",
+ cpno, cp->cp_cno);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+
+ /* Check if it's a snapshot ! */
+ last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno;
+ if (cpno != last_cno) {
+ /* Only allow snapshots if not mounting on the last cp */
+ if ((cp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) == 0) {
+ printf( "mount_nandfs: checkpoint %"PRIu64" is not a "
+ "snapshot\n", cpno);
+ brelse(bp);
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+ }
+
+ ifile_inode = cp->cp_ifile_inode;
+ brelse(bp);
+
+ /* Get ifile inode */
+ error = nandfs_get_node_raw(nmp->nm_nandfsdev, NULL, NANDFS_IFILE_INO,
+ &ifile_inode, &nmp->nm_ifile_node);
+ if (error) {
+ printf("mount_nandfs: can't read ifile node\n");
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ return (EINVAL);
+ }
+
+ NANDFS_SET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
+ VOP_UNLOCK(NTOV(cp_node), 0);
+ /* Get root node? */
+
+ return (0);
+}
+
+static void
+free_nandfs_mountinfo(struct mount *mp)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+
+ if (nmp == NULL)
+ return;
+
+ free(nmp, M_NANDFSMNT);
+}
+
+void
+nandfs_wakeup_wait_sync(struct nandfs_device *nffsdev, int reason)
+{
+ char *reasons[] = {
+ "umount",
+ "vfssync",
+ "bdflush",
+ "fforce",
+ "fsync",
+ "ro_upd"
+ };
+
+ DPRINTF(SYNC, ("%s: %s\n", __func__, reasons[reason]));
+ mtx_lock(&nffsdev->nd_sync_mtx);
+ if (nffsdev->nd_syncing)
+ cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
+ if (reason == SYNCER_UMOUNT)
+ nffsdev->nd_syncer_exit = 1;
+ nffsdev->nd_syncing = 1;
+ wakeup(&nffsdev->nd_syncing);
+ cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx);
+
+ mtx_unlock(&nffsdev->nd_sync_mtx);
+}
+
+static void
+nandfs_gc_finished(struct nandfs_device *nffsdev, int exit)
+{
+ int error;
+
+ mtx_lock(&nffsdev->nd_sync_mtx);
+ nffsdev->nd_syncing = 0;
+ DPRINTF(SYNC, ("%s: cleaner finish\n", __func__));
+ cv_broadcast(&nffsdev->nd_sync_cv);
+ mtx_unlock(&nffsdev->nd_sync_mtx);
+ if (!exit) {
+ error = tsleep(&nffsdev->nd_syncing, PRIBIO, "-",
+ hz * nandfs_sync_interval);
+ DPRINTF(SYNC, ("%s: cleaner waked up: %d\n",
+ __func__, error));
+ }
+}
+
+static void
+nandfs_syncer(struct nandfsmount *nmp)
+{
+ struct nandfs_device *nffsdev;
+ struct mount *mp;
+ int flags, error;
+
+ mp = nmp->nm_vfs_mountp;
+ nffsdev = nmp->nm_nandfsdev;
+ tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval);
+
+ while (!nffsdev->nd_syncer_exit) {
+ DPRINTF(SYNC, ("%s: syncer run\n", __func__));
+ nffsdev->nd_syncing = 1;
+
+ flags = (nmp->nm_flags & (NANDFS_FORCE_SYNCER | NANDFS_UMOUNT));
+
+ error = nandfs_segment_constructor(nmp, flags);
+ if (error)
+ nandfs_error("%s: error:%d when creating segments\n",
+ __func__, error);
+
+ nmp->nm_flags &= ~flags;
+
+ nandfs_gc_finished(nffsdev, 0);
+ }
+
+ MPASS(nffsdev->nd_cleaner == NULL);
+ error = nandfs_segment_constructor(nmp,
+ NANDFS_FORCE_SYNCER | NANDFS_UMOUNT);
+ if (error)
+ nandfs_error("%s: error:%d when creating segments\n",
+ __func__, error);
+ nandfs_gc_finished(nffsdev, 1);
+ nffsdev->nd_syncer = NULL;
+ MPASS(nffsdev->nd_free_base == NULL);
+
+ DPRINTF(SYNC, ("%s: exiting\n", __func__));
+ kthread_exit();
+}
+
+static int
+start_syncer(struct nandfsmount *nmp)
+{
+ int error;
+
+ MPASS(nmp->nm_nandfsdev->nd_syncer == NULL);
+
+ DPRINTF(SYNC, ("%s: start syncer\n", __func__));
+
+ nmp->nm_nandfsdev->nd_syncer_exit = 0;
+
+ error = kthread_add((void(*)(void *))nandfs_syncer, nmp, NULL,
+ &nmp->nm_nandfsdev->nd_syncer, 0, 0, "nandfs_syncer");
+
+ if (error)
+ printf("nandfs: could not start syncer: %d\n", error);
+
+ return (error);
+}
+
+static int
+stop_syncer(struct nandfsmount *nmp)
+{
+
+ MPASS(nmp->nm_nandfsdev->nd_syncer != NULL);
+
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_UMOUNT);
+
+ DPRINTF(SYNC, ("%s: stop syncer\n", __func__));
+ return (0);
+}
+
+/*
+ * Mount null layer
+ */
+static int
+nandfs_mount(struct mount *mp)
+{
+ struct nandfsmount *nmp;
+ struct vnode *devvp;
+ struct nameidata nd;
+ struct vfsoptlist *opts;
+ struct thread *td;
+ char *from;
+ int error = 0, flags;
+
+ DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
+
+ td = curthread;
+ opts = mp->mnt_optnew;
+
+ if (vfs_filteropt(opts, nandfs_opts))
+ return (EINVAL);
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ nmp = VFSTONANDFS(mp);
+ if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) {
+ return (error);
+ }
+ if (!(nmp->nm_ronly) && vfs_flagopt(opts, "ro", NULL, 0)) {
+ vn_start_write(NULL, &mp, V_WAIT);
+ error = VFS_SYNC(mp, MNT_WAIT);
+ if (error)
+ return (error);
+ vn_finished_write(mp);
+
+ flags = WRITECLOSE;
+ if (mp->mnt_flag & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev,
+ SYNCER_ROUPD);
+ error = vflush(mp, 0, flags, td);
+ if (error)
+ return (error);
+
+ nandfs_stop_cleaner(nmp->nm_nandfsdev);
+ stop_syncer(nmp);
+ DROP_GIANT();
+ g_topology_lock();
+ g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_RDONLY;
+ MNT_IUNLOCK(mp);
+ nmp->nm_ronly = 1;
+
+ } else if ((nmp->nm_ronly) &&
+ !vfs_flagopt(opts, "ro", NULL, 0)) {
+ /*
+ * Don't allow read-write snapshots.
+ */
+ if (nmp->nm_mount_args.cpno != 0)
+ return (EROFS);
+ /*
+ * If upgrade to read-write by non-root, then verify
+ * that user has necessary permissions on the device.
+ */
+ devvp = nmp->nm_nandfsdev->nd_devvp;
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+ error = VOP_ACCESS(devvp, VREAD | VWRITE,
+ td->td_ucred, td);
+ if (error) {
+ error = priv_check(td, PRIV_VFS_MOUNT_PERM);
+ if (error) {
+ VOP_UNLOCK(devvp, 0);
+ return (error);
+ }
+ }
+
+ VOP_UNLOCK(devvp, 0);
+ DROP_GIANT();
+ g_topology_lock();
+ error = g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, 1,
+ 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ if (error)
+ return (error);
+
+ MNT_ILOCK(mp);
+ mp->mnt_flag &= ~MNT_RDONLY;
+ MNT_IUNLOCK(mp);
+ error = start_syncer(nmp);
+ if (error == 0)
+ error = nandfs_start_cleaner(nmp->nm_nandfsdev);
+ if (error) {
+ DROP_GIANT();
+ g_topology_lock();
+ g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1,
+ 0);
+ g_topology_unlock();
+ PICKUP_GIANT();
+ return (error);
+ }
+
+ nmp->nm_ronly = 0;
+ }
+ return (0);
+ }
+
+ from = vfs_getopts(opts, "from", &error);
+ if (error)
+ return (error);
+
+ /*
+ * Find device node
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, from, curthread);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+
+ devvp = nd.ni_vp;
+
+ if (!vn_isdisk(devvp, &error)) {
+ vput(devvp);
+ return (error);
+ }
+
+ /* Check the access rights on the mount device */
+ error = VOP_ACCESS(devvp, VREAD, curthread->td_ucred, curthread);
+ if (error)
+ error = priv_check(curthread, PRIV_VFS_MOUNT_PERM);
+ if (error) {
+ vput(devvp);
+ return (error);
+ }
+
+ vfs_getnewfsid(mp);
+
+ error = nandfs_mountfs(devvp, mp);
+ if (error)
+ return (error);
+ vfs_mountedfrom(mp, from);
+
+ return (0);
+}
+
+static int
+nandfs_mountfs(struct vnode *devvp, struct mount *mp)
+{
+ struct nandfsmount *nmp = NULL;
+ struct nandfs_args *args = NULL;
+ struct nandfs_device *nandfsdev;
+ char *from;
+ int error, ronly;
+ char *cpno;
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+
+ if (devvp->v_rdev->si_iosize_max != 0)
+ mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
+ VOP_UNLOCK(devvp, 0);
+
+ if (mp->mnt_iosize_max > MAXPHYS)
+ mp->mnt_iosize_max = MAXPHYS;
+
+ from = vfs_getopts(mp->mnt_optnew, "from", &error);
+ if (error)
+ goto error;
+
+ error = vfs_getopt(mp->mnt_optnew, "snap", (void **)&cpno, NULL);
+ if (error == ENOENT)
+ cpno = NULL;
+ else if (error)
+ goto error;
+
+ args = (struct nandfs_args *)malloc(sizeof(struct nandfs_args),
+ M_NANDFSMNT, M_WAITOK | M_ZERO);
+
+ if (cpno != NULL)
+ args->cpno = strtoul(cpno, (char **)NULL, 10);
+ else
+ args->cpno = 0;
+ args->fspec = from;
+
+ if (args->cpno != 0 && !ronly) {
+ error = EROFS;
+ goto error;
+ }
+
+ printf("WARNING: NANDFS is considered to be a highly experimental "
+ "feature in FreeBSD.\n");
+
+ error = nandfs_mount_device(devvp, mp, args, &nandfsdev);
+ if (error)
+ goto error;
+
+ nmp = (struct nandfsmount *) malloc(sizeof(struct nandfsmount),
+ M_NANDFSMNT, M_WAITOK | M_ZERO);
+
+ mp->mnt_data = nmp;
+ nmp->nm_vfs_mountp = mp;
+ nmp->nm_ronly = ronly;
+ MNT_ILOCK(mp);
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_kern_flag |= MNTK_MPSAFE;
+ MNT_IUNLOCK(mp);
+ nmp->nm_nandfsdev = nandfsdev;
+ /* Add our mountpoint */
+ STAILQ_INSERT_TAIL(&nandfsdev->nd_mounts, nmp, nm_next_mount);
+
+ if (args->cpno > nandfsdev->nd_last_cno) {
+ printf("WARNING: supplied checkpoint number (%jd) is greater "
+ "than last known checkpoint on filesystem (%jd). Mounting"
+ " checkpoint %jd\n", (uintmax_t)args->cpno,
+ (uintmax_t)nandfsdev->nd_last_cno,
+ (uintmax_t)nandfsdev->nd_last_cno);
+ args->cpno = nandfsdev->nd_last_cno;
+ }
+
+ /* Setting up other parameters */
+ nmp->nm_mount_args = *args;
+ free(args, M_NANDFSMNT);
+ error = nandfs_mount_checkpoint(nmp);
+ if (error) {
+ nandfs_unmount(mp, MNT_FORCE);
+ goto unmounted;
+ }
+
+ if (!ronly) {
+ error = start_syncer(nmp);
+ if (error == 0)
+ error = nandfs_start_cleaner(nmp->nm_nandfsdev);
+ if (error)
+ nandfs_unmount(mp, MNT_FORCE);
+ }
+
+ return (0);
+
+error:
+ if (args != NULL)
+ free(args, M_NANDFSMNT);
+
+ if (nmp != NULL) {
+ free(nmp, M_NANDFSMNT);
+ mp->mnt_data = NULL;
+ }
+unmounted:
+ return (error);
+}
+
+static int
+nandfs_unmount(struct mount *mp, int mntflags)
+{
+ struct nandfs_device *nandfsdev;
+ struct nandfsmount *nmp;
+ int error;
+ int flags = 0;
+
+ DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp));
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ nmp = mp->mnt_data;
+ nandfsdev = nmp->nm_nandfsdev;
+
+ error = vflush(mp, 0, flags | SKIPSYSTEM, curthread);
+ if (error)
+ return (error);
+
+ if (!(nmp->nm_ronly)) {
+ nandfs_stop_cleaner(nandfsdev);
+ stop_syncer(nmp);
+ }
+
+ if (nmp->nm_ifile_node)
+ NANDFS_UNSET_SYSTEMFILE(NTOV(nmp->nm_ifile_node));
+
+ /* Remove our mount point */
+ STAILQ_REMOVE(&nandfsdev->nd_mounts, nmp, nandfsmount, nm_next_mount);
+
+ /* Unmount the device itself when we're the last one */
+ nandfs_unmount_device(nandfsdev);
+
+ free_nandfs_mountinfo(mp);
+
+ /*
+ * Finally, throw away the null_mount structure
+ */
+ mp->mnt_data = 0;
+ MNT_ILOCK(mp);
+ mp->mnt_flag &= ~MNT_LOCAL;
+ MNT_IUNLOCK(mp);
+
+ return (0);
+}
+
+static int
+nandfs_statfs(struct mount *mp, struct statfs *sbp)
+{
+ struct nandfsmount *nmp;
+ struct nandfs_device *nandfsdev;
+ struct nandfs_fsdata *fsdata;
+ struct nandfs_super_block *sb;
+ struct nandfs_block_group_desc *groups;
+ struct nandfs_node *ifile;
+ struct nandfs_mdt *mdt;
+ struct buf *bp;
+ int i, error;
+ uint32_t entries_per_group;
+ uint64_t files = 0;
+
+ nmp = mp->mnt_data;
+ nandfsdev = nmp->nm_nandfsdev;
+ fsdata = &nandfsdev->nd_fsdata;
+ sb = &nandfsdev->nd_super;
+ ifile = nmp->nm_ifile_node;
+ mdt = &nandfsdev->nd_ifile_mdt;
+ entries_per_group = mdt->entries_per_group;
+
+ VOP_LOCK(NTOV(ifile), LK_SHARED);
+ error = nandfs_bread(ifile, 0, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ VOP_UNLOCK(NTOV(ifile), 0);
+ return (error);
+ }
+
+ groups = (struct nandfs_block_group_desc *)bp->b_data;
+
+ for (i = 0; i < mdt->groups_per_desc_block; i++)
+ files += (entries_per_group - groups[i].bg_nfrees);
+
+ brelse(bp);
+ VOP_UNLOCK(NTOV(ifile), 0);
+
+ sbp->f_bsize = nandfsdev->nd_blocksize;
+ sbp->f_iosize = sbp->f_bsize;
+ sbp->f_blocks = fsdata->f_blocks_per_segment * fsdata->f_nsegments;
+ sbp->f_bfree = sb->s_free_blocks_count;
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_files = files;
+ sbp->f_ffree = 0;
+ return (0);
+}
+
+static int
+nandfs_root(struct mount *mp, int flags, struct vnode **vpp)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+ struct nandfs_node *node;
+ int error;
+
+ error = nandfs_get_node(nmp, NANDFS_ROOT_INO, &node);
+ if (error)
+ return (error);
+
+ KASSERT(NTOV(node)->v_vflag & VV_ROOT,
+ ("root_vp->v_vflag & VV_ROOT"));
+
+ *vpp = NTOV(node);
+
+ return (error);
+}
+
+static int
+nandfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+ struct nandfs_node *node;
+ int error;
+
+ error = nandfs_get_node(nmp, ino, &node);
+ if (node)
+ *vpp = NTOV(node);
+
+ return (error);
+}
+
+static int
+nandfs_sync(struct mount *mp, int waitfor)
+{
+ struct nandfsmount *nmp = VFSTONANDFS(mp);
+
+ DPRINTF(SYNC, ("%s: mp %p waitfor %d\n", __func__, mp, waitfor));
+
+ /*
+ * XXX: A hack to be removed soon
+ */
+ if (waitfor == MNT_LAZY)
+ return (0);
+ if (waitfor == MNT_SUSPEND)
+ return (0);
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_VFS_SYNC);
+ return (0);
+}
+
+static struct vfsops nandfs_vfsops = {
+ .vfs_init = nandfs_init,
+ .vfs_mount = nandfs_mount,
+ .vfs_root = nandfs_root,
+ .vfs_statfs = nandfs_statfs,
+ .vfs_uninit = nandfs_uninit,
+ .vfs_unmount = nandfs_unmount,
+ .vfs_vget = nandfs_vget,
+ .vfs_sync = nandfs_sync,
+};
+
+VFS_SET(nandfs_vfsops, nandfs, VFCF_LOOPBACK);
diff --git a/sys/fs/nandfs/nandfs_vnops.c b/sys/fs/nandfs/nandfs_vnops.c
new file mode 100644
index 0000000..b226d30
--- /dev/null
+++ b/sys/fs/nandfs/nandfs_vnops.c
@@ -0,0 +1,2455 @@
+/*-
+ * Copyright (c) 2010-2012 Semihalf
+ * Copyright (c) 2008, 2009 Reinoud Zandijk
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/lockf.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/bio.h>
+#include <sys/fcntl.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+#include <sys/priv.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vnode_pager.h>
+
+#include <machine/_inttypes.h>
+
+#include <fs/nandfs/nandfs_mount.h>
+#include <fs/nandfs/nandfs.h>
+#include <fs/nandfs/nandfs_subr.h>
+
+extern uma_zone_t nandfs_node_zone;
+static void nandfs_read_filebuf(struct nandfs_node *, struct buf *);
+static void nandfs_itimes_locked(struct vnode *);
+static int nandfs_truncate(struct vnode *, uint64_t);
+
+static vop_pathconf_t nandfs_pathconf;
+
+#define UPDATE_CLOSE 0
+#define UPDATE_WAIT 0
+
+static int
+nandfs_inactive(struct vop_inactive_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int error = 0;
+
+ DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node));
+
+ if (node == NULL) {
+ DPRINTF(NODE, ("%s: inactive NULL node\n", __func__));
+ return (0);
+ }
+
+ if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) {
+ nandfs_truncate(vp, 0);
+ error = nandfs_node_destroy(node);
+ if (error)
+ nandfs_error("%s: destroy node: %p\n", __func__, node);
+ node->nn_flags = 0;
+ vrecycle(vp);
+ }
+
+ return (error);
+}
+
+static int
+nandfs_reclaim(struct vop_reclaim_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *nandfs_node = VTON(vp);
+ struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev;
+ uint64_t ino = nandfs_node->nn_ino;
+
+ DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node));
+
+ /* Invalidate all entries to a particular vnode. */
+ cache_purge(vp);
+
+ /* Destroy the vm object and flush associated pages. */
+ vnode_destroy_vobject(vp);
+
+ /* Remove from vfs hash if not system vnode */
+ if (!NANDFS_SYS_NODE(nandfs_node->nn_ino))
+ vfs_hash_remove(vp);
+
+ /* Dispose all node knowledge */
+ nandfs_dispose_node(&nandfs_node);
+
+ if (!NANDFS_SYS_NODE(ino))
+ NANDFS_WRITEUNLOCK(fsdev);
+
+ return (0);
+}
+
+static int
+nandfs_read(struct vop_read_args *ap)
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nandfs_node *node = VTON(vp);
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+ struct uio *uio = ap->a_uio;
+ struct buf *bp;
+ uint64_t size;
+ uint32_t blocksize;
+ off_t bytesinfile;
+ ssize_t toread, off;
+ daddr_t lbn;
+ ssize_t resid;
+ int error = 0;
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ size = node->nn_inode.i_size;
+ if (uio->uio_offset >= size)
+ return (0);
+
+ blocksize = nandfsdev->nd_blocksize;
+ bytesinfile = size - uio->uio_offset;
+
+ resid = omin(uio->uio_resid, bytesinfile);
+
+ while (resid) {
+ lbn = uio->uio_offset / blocksize;
+ off = uio->uio_offset & (blocksize - 1);
+
+ toread = omin(resid, blocksize - off);
+
+ DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n",
+ (uintmax_t)lbn, toread, blocksize));
+
+ error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+
+ error = uiomove(bp->b_data + off, toread, uio);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+
+ brelse(bp);
+ resid -= toread;
+ }
+
+ return (error);
+}
+
+static int
+nandfs_write(struct vop_write_args *ap)
+{
+ struct nandfs_device *fsdev;
+ struct nandfs_node *node;
+ struct vnode *vp;
+ struct uio *uio;
+ struct buf *bp;
+ uint64_t file_size, vblk;
+ uint32_t blocksize;
+ ssize_t towrite, off;
+ daddr_t lbn;
+ ssize_t resid;
+ int error, ioflag, modified;
+
+ vp = ap->a_vp;
+ uio = ap->a_uio;
+ ioflag = ap->a_ioflag;
+ node = VTON(vp);
+ fsdev = node->nn_nandfsdev;
+
+ if (nandfs_fs_full(fsdev))
+ return (ENOSPC);
+
+ DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n",
+ uio->uio_resid, (uintmax_t)uio->uio_offset));
+
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ if (uio->uio_resid == 0)
+ return (0);
+
+ blocksize = fsdev->nd_blocksize;
+ file_size = node->nn_inode.i_size;
+
+ switch (vp->v_type) {
+ case VREG:
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = file_size;
+ break;
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ break;
+ default:
+ panic("%s: bad file type vp: %p", __func__, vp);
+ }
+
+ /* If explicitly asked to append, uio_offset can be wrong? */
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = file_size;
+
+ resid = uio->uio_resid;
+ modified = error = 0;
+
+ while (uio->uio_resid) {
+ lbn = uio->uio_offset / blocksize;
+ off = uio->uio_offset & (blocksize - 1);
+
+ towrite = omin(uio->uio_resid, blocksize - off);
+
+ DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n",
+ __func__, (uintmax_t)lbn, towrite, blocksize));
+
+ error = nandfs_bmap_lookup(node, lbn, &vblk);
+ if (error)
+ break;
+
+ DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) "
+ "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize,
+ vblk));
+
+ if (vblk != 0)
+ error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
+ else
+ error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp);
+
+ DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__,
+ vp, bp, (uintmax_t)lbn));
+ if (error) {
+ if (bp)
+ brelse(bp);
+ break;
+ }
+
+ error = uiomove((char *)bp->b_data + off, (int)towrite, uio);
+ if (error)
+ break;
+
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ break;
+
+ modified++;
+ }
+
+ /* XXX proper handling when only part of file was properly written */
+ if (modified) {
+ if (resid > uio->uio_resid && ap->a_cred &&
+ ap->a_cred->cr_uid != 0)
+ node->nn_inode.i_mode &= ~(ISUID | ISGID);
+
+ if (file_size < uio->uio_offset + uio->uio_resid) {
+ node->nn_inode.i_size = uio->uio_offset +
+ uio->uio_resid;
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ vnode_pager_setsize(vp, uio->uio_offset +
+ uio->uio_resid);
+ nandfs_itimes(vp);
+ }
+ }
+
+ DPRINTF(WRITE, ("%s: return:%d\n", __func__, error));
+
+ return (error);
+}
+
+static int
+nandfs_lookup(struct vop_cachedlookup_args *ap)
+{
+ struct vnode *dvp, **vpp;
+ struct componentname *cnp;
+ struct ucred *cred;
+ struct thread *td;
+ struct nandfs_node *dir_node, *node;
+ struct nandfsmount *nmp;
+ uint64_t ino, off;
+ const char *name;
+ int namelen, nameiop, islastcn, mounted_ro;
+ int error, found;
+
+ DPRINTF(VNCALL, ("%s\n", __func__));
+
+ dvp = ap->a_dvp;
+ vpp = ap->a_vpp;
+ *vpp = NULL;
+
+ cnp = ap->a_cnp;
+ cred = cnp->cn_cred;
+ td = cnp->cn_thread;
+
+ dir_node = VTON(dvp);
+ nmp = dir_node->nn_nmp;
+
+ /* Simplify/clarification flags */
+ nameiop = cnp->cn_nameiop;
+ islastcn = cnp->cn_flags & ISLASTCN;
+ mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY;
+
+ /*
+ * If requesting a modify on the last path element on a read-only
+ * filingsystem, reject lookup;
+ */
+ if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME))
+ return (EROFS);
+
+ if (dir_node->nn_inode.i_links_count == 0)
+ return (ENOENT);
+
+ /*
+ * Obviously, the file is not (anymore) in the namecache, we have to
+ * search for it. There are three basic cases: '.', '..' and others.
+ *
+ * Following the guidelines of VOP_LOOKUP manpage and tmpfs.
+ */
+ error = 0;
+ if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
+ DPRINTF(LOOKUP, ("\tlookup '.'\n"));
+ /* Special case 1 '.' */
+ VREF(dvp);
+ *vpp = dvp;
+ /* Done */
+ } else if (cnp->cn_flags & ISDOTDOT) {
+ /* Special case 2 '..' */
+ DPRINTF(LOOKUP, ("\tlookup '..'\n"));
+
+ /* Get our node */
+ name = "..";
+ namelen = 2;
+ error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino,
+ &found, &off);
+ if (error)
+ goto out;
+ if (!found)
+ error = ENOENT;
+
+ /* First unlock parent */
+ VOP_UNLOCK(dvp, 0);
+
+ if (error == 0) {
+ DPRINTF(LOOKUP, ("\tfound '..'\n"));
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+
+ if (!error) {
+ DPRINTF(LOOKUP,
+ ("\tnode retrieved/created OK\n"));
+ *vpp = NTOV(node);
+ }
+ }
+
+ /* Try to relock parent */
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ } else {
+ DPRINTF(LOOKUP, ("\tlookup file\n"));
+ /* All other files */
+ /* Look up filename in the directory returning its inode */
+ name = cnp->cn_nameptr;
+ namelen = cnp->cn_namelen;
+ error = nandfs_lookup_name_in_dir(dvp, name, namelen,
+ &ino, &found, &off);
+ if (error)
+ goto out;
+ if (!found) {
+ DPRINTF(LOOKUP, ("\tNOT found\n"));
+ /*
+ * UGH, didn't find name. If we're creating or
+ * renaming on the last name this is OK and we ought
+ * to return EJUSTRETURN if its allowed to be created.
+ */
+ error = ENOENT;
+ if ((nameiop == CREATE || nameiop == RENAME) &&
+ islastcn) {
+ error = VOP_ACCESS(dvp, VWRITE, cred,
+ td);
+ if (!error) {
+ /* keep the component name */
+ cnp->cn_flags |= SAVENAME;
+ error = EJUSTRETURN;
+ }
+ }
+ /* Done */
+ } else {
+ if (ino == NANDFS_WHT_INO)
+ cnp->cn_flags |= ISWHITEOUT;
+
+ if ((cnp->cn_flags & ISWHITEOUT) &&
+ (nameiop == LOOKUP))
+ return (ENOENT);
+
+ if ((nameiop == DELETE) && islastcn) {
+ if ((cnp->cn_flags & ISWHITEOUT) &&
+ (cnp->cn_flags & DOWHITEOUT)) {
+ cnp->cn_flags |= SAVENAME;
+ dir_node->nn_diroff = off;
+ return (EJUSTRETURN);
+ }
+
+ error = VOP_ACCESS(dvp, VWRITE, cred,
+ cnp->cn_thread);
+ if (error)
+ return (error);
+
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+ if (!error) {
+ *vpp = NTOV(node);
+ node->nn_diroff = off;
+ }
+
+ if ((dir_node->nn_inode.i_mode & ISVTX) &&
+ cred->cr_uid != 0 &&
+ cred->cr_uid != dir_node->nn_inode.i_uid &&
+ node->nn_inode.i_uid != cred->cr_uid) {
+ vput(*vpp);
+ *vpp = NULL;
+ return (EPERM);
+ }
+ } else if ((nameiop == RENAME) && islastcn) {
+ error = VOP_ACCESS(dvp, VWRITE, cred,
+ cnp->cn_thread);
+ if (error)
+ return (error);
+
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+ if (!error) {
+ *vpp = NTOV(node);
+ node->nn_diroff = off;
+ }
+ } else {
+ /* Try to create/reuse the node */
+ error = nandfs_get_node(nmp, ino, &node);
+ if (!error) {
+ *vpp = NTOV(node);
+ node->nn_diroff = off;
+ }
+ }
+ }
+ }
+
+out:
+ /*
+ * Store result in the cache if requested. If we are creating a file,
+ * the file might not be found and thus putting it into the namecache
+ * might be seen as negative caching.
+ */
+ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+ cache_enter(dvp, *vpp, cnp);
+
+ return (error);
+
+}
+
+static int
+nandfs_getattr(struct vop_getattr_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+
+ DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp));
+ nandfs_itimes(vp);
+
+ /* Basic info */
+ VATTR_NULL(vap);
+ vap->va_atime.tv_sec = inode->i_mtime;
+ vap->va_atime.tv_nsec = inode->i_mtime_nsec;
+ vap->va_mtime.tv_sec = inode->i_mtime;
+ vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
+ vap->va_ctime.tv_sec = inode->i_ctime;
+ vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
+ vap->va_type = IFTOVT(inode->i_mode);
+ vap->va_mode = inode->i_mode & ~S_IFMT;
+ vap->va_nlink = inode->i_links_count;
+ vap->va_uid = inode->i_uid;
+ vap->va_gid = inode->i_gid;
+ vap->va_rdev = inode->i_special;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_fileid = node->nn_ino;
+ vap->va_size = inode->i_size;
+ vap->va_blocksize = node->nn_nandfsdev->nd_blocksize;
+ vap->va_gen = 0;
+ vap->va_flags = inode->i_flags;
+ vap->va_bytes = inode->i_blocks * vap->va_blocksize;
+ vap->va_filerev = 0;
+ vap->va_vaflags = 0;
+
+ return (0);
+}
+
+static int
+nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks)
+{
+ struct nandfs_device *nffsdev;
+ struct bufobj *bo;
+ struct buf *bp, *nbp;
+
+ bo = &vp->v_bufobj;
+ nffsdev = VTON(vp)->nn_nandfsdev;
+
+ ASSERT_VOP_LOCKED(vp, "nandfs_truncate");
+restart:
+ BO_LOCK(bo);
+restart_locked:
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
+ if (bp->b_lblkno < nblks)
+ continue;
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+ goto restart_locked;
+
+ bremfree(bp);
+ bp->b_flags |= (B_INVAL | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_MANAGED);
+ BO_UNLOCK(bo);
+ brelse(bp);
+ BO_LOCK(bo);
+ }
+
+ TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+ if (bp->b_lblkno < nblks)
+ continue;
+ if (BUF_LOCK(bp,
+ LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+ BO_MTX(bo)) == ENOLCK)
+ goto restart;
+ bp->b_flags |= (B_INVAL | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_MANAGED);
+ brelse(bp);
+ nandfs_dirty_bufs_decrement(nffsdev);
+ BO_LOCK(bo);
+ }
+
+ BO_UNLOCK(bo);
+
+ return (0);
+}
+
+static int
+nandfs_truncate(struct vnode *vp, uint64_t newsize)
+{
+ struct nandfs_device *nffsdev;
+ struct nandfs_node *node;
+ struct nandfs_inode *inode;
+ struct buf *bp = NULL;
+ uint64_t oblks, nblks, vblk, size, rest;
+ int error;
+
+ node = VTON(vp);
+ nffsdev = node->nn_nandfsdev;
+ inode = &node->nn_inode;
+
+ /* Calculate end of file */
+ size = inode->i_size;
+
+ if (newsize == size) {
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ nandfs_itimes(vp);
+ return (0);
+ }
+
+ if (newsize > size) {
+ inode->i_size = newsize;
+ vnode_pager_setsize(vp, newsize);
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ nandfs_itimes(vp);
+ return (0);
+ }
+
+ nblks = howmany(newsize, nffsdev->nd_blocksize);
+ oblks = howmany(size, nffsdev->nd_blocksize);
+ rest = newsize % nffsdev->nd_blocksize;
+
+ if (rest) {
+ error = nandfs_bmap_lookup(node, nblks - 1, &vblk);
+ if (error)
+ return (error);
+
+ if (vblk != 0)
+ error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp);
+ else
+ error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp);
+
+ if (error) {
+ if (bp)
+ brelse(bp);
+ return (error);
+ }
+
+ bzero((char *)bp->b_data + rest,
+ (u_int)(nffsdev->nd_blocksize - rest));
+ error = nandfs_dirty_buf(bp, 0);
+ if (error)
+ return (error);
+ }
+
+ DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks,
+ nblks));
+
+ error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1);
+ if (error) {
+ if (bp)
+ nandfs_undirty_buf(bp);
+ return (error);
+ }
+
+ error = nandfs_vtruncbuf(vp, nblks);
+ if (error) {
+ if (bp)
+ nandfs_undirty_buf(bp);
+ return (error);
+ }
+
+ inode->i_size = newsize;
+ vnode_pager_setsize(vp, newsize);
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ nandfs_itimes(vp);
+
+ return (error);
+}
+
+static void
+nandfs_itimes_locked(struct vnode *vp)
+{
+ struct nandfs_node *node;
+ struct nandfs_inode *inode;
+ struct timespec ts;
+
+ ASSERT_VI_LOCKED(vp, __func__);
+
+ node = VTON(vp);
+ inode = &node->nn_inode;
+
+ if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
+ return;
+
+ if (((vp->v_mount->mnt_kern_flag &
+ (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
+ (node->nn_flags & (IN_CHANGE | IN_UPDATE)))
+ node->nn_flags |= IN_MODIFIED;
+
+ vfs_timestamp(&ts);
+ if (node->nn_flags & IN_UPDATE) {
+ inode->i_mtime = ts.tv_sec;
+ inode->i_mtime_nsec = ts.tv_nsec;
+ }
+ if (node->nn_flags & IN_CHANGE) {
+ inode->i_ctime = ts.tv_sec;
+ inode->i_ctime_nsec = ts.tv_nsec;
+ }
+
+ node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+}
+
+void
+nandfs_itimes(struct vnode *vp)
+{
+
+ VI_LOCK(vp);
+ nandfs_itimes_locked(vp);
+ VI_UNLOCK(vp);
+}
+
+static int
+nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
+{
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ uint16_t nmode;
+ int error = 0;
+
+ DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp,
+ mode, cred, td));
+ /*
+ * To modify the permissions on a file, must possess VADMIN
+ * for that file.
+ */
+ if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+ return (error);
+
+ /*
+ * Privileged processes may set the sticky bit on non-directories,
+ * as well as set the setgid bit on a file with a group that the
+ * process is not a member of. Both of these are allowed in
+ * jail(8).
+ */
+ if (vp->v_type != VDIR && (mode & S_ISTXT)) {
+ if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
+ return (EFTYPE);
+ }
+ if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) {
+ error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Deny setting setuid if we are not the file owner.
+ */
+ if ((mode & ISUID) && inode->i_uid != cred->cr_uid) {
+ error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
+ if (error)
+ return (error);
+ }
+
+ nmode = inode->i_mode;
+ nmode &= ~ALLPERMS;
+ nmode |= (mode & ALLPERMS);
+ inode->i_mode = nmode;
+ node->nn_flags |= IN_CHANGE;
+
+ DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode));
+
+ return (error);
+}
+
+static int
+nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
+ struct thread *td)
+{
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ uid_t ouid;
+ gid_t ogid;
+ int error = 0;
+
+ if (uid == (uid_t)VNOVAL)
+ uid = inode->i_uid;
+ if (gid == (gid_t)VNOVAL)
+ gid = inode->i_gid;
+ /*
+ * To modify the ownership of a file, must possess VADMIN for that
+ * file.
+ */
+ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
+ return (error);
+ /*
+ * To change the owner of a file, or change the group of a file to a
+ * group of which we are not a member, the caller must have
+ * privilege.
+ */
+ if (((uid != inode->i_uid && uid != cred->cr_uid) ||
+ (gid != inode->i_gid && !groupmember(gid, cred))) &&
+ (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
+ return (error);
+ ogid = inode->i_gid;
+ ouid = inode->i_uid;
+
+ inode->i_gid = gid;
+ inode->i_uid = uid;
+
+ node->nn_flags |= IN_CHANGE;
+ if ((inode->i_mode & (ISUID | ISGID)) &&
+ (ouid != uid || ogid != gid)) {
+ if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
+ inode->i_mode &= ~(ISUID | ISGID);
+ }
+ }
+ DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp,
+ cred, td));
+ return (0);
+}
+
+static int
+nandfs_setattr(struct vop_setattr_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ struct vattr *vap = ap->a_vap;
+ struct ucred *cred = ap->a_cred;
+ struct thread *td = curthread;
+ uint32_t flags;
+ int error = 0;
+
+ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+ (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+ (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+ (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+ DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__));
+ return (EINVAL);
+ }
+
+ if (vap->va_flags != VNOVAL) {
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp,
+ td, vap->va_flags));
+
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ /*
+ * Callers may only modify the file flags on objects they
+ * have VADMIN rights for.
+ */
+ if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
+ return (error);
+ /*
+ * Unprivileged processes are not permitted to unset system
+ * flags, or modify flags if any system flags are set.
+ * Privileged non-jail processes may not modify system flags
+ * if securelevel > 0 and any existing system flags are set.
+ * Privileged jail processes behave like privileged non-jail
+ * processes if the security.jail.chflags_allowed sysctl is
+ * is non-zero; otherwise, they behave like unprivileged
+ * processes.
+ */
+
+ flags = inode->i_flags;
+ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
+ if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
+ error = securelevel_gt(cred, 0);
+ if (error)
+ return (error);
+ }
+ /* Snapshot flag cannot be set or cleared */
+ if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
+ (flags & SF_SNAPSHOT) == 0) ||
+ ((vap->va_flags & SF_SNAPSHOT) == 0 &&
+ (flags & SF_SNAPSHOT) != 0))
+ return (EPERM);
+
+ inode->i_flags = vap->va_flags;
+ } else {
+ if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
+ (vap->va_flags & UF_SETTABLE) != vap->va_flags)
+ return (EPERM);
+
+ flags &= SF_SETTABLE;
+ flags |= (vap->va_flags & UF_SETTABLE);
+ inode->i_flags = flags;
+ }
+ node->nn_flags |= IN_CHANGE;
+ if (vap->va_flags & (IMMUTABLE | APPEND))
+ return (0);
+ }
+ if (inode->i_flags & (IMMUTABLE | APPEND))
+ return (EPERM);
+
+ if (vap->va_size != (u_quad_t)VNOVAL) {
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td,
+ (uintmax_t)vap->va_size));
+
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ if ((inode->i_flags & SF_SNAPSHOT) != 0)
+ return (EPERM);
+ break;
+ default:
+ return (0);
+ }
+
+ if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize)
+ return (EFBIG);
+
+ KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type));
+ nandfs_truncate(vp, vap->va_size);
+ node->nn_flags |= IN_CHANGE;
+
+ return (0);
+ }
+
+ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
+ vp, td, vap->va_uid, vap->va_gid));
+ error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
+ if (error)
+ return (error);
+ }
+
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td,
+ vap->va_mode));
+
+ error = nandfs_chmod(vp, (int)vap->va_mode, cred, td);
+ if (error)
+ return (error);
+ }
+ if (vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL ||
+ vap->va_birthtime.tv_sec != VNOVAL) {
+ DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n",
+ __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
+ (uintmax_t)vap->va_mtime.tv_sec,
+ (uintmax_t)vap->va_birthtime.tv_sec));
+
+ if (vap->va_atime.tv_sec != VNOVAL)
+ node->nn_flags |= IN_ACCESS;
+ if (vap->va_mtime.tv_sec != VNOVAL)
+ node->nn_flags |= IN_CHANGE | IN_UPDATE;
+ if (vap->va_birthtime.tv_sec != VNOVAL)
+ node->nn_flags |= IN_MODIFIED;
+ nandfs_itimes(vp);
+ return (0);
+ }
+
+ return (0);
+}
+
+static int
+nandfs_open(struct vop_open_args *ap)
+{
+ struct nandfs_node *node = VTON(ap->a_vp);
+ uint64_t filesize;
+
+ DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode));
+
+ if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
+ return (EOPNOTSUPP);
+
+ if ((node->nn_inode.i_flags & APPEND) &&
+ (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+ return (EPERM);
+
+ filesize = node->nn_inode.i_size;
+ vnode_create_vobject(ap->a_vp, filesize, ap->a_td);
+
+ return (0);
+}
+
+static int
+nandfs_close(struct vop_close_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+
+ DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
+
+ mtx_lock(&vp->v_interlock);
+ if (vp->v_usecount > 1)
+ nandfs_itimes_locked(vp);
+ mtx_unlock(&vp->v_interlock);
+
+ return (0);
+}
+
+static int
+nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
+{
+
+ /* Check if we are allowed to write */
+ switch (vap->va_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ /*
+ * Normal nodes: check if we're on a read-only mounted
+ * filingsystem and bomb out if we're trying to write.
+ */
+ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY))
+ return (EROFS);
+ break;
+ case VBLK:
+ case VCHR:
+ case VSOCK:
+ case VFIFO:
+ /*
+ * Special nodes: even on read-only mounted filingsystems
+ * these are allowed to be written to if permissions allow.
+ */
+ break;
+ default:
+ /* No idea what this is */
+ return (EINVAL);
+ }
+
+ /* Noone may write immutable files */
+ if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE))
+ return (EPERM);
+
+ return (0);
+}
+
+static int
+nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode,
+ struct ucred *cred)
+{
+
+ return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode,
+ cred, NULL));
+}
+
+static int
+nandfs_advlock(struct vop_advlock_args *ap)
+{
+ struct nandfs_node *nvp;
+ quad_t size;
+
+ nvp = VTON(ap->a_vp);
+ size = nvp->nn_inode.i_size;
+ return (lf_advlock(ap, &(nvp->nn_lockf), size));
+}
+
+static int
+nandfs_access(struct vop_access_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ accmode_t accmode = ap->a_accmode;
+ struct ucred *cred = ap->a_cred;
+ struct vattr vap;
+ int error;
+
+ DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode));
+
+ error = VOP_GETATTR(vp, &vap, NULL);
+ if (error)
+ return (error);
+
+ error = nandfs_check_possible(vp, &vap, accmode);
+ if (error) {
+ return (error);
+ }
+
+ error = nandfs_check_permitted(vp, &vap, accmode, cred);
+
+ return (error);
+}
+
+static int
+nandfs_print(struct vop_print_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *nvp = VTON(vp);
+
+ printf("\tvp=%p, nandfs_node=%p\n", vp, nvp);
+ printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino);
+ printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS);
+
+ return (0);
+}
+
+static void
+nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp)
+{
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+ struct buf *nbp;
+ nandfs_daddr_t vblk, pblk;
+ nandfs_lbn_t from;
+ uint32_t blocksize;
+ int error = 0;
+ int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
+
+ /*
+ * Translate all the block sectors into a series of buffers to read
+ * asynchronously from the nandfs device. Note that this lookup may
+ * induce readin's too.
+ */
+
+ blocksize = nandfsdev->nd_blocksize;
+ if (bp->b_bcount / blocksize != 1)
+ panic("invalid b_count in bp %p\n", bp);
+
+ from = bp->b_blkno;
+
+ DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx"
+ " count %#lx\n", (uintmax_t)node->nn_ino, from,
+ bp->b_bcount));
+
+ /* Get virtual block numbers for the vnode's buffer span */
+ error = nandfs_bmap_lookup(node, from, &vblk);
+ if (error) {
+ bp->b_error = EINVAL;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ return;
+ }
+
+ /* Translate virtual block numbers to physical block numbers */
+ error = nandfs_vtop(node, vblk, &pblk);
+ if (error) {
+ bp->b_error = EINVAL;
+ bp->b_ioflags |= BIO_ERROR;
+ bufdone(bp);
+ return;
+ }
+
+ /* Issue translated blocks */
+ bp->b_resid = bp->b_bcount;
+
+ /* Note virtual block 0 marks not mapped */
+ if (vblk == 0) {
+ vfs_bio_clrbuf(bp);
+ bufdone(bp);
+ return;
+ }
+
+ nbp = bp;
+ nbp->b_blkno = pblk * blk2dev;
+ bp->b_iooffset = dbtob(nbp->b_blkno);
+ MPASS(bp->b_iooffset >= 0);
+ BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp);
+ nandfs_vblk_set(bp, vblk);
+ DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> "
+ "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino,
+ (uintmax_t)(from), (uintmax_t)vblk,
+ (uintmax_t)pblk, nbp));
+}
+
+static void
+nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp)
+{
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+
+ bp->b_iooffset = dbtob(bp->b_blkno);
+ MPASS(bp->b_iooffset >= 0);
+ BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp);
+}
+
+static int
+nandfs_strategy(struct vop_strategy_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct buf *bp = ap->a_bp;
+ struct nandfs_node *node = VTON(vp);
+
+
+ /* check if we ought to be here */
+ KASSERT((vp->v_type != VBLK && vp->v_type != VCHR),
+ ("nandfs_strategy on type %d", vp->v_type));
+
+ /* Translate if needed and pass on */
+ if (bp->b_iocmd == BIO_READ) {
+ nandfs_read_filebuf(node, bp);
+ return (0);
+ }
+
+ /* Send to segment collector */
+ nandfs_write_filebuf(node, bp);
+ return (0);
+}
+
+static int
+nandfs_readdir(struct vop_readdir_args *ap)
+{
+ struct uio *uio = ap->a_uio;
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_dir_entry *ndirent;
+ struct dirent dirent;
+ struct buf *bp;
+ uint64_t file_size, diroffset, transoffset, blkoff;
+ uint64_t blocknr;
+ uint32_t blocksize = node->nn_nandfsdev->nd_blocksize;
+ uint8_t *pos, name_len;
+ int error;
+
+ DPRINTF(READDIR, ("nandfs_readdir called\n"));
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ file_size = node->nn_inode.i_size;
+ DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n",
+ (uintmax_t)file_size, uio->uio_resid ));
+
+ /* We are called just as long as we keep on pushing data in */
+ error = 0;
+ if ((uio->uio_offset < file_size) &&
+ (uio->uio_resid >= sizeof(struct dirent))) {
+ diroffset = uio->uio_offset;
+ transoffset = diroffset;
+
+ blocknr = diroffset / blocksize;
+ blkoff = diroffset % blocksize;
+ error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+ while (diroffset < file_size) {
+ DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n",
+ diroffset));
+ if (blkoff >= blocksize) {
+ blkoff = 0; blocknr++;
+ brelse(bp);
+ error = nandfs_bread(node, blocknr, NOCRED, 0,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return (EIO);
+ }
+ }
+
+ /* Read in one dirent */
+ pos = (uint8_t *)bp->b_data + blkoff;
+ ndirent = (struct nandfs_dir_entry *)pos;
+
+ name_len = ndirent->name_len;
+ memset(&dirent, 0, sizeof(struct dirent));
+ dirent.d_fileno = ndirent->inode;
+ if (dirent.d_fileno) {
+ dirent.d_type = ndirent->file_type;
+ dirent.d_namlen = name_len;
+ strncpy(dirent.d_name, ndirent->name, name_len);
+ dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
+ DPRINTF(READDIR, ("copying `%*.*s`\n", name_len,
+ name_len, dirent.d_name));
+ }
+
+ /*
+ * If there isn't enough space in the uio to return a
+ * whole dirent, break off read
+ */
+ if (uio->uio_resid < GENERIC_DIRSIZ(&dirent))
+ break;
+
+ /* Transfer */
+ if (dirent.d_fileno)
+ uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio);
+
+ /* Advance */
+ diroffset += ndirent->rec_len;
+ blkoff += ndirent->rec_len;
+
+ /* Remember the last entry we transfered */
+ transoffset = diroffset;
+ }
+ brelse(bp);
+
+ /* Pass on last transfered offset */
+ uio->uio_offset = transoffset;
+ }
+
+ if (ap->a_eofflag)
+ *ap->a_eofflag = (uio->uio_offset >= file_size);
+
+ return (error);
+}
+
+static int
+nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred)
+{
+ struct nandfs_node *dnode = VTON(dvp);
+ struct nandfs_dir_entry *dirent;
+ uint64_t file_size = dnode->nn_inode.i_size;
+ uint64_t blockcount = dnode->nn_inode.i_blocks;
+ uint64_t blocknr;
+ uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize;
+ uint32_t limit;
+ uint32_t off;
+ uint8_t *pos;
+ struct buf *bp;
+ int error;
+
+ DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp,
+ (uintmax_t)parentino, cred));
+
+ KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp));
+
+ blocknr = 0;
+ while (blocknr < blockcount) {
+ error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp);
+ if (error) {
+ brelse(bp);
+ return (0);
+ }
+
+ pos = (uint8_t *)bp->b_data;
+ off = 0;
+
+ if (blocknr == (blockcount - 1))
+ limit = file_size % blocksize;
+ else
+ limit = blocksize;
+
+ while (off < limit) {
+ dirent = (struct nandfs_dir_entry *)(pos + off);
+ off += dirent->rec_len;
+
+ if (dirent->inode == 0)
+ continue;
+
+ switch (dirent->name_len) {
+ case 0:
+ break;
+ case 1:
+ if (dirent->name[0] != '.')
+ goto notempty;
+
+ KASSERT(dirent->inode == dnode->nn_ino,
+ (".'s inode does not match dir"));
+ break;
+ case 2:
+ if (dirent->name[0] != '.' &&
+ dirent->name[1] != '.')
+ goto notempty;
+
+ KASSERT(dirent->inode == parentino,
+ ("..'s inode does not match parent"));
+ break;
+ default:
+ goto notempty;
+ }
+ }
+
+ brelse(bp);
+ blocknr++;
+ }
+
+ return (1);
+notempty:
+ brelse(bp);
+ return (0);
+}
+
+static int
+nandfs_link(struct vop_link_args *ap)
+{
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *vp = ap->a_vp;
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_inode *inode = &node->nn_inode;
+ int error;
+
+ if (tdvp->v_mount != vp->v_mount)
+ return (EXDEV);
+
+ if (inode->i_links_count >= LINK_MAX)
+ return (EMLINK);
+
+ if (inode->i_flags & (IMMUTABLE | APPEND))
+ return (EPERM);
+
+ /* Update link count */
+ inode->i_links_count++;
+
+ /* Add dir entry */
+ error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(inode->i_mode));
+ if (error) {
+ inode->i_links_count--;
+ }
+
+ node->nn_flags |= IN_CHANGE;
+ nandfs_itimes(vp);
+ DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n",
+ __func__, tdvp, vp, cnp));
+
+ return (0);
+}
+
+static int
+nandfs_create(struct vop_create_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ struct nandfs_node *node;
+ int error;
+
+ DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ /* Create new vnode/inode */
+ error = nandfs_node_create(nmp, &node, mode);
+ if (error)
+ return (error);
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+
+ /* Add new dir entry */
+ error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode));
+ if (error) {
+ if (nandfs_node_destroy(node)) {
+ nandfs_error("%s: error destroying node %p\n",
+ __func__, node);
+ }
+ return (error);
+ }
+ *vpp = NTOV(node);
+
+ DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node,
+ (uintmax_t)node->nn_ino));
+ return (0);
+}
+
+static int
+nandfs_remove(struct vop_remove_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *dvp = ap->a_dvp;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_node *dnode = VTON(dvp);
+ struct componentname *cnp = ap->a_cnp;
+
+ DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n",
+ __func__, dvp, vp, node, (uintmax_t)node->nn_ino,
+ node->nn_inode.i_links_count));
+
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+
+ /* Files marked as immutable or append-only cannot be deleted. */
+ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
+ (dnode->nn_inode.i_flags & APPEND))
+ return (EPERM);
+
+ nandfs_remove_dirent(dvp, node, cnp);
+ node->nn_inode.i_links_count--;
+ node->nn_flags |= IN_CHANGE;
+
+ return (0);
+}
+
+/*
+ * Check if source directory is in the path of the target directory.
+ * Target is supplied locked, source is unlocked.
+ * The target is always vput before returning.
+ */
+static int
+nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest,
+ struct ucred *cred)
+{
+ struct vnode *vp;
+ int error, rootino;
+ struct nandfs_dir_entry dirent;
+
+ vp = NTOV(dest);
+ if (src->nn_ino == dest->nn_ino) {
+ error = EEXIST;
+ goto out;
+ }
+ rootino = NANDFS_ROOT_INO;
+ error = 0;
+ if (dest->nn_ino == rootino)
+ goto out;
+
+ for (;;) {
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ break;
+ }
+
+ error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent,
+ NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE,
+ IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED,
+ NULL, NULL);
+ if (error != 0)
+ break;
+ if (dirent.name_len != 2 ||
+ dirent.name[0] != '.' ||
+ dirent.name[1] != '.') {
+ error = ENOTDIR;
+ break;
+ }
+ if (dirent.inode == src->nn_ino) {
+ error = EINVAL;
+ break;
+ }
+ if (dirent.inode == rootino)
+ break;
+ vput(vp);
+ if ((error = VFS_VGET(vp->v_mount, dirent.inode,
+ LK_EXCLUSIVE, &vp)) != 0) {
+ vp = NULL;
+ break;
+ }
+ }
+
+out:
+ if (error == ENOTDIR)
+ printf("checkpath: .. not a directory\n");
+ if (vp != NULL)
+ vput(vp);
+ return (error);
+}
+
+static int
+nandfs_rename(struct vop_rename_args *ap)
+{
+ struct vnode *tvp = ap->a_tvp;
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *fdvp = ap->a_fdvp;
+ struct componentname *tcnp = ap->a_tcnp;
+ struct componentname *fcnp = ap->a_fcnp;
+ int doingdirectory = 0, oldparent = 0, newparent = 0;
+ int error = 0;
+
+ struct nandfs_node *fdnode, *fnode, *fnode1;
+ struct nandfs_node *tdnode = VTON(tdvp);
+ struct nandfs_node *tnode;
+
+ uint32_t tdflags, fflags, fdflags;
+ uint16_t mode;
+
+ DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp,
+ fvp, tdvp, tvp));
+
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+abortit:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ return (error);
+ }
+
+ tdflags = tdnode->nn_inode.i_flags;
+ if (tvp &&
+ ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+ (tdflags & APPEND))) {
+ error = EPERM;
+ goto abortit;
+ }
+
+ /*
+ * Renaming a file to itself has no effect. The upper layers should
+ * not call us in that case. Temporarily just warn if they do.
+ */
+ if (fvp == tvp) {
+ printf("nandfs_rename: fvp == tvp (can't happen)\n");
+ error = 0;
+ goto abortit;
+ }
+
+ if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
+ goto abortit;
+
+ fdnode = VTON(fdvp);
+ fnode = VTON(fvp);
+
+ if (fnode->nn_inode.i_links_count >= LINK_MAX) {
+ VOP_UNLOCK(fvp, 0);
+ error = EMLINK;
+ goto abortit;
+ }
+
+ fflags = fnode->nn_inode.i_flags;
+ fdflags = fdnode->nn_inode.i_flags;
+
+ if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) ||
+ (fdflags & APPEND)) {
+ VOP_UNLOCK(fvp, 0);
+ error = EPERM;
+ goto abortit;
+ }
+
+ mode = fnode->nn_inode.i_mode;
+ if ((mode & S_IFMT) == S_IFDIR) {
+ /*
+ * Avoid ".", "..", and aliases of "." for obvious reasons.
+ */
+
+ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+ (fdvp == fvp) ||
+ ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) ||
+ (fnode->nn_flags & IN_RENAME)) {
+ VOP_UNLOCK(fvp, 0);
+ error = EINVAL;
+ goto abortit;
+ }
+ fnode->nn_flags |= IN_RENAME;
+ doingdirectory = 1;
+ DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__,
+ tdvp));
+ oldparent = fdnode->nn_ino;
+ }
+
+ vrele(fdvp);
+
+ tnode = NULL;
+ if (tvp)
+ tnode = VTON(tvp);
+
+ /*
+ * Bump link count on fvp while we are moving stuff around. If we
+ * crash before completing the work, the link count may be wrong
+ * but correctable.
+ */
+ fnode->nn_inode.i_links_count++;
+
+ /* Check for in path moving XXX */
+ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
+ VOP_UNLOCK(fvp, 0);
+ if (oldparent != tdnode->nn_ino)
+ newparent = tdnode->nn_ino;
+ if (doingdirectory && newparent) {
+ if (error) /* write access check above */
+ goto bad;
+ if (tnode != NULL)
+ vput(tvp);
+
+ error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred);
+ if (error)
+ goto out;
+
+ VREF(tdvp);
+ error = relookup(tdvp, &tvp, tcnp);
+ if (error)
+ goto out;
+ vrele(tdvp);
+ tdnode = VTON(tdvp);
+ tnode = NULL;
+ if (tvp)
+ tnode = VTON(tvp);
+ }
+
+ /*
+ * If the target doesn't exist, link the target to the source and
+ * unlink the source. Otherwise, rewrite the target directory to
+ * reference the source and remove the original entry.
+ */
+
+ if (tvp == NULL) {
+ /*
+ * Account for ".." in new directory.
+ */
+ if (doingdirectory && fdvp != tdvp)
+ tdnode->nn_inode.i_links_count++;
+
+ DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp));
+ /*
+ * Add name in new directory.
+ */
+ error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr,
+ tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode));
+ if (error) {
+ if (doingdirectory && fdvp != tdvp)
+ tdnode->nn_inode.i_links_count--;
+ goto bad;
+ }
+
+ vput(tdvp);
+ } else {
+ /*
+ * If the parent directory is "sticky", then the user must
+ * own the parent directory, or the destination of the rename,
+ * otherwise the destination may not be changed (except by
+ * root). This implements append-only directories.
+ */
+ if ((tdnode->nn_inode.i_mode & S_ISTXT) &&
+ tcnp->cn_cred->cr_uid != 0 &&
+ tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid &&
+ tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) {
+ error = EPERM;
+ goto bad;
+ }
+ /*
+ * Target must be empty if a directory and have no links
+ * to it. Also, ensure source and target are compatible
+ * (both directories, or both not directories).
+ */
+ mode = tnode->nn_inode.i_mode;
+ if ((mode & S_IFMT) == S_IFDIR) {
+ if (!nandfs_dirempty(tvp, tdnode->nn_ino,
+ tcnp->cn_cred)) {
+ error = ENOTEMPTY;
+ goto bad;
+ }
+ if (!doingdirectory) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ /*
+ * Update name cache since directory is going away.
+ */
+ cache_purge(tdvp);
+ } else if (doingdirectory) {
+ error = EISDIR;
+ goto bad;
+ }
+
+ DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp));
+ /*
+ * Change name tcnp in tdvp to point at fvp.
+ */
+ error = nandfs_update_dirent(tdvp, fnode, tnode);
+ if (error)
+ goto bad;
+
+ if (doingdirectory && !newparent)
+ tdnode->nn_inode.i_links_count--;
+
+ vput(tdvp);
+
+ tnode->nn_inode.i_links_count--;
+ vput(tvp);
+ tnode = NULL;
+ }
+
+ /*
+ * Unlink the source.
+ */
+ fcnp->cn_flags &= ~MODMASK;
+ fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+ VREF(fdvp);
+ error = relookup(fdvp, &fvp, fcnp);
+ if (error == 0)
+ vrele(fdvp);
+ if (fvp != NULL) {
+ fnode1 = VTON(fvp);
+ fdnode = VTON(fdvp);
+ } else {
+ /*
+ * From name has disappeared.
+ */
+ if (doingdirectory)
+ panic("nandfs_rename: lost dir entry");
+ vrele(ap->a_fvp);
+ return (0);
+ }
+
+ DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode));
+
+ /*
+ * Ensure that the directory entry still exists and has not
+ * changed while the new name has been entered. If the source is
+ * a file then the entry may have been unlinked or renamed. In
+ * either case there is no further work to be done. If the source
+ * is a directory then it cannot have been rmdir'ed; its link
+ * count of three would cause a rmdir to fail with ENOTEMPTY.
+ * The IN_RENAME flag ensures that it cannot be moved by another
+ * rename.
+ */
+ if (fnode != fnode1) {
+ if (doingdirectory)
+ panic("nandfs: lost dir entry");
+ } else {
+ /*
+ * If the source is a directory with a
+ * new parent, the link count of the old
+ * parent directory must be decremented
+ * and ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
+ DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n",
+ __func__, (uintmax_t) oldparent,
+ (uintmax_t) newparent));
+ error = nandfs_update_parent_dir(fvp, newparent);
+ if (!error) {
+ fdnode->nn_inode.i_links_count--;
+ fdnode->nn_flags |= IN_CHANGE;
+ }
+ }
+ error = nandfs_remove_dirent(fdvp, fnode, fcnp);
+ if (!error) {
+ fnode->nn_inode.i_links_count--;
+ fnode->nn_flags |= IN_CHANGE;
+ }
+ fnode->nn_flags &= ~IN_RENAME;
+ }
+ if (fdnode)
+ vput(fdvp);
+ if (fnode)
+ vput(fvp);
+ vrele(ap->a_fvp);
+ return (error);
+
+bad:
+ DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error));
+ if (tnode)
+ vput(NTOV(tnode));
+ vput(NTOV(tdnode));
+out:
+ if (doingdirectory)
+ fnode->nn_flags &= ~IN_RENAME;
+ if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
+ fnode->nn_inode.i_links_count--;
+ fnode->nn_flags |= IN_CHANGE;
+ fnode->nn_flags &= ~IN_RENAME;
+ vput(fvp);
+ } else
+ vrele(fvp);
+ return (error);
+}
+
+static int
+nandfs_mkdir(struct vop_mkdir_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfs_inode *dir_inode = &dir_node->nn_inode;
+ struct nandfs_node *node;
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ int error;
+
+ DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ if (dir_inode->i_links_count >= LINK_MAX)
+ return (EMLINK);
+
+ error = nandfs_node_create(nmp, &node, mode);
+ if (error)
+ return (error);
+
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+
+ *vpp = NTOV(node);
+
+ error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode));
+ if (error) {
+ vput(*vpp);
+ return (error);
+ }
+
+ dir_node->nn_inode.i_links_count++;
+ dir_node->nn_flags |= IN_CHANGE;
+
+ error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino);
+ if (error) {
+ vput(NTOV(node));
+ return (error);
+ }
+
+ DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node,
+ (uintmax_t)node->nn_ino));
+ return (0);
+}
+
+static int
+nandfs_mknod(struct vop_mknod_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vattr *vap = ap->a_vap;
+ uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ struct nandfs_node *node;
+ int error;
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ error = nandfs_node_create(nmp, &node, mode);
+ if (error)
+ return (error);
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+ if (vap->va_rdev != VNOVAL)
+ node->nn_inode.i_special = vap->va_rdev;
+
+ *vpp = NTOV(node);
+
+ if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode))) {
+ vput(*vpp);
+ return (ENOTDIR);
+ }
+
+ node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+
+ return (0);
+}
+
+static int
+nandfs_symlink(struct vop_symlink_args *ap)
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *dir_node = VTON(dvp);
+ struct nandfsmount *nmp = dir_node->nn_nmp;
+ struct nandfs_node *node;
+ int len, error;
+
+ if (nandfs_fs_full(dir_node->nn_nandfsdev))
+ return (ENOSPC);
+
+ error = nandfs_node_create(nmp, &node, S_IFLNK | mode);
+ if (error)
+ return (error);
+ node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
+ node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
+
+ *vpp = NTOV(node);
+
+ if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
+ cnp->cn_namelen, IFTODT(mode))) {
+ vput(*vpp);
+ return (ENOTDIR);
+ }
+
+
+ len = strlen(ap->a_target);
+ error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
+ UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
+ cnp->cn_cred, NOCRED, NULL, NULL);
+ if (error)
+ vput(*vpp);
+
+ return (error);
+}
+
+static int
+nandfs_readlink(struct vop_readlink_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+
+ return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+static int
+nandfs_rmdir(struct vop_rmdir_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ struct nandfs_node *node, *dnode;
+ uint32_t dflag, flag;
+ int error = 0;
+
+ node = VTON(vp);
+ dnode = VTON(dvp);
+
+ /* Files marked as immutable or append-only cannot be deleted. */
+ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
+ (dnode->nn_inode.i_flags & APPEND))
+ return (EPERM);
+
+ DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__,
+ dvp, vp, node, (uintmax_t)node->nn_ino));
+
+ if (node->nn_inode.i_links_count < 2)
+ return (EINVAL);
+
+ if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred))
+ return (ENOTEMPTY);
+
+ /* Files marked as immutable or append-only cannot be deleted. */
+ dflag = dnode->nn_inode.i_flags;
+ flag = node->nn_inode.i_flags;
+ if ((dflag & APPEND) ||
+ (flag & (NOUNLINK | IMMUTABLE | APPEND))) {
+ return (EPERM);
+ }
+
+ if (vp->v_mountedhere != 0)
+ return (EINVAL);
+
+ nandfs_remove_dirent(dvp, node, cnp);
+ dnode->nn_inode.i_links_count -= 1;
+ dnode->nn_flags |= IN_CHANGE;
+
+ cache_purge(dvp);
+
+ error = nandfs_truncate(vp, (uint64_t)0);
+ if (error)
+ return (error);
+
+ node->nn_inode.i_links_count -= 2;
+ node->nn_flags |= IN_CHANGE;
+
+ cache_purge(vp);
+
+ return (error);
+}
+
+static int
+nandfs_fsync(struct vop_fsync_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int locked;
+
+ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
+ node, (uintmax_t)node->nn_ino));
+
+ /*
+ * Start syncing vnode only if inode was modified or
+ * there are some dirty buffers
+ */
+ if (VTON(vp)->nn_flags & IN_MODIFIED ||
+ vp->v_bufobj.bo_dirty.bv_cnt) {
+ locked = VOP_ISLOCKED(vp);
+ VOP_UNLOCK(vp, 0);
+ nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC);
+ VOP_LOCK(vp, locked | LK_RETRY);
+ }
+
+ return (0);
+}
+
+static int
+nandfs_bmap(struct vop_bmap_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *nnode = VTON(vp);
+ struct nandfs_device *nandfsdev = nnode->nn_nandfsdev;
+ nandfs_daddr_t l2vmap, v2pmap;
+ int error;
+ int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
+
+ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
+ nnode, (uintmax_t)nnode->nn_ino));
+
+ if (ap->a_bop != NULL)
+ *ap->a_bop = &nandfsdev->nd_devvp->v_bufobj;
+ if (ap->a_bnp == NULL)
+ return (0);
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+ if (ap->a_runb != NULL)
+ *ap->a_runb = 0;
+
+ /*
+ * Translate all the block sectors into a series of buffers to read
+ * asynchronously from the nandfs device. Note that this lookup may
+ * induce readin's too.
+ */
+
+ /* Get virtual block numbers for the vnode's buffer span */
+ error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap);
+ if (error)
+ return (-1);
+
+ /* Translate virtual block numbers to physical block numbers */
+ error = nandfs_vtop(nnode, l2vmap, &v2pmap);
+ if (error)
+ return (-1);
+
+ /* Note virtual block 0 marks not mapped */
+ if (l2vmap == 0)
+ *ap->a_bnp = -1;
+ else
+ *ap->a_bnp = v2pmap * blk2dev; /* in DEV_BSIZE */
+
+ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n",
+ __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn,
+ (uintmax_t)*ap->a_bnp ));
+
+ return (0);
+}
+
+static void
+nandfs_force_syncer(struct nandfsmount *nmp)
+{
+
+ nmp->nm_flags |= NANDFS_FORCE_SYNCER;
+ nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE);
+}
+
+static int
+nandfs_ioctl(struct vop_ioctl_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ u_long command = ap->a_command;
+ caddr_t data = ap->a_data;
+ struct nandfs_node *node = VTON(vp);
+ struct nandfs_device *nandfsdev = node->nn_nandfsdev;
+ struct nandfsmount *nmp = node->nn_nmp;
+ uint64_t *tab, *cno;
+ struct nandfs_seg_stat *nss;
+ struct nandfs_cpmode *ncpm;
+ struct nandfs_argv *nargv;
+ struct nandfs_cpstat *ncp;
+ int error;
+
+ DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command));
+
+ error = priv_check(ap->a_td, PRIV_VFS_MOUNT);
+ if (error)
+ return (error);
+
+ if (nmp->nm_ronly) {
+ switch (command) {
+ case NANDFS_IOCTL_GET_FSINFO:
+ case NANDFS_IOCTL_GET_SUSTAT:
+ case NANDFS_IOCTL_GET_CPINFO:
+ case NANDFS_IOCTL_GET_CPSTAT:
+ case NANDFS_IOCTL_GET_SUINFO:
+ case NANDFS_IOCTL_GET_VINFO:
+ case NANDFS_IOCTL_GET_BDESCS:
+ break;
+ default:
+ return (EROFS);
+ }
+ }
+
+ switch (command) {
+ case NANDFS_IOCTL_GET_FSINFO:
+ error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data);
+ break;
+ case NANDFS_IOCTL_GET_SUSTAT:
+ nss = (struct nandfs_seg_stat *)data;
+ error = nandfs_get_seg_stat(nandfsdev, nss);
+ break;
+ case NANDFS_IOCTL_CHANGE_CPMODE:
+ ncpm = (struct nandfs_cpmode *)data;
+ error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm);
+ nandfs_force_syncer(nmp);
+ break;
+ case NANDFS_IOCTL_GET_CPINFO:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv);
+ break;
+ case NANDFS_IOCTL_DELETE_CP:
+ tab = (uint64_t *)data;
+ error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]);
+ nandfs_force_syncer(nmp);
+ break;
+ case NANDFS_IOCTL_GET_CPSTAT:
+ ncp = (struct nandfs_cpstat *)data;
+ error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp);
+ break;
+ case NANDFS_IOCTL_GET_SUINFO:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_segment_info_ioctl(nandfsdev, nargv);
+ break;
+ case NANDFS_IOCTL_GET_VINFO:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv);
+ break;
+ case NANDFS_IOCTL_GET_BDESCS:
+ nargv = (struct nandfs_argv *)data;
+ error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv);
+ break;
+ case NANDFS_IOCTL_SYNC:
+ cno = (uint64_t *)data;
+ nandfs_force_syncer(nmp);
+ *cno = nandfsdev->nd_last_cno;
+ error = 0;
+ break;
+ case NANDFS_IOCTL_MAKE_SNAP:
+ cno = (uint64_t *)data;
+ error = nandfs_make_snap(nandfsdev, cno);
+ nandfs_force_syncer(nmp);
+ break;
+ case NANDFS_IOCTL_DELETE_SNAP:
+ cno = (uint64_t *)data;
+ error = nandfs_delete_snap(nandfsdev, *cno);
+ nandfs_force_syncer(nmp);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * Whiteout vnode call
+ */
+static int
+nandfs_whiteout(struct vop_whiteout_args *ap)
+{
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ int error = 0;
+
+ switch (ap->a_flags) {
+ case LOOKUP:
+ return (0);
+ case CREATE:
+ /* Create a new directory whiteout */
+#ifdef INVARIANTS
+ if ((cnp->cn_flags & SAVENAME) == 0)
+ panic("ufs_whiteout: missing name");
+#endif
+ error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr,
+ cnp->cn_namelen, DT_WHT);
+ break;
+
+ case DELETE:
+ /* Remove an existing directory whiteout */
+ cnp->cn_flags &= ~DOWHITEOUT;
+ error = nandfs_remove_dirent(dvp, NULL, cnp);
+ break;
+ default:
+ panic("nandf_whiteout: unknown op: %d", ap->a_flags);
+ }
+
+ return (error);
+}
+
+static int
+nandfs_pathconf(struct vop_pathconf_args *ap)
+{
+ int error;
+
+ error = 0;
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ break;
+ case _PC_NAME_MAX:
+ *ap->a_retval = NAME_MAX;
+ break;
+ case _PC_PATH_MAX:
+ *ap->a_retval = PATH_MAX;
+ break;
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ break;
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ break;
+ case _PC_NO_TRUNC:
+ *ap->a_retval = 1;
+ break;
+ case _PC_ACL_EXTENDED:
+ *ap->a_retval = 0;
+ break;
+ case _PC_ALLOC_SIZE_MIN:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
+ break;
+ case _PC_FILESIZEBITS:
+ *ap->a_retval = 64;
+ break;
+ case _PC_REC_INCR_XFER_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ case _PC_REC_MAX_XFER_SIZE:
+ *ap->a_retval = -1; /* means ``unlimited'' */
+ break;
+ case _PC_REC_MIN_XFER_SIZE:
+ *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
+static int
+nandfs_vnlock1(struct vop_lock1_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int error, vi_locked;
+
+ /*
+ * XXX can vnode go away while we are sleeping?
+ */
+ vi_locked = mtx_owned(&vp->v_interlock);
+ if (vi_locked)
+ VI_UNLOCK(vp);
+ error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev,
+ ap->a_flags & LK_NOWAIT);
+ if (vi_locked && !error)
+ VI_LOCK(vp);
+ if (error)
+ return (error);
+
+ error = vop_stdlock(ap);
+ if (error) {
+ NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+nandfs_vnunlock(struct vop_unlock_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+ int error;
+
+ error = vop_stdunlock(ap);
+ if (error)
+ return (error);
+
+ NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
+
+ return (0);
+}
+
+/*
+ * Global vfs data structures
+ */
+struct vop_vector nandfs_vnodeops = {
+ .vop_default = &default_vnodeops,
+ .vop_access = nandfs_access,
+ .vop_advlock = nandfs_advlock,
+ .vop_bmap = nandfs_bmap,
+ .vop_close = nandfs_close,
+ .vop_create = nandfs_create,
+ .vop_fsync = nandfs_fsync,
+ .vop_getattr = nandfs_getattr,
+ .vop_inactive = nandfs_inactive,
+ .vop_cachedlookup = nandfs_lookup,
+ .vop_ioctl = nandfs_ioctl,
+ .vop_link = nandfs_link,
+ .vop_lookup = vfs_cache_lookup,
+ .vop_mkdir = nandfs_mkdir,
+ .vop_mknod = nandfs_mknod,
+ .vop_open = nandfs_open,
+ .vop_pathconf = nandfs_pathconf,
+ .vop_print = nandfs_print,
+ .vop_read = nandfs_read,
+ .vop_readdir = nandfs_readdir,
+ .vop_readlink = nandfs_readlink,
+ .vop_reclaim = nandfs_reclaim,
+ .vop_remove = nandfs_remove,
+ .vop_rename = nandfs_rename,
+ .vop_rmdir = nandfs_rmdir,
+ .vop_whiteout = nandfs_whiteout,
+ .vop_write = nandfs_write,
+ .vop_setattr = nandfs_setattr,
+ .vop_strategy = nandfs_strategy,
+ .vop_symlink = nandfs_symlink,
+ .vop_lock1 = nandfs_vnlock1,
+ .vop_unlock = nandfs_vnunlock,
+};
+
+struct vop_vector nandfs_system_vnodeops = {
+ .vop_default = &default_vnodeops,
+ .vop_close = nandfs_close,
+ .vop_inactive = nandfs_inactive,
+ .vop_reclaim = nandfs_reclaim,
+ .vop_strategy = nandfs_strategy,
+ .vop_fsync = nandfs_fsync,
+ .vop_bmap = nandfs_bmap,
+ .vop_access = VOP_PANIC,
+ .vop_advlock = VOP_PANIC,
+ .vop_create = VOP_PANIC,
+ .vop_getattr = VOP_PANIC,
+ .vop_cachedlookup = VOP_PANIC,
+ .vop_ioctl = VOP_PANIC,
+ .vop_link = VOP_PANIC,
+ .vop_lookup = VOP_PANIC,
+ .vop_mkdir = VOP_PANIC,
+ .vop_mknod = VOP_PANIC,
+ .vop_open = VOP_PANIC,
+ .vop_pathconf = VOP_PANIC,
+ .vop_print = VOP_PANIC,
+ .vop_read = VOP_PANIC,
+ .vop_readdir = VOP_PANIC,
+ .vop_readlink = VOP_PANIC,
+ .vop_remove = VOP_PANIC,
+ .vop_rename = VOP_PANIC,
+ .vop_rmdir = VOP_PANIC,
+ .vop_whiteout = VOP_PANIC,
+ .vop_write = VOP_PANIC,
+ .vop_setattr = VOP_PANIC,
+ .vop_symlink = VOP_PANIC,
+};
+
+static int
+nandfsfifo_close(struct vop_close_args *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct nandfs_node *node = VTON(vp);
+
+ DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
+
+ mtx_lock(&vp->v_interlock);
+ if (vp->v_usecount > 1)
+ nandfs_itimes_locked(vp);
+ mtx_unlock(&vp->v_interlock);
+
+ return (fifo_specops.vop_close(ap));
+}
+
+struct vop_vector nandfs_fifoops = {
+ .vop_default = &fifo_specops,
+ .vop_fsync = VOP_PANIC,
+ .vop_access = nandfs_access,
+ .vop_close = nandfsfifo_close,
+ .vop_getattr = nandfs_getattr,
+ .vop_inactive = nandfs_inactive,
+ .vop_print = nandfs_print,
+ .vop_read = VOP_PANIC,
+ .vop_reclaim = nandfs_reclaim,
+ .vop_setattr = nandfs_setattr,
+ .vop_write = VOP_PANIC,
+ .vop_lock1 = nandfs_vnlock1,
+ .vop_unlock = nandfs_vnunlock,
+};
+
+int
+nandfs_vinit(struct vnode *vp, uint64_t ino)
+{
+ struct nandfs_node *node;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ node = VTON(vp);
+
+ /* Check if we're fetching the root */
+ if (ino == NANDFS_ROOT_INO)
+ vp->v_vflag |= VV_ROOT;
+
+ if (ino != NANDFS_GC_INO)
+ vp->v_type = IFTOVT(node->nn_inode.i_mode);
+ else
+ vp->v_type = VREG;
+
+ if (vp->v_type == VFIFO)
+ vp->v_op = &nandfs_fifoops;
+
+ return (0);
+}
OpenPOWER on IntegriCloud