summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjb <jb@FreeBSD.org>2008-03-28 00:08:47 +0000
committerjb <jb@FreeBSD.org>2008-03-28 00:08:47 +0000
commit291b24b7551a7f0ab80c6cc152155e317f53be0a (patch)
tree9edd0223cc16fd15bec321b8d36de00f35487d3b
parent5794ada908cc2e195ebcb0217b7164e3bd207e1b (diff)
downloadFreeBSD-src-291b24b7551a7f0ab80c6cc152155e317f53be0a.zip
FreeBSD-src-291b24b7551a7f0ab80c6cc152155e317f53be0a.tar.gz
Remove files that have been repo copied to their new location
in cddl-specific parts of the source tree.
-rw-r--r--compat/opensolaris/include/alloca.h35
-rw-r--r--compat/opensolaris/include/devid.h54
-rw-r--r--compat/opensolaris/include/fcntl.h37
-rw-r--r--compat/opensolaris/include/fsshare.h35
-rw-r--r--compat/opensolaris/include/libintl.h11
-rw-r--r--compat/opensolaris/include/mnttab.h19
-rw-r--r--compat/opensolaris/include/priv.h18
-rw-r--r--compat/opensolaris/include/solaris.h8
-rw-r--r--compat/opensolaris/include/stdio.h39
-rw-r--r--compat/opensolaris/include/stdlib.h37
-rw-r--r--compat/opensolaris/include/strings.h37
-rw-r--r--compat/opensolaris/include/unistd.h39
-rw-r--r--compat/opensolaris/include/zone.h6
-rw-r--r--compat/opensolaris/lib/libumem/umem.c167
-rw-r--r--compat/opensolaris/lib/libumem/umem.h85
-rw-r--r--compat/opensolaris/misc/deviceid.c119
-rw-r--r--compat/opensolaris/misc/fsshare.c259
-rw-r--r--compat/opensolaris/misc/mkdirp.c213
-rw-r--r--compat/opensolaris/misc/mnttab.c160
-rw-r--r--compat/opensolaris/misc/zmount.c102
-rw-r--r--compat/opensolaris/misc/zone.c46
-rw-r--r--contrib/opensolaris/OPENSOLARIS.LICENSE384
-rw-r--r--contrib/opensolaris/cmd/zdb/zdb.893
-rw-r--r--contrib/opensolaris/cmd/zdb/zdb.c2193
-rw-r--r--contrib/opensolaris/cmd/zdb/zdb_il.c354
-rw-r--r--contrib/opensolaris/cmd/zfs/zfs.81843
-rw-r--r--contrib/opensolaris/cmd/zfs/zfs_iter.c405
-rw-r--r--contrib/opensolaris/cmd/zfs/zfs_iter.h52
-rw-r--r--contrib/opensolaris/cmd/zfs/zfs_main.c3253
-rw-r--r--contrib/opensolaris/cmd/zfs/zfs_util.h44
-rw-r--r--contrib/opensolaris/cmd/zpool/zpool.81140
-rw-r--r--contrib/opensolaris/cmd/zpool/zpool_iter.c245
-rw-r--r--contrib/opensolaris/cmd/zpool/zpool_main.c3602
-rw-r--r--contrib/opensolaris/cmd/zpool/zpool_util.c79
-rw-r--r--contrib/opensolaris/cmd/zpool/zpool_util.h72
-rw-r--r--contrib/opensolaris/cmd/zpool/zpool_vdev.c883
-rw-r--r--contrib/opensolaris/cmd/ztest/ztest.c3495
-rw-r--r--contrib/opensolaris/head/assert.h82
-rw-r--r--contrib/opensolaris/head/atomic.h34
-rw-r--r--contrib/opensolaris/head/libintl.h124
-rw-r--r--contrib/opensolaris/head/stdio_ext.h32
-rw-r--r--contrib/opensolaris/head/synch.h264
-rw-r--r--contrib/opensolaris/head/thread.h99
-rw-r--r--contrib/opensolaris/lib/libnvpair/libnvpair.c266
-rw-r--r--contrib/opensolaris/lib/libnvpair/libnvpair.h46
-rw-r--r--contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c59
-rw-r--r--contrib/opensolaris/lib/libuutil/common/libuutil.h384
-rw-r--r--contrib/opensolaris/lib/libuutil/common/libuutil_common.h46
-rw-r--r--contrib/opensolaris/lib/libuutil/common/libuutil_impl.h181
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_alloc.c85
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_avl.c567
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_dprintf.c128
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_ident.c122
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_list.c711
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_misc.c250
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_open.c70
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_pname.c205
-rw-r--r--contrib/opensolaris/lib/libuutil/common/uu_strtoint.c300
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs.h443
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c599
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_config.c360
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c3855
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_graph.c646
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_impl.h171
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_import.c1023
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_mount.c986
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_pool.c2055
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_status.c303
-rw-r--r--contrib/opensolaris/lib/libzfs/common/libzfs_util.c853
-rw-r--r--contrib/opensolaris/lib/libzpool/common/kernel.c852
-rw-r--r--contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h509
-rw-r--r--contrib/opensolaris/lib/libzpool/common/taskq.c250
-rw-r--r--contrib/opensolaris/lib/libzpool/common/util.c144
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_atomic.c133
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_kmem.c260
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_kobj.c220
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_kstat.c131
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_misc.c56
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_policy.c261
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_string.c71
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_vfs.c280
-rw-r--r--sys/compat/opensolaris/kern/opensolaris_zone.c237
-rw-r--r--sys/compat/opensolaris/machine/endian.h44
-rw-r--r--sys/compat/opensolaris/rpc/xdr.h110
-rw-r--r--sys/compat/opensolaris/sys/acl.h241
-rw-r--r--sys/compat/opensolaris/sys/atomic.h114
-rw-r--r--sys/compat/opensolaris/sys/bitmap.h116
-rw-r--r--sys/compat/opensolaris/sys/byteorder.h65
-rw-r--r--sys/compat/opensolaris/sys/callb.h217
-rw-r--r--sys/compat/opensolaris/sys/cmn_err.h90
-rw-r--r--sys/compat/opensolaris/sys/cpupart.h36
-rw-r--r--sys/compat/opensolaris/sys/cpuvar.h95
-rw-r--r--sys/compat/opensolaris/sys/cred.h51
-rw-r--r--sys/compat/opensolaris/sys/cyclic.h39
-rw-r--r--sys/compat/opensolaris/sys/debug.h48
-rw-r--r--sys/compat/opensolaris/sys/dirent.h44
-rw-r--r--sys/compat/opensolaris/sys/dkio.h85
-rw-r--r--sys/compat/opensolaris/sys/dnlc.h40
-rw-r--r--sys/compat/opensolaris/sys/elf.h116
-rw-r--r--sys/compat/opensolaris/sys/kcondvar.h62
-rw-r--r--sys/compat/opensolaris/sys/kmem.h76
-rw-r--r--sys/compat/opensolaris/sys/kobj.h60
-rw-r--r--sys/compat/opensolaris/sys/kstat.h66
-rw-r--r--sys/compat/opensolaris/sys/lock.h46
-rw-r--r--sys/compat/opensolaris/sys/misc.h45
-rw-r--r--sys/compat/opensolaris/sys/mman.h37
-rw-r--r--sys/compat/opensolaris/sys/mntent.h56
-rw-r--r--sys/compat/opensolaris/sys/mnttab.h36
-rw-r--r--sys/compat/opensolaris/sys/modctl.h38
-rw-r--r--sys/compat/opensolaris/sys/mount.h39
-rw-r--r--sys/compat/opensolaris/sys/mutex.h79
-rw-r--r--sys/compat/opensolaris/sys/objfs.h33
-rw-r--r--sys/compat/opensolaris/sys/param.h37
-rw-r--r--sys/compat/opensolaris/sys/pcpu.h39
-rw-r--r--sys/compat/opensolaris/sys/policy.h63
-rw-r--r--sys/compat/opensolaris/sys/proc.h87
-rw-r--r--sys/compat/opensolaris/sys/random.h37
-rw-r--r--sys/compat/opensolaris/sys/rwlock.h95
-rw-r--r--sys/compat/opensolaris/sys/sdt.h46
-rw-r--r--sys/compat/opensolaris/sys/stat.h38
-rw-r--r--sys/compat/opensolaris/sys/string.h37
-rw-r--r--sys/compat/opensolaris/sys/sunddi.h36
-rw-r--r--sys/compat/opensolaris/sys/sysmacros.h137
-rw-r--r--sys/compat/opensolaris/sys/systm.h49
-rw-r--r--sys/compat/opensolaris/sys/taskq.h84
-rw-r--r--sys/compat/opensolaris/sys/taskq_impl.h135
-rw-r--r--sys/compat/opensolaris/sys/time.h75
-rw-r--r--sys/compat/opensolaris/sys/types.h86
-rw-r--r--sys/compat/opensolaris/sys/uio.h63
-rw-r--r--sys/compat/opensolaris/sys/varargs.h38
-rw-r--r--sys/compat/opensolaris/sys/vfs.h116
-rw-r--r--sys/compat/opensolaris/sys/vnode.h267
-rw-r--r--sys/compat/opensolaris/sys/zone.h68
-rw-r--r--sys/contrib/opensolaris/OPENSOLARIS.LICENSE384
-rw-r--r--sys/contrib/opensolaris/common/atomic/i386/atomic.S98
-rw-r--r--sys/contrib/opensolaris/common/atomic/ia64/atomic.S82
-rw-r--r--sys/contrib/opensolaris/common/avl/avl.c969
-rw-r--r--sys/contrib/opensolaris/common/nvpair/nvpair.c2953
-rw-r--r--sys/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c118
-rw-r--r--sys/contrib/opensolaris/common/zfs/zfs_namecheck.c287
-rw-r--r--sys/contrib/opensolaris/common/zfs/zfs_namecheck.h56
-rw-r--r--sys/contrib/opensolaris/common/zfs/zfs_prop.c657
-rw-r--r--sys/contrib/opensolaris/common/zfs/zfs_prop.h56
-rw-r--r--sys/contrib/opensolaris/uts/common/Makefile.files101
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/gfs.c884
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/arc.c2859
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/bplist.c312
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dbuf.c2247
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu.c1029
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c160
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c1037
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c1009
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c888
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c992
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c655
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dnode.c1369
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c623
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c2035
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c1215
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c256
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c501
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c196
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/fletcher.c145
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/gzip.c69
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/lzjb.c129
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/metaslab.c1023
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/refcount.c194
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sha256.c131
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/spa.c3301
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/spa_config.c375
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c440
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/spa_history.c354
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c1130
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/space_map.c501
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h109
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h89
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h334
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h587
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h237
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h125
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h120
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h134
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h75
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h267
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h185
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h143
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h82
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h77
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h77
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h69
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h81
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h103
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h491
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h168
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h162
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h120
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h77
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock.h50
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h63
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h56
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h132
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h52
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_file.h46
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h298
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h359
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h204
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h234
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h115
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h120
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h71
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h75
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h71
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h163
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h89
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h100
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h298
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h276
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h111
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h366
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h75
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h82
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h205
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h68
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/txg.c611
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/uberblock.c63
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/unique.c107
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev.c1915
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c394
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c363
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c225
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c583
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c1011
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c495
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c89
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c323
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c1237
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c118
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zap.c1071
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c741
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c857
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs.conf28
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c1608
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c99
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c1119
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c797
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c335
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c1826
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c349
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c430
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c594
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c1021
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c3623
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c1072
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zil.c1607
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zio.c1861
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c172
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c148
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c315
-rw-r--r--sys/contrib/opensolaris/uts/common/fs/zfs/zvol.c801
-rw-r--r--sys/contrib/opensolaris/uts/common/os/callb.c363
-rw-r--r--sys/contrib/opensolaris/uts/common/os/list.c193
-rw-r--r--sys/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c63
-rw-r--r--sys/contrib/opensolaris/uts/common/os/taskq.c1020
-rw-r--r--sys/contrib/opensolaris/uts/common/rpc/xdr.c673
-rw-r--r--sys/contrib/opensolaris/uts/common/rpc/xdr.h605
-rw-r--r--sys/contrib/opensolaris/uts/common/rpc/xdr_array.c123
-rw-r--r--sys/contrib/opensolaris/uts/common/rpc/xdr_mem.c209
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/asm_linkage.h110
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/avl.h298
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/avl_impl.h164
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/bitmap.h194
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/byteorder.h137
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/callb.h214
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/ccompile.h127
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/compress.h46
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/cred.h154
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/debug.h129
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/dkio.h477
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/dklabel.h268
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/errorq.h83
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/feature_tests.h397
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h75
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/fm/protocol.h301
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/fm/util.h103
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/fs/zfs.h437
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/gfs.h139
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/isa_defs.h485
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/list.h63
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/list_impl.h53
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/note.h56
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/nvpair.h260
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/nvpair_impl.h73
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/processor.h146
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/procset.h162
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/sdt.h176
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/synch.h161
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/sysevent.h227
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/sysmacros.h290
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/vmem.h142
-rw-r--r--sys/contrib/opensolaris/uts/common/sys/zmod.h68
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/adler32.c149
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/crc32.c428
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/crc32.h443
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/deflate.c1742
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/deflate.h331
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inffast.c320
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inffast.h13
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inffixed.h96
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inflate.c1395
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inflate.h117
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inftrees.c331
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/inftrees.h57
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/trees.c1219
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/zconf.h117
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/zlib.h1359
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/zmod.c109
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/zmod_subr.c84
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/zutil.c324
-rw-r--r--sys/contrib/opensolaris/uts/common/zmod/zutil.h274
319 files changed, 0 insertions, 128164 deletions
diff --git a/compat/opensolaris/include/alloca.h b/compat/opensolaris/include/alloca.h
deleted file mode 100644
index b8433c1..0000000
--- a/compat/opensolaris/include/alloca.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_ALLOCA_H_
-#define _COMPAT_OPENSOLARIS_ALLOCA_H_
-
-#include <stdlib.h>
-
-#endif
diff --git a/compat/opensolaris/include/devid.h b/compat/opensolaris/include/devid.h
deleted file mode 100644
index 6718ce2..0000000
--- a/compat/opensolaris/include/devid.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _OPENSOLARIS_DEVID_H_
-#define _OPENSOLARIS_DEVID_H_
-
-#include <sys/param.h>
-#include <sys/disk.h>
-#include <stdlib.h>
-
-typedef struct ddi_devid {
- char devid[DISK_IDENT_SIZE];
-} ddi_devid_t;
-
-typedef struct devid_nmlist {
- char devname[MAXPATHLEN];
- dev_t dev;
-} devid_nmlist_t;
-
-int devid_str_decode(char *devidstr, ddi_devid_t *retdevid,
- char **retminor_name);
-int devid_deviceid_to_nmlist(char *search_path, ddi_devid_t devid,
- char *minor_name, devid_nmlist_t **retlist);
-void devid_str_free(char *str);
-void devid_free(ddi_devid_t devid);
-void devid_free_nmlist(devid_nmlist_t *list);
-int devid_get(int fd, ddi_devid_t *retdevid);
-int devid_get_minor_name(int fd, char **retminor_name);
-char *devid_str_encode(ddi_devid_t devid, char *minor_name);
-
-#endif /* !_OPENSOLARIS_DEVID_H_ */
diff --git a/compat/opensolaris/include/fcntl.h b/compat/opensolaris/include/fcntl.h
deleted file mode 100644
index 9b6c3f9..0000000
--- a/compat/opensolaris/include/fcntl.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_FCNTL_H_
-#define _COMPAT_OPENSOLARIS_FCNTL_H_
-
-#include_next <fcntl.h>
-
-#define open64 open
-
-#endif
diff --git a/compat/opensolaris/include/fsshare.h b/compat/opensolaris/include/fsshare.h
deleted file mode 100644
index 11a8dbf0..0000000
--- a/compat/opensolaris/include/fsshare.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_FSSHARE_H_
-#define _OPENSOLARIS_FSSHARE_H_
-
-int fsshare(const char *, const char *, const char *);
-int fsunshare(const char *, const char *);
-
-#endif /* !_OPENSOLARIS_FSSHARE_H_ */
diff --git a/compat/opensolaris/include/libintl.h b/compat/opensolaris/include/libintl.h
deleted file mode 100644
index 5954344..0000000
--- a/compat/opensolaris/include/libintl.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _LIBINTL_H_
-#define _LIBINTL_H_
-
-#include <sys/cdefs.h>
-#include <stdio.h>
-
-#define textdomain(domain) 0
-#define gettext(...) (__VA_ARGS__)
-#define dgettext(domain, ...) (__VA_ARGS__)
-
-#endif /* !_SOLARIS_H_ */
diff --git a/compat/opensolaris/include/mnttab.h b/compat/opensolaris/include/mnttab.h
deleted file mode 100644
index 6e7d28c..0000000
--- a/compat/opensolaris/include/mnttab.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _OPENSOLARIS_MNTTAB_H_
-#define _OPENSOLARIS_MNTTAB_H_
-
-#include <stdio.h>
-#include <paths.h>
-
-#define MNTTAB _PATH_DEVNULL
-#define MNT_LINE_MAX 1024
-
-struct mnttab {
- char *mnt_special;
- char *mnt_mountp;
- char *mnt_fstype;
- char *mnt_mntopts;
-};
-
-int getmntany(FILE *fd, struct mnttab *mgetp, struct mnttab *mrefp);
-
-#endif /* !_OPENSOLARIS_MNTTAB_H_ */
diff --git a/compat/opensolaris/include/priv.h b/compat/opensolaris/include/priv.h
deleted file mode 100644
index 78eae1a..0000000
--- a/compat/opensolaris/include/priv.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _OPENSOLARIS_PRIV_H_
-#define _OPENSOLARIS_PRIV_H_
-
-#include <sys/types.h>
-#include <unistd.h>
-#include <assert.h>
-
-#define PRIV_SYS_CONFIG 0
-
-static __inline int
-priv_ineffect(priv)
-{
-
- assert(priv == PRIV_SYS_CONFIG);
- return (geteuid() == 0);
-}
-
-#endif /* !_OPENSOLARIS_PRIV_H_ */
diff --git a/compat/opensolaris/include/solaris.h b/compat/opensolaris/include/solaris.h
deleted file mode 100644
index fbdd8c6..0000000
--- a/compat/opensolaris/include/solaris.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SOLARIS_H_
-#define _SOLARIS_H_
-
-#include <sys/ccompile.h>
-
-#define dirent64 dirent
-
-#endif /* !_SOLARIS_H_ */
diff --git a/compat/opensolaris/include/stdio.h b/compat/opensolaris/include/stdio.h
deleted file mode 100644
index eba3487..0000000
--- a/compat/opensolaris/include/stdio.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_STDIO_H_
-#define _COMPAT_OPENSOLARIS_STDIO_H_
-
-#include_next <stdio.h>
-
-#define ftello64 ftello
-#define lseek64 lseek
-#define fseeko64 fseeko
-
-#endif
diff --git a/compat/opensolaris/include/stdlib.h b/compat/opensolaris/include/stdlib.h
deleted file mode 100644
index 4e21ea6..0000000
--- a/compat/opensolaris/include/stdlib.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_STDLIB_H_
-#define _COMPAT_OPENSOLARIS_STDLIB_H_
-
-#include_next <stdlib.h>
-
-#define getexecname getprogname
-
-#endif
diff --git a/compat/opensolaris/include/strings.h b/compat/opensolaris/include/strings.h
deleted file mode 100644
index a62bbc8..0000000
--- a/compat/opensolaris/include/strings.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_STRINGS_H_
-#define _COMPAT_OPENSOLARIS_STRINGS_H_
-
-#include_next <strings.h>
-
-#include <string.h>
-
-#endif
diff --git a/compat/opensolaris/include/unistd.h b/compat/opensolaris/include/unistd.h
deleted file mode 100644
index e1a1bc8..0000000
--- a/compat/opensolaris/include/unistd.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_UNISTD_H_
-#define _COMPAT_OPENSOLARIS_UNISTD_H_
-
-#include_next <unistd.h>
-
-#define fork1 fork
-#define ftruncate64 ftruncate
-#define pread64 pread
-
-#endif
diff --git a/compat/opensolaris/include/zone.h b/compat/opensolaris/include/zone.h
deleted file mode 100644
index 487f02f..0000000
--- a/compat/opensolaris/include/zone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ZONE_H_
-#define _ZONE_H_
-
-#include <sys/zone.h>
-
-#endif /* !_ZONE_H_ */
diff --git a/compat/opensolaris/lib/libumem/umem.c b/compat/opensolaris/lib/libumem/umem.c
deleted file mode 100644
index e9662ce..0000000
--- a/compat/opensolaris/lib/libumem/umem.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Ricardo Correia. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <umem.h>
-#include <stdlib.h>
-#include <assert.h>
-
-static umem_nofail_callback_t *nofail_cb = NULL;
-
-struct umem_cache {
- umem_constructor_t *constructor;
- umem_destructor_t *destructor;
- void *callback_data;
- size_t bufsize;
-};
-
-/*
- * Simple stub for umem_alloc(). The callback isn't expected to return.
- */
-void *umem_alloc(size_t size, int flags)
-{
- assert(flags == UMEM_DEFAULT || flags == UMEM_NOFAIL);
-
- if(size == 0)
- return NULL;
-
- void *ret = malloc(size);
- if(ret == NULL) {
- if(!(flags & UMEM_NOFAIL))
- return NULL;
-
- if(nofail_cb != NULL)
- nofail_cb();
- abort();
- }
-
- return ret;
-}
-
-/*
- * Simple stub for umem_zalloc().
- */
-void *umem_zalloc(size_t size, int flags)
-{
- assert(flags == UMEM_DEFAULT || flags == UMEM_NOFAIL);
-
- if(size == 0)
- return NULL;
-
- void *ret = calloc(1, size);
- if(ret == NULL) {
- if(!(flags & UMEM_NOFAIL))
- return NULL;
-
- if(nofail_cb != NULL)
- nofail_cb();
- abort();
- }
-
- return ret;
-}
-
-/*
- * Simple stub for umem_free().
- */
-void umem_free(void *buf, size_t size)
-{
- free(buf);
-}
-
-/*
- * Simple stub for umem_nofail_callback().
- */
-void umem_nofail_callback(umem_nofail_callback_t *callback)
-{
- nofail_cb = callback;
-}
-
-/*
- * Simple stub for umem_cache_create().
- */
-umem_cache_t *umem_cache_create(char *debug_name, size_t bufsize, size_t align, umem_constructor_t *constructor, umem_destructor_t *destructor, umem_reclaim_t *reclaim, void *callback_data, void *source, int cflags)
-{
- assert(source == NULL);
-
- umem_cache_t *cache = malloc(sizeof(umem_cache_t));
- if(cache == NULL)
- return NULL;
-
- cache->constructor = constructor;
- cache->destructor = destructor;
- cache->callback_data = callback_data;
- cache->bufsize = bufsize;
-
- return cache;
-}
-
-/*
- * Simple stub for umem_cache_alloc(). The nofail callback isn't expected to return.
- */
-void *umem_cache_alloc(umem_cache_t *cache, int flags)
-{
- void *buf = malloc(cache->bufsize);
- if(buf == NULL) {
- if(!(flags & UMEM_NOFAIL))
- return NULL;
-
- if(nofail_cb != NULL)
- nofail_cb();
- abort();
- }
-
- if(cache->constructor != NULL) {
- if(cache->constructor(buf, cache->callback_data, flags) != 0) {
- free(buf);
- if(!(flags & UMEM_NOFAIL))
- return NULL;
-
- if(nofail_cb != NULL)
- nofail_cb();
- abort();
- }
- }
-
- return buf;
-}
-
-/*
- * Simple stub for umem_cache_free().
- */
-void umem_cache_free(umem_cache_t *cache, void *buffer)
-{
- if(cache->destructor != NULL)
- cache->destructor(buffer, cache->callback_data);
-
- free(buffer);
-}
-
-/*
- * Simple stub for umem_cache_destroy().
- */
-void umem_cache_destroy(umem_cache_t *cache)
-{
- free(cache);
-}
diff --git a/compat/opensolaris/lib/libumem/umem.h b/compat/opensolaris/lib/libumem/umem.h
deleted file mode 100644
index 971cde3..0000000
--- a/compat/opensolaris/lib/libumem/umem.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _UMEM_H
-#define _UMEM_H
-
-
-
-#include <sys/types.h>
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define UMEM_DEFAULT 0x0000 /* normal -- may fail */
-#define UMEM_NOFAIL 0x0100 /* Never fails -- may call exit(2) */
-
-#define UMEM_FLAGS 0xffff /* all settable umem flags */
-
-extern void *umem_alloc(size_t, int);
-extern void *umem_alloc_align(size_t, size_t, int);
-extern void *umem_zalloc(size_t, int);
-extern void umem_free(void *, size_t);
-extern void umem_free_align(void *, size_t);
-
-/*
- * Flags for umem_cache_create()
- */
-#define UMC_NOTOUCH 0x00010000
-#define UMC_NODEBUG 0x00020000
-#define UMC_NOMAGAZINE 0x00040000
-#define UMC_NOHASH 0x00080000
-
-struct umem_cache; /* cache structure is opaque to umem clients */
-
-typedef struct umem_cache umem_cache_t;
-typedef int umem_constructor_t(void *, void *, int);
-typedef void umem_destructor_t(void *, void *);
-typedef void umem_reclaim_t(void *);
-
-typedef int umem_nofail_callback_t(void);
-#define UMEM_CALLBACK_RETRY 0
-#define UMEM_CALLBACK_EXIT(status) (0x100 | ((status) & 0xFF))
-
-extern void umem_nofail_callback(umem_nofail_callback_t *);
-
-extern umem_cache_t *umem_cache_create(char *, size_t,
- size_t, umem_constructor_t *, umem_destructor_t *, umem_reclaim_t *,
- void *, void *, int);
-extern void umem_cache_destroy(umem_cache_t *);
-
-extern void *umem_cache_alloc(umem_cache_t *, int);
-extern void umem_cache_free(umem_cache_t *, void *);
-
-extern void umem_reap(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UMEM_H */
diff --git a/compat/opensolaris/misc/deviceid.c b/compat/opensolaris/misc/deviceid.c
deleted file mode 100644
index f37b1d6..0000000
--- a/compat/opensolaris/misc/deviceid.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/ioctl.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <libgeom.h>
-#include <devid.h>
-
-int
-devid_str_decode(char *devidstr, ddi_devid_t *retdevid, char **retminor_name)
-{
-
- if (strlcpy(retdevid->devid, devidstr, sizeof(retdevid->devid)) >=
- sizeof(retdevid->devid)) {
- return (EINVAL);
- }
- *retminor_name = strdup("");
- if (*retminor_name == NULL);
- return (ENOMEM);
- return (0);
-}
-
-int
-devid_deviceid_to_nmlist(char *search_path, ddi_devid_t devid, char *minor_name,
- devid_nmlist_t **retlist)
-{
- char path[MAXPATHLEN];
- char *dst;
-
- if (g_get_name(devid.devid, path, sizeof(path)) == -1)
- return (errno);
- *retlist = malloc(sizeof(**retlist));
- if (*retlist == NULL)
- return (ENOMEM);
- if (strlcpy((*retlist)[0].devname, path,
- sizeof((*retlist)[0].devname)) >= sizeof((*retlist)[0].devname)) {
- free(*retlist);
- return (ENAMETOOLONG);
- }
- return (0);
-}
-
-void
-devid_str_free(char *str)
-{
-
- free(str);
-}
-
-void
-devid_free(ddi_devid_t devid)
-{
- /* Do nothing. */
-}
-
-void
-devid_free_nmlist(devid_nmlist_t *list)
-{
-
- free(list);
-}
-
-int
-devid_get(int fd, ddi_devid_t *retdevid)
-{
-
- if (ioctl(fd, DIOCGIDENT, retdevid->devid) == -1)
- return (errno);
- if (retdevid->devid[0] == '\0')
- return (ENOENT);
- return (0);
-}
-
-int
-devid_get_minor_name(int fd, char **retminor_name)
-{
-
- *retminor_name = strdup("");
- if (*retminor_name == NULL)
- return (ENOMEM);
- return (0);
-}
-
-char *
-devid_str_encode(ddi_devid_t devid, char *minor_name)
-{
-
- return (strdup(devid.devid));
-}
diff --git a/compat/opensolaris/misc/fsshare.c b/compat/opensolaris/misc/fsshare.c
deleted file mode 100644
index ca82710..0000000
--- a/compat/opensolaris/misc/fsshare.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
-#include <errno.h>
-#include <libutil.h>
-#include <assert.h>
-#include <pathnames.h> /* _PATH_MOUNTDPID */
-#include <fsshare.h>
-
-#define FILE_HEADER "# !!! DO NOT EDIT THIS FILE MANUALLY !!!\n\n"
-#define OPTSSIZE 1024
-#define MAXLINESIZE (PATH_MAX + OPTSSIZE)
-
-static void
-restart_mountd(void)
-{
- struct pidfh *pfh;
- pid_t mountdpid;
-
- pfh = pidfile_open(_PATH_MOUNTDPID, 0600, &mountdpid);
- if (pfh != NULL) {
- /* Mountd is not running. */
- pidfile_remove(pfh);
- return;
- }
- if (errno != EEXIST) {
- /* Cannot open pidfile for some reason. */
- return;
- }
- /* We have mountd(8) PID in mountdpid varible. */
- kill(mountdpid, SIGHUP);
-}
-
-/*
- * Read one line from a file. Skip comments, empty lines and a line with a
- * mountpoint specified in the 'skip' argument.
- */
-static char *
-getline(FILE *fd, const char *skip)
-{
- static char line[MAXLINESIZE];
- size_t len, skiplen;
- char *s, last;
-
- if (skip != NULL)
- skiplen = strlen(skip);
- for (;;) {
- s = fgets(line, sizeof(line), fd);
- if (s == NULL)
- return (NULL);
- /* Skip empty lines and comments. */
- if (line[0] == '\n' || line[0] == '#')
- continue;
- len = strlen(line);
- if (line[len - 1] == '\n')
- line[len - 1] = '\0';
- last = line[skiplen];
- /* Skip the given mountpoint. */
- if (skip != NULL && strncmp(skip, line, skiplen) == 0 &&
- (last == '\t' || last == ' ' || last == '\0')) {
- continue;
- }
- break;
- }
- return (line);
-}
-
-/*
- * Function translate options to a format acceptable by exports(5), eg.
- *
- * -ro -network=192.168.0.0 -mask=255.255.255.0 -maproot=0 freefall.freebsd.org 69.147.83.54
- *
- * Accepted input formats:
- *
- * ro,network=192.168.0.0,mask=255.255.255.0,maproot=0,freefall.freebsd.org
- * ro network=192.168.0.0 mask=255.255.255.0 maproot=0 freefall.freebsd.org
- * -ro,-network=192.168.0.0,-mask=255.255.255.0,-maproot=0,freefall.freebsd.org
- * -ro -network=192.168.0.0 -mask=255.255.255.0 -maproot=0 freefall.freebsd.org
- *
- * Recognized keywords:
- *
- * ro, maproot, mapall, mask, network, alldirs, public, webnfs, index, quiet
- *
- */
-static const char *known_opts[] = { "ro", "maproot", "mapall", "mask",
- "network", "alldirs", "public", "webnfs", "index", "quiet", NULL };
-static char *
-translate_opts(const char *shareopts)
-{
- static char newopts[OPTSSIZE];
- char oldopts[OPTSSIZE];
- char *o, *s = NULL;
- unsigned int i;
- size_t len;
-
- strlcpy(oldopts, shareopts, sizeof(oldopts));
- newopts[0] = '\0';
- s = oldopts;
- while ((o = strsep(&s, "-, ")) != NULL) {
- if (o[0] == '\0')
- continue;
- for (i = 0; known_opts[i] != NULL; i++) {
- len = strlen(known_opts[i]);
- if (strncmp(known_opts[i], o, len) == 0 &&
- (o[len] == '\0' || o[len] == '=')) {
- strlcat(newopts, "-", sizeof(newopts));
- break;
- }
- }
- strlcat(newopts, o, sizeof(newopts));
- strlcat(newopts, " ", sizeof(newopts));
- }
- return (newopts);
-}
-
-static int
-fsshare_main(const char *file, const char *mountpoint, const char *shareopts,
- int share)
-{
- char tmpfile[PATH_MAX];
- char *line;
- FILE *newfd, *oldfd;
- int fd, error;
-
- newfd = oldfd = NULL;
- error = 0;
-
- /*
- * Create temporary file in the same directory, so we can atomically
- * rename it.
- */
- if (strlcpy(tmpfile, file, sizeof(tmpfile)) >= sizeof(tmpfile))
- return (ENAMETOOLONG);
- if (strlcat(tmpfile, ".XXXXXXXX", sizeof(tmpfile)) >= sizeof(tmpfile))
- return (ENAMETOOLONG);
- fd = mkstemp(tmpfile);
- if (fd == -1)
- return (errno);
- /*
- * File name is random, so we don't really need file lock now, but it
- * will be needed after rename(2).
- */
- error = flock(fd, LOCK_EX);
- assert(error == 0 || (error == -1 && errno == EOPNOTSUPP));
- newfd = fdopen(fd, "r+");
- assert(newfd != NULL);
- /* Open old exports file. */
- oldfd = fopen(file, "r");
- if (oldfd == NULL) {
- if (share) {
- if (errno != ENOENT) {
- error = errno;
- goto out;
- }
- } else {
- /* If there is no exports file, ignore the error. */
- if (errno == ENOENT)
- errno = 0;
- error = errno;
- goto out;
- }
- } else {
- error = flock(fileno(oldfd), LOCK_EX);
- assert(error == 0 || (error == -1 && errno == EOPNOTSUPP));
- error = 0;
- }
-
- /* Place big, fat warning at the begining of the file. */
- fprintf(newfd, "%s", FILE_HEADER);
- while (oldfd != NULL && (line = getline(oldfd, mountpoint)) != NULL)
- fprintf(newfd, "%s\n", line);
- if (oldfd != NULL && ferror(oldfd) != 0) {
- error = ferror(oldfd);
- goto out;
- }
- if (ferror(newfd) != 0) {
- error = ferror(newfd);
- goto out;
- }
- if (share) {
- fprintf(newfd, "%s\t%s\n", mountpoint,
- translate_opts(shareopts));
- }
-
-out:
- if (error != 0)
- unlink(tmpfile);
- else {
- if (rename(tmpfile, file) == -1) {
- error = errno;
- unlink(tmpfile);
- } else {
- /*
- * Send SIGHUP to mountd, but unlock exports file later.
- */
- restart_mountd();
- }
- }
- if (oldfd != NULL) {
- flock(fileno(oldfd), LOCK_UN);
- fclose(oldfd);
- }
- if (newfd != NULL) {
- flock(fileno(newfd), LOCK_UN);
- fclose(newfd);
- }
- return (error);
-}
-
-/*
- * Add the given mountpoint to the given exports file.
- */
-int
-fsshare(const char *file, const char *mountpoint, const char *shareopts)
-{
-
- return (fsshare_main(file, mountpoint, shareopts, 1));
-}
-
-/*
- * Remove the given mountpoint from the given exports file.
- */
-int
-fsunshare(const char *file, const char *mountpoint)
-{
-
- return (fsshare_main(file, mountpoint, NULL, 0));
-}
diff --git a/compat/opensolaris/misc/mkdirp.c b/compat/opensolaris/misc/mkdirp.c
deleted file mode 100644
index 06cec5b..0000000
--- a/compat/opensolaris/misc/mkdirp.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/* Copyright (c) 1988 AT&T */
-/* All Rights Reserved */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "@(#)mkdirp.c 1.15 06/01/04 SMI"
-
-/*
- * Creates directory and it's parents if the parents do not
- * exist yet.
- *
- * Returns -1 if fails for reasons other than non-existing
- * parents.
- * Does NOT simplify pathnames with . or .. in them.
- */
-
-#include <sys/types.h>
-#include <libgen.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/stat.h>
-
-static char *simplify(const char *str);
-
-int
-mkdirp(const char *d, mode_t mode)
-{
- char *endptr, *ptr, *slash, *str;
-
- str = simplify(d);
-
- /* If space couldn't be allocated for the simplified names, return. */
-
- if (str == NULL)
- return (-1);
-
- /* Try to make the directory */
-
- if (mkdir(str, mode) == 0) {
- free(str);
- return (0);
- }
- if (errno != ENOENT) {
- free(str);
- return (-1);
- }
- endptr = strrchr(str, '\0');
- slash = strrchr(str, '/');
-
- /* Search upward for the non-existing parent */
-
- while (slash != NULL) {
-
- ptr = slash;
- *ptr = '\0';
-
- /* If reached an existing parent, break */
-
- if (access(str, F_OK) == 0)
- break;
-
- /* If non-existing parent */
-
- else {
- slash = strrchr(str, '/');
-
- /* If under / or current directory, make it. */
-
- if (slash == NULL || slash == str) {
- if (mkdir(str, mode) != 0 && errno != EEXIST) {
- free(str);
- return (-1);
- }
- break;
- }
- }
- }
-
- /* Create directories starting from upmost non-existing parent */
-
- while ((ptr = strchr(str, '\0')) != endptr) {
- *ptr = '/';
- if (mkdir(str, mode) != 0 && errno != EEXIST) {
- /*
- * If the mkdir fails because str already
- * exists (EEXIST), then str has the form
- * "existing-dir/..", and this is really
- * ok. (Remember, this loop is creating the
- * portion of the path that didn't exist)
- */
- free(str);
- return (-1);
- }
- }
- free(str);
- return (0);
-}
-
-/*
- * simplify - given a pathname, simplify that path by removing
- * duplicate contiguous slashes.
- *
- * A simplified copy of the argument is returned to the
- * caller, or NULL is returned on error.
- *
- * The caller should handle error reporting based upon the
- * returned vlaue, and should free the returned value,
- * when appropriate.
- */
-
-static char *
-simplify(const char *str)
-{
- int i;
- size_t mbPathlen; /* length of multi-byte path */
- size_t wcPathlen; /* length of wide-character path */
- wchar_t *wptr; /* scratch pointer */
- wchar_t *wcPath; /* wide-character version of the path */
- char *mbPath; /* The copy fo the path to be returned */
-
- /*
- * bail out if there is nothing there.
- */
-
- if (!str)
- return (NULL);
-
- /*
- * Get a copy of the argument.
- */
-
- if ((mbPath = strdup(str)) == NULL) {
- return (NULL);
- }
-
- /*
- * convert the multi-byte version of the path to a
- * wide-character rendering, for doing our figuring.
- */
-
- mbPathlen = strlen(mbPath);
-
- if ((wcPath = calloc(sizeof (wchar_t), mbPathlen+1)) == NULL) {
- free(mbPath);
- return (NULL);
- }
-
- if ((wcPathlen = mbstowcs(wcPath, mbPath, mbPathlen)) == (size_t)-1) {
- free(mbPath);
- free(wcPath);
- return (NULL);
- }
-
- /*
- * remove duplicate slashes first ("//../" -> "/")
- */
-
- for (wptr = wcPath, i = 0; i < wcPathlen; i++) {
- *wptr++ = wcPath[i];
-
- if (wcPath[i] == '/') {
- i++;
-
- while (wcPath[i] == '/') {
- i++;
- }
-
- i--;
- }
- }
-
- *wptr = '\0';
-
- /*
- * now convert back to the multi-byte format.
- */
-
- if (wcstombs(mbPath, wcPath, mbPathlen) == (size_t)-1) {
- free(mbPath);
- free(wcPath);
- return (NULL);
- }
-
- free(wcPath);
- return (mbPath);
-}
diff --git a/compat/opensolaris/misc/mnttab.c b/compat/opensolaris/misc/mnttab.c
deleted file mode 100644
index c1971ca..0000000
--- a/compat/opensolaris/misc/mnttab.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*-
- * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * This file implements Solaris compatible getmntany() and hasmntopt()
- * functions.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/mount.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <stdio.h>
-
-static char *
-mntopt(char **p)
-{
- char *cp = *p;
- char *retstr;
-
- while (*cp && isspace(*cp))
- cp++;
-
- retstr = cp;
- while (*cp && *cp != ',')
- cp++;
-
- if (*cp) {
- *cp = '\0';
- cp++;
- }
-
- *p = cp;
- return (retstr);
-}
-
-char *
-hasmntopt(struct mnttab *mnt, char *opt)
-{
- char tmpopts[MNT_LINE_MAX];
- char *f, *opts = tmpopts;
-
- if (mnt->mnt_mntopts == NULL)
- return (NULL);
- (void) strcpy(opts, mnt->mnt_mntopts);
- f = mntopt(&opts);
- for (; *f; f = mntopt(&opts)) {
- if (strncmp(opt, f, strlen(opt)) == 0)
- return (f - tmpopts + mnt->mnt_mntopts);
- }
- return (NULL);
-}
-
-static void
-optadd(char *mntopts, size_t size, const char *opt)
-{
-
- if (mntopts[0] != '\0')
- strlcat(mntopts, ",", size);
- strlcat(mntopts, opt, size);
-}
-
-int
-getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp)
-{
- static struct statfs *sfs = NULL;
- static char mntopts[MNTMAXSTR];
- struct opt *o;
- long i, n, flags;
-
- if (sfs != NULL) {
- free(sfs);
- sfs = NULL;
- }
- mntopts[0] = '\0';
-
- n = getfsstat(NULL, 0, MNT_NOWAIT);
- if (n == -1)
- return (-1);
- n = sizeof(*sfs) * (n + 8);
- sfs = malloc(n);
- if (sfs == NULL)
- return (-1);
- n = getfsstat(sfs, n, MNT_WAIT);
- if (n == -1) {
- free(sfs);
- sfs = NULL;
- return (-1);
- }
- for (i = 0; i < n; i++) {
- if (mrefp->mnt_special != NULL &&
- strcmp(mrefp->mnt_special, sfs[i].f_mntfromname) != 0) {
- continue;
- }
- if (mrefp->mnt_mountp != NULL &&
- strcmp(mrefp->mnt_mountp, sfs[i].f_mntonname) != 0) {
- continue;
- }
- if (mrefp->mnt_fstype != NULL &&
- strcmp(mrefp->mnt_fstype, sfs[i].f_fstypename) != 0) {
- continue;
- }
- flags = sfs[i].f_flags;
-#define OPTADD(opt) optadd(mntopts, sizeof(mntopts), (opt))
- if (flags & MNT_RDONLY)
- OPTADD(MNTOPT_RO);
- else
- OPTADD(MNTOPT_RW);
- if (flags & MNT_NOSUID)
- OPTADD(MNTOPT_NOSUID);
- else
- OPTADD(MNTOPT_SETUID);
- if (flags & MNT_UPDATE)
- OPTADD(MNTOPT_REMOUNT);
- if (flags & MNT_NOATIME)
- OPTADD(MNTOPT_NOATIME);
- else
- OPTADD(MNTOPT_ATIME);
- OPTADD(MNTOPT_NOXATTR);
- if (flags & MNT_NOEXEC)
- OPTADD(MNTOPT_NOEXEC);
- else
- OPTADD(MNTOPT_EXEC);
-#undef OPTADD
- mgetp->mnt_special = sfs[i].f_mntfromname;
- mgetp->mnt_mountp = sfs[i].f_mntonname;
- mgetp->mnt_fstype = sfs[i].f_fstypename;
- mgetp->mnt_mntopts = mntopts;
- return (0);
- }
- free(sfs);
- sfs = NULL;
- return (-1);
-}
diff --git a/compat/opensolaris/misc/zmount.c b/compat/opensolaris/misc/zmount.c
deleted file mode 100644
index d80a3b3..0000000
--- a/compat/opensolaris/misc/zmount.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*-
- * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * This file implements Solaris compatible zmount() function.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/mount.h>
-#include <sys/uio.h>
-#include <sys/mntent.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-static void
-build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
- size_t len)
-{
- int i;
-
- if (*iovlen < 0)
- return;
- i = *iovlen;
- *iov = realloc(*iov, sizeof(**iov) * (i + 2));
- if (*iov == NULL) {
- *iovlen = -1;
- return;
- }
- (*iov)[i].iov_base = strdup(name);
- (*iov)[i].iov_len = strlen(name) + 1;
- i++;
- (*iov)[i].iov_base = val;
- if (len == (size_t)-1) {
- if (val != NULL)
- len = strlen(val) + 1;
- else
- len = 0;
- }
- (*iov)[i].iov_len = (int)len;
- *iovlen = ++i;
-}
-
-int
-zmount(const char *spec, const char *dir, int mflag, char *fstype,
- char *dataptr, int datalen, char *optptr, int optlen)
-{
- struct iovec *iov;
- char *optstr, *os, *p;
- int iovlen, rv;
-
- assert(spec != NULL);
- assert(dir != NULL);
- assert(mflag == 0);
- assert(fstype != NULL);
- assert(strcmp(fstype, MNTTYPE_ZFS) == 0);
- assert(dataptr == NULL);
- assert(datalen == 0);
- assert(optptr != NULL);
- assert(optlen > 0);
-
- optstr = strdup(optptr);
- assert(optptr != NULL);
-
- iov = NULL;
- iovlen = 0;
- build_iovec(&iov, &iovlen, "fstype", fstype, (size_t)-1);
- build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir),
- (size_t)-1);
- build_iovec(&iov, &iovlen, "from", __DECONST(char *, spec), (size_t)-1);
- for (p = optstr; p != NULL; strsep(&p, ",/ "))
- build_iovec(&iov, &iovlen, p, NULL, (size_t)-1);
- rv = nmount(iov, iovlen, 0);
- free(optstr);
- return (rv);
-}
diff --git a/compat/opensolaris/misc/zone.c b/compat/opensolaris/misc/zone.c
deleted file mode 100644
index 1ce77cf..0000000
--- a/compat/opensolaris/misc/zone.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#include <sys/zone.h>
-
-int
-getzoneid(void)
-{
- size_t size;
- int jailid;
-
- /* Information that we are in jail or not is enough for our needs. */
- size = sizeof(jailid);
- if (sysctlbyname("security.jail.jailed", &jailid, &size, NULL, 0) == -1)
- assert(!"No security.jail.jailed sysctl!");
- return (jailid);
-}
diff --git a/contrib/opensolaris/OPENSOLARIS.LICENSE b/contrib/opensolaris/OPENSOLARIS.LICENSE
deleted file mode 100644
index da23621..0000000
--- a/contrib/opensolaris/OPENSOLARIS.LICENSE
+++ /dev/null
@@ -1,384 +0,0 @@
-Unless otherwise noted, all files in this distribution are released
-under the Common Development and Distribution License (CDDL).
-Exceptions are noted within the associated source files.
-
---------------------------------------------------------------------
-
-
-COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0
-
-1. Definitions.
-
- 1.1. "Contributor" means each individual or entity that creates
- or contributes to the creation of Modifications.
-
- 1.2. "Contributor Version" means the combination of the Original
- Software, prior Modifications used by a Contributor (if any),
- and the Modifications made by that particular Contributor.
-
- 1.3. "Covered Software" means (a) the Original Software, or (b)
- Modifications, or (c) the combination of files containing
- Original Software with files containing Modifications, in
- each case including portions thereof.
-
- 1.4. "Executable" means the Covered Software in any form other
- than Source Code.
-
- 1.5. "Initial Developer" means the individual or entity that first
- makes Original Software available under this License.
-
- 1.6. "Larger Work" means a work which combines Covered Software or
- portions thereof with code not governed by the terms of this
- License.
-
- 1.7. "License" means this document.
-
- 1.8. "Licensable" means having the right to grant, to the maximum
- extent possible, whether at the time of the initial grant or
- subsequently acquired, any and all of the rights conveyed
- herein.
-
- 1.9. "Modifications" means the Source Code and Executable form of
- any of the following:
-
- A. Any file that results from an addition to, deletion from or
- modification of the contents of a file containing Original
- Software or previous Modifications;
-
- B. Any new file that contains any part of the Original
- Software or previous Modifications; or
-
- C. Any new file that is contributed or otherwise made
- available under the terms of this License.
-
- 1.10. "Original Software" means the Source Code and Executable
- form of computer software code that is originally released
- under this License.
-
- 1.11. "Patent Claims" means any patent claim(s), now owned or
- hereafter acquired, including without limitation, method,
- process, and apparatus claims, in any patent Licensable by
- grantor.
-
- 1.12. "Source Code" means (a) the common form of computer software
- code in which modifications are made and (b) associated
- documentation included in or with such code.
-
- 1.13. "You" (or "Your") means an individual or a legal entity
- exercising rights under, and complying with all of the terms
- of, this License. For legal entities, "You" includes any
- entity which controls, is controlled by, or is under common
- control with You. For purposes of this definition,
- "control" means (a) the power, direct or indirect, to cause
- the direction or management of such entity, whether by
- contract or otherwise, or (b) ownership of more than fifty
- percent (50%) of the outstanding shares or beneficial
- ownership of such entity.
-
-2. License Grants.
-
- 2.1. The Initial Developer Grant.
-
- Conditioned upon Your compliance with Section 3.1 below and
- subject to third party intellectual property claims, the Initial
- Developer hereby grants You a world-wide, royalty-free,
- non-exclusive license:
-
- (a) under intellectual property rights (other than patent or
- trademark) Licensable by Initial Developer, to use,
- reproduce, modify, display, perform, sublicense and
- distribute the Original Software (or portions thereof),
- with or without Modifications, and/or as part of a Larger
- Work; and
-
- (b) under Patent Claims infringed by the making, using or
- selling of Original Software, to make, have made, use,
- practice, sell, and offer for sale, and/or otherwise
- dispose of the Original Software (or portions thereof).
-
- (c) The licenses granted in Sections 2.1(a) and (b) are
- effective on the date Initial Developer first distributes
- or otherwise makes the Original Software available to a
- third party under the terms of this License.
-
- (d) Notwithstanding Section 2.1(b) above, no patent license is
- granted: (1) for code that You delete from the Original
- Software, or (2) for infringements caused by: (i) the
- modification of the Original Software, or (ii) the
- combination of the Original Software with other software
- or devices.
-
- 2.2. Contributor Grant.
-
- Conditioned upon Your compliance with Section 3.1 below and
- subject to third party intellectual property claims, each
- Contributor hereby grants You a world-wide, royalty-free,
- non-exclusive license:
-
- (a) under intellectual property rights (other than patent or
- trademark) Licensable by Contributor to use, reproduce,
- modify, display, perform, sublicense and distribute the
- Modifications created by such Contributor (or portions
- thereof), either on an unmodified basis, with other
- Modifications, as Covered Software and/or as part of a
- Larger Work; and
-
- (b) under Patent Claims infringed by the making, using, or
- selling of Modifications made by that Contributor either
- alone and/or in combination with its Contributor Version
- (or portions of such combination), to make, use, sell,
- offer for sale, have made, and/or otherwise dispose of:
- (1) Modifications made by that Contributor (or portions
- thereof); and (2) the combination of Modifications made by
- that Contributor with its Contributor Version (or portions
- of such combination).
-
- (c) The licenses granted in Sections 2.2(a) and 2.2(b) are
- effective on the date Contributor first distributes or
- otherwise makes the Modifications available to a third
- party.
-
- (d) Notwithstanding Section 2.2(b) above, no patent license is
- granted: (1) for any code that Contributor has deleted
- from the Contributor Version; (2) for infringements caused
- by: (i) third party modifications of Contributor Version,
- or (ii) the combination of Modifications made by that
- Contributor with other software (except as part of the
- Contributor Version) or other devices; or (3) under Patent
- Claims infringed by Covered Software in the absence of
- Modifications made by that Contributor.
-
-3. Distribution Obligations.
-
- 3.1. Availability of Source Code.
-
- Any Covered Software that You distribute or otherwise make
- available in Executable form must also be made available in Source
- Code form and that Source Code form must be distributed only under
- the terms of this License. You must include a copy of this
- License with every copy of the Source Code form of the Covered
- Software You distribute or otherwise make available. You must
- inform recipients of any such Covered Software in Executable form
- as to how they can obtain such Covered Software in Source Code
- form in a reasonable manner on or through a medium customarily
- used for software exchange.
-
- 3.2. Modifications.
-
- The Modifications that You create or to which You contribute are
- governed by the terms of this License. You represent that You
- believe Your Modifications are Your original creation(s) and/or
- You have sufficient rights to grant the rights conveyed by this
- License.
-
- 3.3. Required Notices.
-
- You must include a notice in each of Your Modifications that
- identifies You as the Contributor of the Modification. You may
- not remove or alter any copyright, patent or trademark notices
- contained within the Covered Software, or any notices of licensing
- or any descriptive text giving attribution to any Contributor or
- the Initial Developer.
-
- 3.4. Application of Additional Terms.
-
- You may not offer or impose any terms on any Covered Software in
- Source Code form that alters or restricts the applicable version
- of this License or the recipients' rights hereunder. You may
- choose to offer, and to charge a fee for, warranty, support,
- indemnity or liability obligations to one or more recipients of
- Covered Software. However, you may do so only on Your own behalf,
- and not on behalf of the Initial Developer or any Contributor.
- You must make it absolutely clear that any such warranty, support,
- indemnity or liability obligation is offered by You alone, and You
- hereby agree to indemnify the Initial Developer and every
- Contributor for any liability incurred by the Initial Developer or
- such Contributor as a result of warranty, support, indemnity or
- liability terms You offer.
-
- 3.5. Distribution of Executable Versions.
-
- You may distribute the Executable form of the Covered Software
- under the terms of this License or under the terms of a license of
- Your choice, which may contain terms different from this License,
- provided that You are in compliance with the terms of this License
- and that the license for the Executable form does not attempt to
- limit or alter the recipient's rights in the Source Code form from
- the rights set forth in this License. If You distribute the
- Covered Software in Executable form under a different license, You
- must make it absolutely clear that any terms which differ from
- this License are offered by You alone, not by the Initial
- Developer or Contributor. You hereby agree to indemnify the
- Initial Developer and every Contributor for any liability incurred
- by the Initial Developer or such Contributor as a result of any
- such terms You offer.
-
- 3.6. Larger Works.
-
- You may create a Larger Work by combining Covered Software with
- other code not governed by the terms of this License and
- distribute the Larger Work as a single product. In such a case,
- You must make sure the requirements of this License are fulfilled
- for the Covered Software.
-
-4. Versions of the License.
-
- 4.1. New Versions.
-
- Sun Microsystems, Inc. is the initial license steward and may
- publish revised and/or new versions of this License from time to
- time. Each version will be given a distinguishing version number.
- Except as provided in Section 4.3, no one other than the license
- steward has the right to modify this License.
-
- 4.2. Effect of New Versions.
-
- You may always continue to use, distribute or otherwise make the
- Covered Software available under the terms of the version of the
- License under which You originally received the Covered Software.
- If the Initial Developer includes a notice in the Original
- Software prohibiting it from being distributed or otherwise made
- available under any subsequent version of the License, You must
- distribute and make the Covered Software available under the terms
- of the version of the License under which You originally received
- the Covered Software. Otherwise, You may also choose to use,
- distribute or otherwise make the Covered Software available under
- the terms of any subsequent version of the License published by
- the license steward.
-
- 4.3. Modified Versions.
-
- When You are an Initial Developer and You want to create a new
- license for Your Original Software, You may create and use a
- modified version of this License if You: (a) rename the license
- and remove any references to the name of the license steward
- (except to note that the license differs from this License); and
- (b) otherwise make it clear that the license contains terms which
- differ from this License.
-
-5. DISCLAIMER OF WARRANTY.
-
- COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS"
- BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
- INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED
- SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
- PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
- PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY
- COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
- INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY
- NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
- WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
- ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
- DISCLAIMER.
-
-6. TERMINATION.
-
- 6.1. This License and the rights granted hereunder will terminate
- automatically if You fail to comply with terms herein and fail to
- cure such breach within 30 days of becoming aware of the breach.
- Provisions which, by their nature, must remain in effect beyond
- the termination of this License shall survive.
-
- 6.2. If You assert a patent infringement claim (excluding
- declaratory judgment actions) against Initial Developer or a
- Contributor (the Initial Developer or Contributor against whom You
- assert such claim is referred to as "Participant") alleging that
- the Participant Software (meaning the Contributor Version where
- the Participant is a Contributor or the Original Software where
- the Participant is the Initial Developer) directly or indirectly
- infringes any patent, then any and all rights granted directly or
- indirectly to You by such Participant, the Initial Developer (if
- the Initial Developer is not the Participant) and all Contributors
- under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
- notice from Participant terminate prospectively and automatically
- at the expiration of such 60 day notice period, unless if within
- such 60 day period You withdraw Your claim with respect to the
- Participant Software against such Participant either unilaterally
- or pursuant to a written agreement with Participant.
-
- 6.3. In the event of termination under Sections 6.1 or 6.2 above,
- all end user licenses that have been validly granted by You or any
- distributor hereunder prior to termination (excluding licenses
- granted to You by any distributor) shall survive termination.
-
-7. LIMITATION OF LIABILITY.
-
- UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
- (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
- INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
- COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE
- LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
- CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
- LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK
- STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
- COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
- INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
- LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
- INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT
- APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
- NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR
- CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT
- APPLY TO YOU.
-
-8. U.S. GOVERNMENT END USERS.
-
- The Covered Software is a "commercial item," as that term is
- defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial
- computer software" (as that term is defined at 48
- C.F.R. 252.227-7014(a)(1)) and "commercial computer software
- documentation" as such terms are used in 48 C.F.R. 12.212
- (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48
- C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all
- U.S. Government End Users acquire Covered Software with only those
- rights set forth herein. This U.S. Government Rights clause is in
- lieu of, and supersedes, any other FAR, DFAR, or other clause or
- provision that addresses Government rights in computer software
- under this License.
-
-9. MISCELLANEOUS.
-
- This License represents the complete agreement concerning subject
- matter hereof. If any provision of this License is held to be
- unenforceable, such provision shall be reformed only to the extent
- necessary to make it enforceable. This License shall be governed
- by the law of the jurisdiction specified in a notice contained
- within the Original Software (except to the extent applicable law,
- if any, provides otherwise), excluding such jurisdiction's
- conflict-of-law provisions. Any litigation relating to this
- License shall be subject to the jurisdiction of the courts located
- in the jurisdiction and venue specified in a notice contained
- within the Original Software, with the losing party responsible
- for costs, including, without limitation, court costs and
- reasonable attorneys' fees and expenses. The application of the
- United Nations Convention on Contracts for the International Sale
- of Goods is expressly excluded. Any law or regulation which
- provides that the language of a contract shall be construed
- against the drafter shall not apply to this License. You agree
- that You alone are responsible for compliance with the United
- States export administration regulations (and the export control
- laws and regulation of any other countries) when You use,
- distribute or otherwise make available any Covered Software.
-
-10. RESPONSIBILITY FOR CLAIMS.
-
- As between Initial Developer and the Contributors, each party is
- responsible for claims and damages arising, directly or
- indirectly, out of its utilization of rights under this License
- and You agree to work with Initial Developer and Contributors to
- distribute such responsibility on an equitable basis. Nothing
- herein is intended or shall be deemed to constitute any admission
- of liability.
-
---------------------------------------------------------------------
-
-NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND
-DISTRIBUTION LICENSE (CDDL)
-
-For Covered Software in this distribution, this License shall
-be governed by the laws of the State of California (excluding
-conflict-of-law provisions).
-
-Any litigation relating to this License shall be subject to the
-jurisdiction of the Federal Courts of the Northern District of
-California and the state courts of the State of California, with
-venue lying in Santa Clara County, California.
diff --git a/contrib/opensolaris/cmd/zdb/zdb.8 b/contrib/opensolaris/cmd/zdb/zdb.8
deleted file mode 100644
index 87913f6..0000000
--- a/contrib/opensolaris/cmd/zdb/zdb.8
+++ /dev/null
@@ -1,93 +0,0 @@
-'\" te
-.\" CDDL HEADER START
-.\"
-.\" The contents of this file are subject to the terms of the
-.\" Common Development and Distribution License (the "License").
-.\" You may not use this file except in compliance with the License.
-.\"
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-.\" or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions
-.\" and limitations under the License.
-.\"
-.\" When distributing Covered Code, include this CDDL HEADER in each
-.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-.\" If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying
-.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.\"
-.\" CDDL HEADER END
-.\" Copyright (c) 2004, Sun Microsystems, Inc. All Rights Reserved.
-.TH zdb 1M "31 Oct 2005" "SunOS 5.11" "System Administration Commands"
-.SH NAME
-zdb \- ZFS debugger
-.SH SYNOPSIS
-.LP
-.nf
-\fBzdb\fR \fIpool\fR
-.fi
-
-.SH DESCRIPTION
-.LP
-The \fBzdb\fR command is used by support engineers to diagnose failures and gather statistics. Since the \fBZFS\fR file system is always consistent on disk and is self-repairing, \fBzdb\fR should only be run under the direction by a support engineer.
-.LP
-If no arguments are specified, \fBzdb\fR, performs basic consistency checks on the pool and associated datasets, and report any problems detected.
-.LP
-Any options supported by this command are internal to Sun and subject to change at any time.
-.SH EXIT STATUS
-.LP
-The following exit values are returned:
-.sp
-.ne 2
-.mk
-.na
-\fB\fB0\fR\fR
-.ad
-.RS 5n
-.rt
-The pool is consistent.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB1\fR\fR
-.ad
-.RS 5n
-.rt
-An error was detected.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB2\fR\fR
-.ad
-.RS 5n
-.rt
-Invalid command line options were specified.
-.RE
-
-.SH ATTRIBUTES
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-tab() box;
-cw(2.75i) |cw(2.75i)
-lw(2.75i) |lw(2.75i)
-.
-ATTRIBUTE TYPEATTRIBUTE VALUE
-_
-AvailabilitySUNWzfsu
-_
-Interface StabilityUnstable
-.TE
-
-.SH SEE ALSO
-.LP
-\fBzfs\fR(1M), \fBzpool\fR(1M), \fBattributes\fR(5)
diff --git a/contrib/opensolaris/cmd/zdb/zdb.c b/contrib/opensolaris/cmd/zdb/zdb.c
deleted file mode 100644
index 2dc459d..0000000
--- a/contrib/opensolaris/cmd/zdb/zdb.c
+++ /dev/null
@@ -1,2193 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <stdio.h>
-#include <stdio_ext.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/dmu.h>
-#include <sys/zap.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_znode.h>
-#include <sys/vdev.h>
-#include <sys/vdev_impl.h>
-#include <sys/metaslab_impl.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_pool.h>
-#include <sys/dbuf.h>
-#include <sys/zil.h>
-#include <sys/zil_impl.h>
-#include <sys/stat.h>
-#include <sys/resource.h>
-#include <sys/dmu_traverse.h>
-#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
-
-const char cmdname[] = "zdb";
-uint8_t dump_opt[256];
-
-typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
-
-extern void dump_intent_log(zilog_t *);
-uint64_t *zopt_object = NULL;
-int zopt_objects = 0;
-int zdb_advance = ADVANCE_PRE;
-zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
-
-/*
- * These libumem hooks provide a reasonable set of defaults for the allocator's
- * debugging facilities.
- */
-const char *
-_umem_debug_init()
-{
- return ("default,verbose"); /* $UMEM_DEBUG setting */
-}
-
-const char *
-_umem_logging_init(void)
-{
- return ("fail,contents"); /* $UMEM_LOGGING setting */
-}
-
-static void
-usage(void)
-{
- (void) fprintf(stderr,
- "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] "
- "dataset [object...]\n"
- " %s -C [pool]\n"
- " %s -l dev\n"
- " %s -R vdev:offset:size:flags\n",
- cmdname, cmdname, cmdname, cmdname);
-
- (void) fprintf(stderr, " -u uberblock\n");
- (void) fprintf(stderr, " -d datasets\n");
- (void) fprintf(stderr, " -C cached pool configuration\n");
- (void) fprintf(stderr, " -i intent logs\n");
- (void) fprintf(stderr, " -b block statistics\n");
- (void) fprintf(stderr, " -c checksum all data blocks\n");
- (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
- (void) fprintf(stderr, " -v verbose (applies to all others)\n");
- (void) fprintf(stderr, " -l dump label contents\n");
- (void) fprintf(stderr, " -L live pool (allows some errors)\n");
- (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> "
- "visitation order\n");
- (void) fprintf(stderr, " -U use zpool.cache in /tmp\n");
- (void) fprintf(stderr, " -B objset:object:level:blkid -- "
- "simulate bad block\n");
- (void) fprintf(stderr, " -R read and display block from a"
- "device\n");
- (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
- "to make only that option verbose\n");
- (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
- exit(1);
-}
-
-static void
-fatal(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- (void) fprintf(stderr, "%s: ", cmdname);
- (void) vfprintf(stderr, fmt, ap);
- va_end(ap);
- (void) fprintf(stderr, "\n");
-
- exit(1);
-}
-
-static void
-dump_nvlist(nvlist_t *list, int indent)
-{
- nvpair_t *elem = NULL;
-
- while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
- switch (nvpair_type(elem)) {
- case DATA_TYPE_STRING:
- {
- char *value;
-
- VERIFY(nvpair_value_string(elem, &value) == 0);
- (void) printf("%*s%s='%s'\n", indent, "",
- nvpair_name(elem), value);
- }
- break;
-
- case DATA_TYPE_UINT64:
- {
- uint64_t value;
-
- VERIFY(nvpair_value_uint64(elem, &value) == 0);
- (void) printf("%*s%s=%llu\n", indent, "",
- nvpair_name(elem), (u_longlong_t)value);
- }
- break;
-
- case DATA_TYPE_NVLIST:
- {
- nvlist_t *value;
-
- VERIFY(nvpair_value_nvlist(elem, &value) == 0);
- (void) printf("%*s%s\n", indent, "",
- nvpair_name(elem));
- dump_nvlist(value, indent + 4);
- }
- break;
-
- case DATA_TYPE_NVLIST_ARRAY:
- {
- nvlist_t **value;
- uint_t c, count;
-
- VERIFY(nvpair_value_nvlist_array(elem, &value,
- &count) == 0);
-
- for (c = 0; c < count; c++) {
- (void) printf("%*s%s[%u]\n", indent, "",
- nvpair_name(elem), c);
- dump_nvlist(value[c], indent + 8);
- }
- }
- break;
-
- default:
-
- (void) printf("bad config type %d for %s\n",
- nvpair_type(elem), nvpair_name(elem));
- }
- }
-}
-
-/* ARGSUSED */
-static void
-dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
-{
- nvlist_t *nv;
- size_t nvsize = *(uint64_t *)data;
- char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
-
- VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
-
- VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
-
- umem_free(packed, nvsize);
-
- dump_nvlist(nv, 8);
-
- nvlist_free(nv);
-}
-
-const char dump_zap_stars[] = "****************************************";
-const int dump_zap_width = sizeof (dump_zap_stars) - 1;
-
-static void
-dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
-{
- int i;
- int minidx = ZAP_HISTOGRAM_SIZE - 1;
- int maxidx = 0;
- uint64_t max = 0;
-
- for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
- if (histo[i] > max)
- max = histo[i];
- if (histo[i] > 0 && i > maxidx)
- maxidx = i;
- if (histo[i] > 0 && i < minidx)
- minidx = i;
- }
-
- if (max < dump_zap_width)
- max = dump_zap_width;
-
- for (i = minidx; i <= maxidx; i++)
- (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
- &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
-}
-
-static void
-dump_zap_stats(objset_t *os, uint64_t object)
-{
- int error;
- zap_stats_t zs;
-
- error = zap_get_stats(os, object, &zs);
- if (error)
- return;
-
- if (zs.zs_ptrtbl_len == 0) {
- ASSERT(zs.zs_num_blocks == 1);
- (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
- (u_longlong_t)zs.zs_blocksize,
- (u_longlong_t)zs.zs_num_entries);
- return;
- }
-
- (void) printf("\tFat ZAP stats:\n");
-
- (void) printf("\t\tPointer table:\n");
- (void) printf("\t\t\t%llu elements\n",
- (u_longlong_t)zs.zs_ptrtbl_len);
- (void) printf("\t\t\tzt_blk: %llu\n",
- (u_longlong_t)zs.zs_ptrtbl_zt_blk);
- (void) printf("\t\t\tzt_numblks: %llu\n",
- (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
- (void) printf("\t\t\tzt_shift: %llu\n",
- (u_longlong_t)zs.zs_ptrtbl_zt_shift);
- (void) printf("\t\t\tzt_blks_copied: %llu\n",
- (u_longlong_t)zs.zs_ptrtbl_blks_copied);
- (void) printf("\t\t\tzt_nextblk: %llu\n",
- (u_longlong_t)zs.zs_ptrtbl_nextblk);
-
- (void) printf("\t\tZAP entries: %llu\n",
- (u_longlong_t)zs.zs_num_entries);
- (void) printf("\t\tLeaf blocks: %llu\n",
- (u_longlong_t)zs.zs_num_leafs);
- (void) printf("\t\tTotal blocks: %llu\n",
- (u_longlong_t)zs.zs_num_blocks);
- (void) printf("\t\tzap_block_type: 0x%llx\n",
- (u_longlong_t)zs.zs_block_type);
- (void) printf("\t\tzap_magic: 0x%llx\n",
- (u_longlong_t)zs.zs_magic);
- (void) printf("\t\tzap_salt: 0x%llx\n",
- (u_longlong_t)zs.zs_salt);
-
- (void) printf("\t\tLeafs with 2^n pointers:\n");
- dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
-
- (void) printf("\t\tBlocks with n*5 entries:\n");
- dump_zap_histogram(zs.zs_blocks_with_n5_entries);
-
- (void) printf("\t\tBlocks n/10 full:\n");
- dump_zap_histogram(zs.zs_blocks_n_tenths_full);
-
- (void) printf("\t\tEntries with n chunks:\n");
- dump_zap_histogram(zs.zs_entries_using_n_chunks);
-
- (void) printf("\t\tBuckets with n entries:\n");
- dump_zap_histogram(zs.zs_buckets_with_n_entries);
-}
-
-/*ARGSUSED*/
-static void
-dump_none(objset_t *os, uint64_t object, void *data, size_t size)
-{
-}
-
-/*ARGSUSED*/
-void
-dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
-{
-}
-
-/*ARGSUSED*/
-static void
-dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
-{
-}
-
-/*ARGSUSED*/
-static void
-dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
-{
- zap_cursor_t zc;
- zap_attribute_t attr;
- void *prop;
- int i;
-
- dump_zap_stats(os, object);
- (void) printf("\n");
-
- for (zap_cursor_init(&zc, os, object);
- zap_cursor_retrieve(&zc, &attr) == 0;
- zap_cursor_advance(&zc)) {
- (void) printf("\t\t%s = ", attr.za_name);
- if (attr.za_num_integers == 0) {
- (void) printf("\n");
- continue;
- }
- prop = umem_zalloc(attr.za_num_integers *
- attr.za_integer_length, UMEM_NOFAIL);
- (void) zap_lookup(os, object, attr.za_name,
- attr.za_integer_length, attr.za_num_integers, prop);
- if (attr.za_integer_length == 1) {
- (void) printf("%s", (char *)prop);
- } else {
- for (i = 0; i < attr.za_num_integers; i++) {
- switch (attr.za_integer_length) {
- case 2:
- (void) printf("%u ",
- ((uint16_t *)prop)[i]);
- break;
- case 4:
- (void) printf("%u ",
- ((uint32_t *)prop)[i]);
- break;
- case 8:
- (void) printf("%lld ",
- (u_longlong_t)((int64_t *)prop)[i]);
- break;
- }
- }
- }
- (void) printf("\n");
- umem_free(prop, attr.za_num_integers * attr.za_integer_length);
- }
- zap_cursor_fini(&zc);
-}
-
-static void
-dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
-{
- uint64_t alloc, offset, entry;
- uint8_t mapshift = sm->sm_shift;
- uint64_t mapstart = sm->sm_start;
- char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
- "INVALID", "INVALID", "INVALID", "INVALID" };
-
- if (smo->smo_object == 0)
- return;
-
- /*
- * Print out the freelist entries in both encoded and decoded form.
- */
- alloc = 0;
- for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
- VERIFY(0 == dmu_read(os, smo->smo_object, offset,
- sizeof (entry), &entry));
- if (SM_DEBUG_DECODE(entry)) {
- (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
- (u_longlong_t)(offset / sizeof (entry)),
- ddata[SM_DEBUG_ACTION_DECODE(entry)],
- (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
- (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
- } else {
- (void) printf("\t\t[%4llu] %c range:"
- " %08llx-%08llx size: %06llx\n",
- (u_longlong_t)(offset / sizeof (entry)),
- SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
- (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
- mapshift) + mapstart),
- (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
- mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
- mapshift)),
- (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
- if (SM_TYPE_DECODE(entry) == SM_ALLOC)
- alloc += SM_RUN_DECODE(entry) << mapshift;
- else
- alloc -= SM_RUN_DECODE(entry) << mapshift;
- }
- }
- if (alloc != smo->smo_alloc) {
- (void) printf("space_map_object alloc (%llu) INCONSISTENT "
- "with space map summary (%llu)\n",
- (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
- }
-}
-
-static void
-dump_metaslab(metaslab_t *msp)
-{
- char freebuf[5];
- space_map_obj_t *smo = &msp->ms_smo;
- vdev_t *vd = msp->ms_group->mg_vd;
- spa_t *spa = vd->vdev_spa;
-
- nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
-
- if (dump_opt['d'] <= 5) {
- (void) printf("\t%10llx %10llu %5s\n",
- (u_longlong_t)msp->ms_map.sm_start,
- (u_longlong_t)smo->smo_object,
- freebuf);
- return;
- }
-
- (void) printf(
- "\tvdev %llu offset %08llx spacemap %4llu free %5s\n",
- (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
- (u_longlong_t)smo->smo_object, freebuf);
-
- ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
-
- dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
-}
-
-static void
-dump_metaslabs(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m;
-
- (void) printf("\nMetaslabs:\n");
-
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
-
- spa_config_enter(spa, RW_READER, FTAG);
- (void) printf("\n vdev %llu = %s\n\n",
- (u_longlong_t)vd->vdev_id, vdev_description(vd));
- spa_config_exit(spa, FTAG);
-
- if (dump_opt['d'] <= 5) {
- (void) printf("\t%10s %10s %5s\n",
- "offset", "spacemap", "free");
- (void) printf("\t%10s %10s %5s\n",
- "------", "--------", "----");
- }
- for (m = 0; m < vd->vdev_ms_count; m++)
- dump_metaslab(vd->vdev_ms[m]);
- (void) printf("\n");
- }
-}
-
-static void
-dump_dtl(vdev_t *vd, int indent)
-{
- avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
- spa_t *spa = vd->vdev_spa;
- space_seg_t *ss;
- vdev_t *pvd;
- int c;
-
- if (indent == 0)
- (void) printf("\nDirty time logs:\n\n");
-
- spa_config_enter(spa, RW_READER, FTAG);
- (void) printf("\t%*s%s\n", indent, "", vdev_description(vd));
- spa_config_exit(spa, FTAG);
-
- for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
- /*
- * Everything in this DTL must appear in all parent DTL unions.
- */
- for (pvd = vd; pvd; pvd = pvd->vdev_parent)
- ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map,
- ss->ss_start, ss->ss_end - ss->ss_start));
- (void) printf("\t%*soutage [%llu,%llu] length %llu\n",
- indent, "",
- (u_longlong_t)ss->ss_start,
- (u_longlong_t)ss->ss_end - 1,
- (u_longlong_t)(ss->ss_end - ss->ss_start));
- }
-
- (void) printf("\n");
-
- if (dump_opt['d'] > 5 && vd->vdev_children == 0) {
- dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl,
- &vd->vdev_dtl_map);
- (void) printf("\n");
- }
-
- for (c = 0; c < vd->vdev_children; c++)
- dump_dtl(vd->vdev_child[c], indent + 4);
-}
-
-/*ARGSUSED*/
-static void
-dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
-{
-}
-
-static uint64_t
-blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
-{
- if (level < 0)
- return (blkid);
-
- return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
- dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
-}
-
-static void
-sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
-{
- dva_t *dva = bp->blk_dva;
- int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
- int i;
-
- blkbuf[0] = '\0';
-
- for (i = 0; i < ndvas; i++)
- (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
- (u_longlong_t)DVA_GET_VDEV(&dva[i]),
- (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
- (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
-
- (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
- (u_longlong_t)BP_GET_LSIZE(bp),
- (u_longlong_t)BP_GET_PSIZE(bp),
- (u_longlong_t)bp->blk_fill,
- (u_longlong_t)bp->blk_birth);
-}
-
-/* ARGSUSED */
-static int
-zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
-{
- zbookmark_t *zb = &bc->bc_bookmark;
- blkptr_t *bp = &bc->bc_blkptr;
- void *data = bc->bc_data;
- dnode_phys_t *dnp = bc->bc_dnode;
- char blkbuf[BP_SPRINTF_LEN + 80];
- int l;
-
- if (bc->bc_errno) {
- (void) sprintf(blkbuf,
- "Error %d reading <%llu, %llu, %lld, %llu>: ",
- bc->bc_errno,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid);
- goto out;
- }
-
- if (zb->zb_level == -1) {
- ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
- ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
- } else {
- ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
- ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
- }
-
- if (zb->zb_level > 0) {
- uint64_t fill = 0;
- blkptr_t *bpx, *bpend;
-
- for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
- bpx < bpend; bpx++) {
- if (bpx->blk_birth != 0) {
- fill += bpx->blk_fill;
- } else {
- ASSERT(bpx->blk_fill == 0);
- }
- }
- ASSERT3U(fill, ==, bp->blk_fill);
- }
-
- if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
- uint64_t fill = 0;
- dnode_phys_t *dnx, *dnend;
-
- for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
- dnx < dnend; dnx++) {
- if (dnx->dn_type != DMU_OT_NONE)
- fill++;
- }
- ASSERT3U(fill, ==, bp->blk_fill);
- }
-
- (void) sprintf(blkbuf, "%16llx ",
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
-
- ASSERT(zb->zb_level >= 0);
-
- for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
- if (l == zb->zb_level) {
- (void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
- (u_longlong_t)zb->zb_level);
- } else {
- (void) sprintf(blkbuf + strlen(blkbuf), " ");
- }
- }
-
-out:
- if (bp->blk_birth == 0) {
- (void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
- (void) printf("%s\n", blkbuf);
- } else {
- sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
- dump_opt['d'] > 5 ? 1 : 0);
- (void) printf("%s\n", blkbuf);
- }
-
- return (bc->bc_errno ? ERESTART : 0);
-}
-
-/*ARGSUSED*/
-static void
-dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
-{
- traverse_handle_t *th;
- uint64_t objset = dmu_objset_id(os);
- int advance = zdb_advance;
-
- (void) printf("Indirect blocks:\n");
-
- if (object == 0)
- advance |= ADVANCE_DATA;
-
- th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
- ZIO_FLAG_CANFAIL);
- th->th_noread = zdb_noread;
-
- traverse_add_dnode(th, 0, -1ULL, objset, object);
-
- while (traverse_more(th) == EAGAIN)
- continue;
-
- (void) printf("\n");
-
- traverse_fini(th);
-}
-
-/*ARGSUSED*/
-static void
-dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
-{
- dsl_dir_phys_t *dd = data;
- time_t crtime;
- char used[6], compressed[6], uncompressed[6], quota[6], resv[6];
-
- if (dd == NULL)
- return;
-
- ASSERT(size == sizeof (*dd));
-
- crtime = dd->dd_creation_time;
- nicenum(dd->dd_used_bytes, used);
- nicenum(dd->dd_compressed_bytes, compressed);
- nicenum(dd->dd_uncompressed_bytes, uncompressed);
- nicenum(dd->dd_quota, quota);
- nicenum(dd->dd_reserved, resv);
-
- (void) printf("\t\tcreation_time = %s", ctime(&crtime));
- (void) printf("\t\thead_dataset_obj = %llu\n",
- (u_longlong_t)dd->dd_head_dataset_obj);
- (void) printf("\t\tparent_dir_obj = %llu\n",
- (u_longlong_t)dd->dd_parent_obj);
- (void) printf("\t\tclone_parent_obj = %llu\n",
- (u_longlong_t)dd->dd_clone_parent_obj);
- (void) printf("\t\tchild_dir_zapobj = %llu\n",
- (u_longlong_t)dd->dd_child_dir_zapobj);
- (void) printf("\t\tused_bytes = %s\n", used);
- (void) printf("\t\tcompressed_bytes = %s\n", compressed);
- (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
- (void) printf("\t\tquota = %s\n", quota);
- (void) printf("\t\treserved = %s\n", resv);
- (void) printf("\t\tprops_zapobj = %llu\n",
- (u_longlong_t)dd->dd_props_zapobj);
-}
-
-/*ARGSUSED*/
-static void
-dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
-{
- dsl_dataset_phys_t *ds = data;
- time_t crtime;
- char used[6], compressed[6], uncompressed[6], unique[6];
- char blkbuf[BP_SPRINTF_LEN];
-
- if (ds == NULL)
- return;
-
- ASSERT(size == sizeof (*ds));
- crtime = ds->ds_creation_time;
- nicenum(ds->ds_used_bytes, used);
- nicenum(ds->ds_compressed_bytes, compressed);
- nicenum(ds->ds_uncompressed_bytes, uncompressed);
- nicenum(ds->ds_unique_bytes, unique);
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
-
- (void) printf("\t\tdataset_obj = %llu\n",
- (u_longlong_t)ds->ds_dir_obj);
- (void) printf("\t\tprev_snap_obj = %llu\n",
- (u_longlong_t)ds->ds_prev_snap_obj);
- (void) printf("\t\tprev_snap_txg = %llu\n",
- (u_longlong_t)ds->ds_prev_snap_txg);
- (void) printf("\t\tnext_snap_obj = %llu\n",
- (u_longlong_t)ds->ds_next_snap_obj);
- (void) printf("\t\tsnapnames_zapobj = %llu\n",
- (u_longlong_t)ds->ds_snapnames_zapobj);
- (void) printf("\t\tnum_children = %llu\n",
- (u_longlong_t)ds->ds_num_children);
- (void) printf("\t\tcreation_time = %s", ctime(&crtime));
- (void) printf("\t\tcreation_txg = %llu\n",
- (u_longlong_t)ds->ds_creation_txg);
- (void) printf("\t\tdeadlist_obj = %llu\n",
- (u_longlong_t)ds->ds_deadlist_obj);
- (void) printf("\t\tused_bytes = %s\n", used);
- (void) printf("\t\tcompressed_bytes = %s\n", compressed);
- (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
- (void) printf("\t\tunique = %s\n", unique);
- (void) printf("\t\tfsid_guid = %llu\n",
- (u_longlong_t)ds->ds_fsid_guid);
- (void) printf("\t\tguid = %llu\n",
- (u_longlong_t)ds->ds_guid);
- (void) printf("\t\tflags = %llx\n",
- (u_longlong_t)ds->ds_flags);
- (void) printf("\t\tbp = %s\n", blkbuf);
-}
-
-static void
-dump_bplist(objset_t *mos, uint64_t object, char *name)
-{
- bplist_t bpl = { 0 };
- blkptr_t blk, *bp = &blk;
- uint64_t itor = 0;
- char bytes[6];
- char comp[6];
- char uncomp[6];
-
- if (dump_opt['d'] < 3)
- return;
-
- VERIFY(0 == bplist_open(&bpl, mos, object));
- if (bplist_empty(&bpl)) {
- bplist_close(&bpl);
- return;
- }
-
- nicenum(bpl.bpl_phys->bpl_bytes, bytes);
- if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
- nicenum(bpl.bpl_phys->bpl_comp, comp);
- nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
- (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
- bytes, comp, uncomp);
- } else {
- (void) printf("\n %s: %llu entries, %s\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
- }
-
- if (dump_opt['d'] < 5) {
- bplist_close(&bpl);
- return;
- }
-
- (void) printf("\n");
-
- while (bplist_iterate(&bpl, &itor, bp) == 0) {
- char blkbuf[BP_SPRINTF_LEN];
-
- ASSERT(bp->blk_birth != 0);
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
- (void) printf("\tItem %3llu: %s\n",
- (u_longlong_t)itor - 1, blkbuf);
- }
-
- bplist_close(&bpl);
-}
-
-/*ARGSUSED*/
-static void
-dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
-{
- znode_phys_t *zp = data;
- time_t z_crtime, z_atime, z_mtime, z_ctime;
- char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
- int error;
-
- ASSERT(size >= sizeof (znode_phys_t));
-
- error = zfs_obj_to_path(os, object, path, sizeof (path));
- if (error != 0) {
- (void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
- (u_longlong_t)object);
- }
-
- if (dump_opt['d'] < 3) {
- (void) printf("\t%s\n", path);
- return;
- }
-
- z_crtime = (time_t)zp->zp_crtime[0];
- z_atime = (time_t)zp->zp_atime[0];
- z_mtime = (time_t)zp->zp_mtime[0];
- z_ctime = (time_t)zp->zp_ctime[0];
-
- (void) printf("\tpath %s\n", path);
- (void) printf("\tatime %s", ctime(&z_atime));
- (void) printf("\tmtime %s", ctime(&z_mtime));
- (void) printf("\tctime %s", ctime(&z_ctime));
- (void) printf("\tcrtime %s", ctime(&z_crtime));
- (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen);
- (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode);
- (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size);
- (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
- (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links);
- (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr);
- (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev);
-}
-
-/*ARGSUSED*/
-static void
-dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
-{
-}
-
-/*ARGSUSED*/
-static void
-dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
-{
-}
-
-static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
- dump_none, /* unallocated */
- dump_zap, /* object directory */
- dump_uint64, /* object array */
- dump_none, /* packed nvlist */
- dump_packed_nvlist, /* packed nvlist size */
- dump_none, /* bplist */
- dump_none, /* bplist header */
- dump_none, /* SPA space map header */
- dump_none, /* SPA space map */
- dump_none, /* ZIL intent log */
- dump_dnode, /* DMU dnode */
- dump_dmu_objset, /* DMU objset */
- dump_dsl_dir, /* DSL directory */
- dump_zap, /* DSL directory child map */
- dump_zap, /* DSL dataset snap map */
- dump_zap, /* DSL props */
- dump_dsl_dataset, /* DSL dataset */
- dump_znode, /* ZFS znode */
- dump_acl, /* ZFS ACL */
- dump_uint8, /* ZFS plain file */
- dump_zap, /* ZFS directory */
- dump_zap, /* ZFS master node */
- dump_zap, /* ZFS delete queue */
- dump_uint8, /* zvol object */
- dump_zap, /* zvol prop */
- dump_uint8, /* other uint8[] */
- dump_uint64, /* other uint64[] */
- dump_zap, /* other ZAP */
- dump_zap, /* persistent error log */
- dump_uint8, /* SPA history */
- dump_uint64, /* SPA history offsets */
- dump_zap, /* Pool properties */
-};
-
-static void
-dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
-{
- dmu_buf_t *db = NULL;
- dmu_object_info_t doi;
- dnode_t *dn;
- void *bonus = NULL;
- size_t bsize = 0;
- char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
- char aux[50];
- int error;
-
- if (*print_header) {
- (void) printf("\n Object lvl iblk dblk lsize"
- " asize type\n");
- *print_header = 0;
- }
-
- if (object == 0) {
- dn = os->os->os_meta_dnode;
- } else {
- error = dmu_bonus_hold(os, object, FTAG, &db);
- if (error)
- fatal("dmu_bonus_hold(%llu) failed, errno %u",
- object, error);
- bonus = db->db_data;
- bsize = db->db_size;
- dn = ((dmu_buf_impl_t *)db)->db_dnode;
- }
- dmu_object_info_from_dnode(dn, &doi);
-
- nicenum(doi.doi_metadata_block_size, iblk);
- nicenum(doi.doi_data_block_size, dblk);
- nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
- lsize);
- nicenum(doi.doi_physical_blks << 9, asize);
- nicenum(doi.doi_bonus_size, bonus_size);
-
- aux[0] = '\0';
-
- if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6)
- (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
- zio_checksum_table[doi.doi_checksum].ci_name);
-
- if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6)
- (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
- zio_compress_table[doi.doi_compress].ci_name);
-
- (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
- (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
- asize, dmu_ot[doi.doi_type].ot_name, aux);
-
- if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
- (void) printf("%10s %3s %5s %5s %5s %5s %s\n",
- "", "", "", "", bonus_size, "bonus",
- dmu_ot[doi.doi_bonus_type].ot_name);
- }
-
- if (verbosity >= 4) {
- object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
- object_viewer[doi.doi_type](os, object, NULL, 0);
- *print_header = 1;
- }
-
- if (verbosity >= 5)
- dump_indirect(os, object, NULL, 0);
-
- if (verbosity >= 5) {
- /*
- * Report the list of segments that comprise the object.
- */
- uint64_t start = 0;
- uint64_t end;
- uint64_t blkfill = 1;
- int minlvl = 1;
-
- if (dn->dn_type == DMU_OT_DNODE) {
- minlvl = 0;
- blkfill = DNODES_PER_BLOCK;
- }
-
- for (;;) {
- error = dnode_next_offset(dn, B_FALSE, &start, minlvl,
- blkfill, 0);
- if (error)
- break;
- end = start;
- error = dnode_next_offset(dn, B_TRUE, &end, minlvl,
- blkfill, 0);
- nicenum(end - start, segsize);
- (void) printf("\t\tsegment [%016llx, %016llx)"
- " size %5s\n", (u_longlong_t)start,
- (u_longlong_t)end, segsize);
- if (error)
- break;
- start = end;
- }
- }
-
- if (db != NULL)
- dmu_buf_rele(db, FTAG);
-}
-
-static char *objset_types[DMU_OST_NUMTYPES] = {
- "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
-
-/*ARGSUSED*/
-static void
-dump_dir(objset_t *os)
-{
- dmu_objset_stats_t dds;
- uint64_t object, object_count;
- uint64_t refdbytes, usedobjs, scratch;
- char numbuf[8];
- char blkbuf[BP_SPRINTF_LEN];
- char osname[MAXNAMELEN];
- char *type = "UNKNOWN";
- int verbosity = dump_opt['d'];
- int print_header = 1;
- int i, error;
-
- dmu_objset_fast_stat(os, &dds);
-
- if (dds.dds_type < DMU_OST_NUMTYPES)
- type = objset_types[dds.dds_type];
-
- if (dds.dds_type == DMU_OST_META) {
- dds.dds_creation_txg = TXG_INITIAL;
- usedobjs = os->os->os_rootbp->blk_fill;
- refdbytes =
- os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes;
- } else {
- dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
- }
-
- ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
-
- nicenum(refdbytes, numbuf);
-
- if (verbosity >= 4) {
- (void) strcpy(blkbuf, ", rootbp ");
- sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
- } else {
- blkbuf[0] = '\0';
- }
-
- dmu_objset_name(os, osname);
-
- (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
- "%s, %llu objects%s\n",
- osname, type, (u_longlong_t)dmu_objset_id(os),
- (u_longlong_t)dds.dds_creation_txg,
- numbuf, (u_longlong_t)usedobjs, blkbuf);
-
- dump_intent_log(dmu_objset_zil(os));
-
- if (dmu_objset_ds(os) != NULL)
- dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
- dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
-
- if (verbosity < 2)
- return;
-
- if (zopt_objects != 0) {
- for (i = 0; i < zopt_objects; i++)
- dump_object(os, zopt_object[i], verbosity,
- &print_header);
- (void) printf("\n");
- return;
- }
-
- dump_object(os, 0, verbosity, &print_header);
- object_count = 1;
-
- object = 0;
- while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
- dump_object(os, object, verbosity, &print_header);
- object_count++;
- }
-
- ASSERT3U(object_count, ==, usedobjs);
-
- (void) printf("\n");
-
- if (error != ESRCH)
- fatal("dmu_object_next() = %d", error);
-}
-
-static void
-dump_uberblock(uberblock_t *ub)
-{
- time_t timestamp = ub->ub_timestamp;
-
- (void) printf("Uberblock\n\n");
- (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
- (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
- (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
- (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
- (void) printf("\ttimestamp = %llu UTC = %s",
- (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
- if (dump_opt['u'] >= 3) {
- char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
- (void) printf("\trootbp = %s\n", blkbuf);
- }
- (void) printf("\n");
-}
-
-static void
-dump_config(const char *pool)
-{
- spa_t *spa = NULL;
-
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (pool == NULL)
- (void) printf("%s\n", spa_name(spa));
- if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
- dump_nvlist(spa->spa_config, 4);
- }
- mutex_exit(&spa_namespace_lock);
-}
-
-static void
-dump_label(const char *dev)
-{
- int fd;
- vdev_label_t label;
- char *buf = label.vl_vdev_phys.vp_nvlist;
- size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
- struct stat64 statbuf;
- uint64_t psize;
- int l;
-
- if ((fd = open64(dev, O_RDONLY)) < 0) {
- (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
- exit(1);
- }
-
- if (fstat64(fd, &statbuf) != 0) {
- (void) printf("failed to stat '%s': %s\n", dev,
- strerror(errno));
- exit(1);
- }
-
- if (S_ISCHR(statbuf.st_mode)) {
- if (ioctl(fd, DIOCGMEDIASIZE, &psize) != 0) {
- (void) printf("failed to get size '%s': %s\n", dev,
- strerror(errno));
- exit(1);
- }
- } else
- psize = statbuf.st_size;
-
- psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
-
- for (l = 0; l < VDEV_LABELS; l++) {
-
- nvlist_t *config = NULL;
-
- (void) printf("--------------------------------------------\n");
- (void) printf("LABEL %d\n", l);
- (void) printf("--------------------------------------------\n");
-
- if (pread64(fd, &label, sizeof (label),
- vdev_label_offset(psize, l, 0)) != sizeof (label)) {
- (void) printf("failed to read label %d\n", l);
- continue;
- }
-
- if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
- (void) printf("failed to unpack label %d\n", l);
- continue;
- }
- dump_nvlist(config, 4);
- nvlist_free(config);
- }
-}
-
-/*ARGSUSED*/
-static int
-dump_one_dir(char *dsname, void *arg)
-{
- int error;
- objset_t *os;
-
- error = dmu_objset_open(dsname, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- if (error) {
- (void) printf("Could not open %s\n", dsname);
- return (0);
- }
- dump_dir(os);
- dmu_objset_close(os);
- return (0);
-}
-
-static void
-zdb_space_map_load(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m, error;
-
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
- for (m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- error = space_map_load(&msp->ms_allocmap[0], NULL,
- SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset);
- mutex_exit(&msp->ms_lock);
- if (error)
- fatal("%s bad space map #%d, error %d",
- spa->spa_name, c, error);
- }
- }
-}
-
-static int
-zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb)
-{
- dva_t *dva = bp->blk_dva;
- vdev_t *vd;
- metaslab_t *msp;
- space_map_t *allocmap, *freemap;
- int error;
- int d;
- blkptr_t blk = *bp;
-
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- uint64_t vdev = DVA_GET_VDEV(&dva[d]);
- uint64_t offset = DVA_GET_OFFSET(&dva[d]);
- uint64_t size = DVA_GET_ASIZE(&dva[d]);
-
- if ((vd = vdev_lookup_top(spa, vdev)) == NULL)
- return (ENXIO);
-
- if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
- return (ENXIO);
-
- msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- allocmap = &msp->ms_allocmap[0];
- freemap = &msp->ms_freemap[0];
-
- /* Prepare our copy of the bp in case we need to read GBHs */
- if (DVA_GET_GANG(&dva[d])) {
- size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
- DVA_SET_ASIZE(&blk.blk_dva[d], size);
- DVA_SET_GANG(&blk.blk_dva[d], 0);
- }
-
- mutex_enter(&msp->ms_lock);
- if (space_map_contains(freemap, offset, size)) {
- mutex_exit(&msp->ms_lock);
- return (EAGAIN); /* allocated more than once */
- }
-
- if (!space_map_contains(allocmap, offset, size)) {
- mutex_exit(&msp->ms_lock);
- return (ESTALE); /* not allocated at all */
- }
-
- space_map_remove(allocmap, offset, size);
- space_map_add(freemap, offset, size);
-
- mutex_exit(&msp->ms_lock);
- }
-
- if (BP_IS_GANG(bp)) {
- zio_gbh_phys_t gbh;
- int g;
-
- /* LINTED - compile time assert */
- ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE);
-
- BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER);
- BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE);
- BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE);
- BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF);
- error = zio_wait(zio_read(NULL, spa, &blk, &gbh,
- SPA_GANGBLOCKSIZE, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb));
- if (error)
- return (error);
- if (BP_SHOULD_BYTESWAP(&blk))
- byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE);
- for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
- if (BP_IS_HOLE(&gbh.zg_blkptr[g]))
- break;
- error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb);
- if (error)
- return (error);
- }
- }
-
- return (0);
-}
-
-static void
-zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
-{
- metaslab_t *msp;
-
- /* LINTED */
- msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0]));
-
- (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
- (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
- (u_longlong_t)start,
- (u_longlong_t)size);
-}
-
-static void
-zdb_space_map_unload(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m;
-
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
- for (m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- space_map_vacate(&msp->ms_allocmap[0], zdb_leak,
- &msp->ms_allocmap[0]);
- space_map_unload(&msp->ms_allocmap[0]);
- space_map_vacate(&msp->ms_freemap[0], NULL, NULL);
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
-static void
-zdb_refresh_ubsync(spa_t *spa)
-{
- uberblock_t ub = { 0 };
- vdev_t *rvd = spa->spa_root_vdev;
- zio_t *zio;
-
- /*
- * Reload the uberblock.
- */
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
- vdev_uberblock_load(zio, rvd, &ub);
- (void) zio_wait(zio);
-
- if (ub.ub_txg != 0)
- spa->spa_ubsync = ub;
-}
-
-/*
- * Verify that the sum of the sizes of all blocks in the pool adds up
- * to the SPA's sa_alloc total.
- */
-typedef struct zdb_blkstats {
- uint64_t zb_asize;
- uint64_t zb_lsize;
- uint64_t zb_psize;
- uint64_t zb_count;
-} zdb_blkstats_t;
-
-#define DMU_OT_DEFERRED DMU_OT_NONE
-#define DMU_OT_TOTAL DMU_OT_NUMTYPES
-
-#define ZB_TOTAL ZB_MAXLEVEL
-
-typedef struct zdb_cb {
- zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
- uint64_t zcb_errors[256];
- traverse_blk_cache_t *zcb_cache;
- int zcb_readfails;
- int zcb_haderrors;
-} zdb_cb_t;
-
-static void
-zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
-{
- int i, error;
-
- for (i = 0; i < 4; i++) {
- int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
- int t = (i & 1) ? type : DMU_OT_TOTAL;
- zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
-
- zb->zb_asize += BP_GET_ASIZE(bp);
- zb->zb_lsize += BP_GET_LSIZE(bp);
- zb->zb_psize += BP_GET_PSIZE(bp);
- zb->zb_count++;
- }
-
- if (dump_opt['L'])
- return;
-
- error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark);
-
- if (error == 0)
- return;
-
- if (error == EAGAIN)
- (void) fatal("double-allocation, bp=%p", bp);
-
- if (error == ESTALE)
- (void) fatal("reference to freed block, bp=%p", bp);
-
- (void) fatal("fatal error %d in bp %p", error, bp);
-}
-
-static int
-zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
-{
- zbookmark_t *zb = &bc->bc_bookmark;
- zdb_cb_t *zcb = arg;
- blkptr_t *bp = &bc->bc_blkptr;
- dmu_object_type_t type = BP_GET_TYPE(bp);
- char blkbuf[BP_SPRINTF_LEN];
- int error = 0;
-
- if (bc->bc_errno) {
- if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
- zdb_refresh_ubsync(spa);
- error = EAGAIN;
- } else {
- zcb->zcb_haderrors = 1;
- zcb->zcb_errors[bc->bc_errno]++;
- error = ERESTART;
- }
-
- if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
- else
- blkbuf[0] = '\0';
-
- (void) printf("zdb_blkptr_cb: Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- %s\n",
- bc->bc_errno,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf,
- error == EAGAIN ? "retrying" : "skipping");
-
- return (error);
- }
-
- zcb->zcb_readfails = 0;
-
- ASSERT(!BP_IS_HOLE(bp));
-
- if (dump_opt['b'] >= 4) {
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
- (void) printf("objset %llu object %llu offset 0x%llx %s\n",
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)blkid2offset(bc->bc_dnode,
- zb->zb_level, zb->zb_blkid),
- blkbuf);
- }
-
- zdb_count_block(spa, zcb, bp, type);
-
- return (0);
-}
-
-static int
-dump_block_stats(spa_t *spa)
-{
- traverse_handle_t *th;
- zdb_cb_t zcb = { 0 };
- traverse_blk_cache_t dummy_cache = { 0 };
- zdb_blkstats_t *zb, *tzb;
- uint64_t alloc, space;
- int leaks = 0;
- int advance = zdb_advance;
- int flags;
- int e;
-
- zcb.zcb_cache = &dummy_cache;
-
- if (dump_opt['c'])
- advance |= ADVANCE_DATA;
-
- advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
-
- (void) printf("\nTraversing all blocks to %sverify"
- " nothing leaked ...\n",
- dump_opt['c'] ? "verify checksums and " : "");
-
- /*
- * Load all space maps. As we traverse the pool, if we find a block
- * that's not in its space map, that indicates a double-allocation,
- * reference to a freed block, or an unclaimed block. Otherwise we
- * remove the block from the space map. If the space maps are not
- * empty when we're done, that indicates leaked blocks.
- */
- if (!dump_opt['L'])
- zdb_space_map_load(spa);
-
- /*
- * If there's a deferred-free bplist, process that first.
- */
- if (spa->spa_sync_bplist_obj != 0) {
- bplist_t *bpl = &spa->spa_sync_bplist;
- blkptr_t blk;
- uint64_t itor = 0;
-
- VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
- spa->spa_sync_bplist_obj));
-
- while (bplist_iterate(bpl, &itor, &blk) == 0) {
- if (dump_opt['b'] >= 4) {
- char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
- (void) printf("[%s] %s\n",
- "deferred free", blkbuf);
- }
- zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
- }
-
- bplist_close(bpl);
- }
-
- /*
- * Now traverse the pool. If we're reading all data to verify
- * checksums, do a scrubbing read so that we validate all copies.
- */
- flags = ZIO_FLAG_CANFAIL;
- if (advance & ADVANCE_DATA)
- flags |= ZIO_FLAG_SCRUB;
- th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
- th->th_noread = zdb_noread;
-
- traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
-
- while (traverse_more(th) == EAGAIN)
- continue;
-
- traverse_fini(th);
-
- if (zcb.zcb_haderrors) {
- (void) printf("\nError counts:\n\n");
- (void) printf("\t%5s %s\n", "errno", "count");
- for (e = 0; e < 256; e++) {
- if (zcb.zcb_errors[e] != 0) {
- (void) printf("\t%5d %llu\n",
- e, (u_longlong_t)zcb.zcb_errors[e]);
- }
- }
- }
-
- /*
- * Report any leaked segments.
- */
- if (!dump_opt['L'])
- zdb_space_map_unload(spa);
-
- if (dump_opt['L'])
- (void) printf("\n\n *** Live pool traversal; "
- "block counts are only approximate ***\n\n");
-
- alloc = spa_get_alloc(spa);
- space = spa_get_space(spa);
-
- tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
-
- if (tzb->zb_asize == alloc) {
- (void) printf("\n\tNo leaks (block sum matches space"
- " maps exactly)\n");
- } else {
- (void) printf("block traversal size %llu != alloc %llu "
- "(leaked %lld)\n",
- (u_longlong_t)tzb->zb_asize,
- (u_longlong_t)alloc,
- (u_longlong_t)(alloc - tzb->zb_asize));
- leaks = 1;
- }
-
- if (tzb->zb_count == 0)
- return (2);
-
- (void) printf("\n");
- (void) printf("\tbp count: %10llu\n",
- (u_longlong_t)tzb->zb_count);
- (void) printf("\tbp logical: %10llu\t avg: %6llu\n",
- (u_longlong_t)tzb->zb_lsize,
- (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
- (void) printf("\tbp physical: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
- (u_longlong_t)tzb->zb_psize,
- (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
- (double)tzb->zb_lsize / tzb->zb_psize);
- (void) printf("\tbp allocated: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
- (u_longlong_t)tzb->zb_asize,
- (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
- (double)tzb->zb_lsize / tzb->zb_asize);
- (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
- (u_longlong_t)alloc, 100.0 * alloc / space);
-
- if (dump_opt['b'] >= 2) {
- int l, t, level;
- (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
- "\t avg\t comp\t%%Total\tType\n");
-
- for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
- char csize[6], lsize[6], psize[6], asize[6], avg[6];
- char *typename;
-
- typename = t == DMU_OT_DEFERRED ? "deferred free" :
- t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
-
- if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
- (void) printf("%6s\t%5s\t%5s\t%5s"
- "\t%5s\t%5s\t%6s\t%s\n",
- "-",
- "-",
- "-",
- "-",
- "-",
- "-",
- "-",
- typename);
- continue;
- }
-
- for (l = ZB_TOTAL - 1; l >= -1; l--) {
- level = (l == -1 ? ZB_TOTAL : l);
- zb = &zcb.zcb_type[level][t];
-
- if (zb->zb_asize == 0)
- continue;
-
- if (dump_opt['b'] < 3 && level != ZB_TOTAL)
- continue;
-
- if (level == 0 && zb->zb_asize ==
- zcb.zcb_type[ZB_TOTAL][t].zb_asize)
- continue;
-
- nicenum(zb->zb_count, csize);
- nicenum(zb->zb_lsize, lsize);
- nicenum(zb->zb_psize, psize);
- nicenum(zb->zb_asize, asize);
- nicenum(zb->zb_asize / zb->zb_count, avg);
-
- (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
- "\t%5.2f\t%6.2f\t",
- csize, lsize, psize, asize, avg,
- (double)zb->zb_lsize / zb->zb_psize,
- 100.0 * zb->zb_asize / tzb->zb_asize);
-
- if (level == ZB_TOTAL)
- (void) printf("%s\n", typename);
- else
- (void) printf(" L%d %s\n",
- level, typename);
- }
- }
- }
-
- (void) printf("\n");
-
- if (leaks)
- return (2);
-
- if (zcb.zcb_haderrors)
- return (3);
-
- return (0);
-}
-
-static void
-dump_zpool(spa_t *spa)
-{
- dsl_pool_t *dp = spa_get_dsl(spa);
- int rc = 0;
-
- if (dump_opt['u'])
- dump_uberblock(&spa->spa_uberblock);
-
- if (dump_opt['d'] || dump_opt['i']) {
- dump_dir(dp->dp_meta_objset);
- if (dump_opt['d'] >= 3) {
- dump_bplist(dp->dp_meta_objset,
- spa->spa_sync_bplist_obj, "Deferred frees");
- dump_dtl(spa->spa_root_vdev, 0);
- dump_metaslabs(spa);
- }
- (void) dmu_objset_find(spa->spa_name, dump_one_dir, NULL,
- DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
- }
-
- if (dump_opt['b'] || dump_opt['c'])
- rc = dump_block_stats(spa);
-
- if (dump_opt['s'])
- show_pool_stats(spa);
-
- if (rc != 0)
- exit(rc);
-}
-
-#define ZDB_FLAG_CHECKSUM 0x0001
-#define ZDB_FLAG_DECOMPRESS 0x0002
-#define ZDB_FLAG_BSWAP 0x0004
-#define ZDB_FLAG_GBH 0x0008
-#define ZDB_FLAG_INDIRECT 0x0010
-#define ZDB_FLAG_PHYS 0x0020
-#define ZDB_FLAG_RAW 0x0040
-#define ZDB_FLAG_PRINT_BLKPTR 0x0080
-
-int flagbits[256];
-
-static void
-zdb_print_blkptr(blkptr_t *bp, int flags)
-{
- dva_t *dva = bp->blk_dva;
- int d;
-
- if (flags & ZDB_FLAG_BSWAP)
- byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
- /*
- * Super-ick warning: This code is also duplicated in
- * cmd/mdb/common/modules/zfs/zfs.c . Yeah, I hate code
- * replication, too.
- */
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
- (longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]));
- (void) printf("\tDVA[%d]: GANG: %-5s GRID: %04llx\t"
- "ASIZE: %llx\n", d,
- DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
- (longlong_t)DVA_GET_GRID(&dva[d]),
- (longlong_t)DVA_GET_ASIZE(&dva[d]));
- (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
- (u_longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]),
- (longlong_t)BP_GET_PSIZE(bp),
- BP_SHOULD_BYTESWAP(bp) ? "e" : "",
- !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
- "d" : "",
- DVA_GET_GANG(&dva[d]) ? "g" : "",
- BP_GET_COMPRESS(bp) != 0 ? "d" : "");
- }
- (void) printf("\tLSIZE: %-16llx\t\tPSIZE: %llx\n",
- (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
- (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
- BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
- dmu_ot[BP_GET_TYPE(bp)].ot_name);
- (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n",
- (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
- (u_longlong_t)bp->blk_fill);
- (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n",
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
- (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n",
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
-}
-
-static void
-zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
-{
- int i;
-
- for (i = 0; i < nbps; i++)
- zdb_print_blkptr(&bp[i], flags);
-}
-
-static void
-zdb_dump_gbh(void *buf, int flags)
-{
- zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
-}
-
-static void
-zdb_dump_block_raw(void *buf, uint64_t size, int flags)
-{
- if (flags & ZDB_FLAG_BSWAP)
- byteswap_uint64_array(buf, size);
- (void) write(2, buf, size);
-}
-
-static void
-zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
-{
- uint64_t *d = (uint64_t *)buf;
- int nwords = size / sizeof (uint64_t);
- int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
- int i, j;
- char *hdr, *c;
-
-
- if (do_bswap)
- hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8";
- else
- hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f";
-
- (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr);
-
- for (i = 0; i < nwords; i += 2) {
- (void) printf("%06llx: %016llx %016llx ",
- (u_longlong_t)(i * sizeof (uint64_t)),
- (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
- (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
-
- c = (char *)&d[i];
- for (j = 0; j < 2 * sizeof (uint64_t); j++)
- (void) printf("%c", isprint(c[j]) ? c[j] : '.');
- (void) printf("\n");
- }
-}
-
-/*
- * There are two acceptable formats:
- * leaf_name - For example: c1t0d0 or /tmp/ztest.0a
- * child[.child]* - For example: 0.1.1
- *
- * The second form can be used to specify arbitrary vdevs anywhere
- * in the heirarchy. For example, in a pool with a mirror of
- * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
- */
-static vdev_t *
-zdb_vdev_lookup(vdev_t *vdev, char *path)
-{
- char *s, *p, *q;
- int i;
-
- if (vdev == NULL)
- return (NULL);
-
- /* First, assume the x.x.x.x format */
- i = (int)strtoul(path, &s, 10);
- if (s == path || (s && *s != '.' && *s != '\0'))
- goto name;
- if (i < 0 || i >= vdev->vdev_children)
- return (NULL);
-
- vdev = vdev->vdev_child[i];
- if (*s == '\0')
- return (vdev);
- return (zdb_vdev_lookup(vdev, s+1));
-
-name:
- for (i = 0; i < vdev->vdev_children; i++) {
- vdev_t *vc = vdev->vdev_child[i];
-
- if (vc->vdev_path == NULL) {
- vc = zdb_vdev_lookup(vc, path);
- if (vc == NULL)
- continue;
- else
- return (vc);
- }
-
- p = strrchr(vc->vdev_path, '/');
- p = p ? p + 1 : vc->vdev_path;
- q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
-
- if (strcmp(vc->vdev_path, path) == 0)
- return (vc);
- if (strcmp(p, path) == 0)
- return (vc);
- if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
- return (vc);
- }
-
- return (NULL);
-}
-
-/*
- * Read a block from a pool and print it out. The syntax of the
- * block descriptor is:
- *
- * pool:vdev_specifier:offset:size[:flags]
- *
- * pool - The name of the pool you wish to read from
- * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
- * offset - offset, in hex, in bytes
- * size - Amount of data to read, in hex, in bytes
- * flags - A string of characters specifying options
- * b: Decode a blkptr at given offset within block
- * *c: Calculate and display checksums
- * *d: Decompress data before dumping
- * e: Byteswap data before dumping
- * *g: Display data as a gang block header
- * *i: Display as an indirect block
- * p: Do I/O to physical offset
- * r: Dump raw data to stdout
- *
- * * = not yet implemented
- */
-static void
-zdb_read_block(char *thing, spa_t **spap)
-{
- spa_t *spa = *spap;
- int flags = 0;
- uint64_t offset = 0, size = 0, blkptr_offset = 0;
- zio_t *zio;
- vdev_t *vd;
- void *buf;
- char *s, *p, *dup, *spa_name, *vdev, *flagstr;
- int i, error, zio_flags;
-
- dup = strdup(thing);
- s = strtok(dup, ":");
- spa_name = s ? s : "";
- s = strtok(NULL, ":");
- vdev = s ? s : "";
- s = strtok(NULL, ":");
- offset = strtoull(s ? s : "", NULL, 16);
- s = strtok(NULL, ":");
- size = strtoull(s ? s : "", NULL, 16);
- s = strtok(NULL, ":");
- flagstr = s ? s : "";
-
- s = NULL;
- if (size == 0)
- s = "size must not be zero";
- if (!IS_P2ALIGNED(size, DEV_BSIZE))
- s = "size must be a multiple of sector size";
- if (!IS_P2ALIGNED(offset, DEV_BSIZE))
- s = "offset must be a multiple of sector size";
- if (s) {
- (void) printf("Invalid block specifier: %s - %s\n", thing, s);
- free(dup);
- return;
- }
-
- for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
- for (i = 0; flagstr[i]; i++) {
- int bit = flagbits[(uchar_t)flagstr[i]];
-
- if (bit == 0) {
- (void) printf("***Invalid flag: %c\n",
- flagstr[i]);
- continue;
- }
- flags |= bit;
-
- /* If it's not something with an argument, keep going */
- if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
- ZDB_FLAG_PRINT_BLKPTR)) == 0)
- continue;
-
- p = &flagstr[i + 1];
- if (bit == ZDB_FLAG_PRINT_BLKPTR)
- blkptr_offset = strtoull(p, &p, 16);
- if (*p != ':' && *p != '\0') {
- (void) printf("***Invalid flag arg: '%s'\n", s);
- free(dup);
- return;
- }
- }
- }
-
- if (spa == NULL || spa->spa_name == NULL ||
- strcmp(spa->spa_name, spa_name)) {
- if (spa && spa->spa_name)
- spa_close(spa, (void *)zdb_read_block);
- error = spa_open(spa_name, spap, (void *)zdb_read_block);
- if (error)
- fatal("Failed to open pool '%s': %s",
- spa_name, strerror(error));
- spa = *spap;
- }
-
- vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
- if (vd == NULL) {
- (void) printf("***Invalid vdev: %s\n", vdev);
- free(dup);
- return;
- } else {
- if (vd->vdev_path)
- (void) printf("Found vdev: %s\n", vd->vdev_path);
- else
- (void) printf("Found vdev type: %s\n",
- vd->vdev_ops->vdev_op_type);
- }
-
- buf = umem_alloc(size, UMEM_NOFAIL);
-
- zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
- ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK;
-
- if (flags & ZDB_FLAG_PHYS)
- zio_flags |= ZIO_FLAG_PHYSICAL;
-
- zio = zio_root(spa, NULL, NULL, 0);
- /* XXX todo - cons up a BP so RAID-Z will be happy */
- zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
- ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
- error = zio_wait(zio);
-
- if (error) {
- (void) printf("Read of %s failed, error: %d\n", thing, error);
- goto out;
- }
-
- if (flags & ZDB_FLAG_PRINT_BLKPTR)
- zdb_print_blkptr((blkptr_t *)(void *)
- ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
- else if (flags & ZDB_FLAG_RAW)
- zdb_dump_block_raw(buf, size, flags);
- else if (flags & ZDB_FLAG_INDIRECT)
- zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
- flags);
- else if (flags & ZDB_FLAG_GBH)
- zdb_dump_gbh(buf, flags);
- else
- zdb_dump_block(thing, buf, size, flags);
-
-out:
- umem_free(buf, size);
- free(dup);
-}
-
-int
-main(int argc, char **argv)
-{
- int i, c;
- struct rlimit rl = { 1024, 1024 };
- spa_t *spa;
- objset_t *os = NULL;
- char *endstr;
- int dump_all = 1;
- int verbose = 0;
- int error;
- int flag, set;
-
- (void) setrlimit(RLIMIT_NOFILE, &rl);
- (void) enable_extended_FILE_stdio(-1, -1);
-
- dprintf_setup(&argc, argv);
-
- while ((c = getopt(argc, argv, "udibcsvCLO:B:UlR")) != -1) {
- switch (c) {
- case 'u':
- case 'd':
- case 'i':
- case 'b':
- case 'c':
- case 's':
- case 'C':
- case 'l':
- case 'R':
- dump_opt[c]++;
- dump_all = 0;
- break;
- case 'L':
- dump_opt[c]++;
- break;
- case 'O':
- endstr = optarg;
- if (endstr[0] == '!') {
- endstr++;
- set = 0;
- } else {
- set = 1;
- }
- if (strcmp(endstr, "post") == 0) {
- flag = ADVANCE_PRE;
- set = !set;
- } else if (strcmp(endstr, "pre") == 0) {
- flag = ADVANCE_PRE;
- } else if (strcmp(endstr, "prune") == 0) {
- flag = ADVANCE_PRUNE;
- } else if (strcmp(endstr, "data") == 0) {
- flag = ADVANCE_DATA;
- } else if (strcmp(endstr, "holes") == 0) {
- flag = ADVANCE_HOLES;
- } else {
- usage();
- }
- if (set)
- zdb_advance |= flag;
- else
- zdb_advance &= ~flag;
- break;
- case 'B':
- endstr = optarg - 1;
- zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
- zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
- zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
- zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
- (void) printf("simulating bad block "
- "<%llu, %llu, %lld, %llx>\n",
- (u_longlong_t)zdb_noread.zb_objset,
- (u_longlong_t)zdb_noread.zb_object,
- (u_longlong_t)zdb_noread.zb_level,
- (u_longlong_t)zdb_noread.zb_blkid);
- break;
- case 'v':
- verbose++;
- break;
- case 'U':
- spa_config_dir = "/tmp";
- break;
- default:
- usage();
- break;
- }
- }
-
- kernel_init(FREAD);
-
- /*
- * Disable vdev caching. If we don't do this, live pool traversal
- * won't make progress because it will never see disk updates.
- */
- zfs_vdev_cache_size = 0;
-
- for (c = 0; c < 256; c++) {
- if (dump_all && c != 'L' && c != 'l' && c != 'R')
- dump_opt[c] = 1;
- if (dump_opt[c])
- dump_opt[c] += verbose;
- }
-
- argc -= optind;
- argv += optind;
-
- if (argc < 1) {
- if (dump_opt['C']) {
- dump_config(NULL);
- return (0);
- }
- usage();
- }
-
- if (dump_opt['l']) {
- dump_label(argv[0]);
- return (0);
- }
-
- if (dump_opt['R']) {
- flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
- flagbits['c'] = ZDB_FLAG_CHECKSUM;
- flagbits['d'] = ZDB_FLAG_DECOMPRESS;
- flagbits['e'] = ZDB_FLAG_BSWAP;
- flagbits['g'] = ZDB_FLAG_GBH;
- flagbits['i'] = ZDB_FLAG_INDIRECT;
- flagbits['p'] = ZDB_FLAG_PHYS;
- flagbits['r'] = ZDB_FLAG_RAW;
-
- spa = NULL;
- while (argv[0]) {
- zdb_read_block(argv[0], &spa);
- argv++;
- argc--;
- }
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- return (0);
- }
-
- if (dump_opt['C'])
- dump_config(argv[0]);
-
- if (strchr(argv[0], '/') != NULL) {
- error = dmu_objset_open(argv[0], DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- } else {
- error = spa_open(argv[0], &spa, FTAG);
- }
-
- if (error)
- fatal("can't open %s: %s", argv[0], strerror(error));
-
- argv++;
- if (--argc > 0) {
- zopt_objects = argc;
- zopt_object = calloc(zopt_objects, sizeof (uint64_t));
- for (i = 0; i < zopt_objects; i++) {
- errno = 0;
- zopt_object[i] = strtoull(argv[i], NULL, 0);
- if (zopt_object[i] == 0 && errno != 0)
- fatal("bad object number %s: %s",
- argv[i], strerror(errno));
- }
- }
-
- if (os != NULL) {
- dump_dir(os);
- dmu_objset_close(os);
- } else {
- dump_zpool(spa);
- spa_close(spa, FTAG);
- }
-
- kernel_fini();
-
- return (0);
-}
diff --git a/contrib/opensolaris/cmd/zdb/zdb_il.c b/contrib/opensolaris/cmd/zdb/zdb_il.c
deleted file mode 100644
index 10dfe20..0000000
--- a/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Print intent log header and statistics.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/dmu.h>
-#include <sys/stat.h>
-#include <sys/resource.h>
-#include <sys/zil.h>
-#include <sys/zil_impl.h>
-
-extern uint8_t dump_opt[256];
-
-static void
-print_log_bp(const blkptr_t *bp, const char *prefix)
-{
- char blkbuf[BP_SPRINTF_LEN];
-
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
- (void) printf("%s%s\n", prefix, blkbuf);
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
-{
- time_t crtime = lr->lr_crtime[0];
- char *name = (char *)(lr + 1);
- char *link = name + strlen(name) + 1;
-
- if (txtype == TX_SYMLINK)
- (void) printf("\t\t\t%s -> %s\n", name, link);
- else
- (void) printf("\t\t\t%s\n", name);
-
- (void) printf("\t\t\t%s", ctime(&crtime));
- (void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
- (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
- (longlong_t)lr->lr_mode);
- (void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
- (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
- (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
-{
- (void) printf("\t\t\tdoid %llu, name %s\n",
- (u_longlong_t)lr->lr_doid, (char *)(lr + 1));
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
-{
- (void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
- (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
- (char *)(lr + 1));
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
-{
- char *snm = (char *)(lr + 1);
- char *tnm = snm + strlen(snm) + 1;
-
- (void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
- (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
- (void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
-{
- char *data, *dlimit;
- blkptr_t *bp = &lr->lr_blkptr;
- char buf[SPA_MAXBLOCKSIZE];
- int verbose = MAX(dump_opt['d'], dump_opt['i']);
- int error;
-
- (void) printf("\t\t\tfoid %llu, offset 0x%llx,"
- " length 0x%llx, blkoff 0x%llx\n",
- (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
- (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
-
- if (verbose < 5)
- return;
-
- if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
- (void) printf("\t\t\thas blkptr, %s\n",
- bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
- "will claim" : "won't claim");
- print_log_bp(bp, "\t\t\t");
- if (bp->blk_birth == 0) {
- bzero(buf, sizeof (buf));
- } else {
- zbookmark_t zb;
-
- ASSERT3U(bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ==,
- dmu_objset_id(zilog->zl_os));
-
- zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
-
- error = zio_wait(zio_read(NULL, zilog->zl_spa,
- bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
- if (error)
- return;
- }
- data = buf + lr->lr_blkoff;
- } else {
- data = (char *)(lr + 1);
- }
-
- dlimit = data + MIN(lr->lr_length,
- (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
-
- (void) printf("\t\t\t");
- while (data < dlimit) {
- if (isprint(*data))
- (void) printf("%c ", *data);
- else
- (void) printf("%2X", *data);
- data++;
- }
- (void) printf("\n");
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
-{
- (void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
- (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
- (u_longlong_t)lr->lr_length);
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
-{
- time_t atime = (time_t)lr->lr_atime[0];
- time_t mtime = (time_t)lr->lr_mtime[0];
-
- (void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
- (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
-
- if (lr->lr_mask & AT_MODE) {
- (void) printf("\t\t\tAT_MODE %llo\n",
- (longlong_t)lr->lr_mode);
- }
-
- if (lr->lr_mask & AT_UID) {
- (void) printf("\t\t\tAT_UID %llu\n",
- (u_longlong_t)lr->lr_uid);
- }
-
- if (lr->lr_mask & AT_GID) {
- (void) printf("\t\t\tAT_GID %llu\n",
- (u_longlong_t)lr->lr_gid);
- }
-
- if (lr->lr_mask & AT_SIZE) {
- (void) printf("\t\t\tAT_SIZE %llu\n",
- (u_longlong_t)lr->lr_size);
- }
-
- if (lr->lr_mask & AT_ATIME) {
- (void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
- (u_longlong_t)lr->lr_atime[0],
- (u_longlong_t)lr->lr_atime[1],
- ctime(&atime));
- }
-
- if (lr->lr_mask & AT_MTIME) {
- (void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
- (u_longlong_t)lr->lr_mtime[0],
- (u_longlong_t)lr->lr_mtime[1],
- ctime(&mtime));
- }
-}
-
-/* ARGSUSED */
-static void
-zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
-{
- (void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
- (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
-}
-
-typedef void (*zil_prt_rec_func_t)();
-typedef struct zil_rec_info {
- zil_prt_rec_func_t zri_print;
- char *zri_name;
- uint64_t zri_count;
-} zil_rec_info_t;
-
-static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
- { NULL, "Total " },
- { zil_prt_rec_create, "TX_CREATE " },
- { zil_prt_rec_create, "TX_MKDIR " },
- { zil_prt_rec_create, "TX_MKXATTR " },
- { zil_prt_rec_create, "TX_SYMLINK " },
- { zil_prt_rec_remove, "TX_REMOVE " },
- { zil_prt_rec_remove, "TX_RMDIR " },
- { zil_prt_rec_link, "TX_LINK " },
- { zil_prt_rec_rename, "TX_RENAME " },
- { zil_prt_rec_write, "TX_WRITE " },
- { zil_prt_rec_truncate, "TX_TRUNCATE" },
- { zil_prt_rec_setattr, "TX_SETATTR " },
- { zil_prt_rec_acl, "TX_ACL " },
-};
-
-/* ARGSUSED */
-static void
-print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
-{
- int txtype;
- int verbose = MAX(dump_opt['d'], dump_opt['i']);
-
- txtype = lr->lrc_txtype;
-
- ASSERT(txtype != 0 && (uint_t)txtype < TX_MAX_TYPE);
- ASSERT(lr->lrc_txg);
-
- (void) printf("\t\t%s len %6llu, txg %llu, seq %llu\n",
- zil_rec_info[txtype].zri_name,
- (u_longlong_t)lr->lrc_reclen,
- (u_longlong_t)lr->lrc_txg,
- (u_longlong_t)lr->lrc_seq);
-
- if (txtype && verbose >= 3)
- zil_rec_info[txtype].zri_print(zilog, txtype, lr);
-
- zil_rec_info[txtype].zri_count++;
- zil_rec_info[0].zri_count++;
-}
-
-/* ARGSUSED */
-static void
-print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
-{
- char blkbuf[BP_SPRINTF_LEN];
- int verbose = MAX(dump_opt['d'], dump_opt['i']);
- char *claim;
-
- if (verbose <= 3)
- return;
-
- if (verbose >= 5) {
- (void) strcpy(blkbuf, ", ");
- sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), bp);
- } else {
- blkbuf[0] = '\0';
- }
-
- if (claim_txg != 0)
- claim = "already claimed";
- else if (bp->blk_birth >= spa_first_txg(zilog->zl_spa))
- claim = "will claim";
- else
- claim = "won't claim";
-
- (void) printf("\tBlock seqno %llu, %s%s\n",
- (u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
-}
-
-static void
-print_log_stats(int verbose)
-{
- int i, w, p10;
-
- if (verbose > 3)
- (void) printf("\n");
-
- if (zil_rec_info[0].zri_count == 0)
- return;
-
- for (w = 1, p10 = 10; zil_rec_info[0].zri_count >= p10; p10 *= 10)
- w++;
-
- for (i = 0; i < TX_MAX_TYPE; i++)
- if (zil_rec_info[i].zri_count || verbose >= 3)
- (void) printf("\t\t%s %*llu\n",
- zil_rec_info[i].zri_name, w,
- (u_longlong_t)zil_rec_info[i].zri_count);
- (void) printf("\n");
-}
-
-/* ARGSUSED */
-void
-dump_intent_log(zilog_t *zilog)
-{
- const zil_header_t *zh = zilog->zl_header;
- int verbose = MAX(dump_opt['d'], dump_opt['i']);
- int i;
-
- if (zh->zh_log.blk_birth == 0 || verbose < 2)
- return;
-
- (void) printf("\n ZIL header: claim_txg %llu, seq %llu\n",
- (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_replay_seq);
-
- if (verbose >= 4)
- print_log_bp(&zh->zh_log, "\n\tfirst block: ");
-
- for (i = 0; i < TX_MAX_TYPE; i++)
- zil_rec_info[i].zri_count = 0;
-
- if (verbose >= 2) {
- (void) printf("\n");
- (void) zil_parse(zilog, print_log_block, print_log_record, NULL,
- zh->zh_claim_txg);
- print_log_stats(verbose);
- }
-}
diff --git a/contrib/opensolaris/cmd/zfs/zfs.8 b/contrib/opensolaris/cmd/zfs/zfs.8
deleted file mode 100644
index d49cb87..0000000
--- a/contrib/opensolaris/cmd/zfs/zfs.8
+++ /dev/null
@@ -1,1843 +0,0 @@
-'\" te
-.\" CDDL HEADER START
-.\"
-.\" The contents of this file are subject to the terms of the
-.\" Common Development and Distribution License (the "License").
-.\" You may not use this file except in compliance with the License.
-.\"
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-.\" or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions
-.\" and limitations under the License.
-.\"
-.\" When distributing Covered Code, include this CDDL HEADER in each
-.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-.\" If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying
-.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.\"
-.\" CDDL HEADER END
-.\" Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
-.TH zfs 1M "16 Mar 2007" "SunOS 5.11" "System Administration Commands"
-.SH NAME
-zfs \- configures ZFS file systems
-.SH SYNOPSIS
-.LP
-.nf
-\fBzfs\fR [\fB-?\fR]
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBcreate\fR [[\fB-o\fR property=\fIvalue\fR]]... \fIfilesystem\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBcreate\fR [\fB-s\fR] [\fB-b\fR \fIblocksize\fR] [[\fB-o\fR property=\fIvalue\fR]]... \fB-V\fR \fIsize\fR \fIvolume\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBclone\fR \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBpromote\fR \fIfilesystem\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBrename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
- [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBsnapshot\fR [\fB-r\fR] \fIfilesystem@name\fR|\fIvolume@name\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIprop\fR[,\fIprop\fR] ]... [ \fB-t\fR \fItype\fR[,\fItype\fR]...]
- [ \fB-s\fR \fIprop\fR [\fB-s\fR \fIprop\fR]... [ \fB-S\fR \fIprop\fR [\fB-S\fR \fIprop\fR]...
- [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fI/pathname\fR|.\fI/pathname\fR ...
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR ...
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBget\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...]
- [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fIall\fR | \fIproperty\fR[,\fIproperty\fR]...
- \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR... ...
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBmount\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBmount\fR [\fB-o \fIoptions\fR\fR] [\fB-O\fR] \fB-a\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBmount\fR [\fB-o \fIoptions\fR\fR] [\fB-O\fR] \fIfilesystem\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB-a\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB\fIfilesystem\fR|\fImountpoint\fR\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBshare\fR \fB-a\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBshare\fR \fIfilesystem\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBunshare\fR [\fB-f\fR] \fB-a\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBunshare\fR [\fB-f\fR] \fB\fIfilesystem\fR|\fImountpoint\fR\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBsend\fR [\fB-i\fR \fIsnapshot1\fR] \fB\fIsnapshot2\fR\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnF\fR ] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
-.fi
-
-.LP
-.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnF\fR ] \fB-d\fR \fB\fIfilesystem\fR\fR
-.fi
-.LP
-.nf
-\fBzfs\fR \fBjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
-.fi
-.LP
-.nf
-\fBzfs\fR \fBunjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
-.fi
-
-.SH DESCRIPTION
-.LP
-The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(1M). A
-dataset is identified by a unique path within the \fBZFS\fR namespace. For example:
-.sp
-.in +2
-.nf
-pool/{filesystem,volume,snapshot}
-.fi
-.in -2
-.sp
-
-.LP
-where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
-.LP
-A dataset can be one of the following:
-.sp
-.ne 2
-.mk
-.na
-\fB\fIfile system\fR\fR
-.ad
-.RS 15n
-.rt
-A standard \fBPOSIX\fR file system. \fBZFS\fR file systems can be mounted within the standard file system namespace and behave like any other file system.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fIvolume\fR\fR
-.ad
-.RS 15n
-.rt
-A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments. Volumes cannot be used in a non-global zone.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fIsnapshot\fR\fR
-.ad
-.RS 15n
-.rt
-A read-only version of a file system or volume at a given point in time. It is specified as \fIfilesystem@name\fR or \fIvolume@name\fR.
-.RE
-
-.SS "ZFS File System Hierarchy"
-.LP
-A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy.
-.LP
-The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(1M) command.
-.LP
-See \fBzpool\fR(1M) for more information on creating and administering pools.
-.SS "Snapshots"
-.LP
-A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset.
-.LP
-Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently.
-.LP
-File system snapshots can be accessed under the ".zfs/snapshot" directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the ".zfs" directory can be controlled by the "snapdir"
-property.
-.SS "Clones"
-.LP
-A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space.
-.LP
-Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The "origin"
-property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
-.LP
-The clone parent-child dependency relationship can be reversed by using the "\fBpromote\fR" subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone
-was created from.
-.SS "Mount Points"
-.LP
-Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system will likely be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/vfstab\fR file.
-All automatically managed file systems are mounted by \fBZFS\fR at boot time.
-.LP
-By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed.
-.LP
-A file system can also have a mount point set in the "mountpoint" property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the "\fBzfs mount -a\fR" command is invoked (without editing \fB/etc/vfstab\fR). The mountpoint property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR.
-.LP
-A file system mountpoint property of "none" prevents the file system from being mounted.
-.LP
-If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/vfstab\fR). If a file system's mount point is set to "legacy", \fBZFS\fR makes no attempt to manage
-the file system, and the administrator is responsible for mounting and unmounting the file system.
-.SS "Zones"
-.LP
-A \fBZFS\fR file system can be added to a non-global zone by using zonecfg's "\fBadd fs\fR" subcommand. A \fBZFS\fR file system that is added to a non-global zone must have its mountpoint property set to legacy.
-.LP
-The physical properties of an added file system are controlled by the global administrator. However, the zone administrator can create, modify, or destroy files within the added file system, depending on how the file system is mounted.
-.LP
-A dataset can also be delegated to a non-global zone by using zonecfg's "\fBadd dataset\fR" subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or
-any of its children. However, the "quota" property is controlled by the global administrator.
-.LP
-A \fBZFS\fR volume can be added as a device to a non-global zone by using zonecfg's "\fBadd device\fR" subcommand. However, its physical properties can only be modified by the global administrator.
-.LP
-For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
-.LP
-After a dataset is delegated to a non-global zone, the "zoned" property is automatically set. A zoned file system cannot be mounted in the global zone, since the zone administrator might have to set the mount point to an unacceptable value.
-.LP
-The global administrator can forcibly clear the "zoned" property, though this should be done with extreme care. The global administrator should verify that all the mount points are acceptable before clearing the property.
-.SS "Native Properties"
-.LP
-Properties are divided into two types, native properties and user defined properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior,
-but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section.
-.LP
-Every dataset has a set of properties that export statistics about the dataset as well as control various behavior. Properties are inherited from the parent unless overridden by the child. Snapshot properties can not be edited; they always inherit their inheritable properties. Properties
-that are not applicable to snapshots are not displayed.
-.LP
-The values of numeric properties can be specified using the following human-readable suffixes (for example, "k", "KB", "M", "Gb", etc, up to Z for zettabyte). The following are all valid (and equal) specifications:
-.sp
-.in +2
-.nf
-"1536M", "1.5g", "1.50GB".
-.fi
-.in -2
-.sp
-
-.LP
-The values of non-numeric properties are case sensitive and must be lowercase, except for "mountpoint" and "sharenfs".
-.LP
-The first set of properties consist of read-only statistics about the dataset. These properties cannot be set, nor are they inherited. Native properties apply to all dataset types unless otherwise noted.
-.sp
-.ne 2
-.mk
-.na
-\fBtype\fR
-.ad
-.RS 17n
-.rt
-The type of dataset: "filesystem", "volume", "snapshot", or "clone".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBcreation\fR
-.ad
-.RS 17n
-.rt
-The time this dataset was created.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBused\fR
-.ad
-.RS 17n
-.rt
-The amount of space consumed by this dataset and all its descendants. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendant datasets.
-The amount of space that a dataset consumes from its parent, as well as the amount of space that will be freed if this dataset is recursively destroyed, is the greater of its space used and its reservation.
-.sp
-When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in
-the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots.
-.sp
-The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(3c) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBavailable\fR
-.ad
-.RS 17n
-.rt
-The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets
-within the pool.
-.sp
-This property can also be referred to by its shortened column name, "avail".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBreferenced\fR
-.ad
-.RS 17n
-.rt
-The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are
-identical.
-.sp
-This property can also be referred to by its shortened column name, "refer".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBcompressratio\fR
-.ad
-.RS 17n
-.rt
-The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running "zfs set compression=on \fIdataset\fR". The default value is "off".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBmounted\fR
-.ad
-.RS 17n
-.rt
-For file systems, indicates whether the file system is currently mounted. This property can be either "yes" or "no".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBorigin\fR
-.ad
-.RS 17n
-.rt
-For cloned file systems or volumes, the snapshot from which the clone was created. The origin cannot be destroyed (even with the \fB-r\fR or \fB-f\fR options) so long as a clone exists.
-.RE
-
-.LP
-The following two properties can be set to control the way space is allocated between datasets. These properties are not inherited, but do affect their descendants.
-.sp
-.ne 2
-.mk
-.na
-\fBquota=\fIsize\fR | \fInone\fR\fR
-.ad
-.sp .6
-.RS 4n
-Limits the amount of space a dataset and its descendants can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendants, including file systems and snapshots. Setting a quota on a descendant of a dataset that already
-has a quota does not override the ancestor's quota, but rather imposes an additional limit.
-.sp
-Quotas cannot be set on volumes, as the "volsize" property acts as an implicit quota.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBreservation=\fIsize\fR | \fInone\fR\fR
-.ad
-.sp .6
-.RS 4n
-The minimum amount of space guaranteed to a dataset and its descendants. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space
-used, and count against the parent datasets' quotas and reservations.
-.sp
-This property can also be referred to by its shortened column name, "reserv".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBvolsize=\fIsize\fR\fR
-.ad
-.sp .6
-.RS 4n
-For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. Any changes to \fBvolsize\fR are reflected in an equivalent change to the reservation. The \fBvolsize\fR can only be set to a
-multiple of \fBvolblocksize\fR, and cannot be zero.
-.sp
-The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when
-the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size.
-.sp
-Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the "\fBzfs create -V\fR" command, or by changing the reservation after the volume has been created.
-A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBvolblocksize=\fIblocksize\fR\fR
-.ad
-.sp .6
-.RS 4n
-For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes
-to 128 Kbytes is valid.
-.sp
-This property can also be referred to by its shortened column name, "volblock".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBrecordsize=\fIsize\fR\fR
-.ad
-.sp .6
-.RS 4n
-Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical
-access patterns.
-.sp
-For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a "recordsize" greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general
-purpose file systems is strongly discouraged, and may adversely affect performance.
-.sp
-The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes.
-.sp
-Changing the file system's \fBrecordsize\fR only affects files created afterward; existing files are unaffected.
-.sp
-This property can also be referred to by its shortened column name, "recsize".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBmountpoint=\fIpath\fR | \fInone\fR | \fIlegacy\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used.
-.sp
-When the mountpoint property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is "legacy", then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was
-previously "legacy" or "none", or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBsharenfs=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a sharenfs property of "off" is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the "\fBzfs share\fR" and "\fBzfs unshare\fR" commands. If the property is set to "on", the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
-.sp
-When the "sharenfs" property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is "off",
-the file systems are unshared.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBshareiscsi=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Like the "sharenfs" property, "shareiscsi" indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are "on", "off", and "type=disk".
-The default value is "off". In the future, other target types might be supported. For example, "tape".
-.sp
-You might want to set "shareiscsi=on" for a file system so that all \fBZFS\fR volumes within the file system are shared by default. Setting this property on a file system has no direct effect, however.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBchecksum=\fIon\fR | \fIoff\fR | \fIfletcher2\fR, | \fIfletcher4\fR | \fIsha256\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls the checksum used to verify data integrity. The default value is "on", which automatically selects an appropriate algorithm (currently, \fIfletcher2\fR, but this may change in future releases). The value "off" disables integrity
-checking on user data. Disabling checksums is NOT a recommended practice.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBcompression=\fIon\fR | \fIoff\fR | \fIlzjb\fR | \fIgzip\fR | \fIgzip-N\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls the compression algorithm used for this dataset. The "lzjb" compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip"
-compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR",
-where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
-.sp
-This property can also be referred to by its shortened column name "compress".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBatime=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value
-is "on".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBdevices=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether device nodes can be opened on this file system. The default value is "on".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBexec=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether processes can be executed from within this file system. The default value is "on".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBsetuid=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is "on".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBreadonly=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether this dataset can be modified. The default value is "off".
-.sp
-This property can also be referred to by its shortened column name, "rdonly".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBzoned=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether the dataset is managed from a non-global zone. See the "Zones" section for more information. The default value is "off".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBsnapdir=\fIhidden\fR | \fIvisible\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether the ".zfs" directory is hidden or visible in the root of the file system as discussed in the "Snapshots" section. The default value is "hidden".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBaclmode=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an "aclmode" property of "\fBdiscard\fR"
-deletes all \fBACL\fR entries that do not represent the mode of the file. An "aclmode" property of "\fBgroupmask\fR" (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission
-bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an "aclmode" property of "\fBpassthrough\fR" indicates that no changes will be made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBaclinherit=\fBdiscard\fR | \fBnoallow\fR | \fBsecure\fR | \fBpassthrough\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an "aclinherit" property of "\fBdiscard\fR" does not inherit any \fBACL\fR entries. A file system with an "aclinherit"
-property value of "\fBnoallow\fR" only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value "\fBsecure\fR" (the default) removes the "\fBwrite_acl\fR" and "\fBwrite_owner\fR" permissions when the \fBACL\fR entry is inherited. A file system with an "aclinherit" property value of "\fBpassthrough\fR" inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBcanmount=\fBon\fR | \fBoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-If this property is set to "\fBoff\fR", the file system cannot be mounted, and is ignored by "\fBzfs mount -a\fR". This is similar to setting the "mountpoint" property to "\fBnone\fR", except
-that the dataset still has a normal "mountpoint" property which can be inherited. This allows datasets to be used solely as a mechanism to inherit properties. One use case is to have two logically separate datasets have the same mountpoint, so that the children of both datasets appear
-in the same directory, but may have different inherited characteristics. The default value is "\fBon\fR".
-.sp
-This property is not inherited.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBxattr=\fBon\fR | \fBoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether extended attributes are enabled for this file system. The default value is "\fBon\fR".
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBcopies=\fB1\fR | \fB2\fR | \fB3\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or raid-z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated
-file and dataset, changing the "used" property and counting against quotas and reservations.
-.sp
-Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the "\fB-o\fR copies=" option.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBjailed=\fIon\fR | \fIoff\fR\fR
-.ad
-.sp .6
-.RS 4n
-Controls whether the dataset is managed from within a jail. The default value is "off".
-.RE
-
-.SS "iscsioptions"
-.LP
-This read-only property, which is hidden, is used by the \fBiSCSI\fR target daemon to store persistent information, such as the \fBIQN\fR. It cannot be viewed or modified using the \fBzfs\fR command. The contents are not intended for external consumers.
-.SS "Temporary Mount Point Properties"
-.LP
-When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts or the "\fBzfs mount\fR" command for normal file systems,
-its mount options are set according to its properties. The correlation between properties and mount options is as follows:
-.sp
-.in +2
-.nf
- PROPERTY MOUNT OPTION
- devices devices/nodevices
- exec exec/noexec
- readonly ro/rw
- setuid setuid/nosetuid
- xattr xattr/noxattr
-.fi
-.in -2
-.sp
-
-.LP
-In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for "nodevices,nosetuid".
-These properties are reported as "temporary" by the "\fBzfs get\fR" command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings.
-.SS "User Properties"
-.LP
-In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but applications or administrators can use them to annotate datasets.
-.LP
-User property names must contain a colon (":") character, to distinguish them from native properties. They might contain lowercase letters, numbers, and the following punctuation characters: colon (":"), dash ("-"), period ("."), and underscore
-("_"). The expected convention is that the property name is divided into two portions such as "\fImodule\fR:\fIproperty\fR", but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters,
-and cannot begin with a dash ("-").
-.LP
-When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for
-different purposes. Property names beginning with "com.sun." are reserved for use by Sun Microsystems.
-.LP
-The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties ("zfs list", "zfs get", "zfs set", etc.) can be used to manipulate both native properties and user properties.
-Use the "\fBzfs inherit\fR" command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters.
-.SS "Volumes as Swap or Dump Devices"
-.LP
-To set up a swap area, create a \fBZFS\fR volume of a specific size and then enable swap on that device. For more information, see the EXAMPLES section.
-.LP
-Do not swap to a file on a \fBZFS\fR file system. A \fBZFS\fR swap file configuration is not supported.
-.LP
-Using a \fBZFS\fR volume as a dump device is not supported.
-.SH SUBCOMMANDS
-.LP
-All subcommands that modify state are logged persistently to the pool in their original form.
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs ?\fR\fR
-.ad
-.sp .6
-.RS 4n
-Displays a help message.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs create\fR [[\fB-o\fR property=value]...] \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the "mountpoint" property inherited from the parent.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR property=value\fR
-.ad
-.RS 21n
-.rt
-Sets the specified property as if "\fBzfs set property=value\fR" was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An
-error results if the same property is specified in multiple \fB-o\fR options.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs create\fR [\fB-s\fR] [\fB-b\fR \fIblocksize\fR] [[\fB-o\fR property=value]...] \fB-V\fR \fIsize\fR \fIvolume\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/{dsk,rdsk}/\fIpath\fR\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents
-the logical size as exported by the device. By default, a reservation of equal size is created.
-.sp
-\fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that the volume has an integral number of blocks regardless of \fIblocksize\fR.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-s\fR\fR
-.ad
-.RS 21n
-.rt
-Creates a sparse volume with no reservation. See "volsize" in the Native Properties section for more information about sparse volumes.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR property=value\fR
-.ad
-.RS 21n
-.rt
-Sets the specified property as if "\fBzfs set property=value\fR" was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An
-error results if the same property is specified in multiple \fB-o\fR options.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-b\fR \fIblocksize\fR\fR
-.ad
-.RS 21n
-.rt
-Equivalent to "\fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR". If this option is specified in conjunction with "\fB\fR\fB-o\fR \fBvolblocksize\fR", the resulting
-behavior is undefined.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
-.ad
-.sp .6
-.RS 4n
-Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children, snapshots, clones).
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-r\fR\fR
-.ad
-.RS 6n
-.rt
-Recursively destroy all children. If a snapshot is specified, destroy all snapshots with this name in descendant file systems.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-R\fR\fR
-.ad
-.RS 6n
-.rt
-Recursively destroy all dependents, including cloned file systems outside the target hierarchy. If a snapshot is specified, destroy all snapshots with this name in descendant file systems.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Force an unmount of any file systems using the "\fBunmount -f\fR" command. This option has no effect on non-file systems or unmounted file systems.
-.RE
-
-Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs clone\fR \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a clone of the given snapshot. See the "Clones" section for details. The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is created as the same type as the original.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs promote\fR \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the "origin" file system
-becomes a clone of the specified file system.
-.sp
-The snaphot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the "origin" file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed
-by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The "\fBrename\fR" subcommand can be used to rename any conflicting snapshots.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs rename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
-.ad
-.sp .6
-.RS 4n
-Renames the given dataset. The new target can be located anywhere in the \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be renamed within the parent file system or volume. When renaming a snapshot, the parent file system of the snapshot does
-not need to be specified as part of the second argument. Renamed file systems can inherit new mount points, in which case they are unmounted and remounted at the new mount point.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs snapshot\fR [\fB-r\fR] \fIfilesystem@name\fR|\fIvolume@name\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a snapshot with the given name. See the "Snapshots" section for details.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-r\fR\fR
-.ad
-.RS 6n
-.rt
-Recursively create snapshots of all descendant datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR
-.ad
-.sp .6
-.RS 4n
-Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than
-the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option. The file system is unmounted and remounted, if necessary.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-r\fR\fR
-.ad
-.RS 6n
-.rt
-Recursively destroy any snapshots more recent than the one specified.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-R\fR\fR
-.ad
-.RS 6n
-.rt
-Recursively destroy any more recent snapshots, as well as any clones of those snapshots.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Force an unmount of any file systems using the "\fBunmount -f\fR" command.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIprop\fR[,\fIprop\fR] ]... [ \fB-t\fR \fItype\fR[,\fItype\fR]...] [ \fB-s\fR \fIprop\fR [\fB-s\fR \fIprop\fR]... [ \fB-S\fR \fIprop\fR [\fB-S\fR \fIprop\fR]... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fI/pathname\fR|.\fI/pathname\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all datasets are displayed and contain the following fields:
-.sp
-.in +2
-.nf
-name,used,available,referenced,mountpoint
-.fi
-.in -2
-.sp
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-H\fR\fR
-.ad
-.RS 11n
-.rt
-Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary whitespace.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-r\fR\fR
-.ad
-.RS 11n
-.rt
-Recursively display any children of the dataset on the command line.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIprop\fR\fR
-.ad
-.RS 11n
-.rt
-A comma-separated list of properties to display. The property must be one of the properties described in the "Native Properties" section, or the special value "name" to display the dataset name.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-s\fR \fIprop\fR\fR
-.ad
-.RS 11n
-.rt
-A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value "name" to sort by the dataset name. Multiple
-properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
-.sp
-The following is a list of sorting criteria:
-.RS +4
-.TP
-.ie t \(bu
-.el o
-Numeric types sort in numeric order.
-.RE
-.RS +4
-.TP
-.ie t \(bu
-.el o
-String types sort in alphabetical order.
-.RE
-.RS +4
-.TP
-.ie t \(bu
-.el o
-Types inappropriate for a row sort that row to the literal bottom, regardless of the specified ordering.
-.RE
-.RS +4
-.TP
-.ie t \(bu
-.el o
-If no sorting options are specified the existing behavior of "\fBzfs list\fR" is preserved.
-.RE
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-S\fR \fIprop\fR\fR
-.ad
-.RS 11n
-.rt
-Same as the \fB-s\fR option, but sorts by property in descending order.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-t\fR \fItype\fR\fR
-.ad
-.RS 11n
-.rt
-A comma-separated list of types to display, where "type" is one of "filesystem", "snapshot" or "volume". For example, specifying "\fB-t snapshot\fR" displays only snapshots.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs set\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable
-form with a suffix of "B", "K", "M", "G", "T", "P", "E", "Z" (for bytes, Kbytes, Mbytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). Properties cannot be set on snapshots.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs get\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fIall\fR | \fIproperty\fR[,\fIproperty\fR]... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Displays properties for the given datasets. If no datasets are specified, then the command displays properties for all datasets on the system. For each property, the following columns are displayed:
-.sp
-.in +2
-.nf
- name Dataset name
- property Property name
- value Property value
- source Property source. Can either be local, default,
- temporary, inherited, or none (-).
-.fi
-.in -2
-.sp
-
-All columns are displayed by default, though this can be controlled by using the \fB-o\fR option. This command takes a comma-separated list of properties as described in the "Native Properties" and "User Properties" sections.
-.sp
-The special value "all" can be used to display all properties for the given dataset.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-r\fR\fR
-.ad
-.RS 13n
-.rt
-Recursively display properties for any children.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-H\fR\fR
-.ad
-.RS 13n
-.rt
-Display output in a form more easily parsed by scripts. Any headers are omitted, and fields are explicitly separated by a single tab instead of an arbitrary amount of space.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIfield\fR\fR
-.ad
-.RS 13n
-.rt
-A comma-separated list of columns to display. "name,property,value,source" is the default value.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-s\fR \fIsource\fR\fR
-.ad
-.RS 13n
-.rt
-A comma-separated list of sources to display. Those properties coming from a source other than those in this list are ignored. Each source must be one of the following: "local,default,inherited,temporary,none". The default value is all sources.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-p\fR\fR
-.ad
-.RS 13n
-.rt
-Display numbers in parsable (exact) values.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs inherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Clears the specified property, causing it to be inherited from an ancestor. If no ancestor has the property set, then the default value is used. See the "Properties" section for a listing of default values, and details on which properties can be inherited.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-r\fR\fR
-.ad
-.RS 6n
-.rt
-Recursively inherit the given property for all children.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs mount\fR\fR
-.ad
-.sp .6
-.RS 4n
-Displays all \fBZFS\fR file systems currently mounted.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs mount\fR[\fB-o\fR \fIopts\fR] [\fB-O\fR] \fB-a\fR\fR
-.ad
-.sp .6
-.RS 4n
-Mounts all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIopts\fR\fR
-.ad
-.RS 11n
-.rt
-An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-O\fR\fR
-.ad
-.RS 11n
-.rt
-Perform an overlay mount. See \fBmount\fR(1M) for more information.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs mount\fR [\fB-o\fR \fIopts\fR] [\fB-O\fR] \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Mounts a specific \fBZFS\fR file system. This is typically not necessary, as file systems are automatically mounted when they are created or the mountpoint property has changed. See the "Mount Points" section for details.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIopts\fR\fR
-.ad
-.RS 11n
-.rt
-An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-O\fR\fR
-.ad
-.RS 11n
-.rt
-Perform an overlay mount. See \fBmount\fR(1M) for more information.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs unmount\fR \fB-a\fR\fR
-.ad
-.sp .6
-.RS 4n
-Unmounts all currently mounted \fBZFS\fR file systems. Invoked automatically as part of the shutdown process.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs unmount\fR [\fB-f\fR] \fIfilesystem\fR|\fImountpoint\fR\fR
-.ad
-.sp .6
-.RS 4n
-Unmounts the given file system. The command can also be given a path to a \fBZFS\fR file system mount point on the system.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forcefully unmount the file system, even if it is currently in use.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs share\fR \fB-a\fR\fR
-.ad
-.sp .6
-.RS 4n
-Shares all available \fBZFS\fR file systems. This is invoked automatically as part of the boot process.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs share\fR \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Shares a specific \fBZFS\fR file system according to the "sharenfs" property. File systems are shared when the "sharenfs" property is set.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs unshare\fR \fB-a\fR\fR
-.ad
-.sp .6
-.RS 4n
-Unshares all currently shared \fBZFS\fR file systems. This is invoked automatically as part of the shutdown process.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs unshare\fR [\fB-F\fR] \fIfilesystem\fR|\fImountpoint\fR\fR
-.ad
-.sp .6
-.RS 4n
-Unshares the given file system. The command can also be given a path to a \fBZFS\fR file system shared on the system.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-F\fR\fR
-.ad
-.RS 6n
-.rt
-Forcefully unshare the file system, even if it is currently in use.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs send\fR [\fB-i\fR \fIsnapshot1\fR] \fIsnapshot2\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a stream representation of snapshot2, which is written to standard output. The output can be redirected to a file or to a different system (for example, using \fBssh\fR(1). By default, a full stream is generated.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-i\fR \fIsnapshot1\fR\fR
-.ad
-.RS 16n
-.rt
-Generate an incremental stream from \fIsnapshot1\fR to \fIsnapshot2\fR. The incremental source \fIsnapshot1\fR can be specified as the last component of the snapshot name (for example, the part after the "@"),
-and it is assumed to be from the same file system as \fIsnapshot2\fR.
-.RE
-
-.RE
-
-.LP
-The format of the stream is evolving. No backwards compatibility is guaranteed. You may not be able to receive your streams on future versions of \fBZFS\fR.
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs receive\fR [\fB-vnF\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
-.ad
-.br
-.na
-\fB\fBzfs receive\fR [\fB-vnF\fR] \fB-d\fR \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the "\fBzfs send\fR" subcommand, which by default creates a full
-stream. "\fBzfs recv\fR" can be used as an alias for "\fBzfs receive\fR".
-.sp
-If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. The destination file system and all of its child file systems are unmounted and cannot be accessed during the receive operation.
-.sp
-The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option.
-.sp
-If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If the argument is a file system or volume name, a snapshot with the same name as the sent snapshot is created within the specified \fIfilesystem\fR or \fIvolume\fR.
-If the \fB-d\fR option is specified, the snapshot name is determined by appending the sent snapshot's name to the specified \fIfilesystem\fR. If the \fB-d\fR option is specified, any required file systems within the specified one are created.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-d\fR\fR
-.ad
-.RS 6n
-.rt
-Use the name of the sent snapshot to determine the name of the new snapshot as described in the paragraph above.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Print verbose information about the stream and the time required to perform the receive operation.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-n\fR\fR
-.ad
-.RS 6n
-.rt
-Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to determine what name the receive operation would use.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-F\fR\fR
-.ad
-.RS 6n
-.rt
-Force a rollback of the \fIfilesystem\fR to the most recent snapshot before performing the receive operation.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs jail\fR \fIjailid\fR \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Attaches the given file system to the given jail. From now on this file system tree can be managed from within a jail if the "\fBjailed\fR" property has been set.
-To use this functionality, sysctl \fBsecurity.jail.enforce_statfs\fR should be set to 0 and sysctl \fBsecurity.jail.mount_allowed\fR should be set to 1.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzfs unjail\fR \fIjailid\fR \fIfilesystem\fR\fR
-.ad
-.sp .6
-.RS 4n
-Detaches the given file system from the given jail.
-.RE
-
-.SH EXAMPLES
-.LP
-\fBExample 1 \fRCreating a ZFS File System Hierarchy
-.LP
-The following commands create a file system named "\fBpool/home\fR" and a file system named "\fBpool/home/bob\fR". The mount point "\fB/export/home\fR" is set for the parent file system, and automatically inherited
-by the child file system.
-
-.sp
-.in +2
-.nf
-# zfs create pool/home
-# zfs set mountpoint=/export/home pool/home
-# zfs create pool/home/bob
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 2 \fRCreating a ZFS Snapshot
-.LP
-The following command creates a snapshot named "yesterday". This snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of the "\fBpool/home/bob\fR" file system.
-
-.sp
-.in +2
-.nf
-# zfs snapshot pool/home/bob@yesterday
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 3 \fRTaking and destroying multiple snapshots
-.LP
-The following command creates snapshots named "\fByesterday\fR" of "\fBpool/home\fR" and all of its descendant file systems. Each snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of its file system. The
-second command destroys the newly created snapshots.
-
-.sp
-.in +2
-.nf
-# \fBzfs snapshot -r pool/home@yesterday\fR
-\fB# zfs destroy -r pool/home@yesterday\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 4 \fRTurning Off Compression
-.LP
-The following commands turn compression off for all file systems under "\fBpool/home\fR", but explicitly turns it on for "\fBpool/home/anne\fR".
-
-.sp
-.in +2
-.nf
-\fB# zfs set compression=off pool/home
-# zfs set compression=on pool/home/anne\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 5 \fRListing ZFS Datasets
-.LP
-The following command lists all active file systems and volumes in the system.
-
-.sp
-.in +2
-.nf
-\fB# zfs list\fR
-
-
- NAME USED AVAIL REFER MOUNTPOINT
- pool 100G 60G - /pool
- pool/home 100G 60G - /export/home
- pool/home/bob 40G 60G 40G /export/home/bob
- pool/home/bob@yesterday 3M - 40G -
- pool/home/anne 60G 60G 40G /export/home/anne
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 6 \fRSetting a Quota on a ZFS File System
-.LP
-The following command sets a quota of 50 gbytes for "\fBpool/home/bob\fR".
-
-.sp
-.in +2
-.nf
-\fB# zfs set quota=50G pool/home/bob\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 7 \fRListing ZFS Properties
-.LP
-The following command lists all properties for "\fBpool/home/bob\fR".
-
-.sp
-.in +2
-.nf
-\fB# zfs get all pool/home/bob\fR
-
-
- NAME PROPERTY VALUE SOURCE
- pool/home/bob type filesystem -
- pool/home/bob creation Fri Feb 23 14:20 2007 -
- pool/home/bob used 24.5K -
- pool/home/bob available 50.0G -
- pool/home/bob referenced 24.5K -
- pool/home/bob compressratio 1.00x -
- pool/home/bob mounted yes -
- pool/home/bob quota 50G local
- pool/home/bob reservation none default
- pool/home/bob recordsize 128K default
- pool/home/bob mountpoint /pool/home/bob default
- pool/home/bob sharenfs off default
- pool/home/bob shareiscsi off default
- pool/home/bob checksum on default
- pool/home/bob compression off default
- pool/home/bob atime on default
- pool/home/bob devices on default
- pool/home/bob exec on default
- pool/home/bob setuid on default
- pool/home/bob readonly off default
- pool/home/bob zoned off default
- pool/home/bob snapdir hidden default
- pool/home/bob aclmode groupmask default
- pool/home/bob aclinherit secure default
- pool/home/bob canmount on default
- pool/home/bob xattr on default
-
-
-.fi
-.in -2
-.sp
-
-.LP
-The following command gets a single property value.
-
-.sp
-.in +2
-.nf
-\fB# zfs get -H -o value compression pool/home/bob\fR
-on
-.fi
-.in -2
-.sp
-
-.LP
-The following command lists all properties with local settings for "\fBpool/home/bob\fR".
-
-.sp
-.in +2
-.nf
-\fB# zfs get -r -s local -o name,property,value all pool/home/bob\fR
-
- NAME PROPERTY VALUE
- pool compression on
- pool/home checksum off
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 8 \fRRolling Back a ZFS File System
-.LP
-The following command reverts the contents of "\fBpool/home/anne\fR" to the snapshot named "\fByesterday\fR", deleting all intermediate snapshots.
-
-.sp
-.in +2
-.nf
-\fB# zfs rollback -r pool/home/anne@yesterday\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 9 \fRCreating a ZFS Clone
-.LP
-The following command creates a writable file system whose initial contents are the same as "\fBpool/home/bob@yesterday\fR".
-
-.sp
-.in +2
-.nf
-\fB# zfs clone pool/home/bob@yesterday pool/clone\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 10 \fRPromoting a ZFS Clone
-.LP
-The following commands illustrate how to test out changes to a file system, and then replace the original file system with the changed one, using clones, clone promotion, and renaming:
-
-.sp
-.in +2
-.nf
-\fB# zfs create pool/project/production\fR
- populate /pool/project/production with data
-\fB# zfs snapshot pool/project/production@today
-# zfs clone pool/project/production@today pool/project/beta\fR
- make changes to /pool/project/beta and test them
-\fB# zfs promote pool/project/beta
-# zfs rename pool/project/production pool/project/legacy
-# zfs rename pool/project/beta pool/project/production\fR
- once the legacy version is no longer needed, it can be
- destroyed
-\fB# zfs destroy pool/project/legacy\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 11 \fRInheriting ZFS Properties
-.LP
-The following command causes "\fBpool/home/bob\fR" and "\fBpool/home/anne\fR" to inherit the "checksum" property from their parent.
-
-.sp
-.in +2
-.nf
-\fB# zfs inherit checksum pool/home/bob pool/home/anne\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 12 \fRRemotely Replicating ZFS Data
-.LP
-The following commands send a full stream and then an incremental stream to a remote machine, restoring them into "\fBpoolB/received/fs\fR@a" and "\fBpoolB/received/fs@b\fR", respectively. "\fBpoolB\fR" must contain
-the file system "\fBpoolB/received\fR", and must not initially contain "\fBpoolB/received/fs\fR".
-
-.sp
-.in +2
-.nf
-# zfs send pool/fs@a | \e
- ssh host zfs receive poolB/received/fs@a
-# zfs send -i a pool/fs@b | ssh host \e
- zfs receive poolB/received/fs
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 13 \fRUsing the zfs receive -d Option
-.LP
-The following command sends a full stream of "\fBpoolA/fsA/fsB@snap\fR" to a remote machine, receiving it into "\fBpoolB/received/fsA/fsB@snap\fR". The "\fBfsA/fsB@snap\fR" portion of the received snapshot's name
-is determined from the name of the sent snapshot. "\fBpoolB\fR" must contain the file system "\fBpoolB/received\fR". If "\fBpoolB/received/fsA\fR" does not exist, it will be created as an empty file system.
-
-.sp
-.in +2
-.nf
-\fB# zfs send poolA/fsA/fsB@snap | \e
- ssh host zfs receive -d poolB/received
- \fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 14 \fRCreating a ZFS volume as a Swap Device
-.LP
-The following example shows how to create a 5-Gbyte ZFS volume and then add the volume as a swap device.
-
-.sp
-.in +2
-.nf
-\fB# zfs create -V 5gb tank/vol
-# swap -a /dev/zvol/dsk/tank/vol\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 15 \fRSetting User Properties
-.LP
-The following example sets the user defined "com.example:department" property for a dataset.
-
-.sp
-.in +2
-.nf
-\fB# zfs set com.example:department=12345 tank/accounting\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 16 \fRCreating a ZFS Volume as a iSCSI Target Device
-.LP
-The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR target.
-
-.sp
-.in +2
-.nf
-\fB# zfs create -V 2g pool/volumes/vol1
-# zfs set shareiscsi=on pool/volumes/vol1
-# iscsitadm list target\fR
-Target: pool/volumes/vol1
-iSCSI Name:
-iqn.1986-03.com.sun:02:7b4b02a6-3277-eb1b-e686-a24762c52a8c
-Connections: 0
-.fi
-.in -2
-.sp
-
-.LP
-After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see the Solaris Administration Guide: Devices and File Systems.
-.SH EXIT STATUS
-.LP
-The following exit values are returned:
-.sp
-.ne 2
-.mk
-.na
-\fB\fB0\fR\fR
-.ad
-.RS 5n
-.rt
-Successful completion.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB1\fR\fR
-.ad
-.RS 5n
-.rt
-An error occurred.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB2\fR\fR
-.ad
-.RS 5n
-.rt
-Invalid command line options were specified.
-.RE
-
-.SH ATTRIBUTES
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-tab() box;
-cw(2.75i) |cw(2.75i)
-lw(2.75i) |lw(2.75i)
-.
-ATTRIBUTE TYPEATTRIBUTE VALUE
-_
-AvailabilitySUNWzfsu
-_
-Interface StabilityEvolving
-.TE
-
-.SH SEE ALSO
-.LP
-\fBgzip\fR(1), \fBssh\fR(1), \fBmount\fR(1M), \fBshare\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), \fBchmod\fR(2), \fBstat\fR(2), \fBfsync\fR(3c), \fBdfstab\fR(4), \fBattributes\fR(5)
diff --git a/contrib/opensolaris/cmd/zfs/zfs_iter.c b/contrib/opensolaris/cmd/zfs/zfs_iter.c
deleted file mode 100644
index eb6b8b1..0000000
--- a/contrib/opensolaris/cmd/zfs/zfs_iter.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <libintl.h>
-#include <libuutil.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-
-#include <libzfs.h>
-
-#include "zfs_util.h"
-#include "zfs_iter.h"
-
-/*
- * This is a private interface used to gather up all the datasets specified on
- * the command line so that we can iterate over them in order.
- *
- * First, we iterate over all filesystems, gathering them together into an
- * AVL tree. We report errors for any explicitly specified datasets
- * that we couldn't open.
- *
- * When finished, we have an AVL tree of ZFS handles. We go through and execute
- * the provided callback for each one, passing whatever data the user supplied.
- */
-
-typedef struct zfs_node {
- zfs_handle_t *zn_handle;
- uu_avl_node_t zn_avlnode;
-} zfs_node_t;
-
-typedef struct callback_data {
- uu_avl_t *cb_avl;
- int cb_recurse;
- zfs_type_t cb_types;
- zfs_sort_column_t *cb_sortcol;
- zfs_proplist_t **cb_proplist;
-} callback_data_t;
-
-uu_avl_pool_t *avl_pool;
-
-/*
- * Called for each dataset. If the object the object is of an appropriate type,
- * add it to the avl tree and recurse over any children as necessary.
- */
-int
-zfs_callback(zfs_handle_t *zhp, void *data)
-{
- callback_data_t *cb = data;
- int dontclose = 0;
-
- /*
- * If this object is of the appropriate type, add it to the AVL tree.
- */
- if (zfs_get_type(zhp) & cb->cb_types) {
- uu_avl_index_t idx;
- zfs_node_t *node = safe_malloc(sizeof (zfs_node_t));
-
- node->zn_handle = zhp;
- uu_avl_node_init(node, &node->zn_avlnode, avl_pool);
- if (uu_avl_find(cb->cb_avl, node, cb->cb_sortcol,
- &idx) == NULL) {
- if (cb->cb_proplist &&
- zfs_expand_proplist(zhp, cb->cb_proplist) != 0) {
- free(node);
- return (-1);
- }
- uu_avl_insert(cb->cb_avl, node, idx);
- dontclose = 1;
- } else {
- free(node);
- }
- }
-
- /*
- * Recurse if necessary.
- */
- if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM ||
- (zfs_get_type(zhp) == ZFS_TYPE_VOLUME && (cb->cb_types &
- ZFS_TYPE_SNAPSHOT))))
- (void) zfs_iter_children(zhp, zfs_callback, data);
-
- if (!dontclose)
- zfs_close(zhp);
-
- return (0);
-}
-
-int
-zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
- boolean_t reverse)
-{
- zfs_sort_column_t *col;
- zfs_prop_t prop;
-
- if ((prop = zfs_name_to_prop(name)) == ZFS_PROP_INVAL &&
- !zfs_prop_user(name))
- return (-1);
-
- col = safe_malloc(sizeof (zfs_sort_column_t));
-
- col->sc_prop = prop;
- col->sc_reverse = reverse;
- if (prop == ZFS_PROP_INVAL) {
- col->sc_user_prop = safe_malloc(strlen(name) + 1);
- (void) strcpy(col->sc_user_prop, name);
- }
-
- if (*sc == NULL) {
- col->sc_last = col;
- *sc = col;
- } else {
- (*sc)->sc_last->sc_next = col;
- (*sc)->sc_last = col;
- }
-
- return (0);
-}
-
-void
-zfs_free_sort_columns(zfs_sort_column_t *sc)
-{
- zfs_sort_column_t *col;
-
- while (sc != NULL) {
- col = sc->sc_next;
- free(sc->sc_user_prop);
- free(sc);
- sc = col;
- }
-}
-
-/* ARGSUSED */
-static int
-zfs_compare(const void *larg, const void *rarg, void *unused)
-{
- zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
- zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
- const char *lname = zfs_get_name(l);
- const char *rname = zfs_get_name(r);
- char *lat, *rat;
- uint64_t lcreate, rcreate;
- int ret;
-
- lat = (char *)strchr(lname, '@');
- rat = (char *)strchr(rname, '@');
-
- if (lat != NULL)
- *lat = '\0';
- if (rat != NULL)
- *rat = '\0';
-
- ret = strcmp(lname, rname);
- if (ret == 0) {
- /*
- * If we're comparing a dataset to one of its snapshots, we
- * always make the full dataset first.
- */
- if (lat == NULL) {
- ret = -1;
- } else if (rat == NULL) {
- ret = 1;
- } else {
- /*
- * If we have two snapshots from the same dataset, then
- * we want to sort them according to creation time. We
- * use the hidden CREATETXG property to get an absolute
- * ordering of snapshots.
- */
- lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
- rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
-
- if (lcreate < rcreate)
- ret = -1;
- else if (lcreate > rcreate)
- ret = 1;
- }
- }
-
- if (lat != NULL)
- *lat = '@';
- if (rat != NULL)
- *rat = '@';
-
- return (ret);
-}
-
-/*
- * Sort datasets by specified columns.
- *
- * o Numeric types sort in ascending order.
- * o String types sort in alphabetical order.
- * o Types inappropriate for a row sort that row to the literal
- * bottom, regardless of the specified ordering.
- *
- * If no sort columns are specified, or two datasets compare equally
- * across all specified columns, they are sorted alphabetically by name
- * with snapshots grouped under their parents.
- */
-static int
-zfs_sort(const void *larg, const void *rarg, void *data)
-{
- zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
- zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
- zfs_sort_column_t *sc = (zfs_sort_column_t *)data;
- zfs_sort_column_t *psc;
-
- for (psc = sc; psc != NULL; psc = psc->sc_next) {
- char lbuf[ZFS_MAXPROPLEN], rbuf[ZFS_MAXPROPLEN];
- char *lstr, *rstr;
- uint64_t lnum, rnum;
- boolean_t lvalid, rvalid;
- int ret = 0;
-
- /*
- * We group the checks below the generic code. If 'lstr' and
- * 'rstr' are non-NULL, then we do a string based comparison.
- * Otherwise, we compare 'lnum' and 'rnum'.
- */
- lstr = rstr = NULL;
- if (psc->sc_prop == ZFS_PROP_INVAL) {
- nvlist_t *luser, *ruser;
- nvlist_t *lval, *rval;
-
- luser = zfs_get_user_props(l);
- ruser = zfs_get_user_props(r);
-
- lvalid = (nvlist_lookup_nvlist(luser,
- psc->sc_user_prop, &lval) == 0);
- rvalid = (nvlist_lookup_nvlist(ruser,
- psc->sc_user_prop, &rval) == 0);
-
- if (lvalid)
- verify(nvlist_lookup_string(lval,
- ZFS_PROP_VALUE, &lstr) == 0);
- if (rvalid)
- verify(nvlist_lookup_string(rval,
- ZFS_PROP_VALUE, &rstr) == 0);
-
- } else if (zfs_prop_is_string(psc->sc_prop)) {
- lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf,
- sizeof (lbuf), NULL, NULL, 0, B_TRUE) == 0);
- rvalid = (zfs_prop_get(r, psc->sc_prop, rbuf,
- sizeof (rbuf), NULL, NULL, 0, B_TRUE) == 0);
-
- lstr = lbuf;
- rstr = rbuf;
- } else {
- lvalid = zfs_prop_valid_for_type(psc->sc_prop,
- zfs_get_type(l));
- rvalid = zfs_prop_valid_for_type(psc->sc_prop,
- zfs_get_type(r));
-
- if (lvalid)
- (void) zfs_prop_get_numeric(l, psc->sc_prop,
- &lnum, NULL, NULL, 0);
- if (rvalid)
- (void) zfs_prop_get_numeric(r, psc->sc_prop,
- &rnum, NULL, NULL, 0);
- }
-
- if (!lvalid && !rvalid)
- continue;
- else if (!lvalid)
- return (1);
- else if (!rvalid)
- return (-1);
-
- if (lstr)
- ret = strcmp(lstr, rstr);
- if (lnum < rnum)
- ret = -1;
- else if (lnum > rnum)
- ret = 1;
-
- if (ret != 0) {
- if (psc->sc_reverse == B_TRUE)
- ret = (ret < 0) ? 1 : -1;
- return (ret);
- }
- }
-
- return (zfs_compare(larg, rarg, NULL));
-}
-
-int
-zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
- zfs_sort_column_t *sortcol, zfs_proplist_t **proplist, zfs_iter_f callback,
- void *data, boolean_t args_can_be_paths)
-{
- callback_data_t cb;
- int ret = 0;
- zfs_node_t *node;
- uu_avl_walk_t *walk;
-
- avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t),
- offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT);
-
- if (avl_pool == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
- }
-
- cb.cb_sortcol = sortcol;
- cb.cb_recurse = recurse;
- cb.cb_proplist = proplist;
- cb.cb_types = types;
- if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
- }
-
- if (argc == 0) {
- /*
- * If given no arguments, iterate over all datasets.
- */
- cb.cb_recurse = 1;
- ret = zfs_iter_root(g_zfs, zfs_callback, &cb);
- } else {
- int i;
- zfs_handle_t *zhp;
- zfs_type_t argtype;
-
- /*
- * If we're recursive, then we always allow filesystems as
- * arguments. If we also are interested in snapshots, then we
- * can take volumes as well.
- */
- argtype = types;
- if (recurse) {
- argtype |= ZFS_TYPE_FILESYSTEM;
- if (types & ZFS_TYPE_SNAPSHOT)
- argtype |= ZFS_TYPE_VOLUME;
- }
-
- for (i = 0; i < argc; i++) {
- if (args_can_be_paths) {
- zhp = zfs_path_to_zhandle(g_zfs, argv[i],
- argtype);
- } else {
- zhp = zfs_open(g_zfs, argv[i], argtype);
- }
- if (zhp != NULL)
- ret |= zfs_callback(zhp, &cb);
- else
- ret = 1;
- }
- }
-
- /*
- * At this point we've got our AVL tree full of zfs handles, so iterate
- * over each one and execute the real user callback.
- */
- for (node = uu_avl_first(cb.cb_avl); node != NULL;
- node = uu_avl_next(cb.cb_avl, node))
- ret |= callback(node->zn_handle, data);
-
- /*
- * Finally, clean up the AVL tree.
- */
- if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
-
- while ((node = uu_avl_walk_next(walk)) != NULL) {
- uu_avl_remove(cb.cb_avl, node);
- zfs_close(node->zn_handle);
- free(node);
- }
-
- uu_avl_walk_end(walk);
- uu_avl_destroy(cb.cb_avl);
- uu_avl_pool_destroy(avl_pool);
-
- return (ret);
-}
diff --git a/contrib/opensolaris/cmd/zfs/zfs_iter.h b/contrib/opensolaris/cmd/zfs/zfs_iter.h
deleted file mode 100644
index 1f0ce8e..0000000
--- a/contrib/opensolaris/cmd/zfs/zfs_iter.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef ZFS_ITER_H
-#define ZFS_ITER_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct zfs_sort_column {
- struct zfs_sort_column *sc_next;
- struct zfs_sort_column *sc_last;
- zfs_prop_t sc_prop;
- char *sc_user_prop;
- boolean_t sc_reverse;
-} zfs_sort_column_t;
-
-int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_sort_column_t *,
- zfs_proplist_t **, zfs_iter_f, void *, boolean_t);
-int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t);
-void zfs_free_sort_columns(zfs_sort_column_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ZFS_ITER_H */
diff --git a/contrib/opensolaris/cmd/zfs/zfs_main.c b/contrib/opensolaris/cmd/zfs/zfs_main.c
deleted file mode 100644
index de15b00..0000000
--- a/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ /dev/null
@@ -1,3253 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <libgen.h>
-#include <libintl.h>
-#include <libuutil.h>
-#include <locale.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <zone.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-
-#include <libzfs.h>
-
-#include "zfs_iter.h"
-#include "zfs_util.h"
-
-libzfs_handle_t *g_zfs;
-
-static FILE *mnttab_file;
-
-static int zfs_do_clone(int argc, char **argv);
-static int zfs_do_create(int argc, char **argv);
-static int zfs_do_destroy(int argc, char **argv);
-static int zfs_do_get(int argc, char **argv);
-static int zfs_do_inherit(int argc, char **argv);
-static int zfs_do_list(int argc, char **argv);
-static int zfs_do_mount(int argc, char **argv);
-static int zfs_do_rename(int argc, char **argv);
-static int zfs_do_rollback(int argc, char **argv);
-static int zfs_do_set(int argc, char **argv);
-static int zfs_do_snapshot(int argc, char **argv);
-static int zfs_do_unmount(int argc, char **argv);
-static int zfs_do_share(int argc, char **argv);
-static int zfs_do_unshare(int argc, char **argv);
-static int zfs_do_send(int argc, char **argv);
-static int zfs_do_receive(int argc, char **argv);
-static int zfs_do_promote(int argc, char **argv);
-static int zfs_do_jail(int argc, char **argv);
-static int zfs_do_unjail(int argc, char **argv);
-
-/*
- * These libumem hooks provide a reasonable set of defaults for the allocator's
- * debugging facilities.
- */
-const char *
-_umem_debug_init(void)
-{
- return ("default,verbose"); /* $UMEM_DEBUG setting */
-}
-
-const char *
-_umem_logging_init(void)
-{
- return ("fail,contents"); /* $UMEM_LOGGING setting */
-}
-
-typedef enum {
- HELP_CLONE,
- HELP_CREATE,
- HELP_DESTROY,
- HELP_GET,
- HELP_INHERIT,
- HELP_JAIL,
- HELP_UNJAIL,
- HELP_LIST,
- HELP_MOUNT,
- HELP_PROMOTE,
- HELP_RECEIVE,
- HELP_RENAME,
- HELP_ROLLBACK,
- HELP_SEND,
- HELP_SET,
- HELP_SHARE,
- HELP_SNAPSHOT,
- HELP_UNMOUNT,
- HELP_UNSHARE
-} zfs_help_t;
-
-typedef struct zfs_command {
- const char *name;
- int (*func)(int argc, char **argv);
- zfs_help_t usage;
-} zfs_command_t;
-
-/*
- * Master command table. Each ZFS command has a name, associated function, and
- * usage message. The usage messages need to be internationalized, so we have
- * to have a function to return the usage message based on a command index.
- *
- * These commands are organized according to how they are displayed in the usage
- * message. An empty command (one with a NULL name) indicates an empty line in
- * the generic usage message.
- */
-static zfs_command_t command_table[] = {
- { "create", zfs_do_create, HELP_CREATE },
- { "destroy", zfs_do_destroy, HELP_DESTROY },
- { NULL },
- { "snapshot", zfs_do_snapshot, HELP_SNAPSHOT },
- { "rollback", zfs_do_rollback, HELP_ROLLBACK },
- { "clone", zfs_do_clone, HELP_CLONE },
- { "promote", zfs_do_promote, HELP_PROMOTE },
- { "rename", zfs_do_rename, HELP_RENAME },
- { NULL },
- { "list", zfs_do_list, HELP_LIST },
- { NULL },
- { "set", zfs_do_set, HELP_SET },
- { "get", zfs_do_get, HELP_GET },
- { "inherit", zfs_do_inherit, HELP_INHERIT },
- { NULL },
- { "mount", zfs_do_mount, HELP_MOUNT },
- { NULL },
- { "unmount", zfs_do_unmount, HELP_UNMOUNT },
- { NULL },
- { "share", zfs_do_share, HELP_SHARE },
- { NULL },
- { "unshare", zfs_do_unshare, HELP_UNSHARE },
- { NULL },
- { "send", zfs_do_send, HELP_SEND },
- { "receive", zfs_do_receive, HELP_RECEIVE },
- { NULL },
- { "jail", zfs_do_jail, HELP_JAIL },
- { "unjail", zfs_do_unjail, HELP_UNJAIL },
-};
-
-#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
-
-zfs_command_t *current_command;
-
-static const char *
-get_usage(zfs_help_t idx)
-{
- switch (idx) {
- case HELP_CLONE:
- return (gettext("\tclone <snapshot> <filesystem|volume>\n"));
- case HELP_CREATE:
- return (gettext("\tcreate [[-o property=value] ... ] "
- "<filesystem>\n"
- "\tcreate [-s] [-b blocksize] [[-o property=value] ...]\n"
- "\t -V <size> <volume>\n"));
- case HELP_DESTROY:
- return (gettext("\tdestroy [-rRf] "
- "<filesystem|volume|snapshot>\n"));
- case HELP_GET:
- return (gettext("\tget [-rHp] [-o field[,field]...] "
- "[-s source[,source]...]\n"
- "\t <all | property[,property]...> "
- "[filesystem|volume|snapshot] ...\n"));
- case HELP_INHERIT:
- return (gettext("\tinherit [-r] <property> "
- "<filesystem|volume> ...\n"));
- case HELP_JAIL:
- return (gettext("\tjail <jailid> <filesystem>\n"));
- case HELP_UNJAIL:
- return (gettext("\tunjail <jailid> <filesystem>\n"));
- case HELP_LIST:
- return (gettext("\tlist [-rH] [-o property[,property]...] "
- "[-t type[,type]...]\n"
- "\t [-s property [-s property]...]"
- " [-S property [-S property]...]\n"
- "\t [filesystem|volume|snapshot] ...\n"));
- case HELP_MOUNT:
- return (gettext("\tmount\n"
- "\tmount [-o opts] [-O] -a\n"
- "\tmount [-o opts] [-O] <filesystem>\n"));
- case HELP_PROMOTE:
- return (gettext("\tpromote <clone filesystem>\n"));
- case HELP_RECEIVE:
- return (gettext("\treceive [-vnF] <filesystem|volume|"
- "snapshot>\n"
- "\treceive [-vnF] -d <filesystem>\n"));
- case HELP_RENAME:
- return (gettext("\trename <filesystem|volume|snapshot> "
- "<filesystem|volume|snapshot>\n"
- "\trename -r <snapshot> <snapshot>"));
- case HELP_ROLLBACK:
- return (gettext("\trollback [-rRf] <snapshot>\n"));
- case HELP_SEND:
- return (gettext("\tsend [-i <snapshot>] <snapshot>\n"));
- case HELP_SET:
- return (gettext("\tset <property=value> "
- "<filesystem|volume> ...\n"));
- case HELP_SHARE:
- return (gettext("\tshare -a\n"
- "\tshare <filesystem>\n"));
- case HELP_SNAPSHOT:
- return (gettext("\tsnapshot [-r] "
- "<filesystem@name|volume@name>\n"));
- case HELP_UNMOUNT:
- return (gettext("\tunmount [-f] -a\n"
- "\tunmount [-f] <filesystem|mountpoint>\n"));
- case HELP_UNSHARE:
- return (gettext("\tunshare [-f] -a\n"
- "\tunshare [-f] <filesystem|mountpoint>\n"));
- }
-
- abort();
- /* NOTREACHED */
-}
-
-/*
- * Utility function to guarantee malloc() success.
- */
-void *
-safe_malloc(size_t size)
-{
- void *data;
-
- if ((data = calloc(1, size)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
-
- return (data);
-}
-
-/*
- * Callback routinue that will print out information for each of the
- * the properties.
- */
-static zfs_prop_t
-usage_prop_cb(zfs_prop_t prop, void *cb)
-{
- FILE *fp = cb;
-
- (void) fprintf(fp, "\t%-13s ", zfs_prop_to_name(prop));
-
- if (zfs_prop_readonly(prop))
- (void) fprintf(fp, " NO ");
- else
- (void) fprintf(fp, " YES ");
-
- if (zfs_prop_inheritable(prop))
- (void) fprintf(fp, " YES ");
- else
- (void) fprintf(fp, " NO ");
-
- if (zfs_prop_values(prop) == NULL)
- (void) fprintf(fp, "-\n");
- else
- (void) fprintf(fp, "%s\n", zfs_prop_values(prop));
-
- return (ZFS_PROP_CONT);
-}
-
-/*
- * Display usage message. If we're inside a command, display only the usage for
- * that command. Otherwise, iterate over the entire command table and display
- * a complete usage message.
- */
-static void
-usage(boolean_t requested)
-{
- int i;
- boolean_t show_properties = B_FALSE;
- FILE *fp = requested ? stdout : stderr;
-
- if (current_command == NULL) {
-
- (void) fprintf(fp, gettext("usage: zfs command args ...\n"));
- (void) fprintf(fp,
- gettext("where 'command' is one of the following:\n\n"));
-
- for (i = 0; i < NCOMMAND; i++) {
- if (command_table[i].name == NULL)
- (void) fprintf(fp, "\n");
- else
- (void) fprintf(fp, "%s",
- get_usage(command_table[i].usage));
- }
-
- (void) fprintf(fp, gettext("\nEach dataset is of the form: "
- "pool/[dataset/]*dataset[@name]\n"));
- } else {
- (void) fprintf(fp, gettext("usage:\n"));
- (void) fprintf(fp, "%s", get_usage(current_command->usage));
- }
-
- if (current_command != NULL &&
- (strcmp(current_command->name, "set") == 0 ||
- strcmp(current_command->name, "get") == 0 ||
- strcmp(current_command->name, "inherit") == 0 ||
- strcmp(current_command->name, "list") == 0))
- show_properties = B_TRUE;
-
- if (show_properties) {
-
- (void) fprintf(fp,
- gettext("\nThe following properties are supported:\n"));
-
- (void) fprintf(fp, "\n\t%-13s %s %s %s\n\n",
- "PROPERTY", "EDIT", "INHERIT", "VALUES");
-
- /* Iterate over all properties */
- (void) zfs_prop_iter(usage_prop_cb, fp, B_FALSE);
-
- (void) fprintf(fp, gettext("\nSizes are specified in bytes "
- "with standard units such as K, M, G, etc.\n"));
- (void) fprintf(fp, gettext("\n\nUser-defined properties can "
- "be specified by using a name containing a colon (:).\n"));
- } else {
- /*
- * TRANSLATION NOTE:
- * "zfs set|get" must not be localised this is the
- * command name and arguments.
- */
- (void) fprintf(fp,
- gettext("\nFor the property list, run: zfs set|get\n"));
- }
-
- /*
- * See comments at end of main().
- */
- if (getenv("ZFS_ABORT") != NULL) {
- (void) printf("dumping core by request\n");
- abort();
- }
-
- exit(requested ? 0 : 2);
-}
-
-/*
- * zfs clone <fs, snap, vol> fs
- *
- * Given an existing dataset, create a writable copy whose initial contents
- * are the same as the source. The newly created dataset maintains a
- * dependency on the original; the original cannot be destroyed so long as
- * the clone exists.
- */
-static int
-zfs_do_clone(int argc, char **argv)
-{
- zfs_handle_t *zhp;
- int ret;
-
- /* check options */
- if (argc > 1 && argv[1][0] == '-') {
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- argv[1][1]);
- usage(B_FALSE);
- }
-
- /* check number of arguments */
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing source dataset "
- "argument\n"));
- usage(B_FALSE);
- }
- if (argc < 3) {
- (void) fprintf(stderr, gettext("missing target dataset "
- "argument\n"));
- usage(B_FALSE);
- }
- if (argc > 3) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- /* open the source dataset */
- if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
- return (1);
-
- /* pass to libzfs */
- ret = zfs_clone(zhp, argv[2], NULL);
-
- /* create the mountpoint if necessary */
- if (ret == 0) {
- zfs_handle_t *clone = zfs_open(g_zfs, argv[2], ZFS_TYPE_ANY);
- if (clone != NULL) {
- if ((ret = zfs_mount(clone, NULL, 0)) == 0)
- ret = zfs_share(clone);
- zfs_close(clone);
- }
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
- }
-
- zfs_close(zhp);
-
- return (ret == 0 ? 0 : 1);
-}
-
-/*
- * zfs create [-o prop=value] ... fs
- * zfs create [-s] [-b blocksize] [-o prop=value] ... -V vol size
- *
- * Create a new dataset. This command can be used to create filesystems
- * and volumes. Snapshot creation is handled by 'zfs snapshot'.
- * For volumes, the user must specify a size to be used.
- *
- * The '-s' flag applies only to volumes, and indicates that we should not try
- * to set the reservation for this volume. By default we set a reservation
- * equal to the size for any volume.
- */
-static int
-zfs_do_create(int argc, char **argv)
-{
- zfs_type_t type = ZFS_TYPE_FILESYSTEM;
- zfs_handle_t *zhp = NULL;
- uint64_t volsize;
- int c;
- boolean_t noreserve = B_FALSE;
- int ret = 1;
- nvlist_t *props = NULL;
- uint64_t intval;
- char *propname;
- char *propval = NULL;
- char *strval;
-
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
-
- /* check options */
- while ((c = getopt(argc, argv, ":V:b:so:")) != -1) {
- switch (c) {
- case 'V':
- type = ZFS_TYPE_VOLUME;
- if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
- (void) fprintf(stderr, gettext("bad volume "
- "size '%s': %s\n"), optarg,
- libzfs_error_description(g_zfs));
- goto error;
- }
-
- if (nvlist_add_uint64(props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE),
- intval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
- volsize = intval;
- break;
- case 'b':
- if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
- (void) fprintf(stderr, gettext("bad volume "
- "block size '%s': %s\n"), optarg,
- libzfs_error_description(g_zfs));
- goto error;
- }
-
- if (nvlist_add_uint64(props,
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- intval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
- break;
- case 'o':
- propname = optarg;
- if ((propval = strchr(propname, '=')) == NULL) {
- (void) fprintf(stderr, gettext("missing "
- "'=' for -o option\n"));
- goto error;
- }
- *propval = '\0';
- propval++;
- if (nvlist_lookup_string(props, propname,
- &strval) == 0) {
- (void) fprintf(stderr, gettext("property '%s' "
- "specified multiple times\n"), propname);
- goto error;
- }
- if (nvlist_add_string(props, propname, propval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
- break;
- case 's':
- noreserve = B_TRUE;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing size "
- "argument\n"));
- goto badusage;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- goto badusage;
- }
- }
-
- if (noreserve && type != ZFS_TYPE_VOLUME) {
- (void) fprintf(stderr, gettext("'-s' can only be used when "
- "creating a volume\n"));
- goto badusage;
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc == 0) {
- (void) fprintf(stderr, gettext("missing %s argument\n"),
- zfs_type_to_name(type));
- goto badusage;
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- goto badusage;
- }
-
- if (type == ZFS_TYPE_VOLUME && !noreserve &&
- nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_RESERVATION),
- &strval) != 0) {
- if (nvlist_add_uint64(props,
- zfs_prop_to_name(ZFS_PROP_RESERVATION),
- volsize) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- nvlist_free(props);
- return (1);
- }
- }
-
- /* pass to libzfs */
- if (zfs_create(g_zfs, argv[0], type, props) != 0)
- goto error;
-
- if (propval != NULL)
- *(propval - 1) = '=';
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
-
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
- goto error;
-
- /*
- * Mount and/or share the new filesystem as appropriate. We provide a
- * verbose error message to let the user know that their filesystem was
- * in fact created, even if we failed to mount or share it.
- */
- if (zfs_mount(zhp, NULL, 0) != 0) {
- (void) fprintf(stderr, gettext("filesystem successfully "
- "created, but not mounted\n"));
- ret = 1;
- } else if (zfs_share(zhp) != 0) {
- (void) fprintf(stderr, gettext("filesystem successfully "
- "created, but not shared\n"));
- ret = 1;
- } else {
- ret = 0;
- }
-
-error:
- if (zhp)
- zfs_close(zhp);
- nvlist_free(props);
- return (ret);
-badusage:
- nvlist_free(props);
- usage(B_FALSE);
- return (2);
-}
-
-/*
- * zfs destroy [-rf] <fs, snap, vol>
- *
- * -r Recursively destroy all children
- * -R Recursively destroy all dependents, including clones
- * -f Force unmounting of any dependents
- *
- * Destroys the given dataset. By default, it will unmount any filesystems,
- * and refuse to destroy a dataset that has any dependents. A dependent can
- * either be a child, or a clone of a child.
- */
-typedef struct destroy_cbdata {
- boolean_t cb_first;
- int cb_force;
- int cb_recurse;
- int cb_error;
- int cb_needforce;
- int cb_doclones;
- boolean_t cb_closezhp;
- zfs_handle_t *cb_target;
- char *cb_snapname;
-} destroy_cbdata_t;
-
-/*
- * Check for any dependents based on the '-r' or '-R' flags.
- */
-static int
-destroy_check_dependent(zfs_handle_t *zhp, void *data)
-{
- destroy_cbdata_t *cbp = data;
- const char *tname = zfs_get_name(cbp->cb_target);
- const char *name = zfs_get_name(zhp);
-
- if (strncmp(tname, name, strlen(tname)) == 0 &&
- (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) {
- /*
- * This is a direct descendant, not a clone somewhere else in
- * the hierarchy.
- */
- if (cbp->cb_recurse)
- goto out;
-
- if (cbp->cb_first) {
- (void) fprintf(stderr, gettext("cannot destroy '%s': "
- "%s has children\n"),
- zfs_get_name(cbp->cb_target),
- zfs_type_to_name(zfs_get_type(cbp->cb_target)));
- (void) fprintf(stderr, gettext("use '-r' to destroy "
- "the following datasets:\n"));
- cbp->cb_first = B_FALSE;
- cbp->cb_error = 1;
- }
-
- (void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
- } else {
- /*
- * This is a clone. We only want to report this if the '-r'
- * wasn't specified, or the target is a snapshot.
- */
- if (!cbp->cb_recurse &&
- zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT)
- goto out;
-
- if (cbp->cb_first) {
- (void) fprintf(stderr, gettext("cannot destroy '%s': "
- "%s has dependent clones\n"),
- zfs_get_name(cbp->cb_target),
- zfs_type_to_name(zfs_get_type(cbp->cb_target)));
- (void) fprintf(stderr, gettext("use '-R' to destroy "
- "the following datasets:\n"));
- cbp->cb_first = B_FALSE;
- cbp->cb_error = 1;
- }
-
- (void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
- }
-
-out:
- zfs_close(zhp);
- return (0);
-}
-
-static int
-destroy_callback(zfs_handle_t *zhp, void *data)
-{
- destroy_cbdata_t *cbp = data;
-
- /*
- * Ignore pools (which we've already flagged as an error before getting
- * here.
- */
- if (strchr(zfs_get_name(zhp), '/') == NULL &&
- zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- zfs_close(zhp);
- return (0);
- }
-
- /*
- * Bail out on the first error.
- */
- if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 ||
- zfs_destroy(zhp) != 0) {
- zfs_close(zhp);
- return (-1);
- }
-
- zfs_close(zhp);
- return (0);
-}
-
-static int
-destroy_snap_clones(zfs_handle_t *zhp, void *arg)
-{
- destroy_cbdata_t *cbp = arg;
- char thissnap[MAXPATHLEN];
- zfs_handle_t *szhp;
- boolean_t closezhp = cbp->cb_closezhp;
- int rv;
-
- (void) snprintf(thissnap, sizeof (thissnap),
- "%s@%s", zfs_get_name(zhp), cbp->cb_snapname);
-
- libzfs_print_on_error(g_zfs, B_FALSE);
- szhp = zfs_open(g_zfs, thissnap, ZFS_TYPE_SNAPSHOT);
- libzfs_print_on_error(g_zfs, B_TRUE);
- if (szhp) {
- /*
- * Destroy any clones of this snapshot
- */
- if (zfs_iter_dependents(szhp, B_FALSE, destroy_callback,
- cbp) != 0) {
- zfs_close(szhp);
- if (closezhp)
- zfs_close(zhp);
- return (-1);
- }
- zfs_close(szhp);
- }
-
- cbp->cb_closezhp = B_TRUE;
- rv = zfs_iter_filesystems(zhp, destroy_snap_clones, arg);
- if (closezhp)
- zfs_close(zhp);
- return (rv);
-}
-
-static int
-zfs_do_destroy(int argc, char **argv)
-{
- destroy_cbdata_t cb = { 0 };
- int c;
- zfs_handle_t *zhp;
- char *cp;
-
- /* check options */
- while ((c = getopt(argc, argv, "frR")) != -1) {
- switch (c) {
- case 'f':
- cb.cb_force = 1;
- break;
- case 'r':
- cb.cb_recurse = 1;
- break;
- case 'R':
- cb.cb_recurse = 1;
- cb.cb_doclones = 1;
- break;
- case '?':
- default:
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc == 0) {
- (void) fprintf(stderr, gettext("missing path argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- /*
- * If we are doing recursive destroy of a snapshot, then the
- * named snapshot may not exist. Go straight to libzfs.
- */
- if (cb.cb_recurse && (cp = strchr(argv[0], '@'))) {
- int ret;
-
- *cp = '\0';
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
- return (1);
- *cp = '@';
- cp++;
-
- if (cb.cb_doclones) {
- cb.cb_snapname = cp;
- if (destroy_snap_clones(zhp, &cb) != 0) {
- zfs_close(zhp);
- return (1);
- }
- }
-
- ret = zfs_destroy_snaps(zhp, cp);
- zfs_close(zhp);
- if (ret) {
- (void) fprintf(stderr,
- gettext("no snapshots destroyed\n"));
- } else {
- zpool_log_history(g_zfs, argc + optind, argv - optind,
- argv[0], B_FALSE, B_FALSE);
- }
- return (ret != 0);
- }
-
-
- /* Open the given dataset */
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
- return (1);
-
- cb.cb_target = zhp;
-
- /*
- * Perform an explicit check for pools before going any further.
- */
- if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL &&
- zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- (void) fprintf(stderr, gettext("cannot destroy '%s': "
- "operation does not apply to pools\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use 'zfs destroy -r "
- "%s' to destroy all datasets in the pool\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use 'zpool destroy %s' "
- "to destroy the pool itself\n"), zfs_get_name(zhp));
- zfs_close(zhp);
- return (1);
- }
-
- /*
- * Check for any dependents and/or clones.
- */
- cb.cb_first = B_TRUE;
- if (!cb.cb_doclones &&
- zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
- &cb) != 0) {
- zfs_close(zhp);
- return (1);
- }
-
-
- if (cb.cb_error ||
- zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
- zfs_close(zhp);
- return (1);
- }
-
- /*
- * Do the real thing. The callback will close the handle regardless of
- * whether it succeeds or not.
- */
- if (destroy_callback(zhp, &cb) != 0)
- return (1);
-
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
-
- return (0);
-}
-
-/*
- * zfs get [-rHp] [-o field[,field]...] [-s source[,source]...]
- * < all | property[,property]... > < fs | snap | vol > ...
- *
- * -r recurse over any child datasets
- * -H scripted mode. Headers are stripped, and fields are separated
- * by tabs instead of spaces.
- * -o Set of fields to display. One of "name,property,value,source".
- * Default is all four.
- * -s Set of sources to allow. One of
- * "local,default,inherited,temporary,none". Default is all
- * five.
- * -p Display values in parsable (literal) format.
- *
- * Prints properties for the given datasets. The user can control which
- * columns to display as well as which property types to allow.
- */
-
-/*
- * Invoked to display the properties for a single dataset.
- */
-static int
-get_callback(zfs_handle_t *zhp, void *data)
-{
- char buf[ZFS_MAXPROPLEN];
- zfs_source_t sourcetype;
- char source[ZFS_MAXNAMELEN];
- libzfs_get_cbdata_t *cbp = data;
- nvlist_t *userprop = zfs_get_user_props(zhp);
- zfs_proplist_t *pl = cbp->cb_proplist;
- nvlist_t *propval;
- char *strval;
- char *sourceval;
-
- for (; pl != NULL; pl = pl->pl_next) {
- /*
- * Skip the special fake placeholder. This will also skip over
- * the name property when 'all' is specified.
- */
- if (pl->pl_prop == ZFS_PROP_NAME &&
- pl == cbp->cb_proplist)
- continue;
-
- if (pl->pl_prop != ZFS_PROP_INVAL) {
- if (zfs_prop_get(zhp, pl->pl_prop, buf,
- sizeof (buf), &sourcetype, source,
- sizeof (source),
- cbp->cb_literal) != 0) {
- if (pl->pl_all)
- continue;
- if (!zfs_prop_valid_for_type(pl->pl_prop,
- ZFS_TYPE_ANY)) {
- (void) fprintf(stderr,
- gettext("No such property '%s'\n"),
- zfs_prop_to_name(pl->pl_prop));
- continue;
- }
- sourcetype = ZFS_SRC_NONE;
- (void) strlcpy(buf, "-", sizeof (buf));
- }
-
- libzfs_print_one_property(zfs_get_name(zhp), cbp,
- zfs_prop_to_name(pl->pl_prop),
- buf, sourcetype, source);
- } else {
- if (nvlist_lookup_nvlist(userprop,
- pl->pl_user_prop, &propval) != 0) {
- if (pl->pl_all)
- continue;
- sourcetype = ZFS_SRC_NONE;
- strval = "-";
- } else {
- verify(nvlist_lookup_string(propval,
- ZFS_PROP_VALUE, &strval) == 0);
- verify(nvlist_lookup_string(propval,
- ZFS_PROP_SOURCE, &sourceval) == 0);
-
- if (strcmp(sourceval,
- zfs_get_name(zhp)) == 0) {
- sourcetype = ZFS_SRC_LOCAL;
- } else {
- sourcetype = ZFS_SRC_INHERITED;
- (void) strlcpy(source,
- sourceval, sizeof (source));
- }
- }
-
- libzfs_print_one_property(zfs_get_name(zhp), cbp,
- pl->pl_user_prop, strval, sourcetype,
- source);
- }
- }
-
- return (0);
-}
-
-static int
-zfs_do_get(int argc, char **argv)
-{
- libzfs_get_cbdata_t cb = { 0 };
- boolean_t recurse = B_FALSE;
- int i, c;
- char *value, *fields;
- int ret;
- zfs_proplist_t fake_name = { 0 };
-
- /*
- * Set up default columns and sources.
- */
- cb.cb_sources = ZFS_SRC_ALL;
- cb.cb_columns[0] = GET_COL_NAME;
- cb.cb_columns[1] = GET_COL_PROPERTY;
- cb.cb_columns[2] = GET_COL_VALUE;
- cb.cb_columns[3] = GET_COL_SOURCE;
-
- /* check options */
- while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) {
- switch (c) {
- case 'p':
- cb.cb_literal = B_TRUE;
- break;
- case 'r':
- recurse = B_TRUE;
- break;
- case 'H':
- cb.cb_scripted = B_TRUE;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case 'o':
- /*
- * Process the set of columns to display. We zero out
- * the structure to give us a blank slate.
- */
- bzero(&cb.cb_columns, sizeof (cb.cb_columns));
- i = 0;
- while (*optarg != '\0') {
- static char *col_subopts[] =
- { "name", "property", "value", "source",
- NULL };
-
- if (i == 4) {
- (void) fprintf(stderr, gettext("too "
- "many fields given to -o "
- "option\n"));
- usage(B_FALSE);
- }
-
- switch (getsubopt(&optarg, col_subopts,
- &value)) {
- case 0:
- cb.cb_columns[i++] = GET_COL_NAME;
- break;
- case 1:
- cb.cb_columns[i++] = GET_COL_PROPERTY;
- break;
- case 2:
- cb.cb_columns[i++] = GET_COL_VALUE;
- break;
- case 3:
- cb.cb_columns[i++] = GET_COL_SOURCE;
- break;
- default:
- (void) fprintf(stderr,
- gettext("invalid column name "
- "'%s'\n"), value);
- usage(B_FALSE);
- }
- }
- break;
-
- case 's':
- cb.cb_sources = 0;
- while (*optarg != '\0') {
- static char *source_subopts[] = {
- "local", "default", "inherited",
- "temporary", "none", NULL };
-
- switch (getsubopt(&optarg, source_subopts,
- &value)) {
- case 0:
- cb.cb_sources |= ZFS_SRC_LOCAL;
- break;
- case 1:
- cb.cb_sources |= ZFS_SRC_DEFAULT;
- break;
- case 2:
- cb.cb_sources |= ZFS_SRC_INHERITED;
- break;
- case 3:
- cb.cb_sources |= ZFS_SRC_TEMPORARY;
- break;
- case 4:
- cb.cb_sources |= ZFS_SRC_NONE;
- break;
- default:
- (void) fprintf(stderr,
- gettext("invalid source "
- "'%s'\n"), value);
- usage(B_FALSE);
- }
- }
- break;
-
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing property "
- "argument\n"));
- usage(B_FALSE);
- }
-
- fields = argv[0];
-
- if (zfs_get_proplist(g_zfs, fields, &cb.cb_proplist) != 0)
- usage(B_FALSE);
-
- argc--;
- argv++;
-
- /*
- * As part of zfs_expand_proplist(), we keep track of the maximum column
- * width for each property. For the 'NAME' (and 'SOURCE') columns, we
- * need to know the maximum name length. However, the user likely did
- * not specify 'name' as one of the properties to fetch, so we need to
- * make sure we always include at least this property for
- * print_get_headers() to work properly.
- */
- if (cb.cb_proplist != NULL) {
- fake_name.pl_prop = ZFS_PROP_NAME;
- fake_name.pl_width = strlen(gettext("NAME"));
- fake_name.pl_next = cb.cb_proplist;
- cb.cb_proplist = &fake_name;
- }
-
- cb.cb_first = B_TRUE;
-
- /* run for each object */
- ret = zfs_for_each(argc, argv, recurse, ZFS_TYPE_ANY, NULL,
- &cb.cb_proplist, get_callback, &cb, B_FALSE);
-
- if (cb.cb_proplist == &fake_name)
- zfs_free_proplist(fake_name.pl_next);
- else
- zfs_free_proplist(cb.cb_proplist);
-
- return (ret);
-}
-
-/*
- * inherit [-r] <property> <fs|vol> ...
- *
- * -r Recurse over all children
- *
- * For each dataset specified on the command line, inherit the given property
- * from its parent. Inheriting a property at the pool level will cause it to
- * use the default value. The '-r' flag will recurse over all children, and is
- * useful for setting a property on a hierarchy-wide basis, regardless of any
- * local modifications for each dataset.
- */
-typedef struct inherit_cbdata {
- char *cb_propname;
- boolean_t cb_any_successful;
-} inherit_cbdata_t;
-
-static int
-inherit_callback(zfs_handle_t *zhp, void *data)
-{
- inherit_cbdata_t *cbp = data;
- int ret;
-
- ret = zfs_prop_inherit(zhp, cbp->cb_propname);
- if (ret == 0)
- cbp->cb_any_successful = B_TRUE;
- return (ret != 0);
-}
-
-static int
-zfs_do_inherit(int argc, char **argv)
-{
- boolean_t recurse = B_FALSE;
- int c;
- zfs_prop_t prop;
- inherit_cbdata_t cb;
- int ret;
-
- /* check options */
- while ((c = getopt(argc, argv, "r")) != -1) {
- switch (c) {
- case 'r':
- recurse = B_TRUE;
- break;
- case '?':
- default:
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing property argument\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing dataset argument\n"));
- usage(B_FALSE);
- }
-
- cb.cb_propname = argv[0];
- argc--;
- argv++;
-
- if ((prop = zfs_name_to_prop(cb.cb_propname)) != ZFS_PROP_INVAL) {
- if (zfs_prop_readonly(prop)) {
- (void) fprintf(stderr, gettext(
- "%s property is read-only\n"),
- cb.cb_propname);
- return (1);
- }
- if (!zfs_prop_inheritable(prop)) {
- (void) fprintf(stderr, gettext("'%s' property cannot "
- "be inherited\n"), cb.cb_propname);
- if (prop == ZFS_PROP_QUOTA ||
- prop == ZFS_PROP_RESERVATION)
- (void) fprintf(stderr, gettext("use 'zfs set "
- "%s=none' to clear\n"), cb.cb_propname);
- return (1);
- }
- } else if (!zfs_prop_user(cb.cb_propname)) {
- (void) fprintf(stderr, gettext(
- "invalid property '%s'\n"),
- cb.cb_propname);
- usage(B_FALSE);
- }
-
- cb.cb_any_successful = B_FALSE;
-
- ret = zfs_for_each(argc, argv, recurse,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL,
- inherit_callback, &cb, B_FALSE);
-
- if (cb.cb_any_successful) {
- zpool_log_history(g_zfs, argc + optind + 1, argv - optind - 1,
- argv[0], B_FALSE, B_FALSE);
- }
-
- return (ret);
-}
-
-/*
- * list [-rH] [-o property[,property]...] [-t type[,type]...]
- * [-s property [-s property]...] [-S property [-S property]...]
- * <dataset> ...
- *
- * -r Recurse over all children
- * -H Scripted mode; elide headers and separate colums by tabs
- * -o Control which fields to display.
- * -t Control which object types to display.
- * -s Specify sort columns, descending order.
- * -S Specify sort columns, ascending order.
- *
- * When given no arguments, lists all filesystems in the system.
- * Otherwise, list the specified datasets, optionally recursing down them if
- * '-r' is specified.
- */
-typedef struct list_cbdata {
- boolean_t cb_first;
- boolean_t cb_scripted;
- zfs_proplist_t *cb_proplist;
-} list_cbdata_t;
-
-/*
- * Given a list of columns to display, output appropriate headers for each one.
- */
-static void
-print_header(zfs_proplist_t *pl)
-{
- char headerbuf[ZFS_MAXPROPLEN];
- const char *header;
- int i;
- boolean_t first = B_TRUE;
- boolean_t right_justify;
-
- for (; pl != NULL; pl = pl->pl_next) {
- if (!first) {
- (void) printf(" ");
- } else {
- first = B_FALSE;
- }
-
- right_justify = B_FALSE;
- if (pl->pl_prop != ZFS_PROP_INVAL) {
- header = zfs_prop_column_name(pl->pl_prop);
- right_justify = zfs_prop_align_right(pl->pl_prop);
- } else {
- for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
- headerbuf[i] = toupper(pl->pl_user_prop[i]);
- headerbuf[i] = '\0';
- header = headerbuf;
- }
-
- if (pl->pl_next == NULL && !right_justify)
- (void) printf("%s", header);
- else if (right_justify)
- (void) printf("%*s", pl->pl_width, header);
- else
- (void) printf("%-*s", pl->pl_width, header);
- }
-
- (void) printf("\n");
-}
-
-/*
- * Given a dataset and a list of fields, print out all the properties according
- * to the described layout.
- */
-static void
-print_dataset(zfs_handle_t *zhp, zfs_proplist_t *pl, int scripted)
-{
- boolean_t first = B_TRUE;
- char property[ZFS_MAXPROPLEN];
- nvlist_t *userprops = zfs_get_user_props(zhp);
- nvlist_t *propval;
- char *propstr;
- boolean_t right_justify;
- int width;
-
- for (; pl != NULL; pl = pl->pl_next) {
- if (!first) {
- if (scripted)
- (void) printf("\t");
- else
- (void) printf(" ");
- } else {
- first = B_FALSE;
- }
-
- right_justify = B_FALSE;
- if (pl->pl_prop != ZFS_PROP_INVAL) {
- if (zfs_prop_get(zhp, pl->pl_prop, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
- propstr = "-";
- else
- propstr = property;
-
- right_justify = zfs_prop_align_right(pl->pl_prop);
- } else {
- if (nvlist_lookup_nvlist(userprops,
- pl->pl_user_prop, &propval) != 0)
- propstr = "-";
- else
- verify(nvlist_lookup_string(propval,
- ZFS_PROP_VALUE, &propstr) == 0);
- }
-
- width = pl->pl_width;
-
- /*
- * If this is being called in scripted mode, or if this is the
- * last column and it is left-justified, don't include a width
- * format specifier.
- */
- if (scripted || (pl->pl_next == NULL && !right_justify))
- (void) printf("%s", propstr);
- else if (right_justify)
- (void) printf("%*s", width, propstr);
- else
- (void) printf("%-*s", width, propstr);
- }
-
- (void) printf("\n");
-}
-
-/*
- * Generic callback function to list a dataset or snapshot.
- */
-static int
-list_callback(zfs_handle_t *zhp, void *data)
-{
- list_cbdata_t *cbp = data;
-
- if (cbp->cb_first) {
- if (!cbp->cb_scripted)
- print_header(cbp->cb_proplist);
- cbp->cb_first = B_FALSE;
- }
-
- print_dataset(zhp, cbp->cb_proplist, cbp->cb_scripted);
-
- return (0);
-}
-
-static int
-zfs_do_list(int argc, char **argv)
-{
- int c;
- boolean_t recurse = B_FALSE;
- boolean_t scripted = B_FALSE;
- static char default_fields[] =
- "name,used,available,referenced,mountpoint";
- int types = ZFS_TYPE_ANY;
- char *fields = NULL;
- char *basic_fields = default_fields;
- list_cbdata_t cb = { 0 };
- char *value;
- int ret;
- char *type_subopts[] = { "filesystem", "volume", "snapshot", NULL };
- zfs_sort_column_t *sortcol = NULL;
-
- /* check options */
- while ((c = getopt(argc, argv, ":o:rt:Hs:S:")) != -1) {
- switch (c) {
- case 'o':
- fields = optarg;
- break;
- case 'r':
- recurse = B_TRUE;
- break;
- case 'H':
- scripted = B_TRUE;
- break;
- case 's':
- if (zfs_add_sort_column(&sortcol, optarg,
- B_FALSE) != 0) {
- (void) fprintf(stderr,
- gettext("invalid property '%s'\n"), optarg);
- usage(B_FALSE);
- }
- break;
- case 'S':
- if (zfs_add_sort_column(&sortcol, optarg,
- B_TRUE) != 0) {
- (void) fprintf(stderr,
- gettext("invalid property '%s'\n"), optarg);
- usage(B_FALSE);
- }
- break;
- case 't':
- types = 0;
- while (*optarg != '\0') {
- switch (getsubopt(&optarg, type_subopts,
- &value)) {
- case 0:
- types |= ZFS_TYPE_FILESYSTEM;
- break;
- case 1:
- types |= ZFS_TYPE_VOLUME;
- break;
- case 2:
- types |= ZFS_TYPE_SNAPSHOT;
- break;
- default:
- (void) fprintf(stderr,
- gettext("invalid type '%s'\n"),
- value);
- usage(B_FALSE);
- }
- }
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if (fields == NULL)
- fields = basic_fields;
-
- /*
- * If the user specifies '-o all', the zfs_get_proplist() doesn't
- * normally include the name of the dataset. For 'zfs list', we always
- * want this property to be first.
- */
- if (zfs_get_proplist(g_zfs, fields, &cb.cb_proplist) != 0)
- usage(B_FALSE);
-
- cb.cb_scripted = scripted;
- cb.cb_first = B_TRUE;
-
- ret = zfs_for_each(argc, argv, recurse, types, sortcol, &cb.cb_proplist,
- list_callback, &cb, B_TRUE);
-
- zfs_free_proplist(cb.cb_proplist);
- zfs_free_sort_columns(sortcol);
-
- if (ret == 0 && cb.cb_first)
- (void) printf(gettext("no datasets available\n"));
-
- return (ret);
-}
-
-/*
- * zfs rename [-r] <fs | snap | vol> <fs | snap | vol>
- *
- * Renames the given dataset to another of the same type.
- */
-/* ARGSUSED */
-static int
-zfs_do_rename(int argc, char **argv)
-{
- zfs_handle_t *zhp;
- int c;
- int ret;
- int recurse = 0;
-
- /* check options */
- while ((c = getopt(argc, argv, "r")) != -1) {
- switch (c) {
- case 'r':
- recurse = 1;
- break;
- case '?':
- default:
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing source dataset "
- "argument\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing target dataset "
- "argument\n"));
- usage(B_FALSE);
- }
- if (argc > 2) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- if (recurse && strchr(argv[0], '@') == 0) {
- (void) fprintf(stderr, gettext("source dataset for recursive "
- "rename must be a snapshot\n"));
- usage(B_FALSE);
- }
-
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
- return (1);
-
- ret = (zfs_rename(zhp, argv[1], recurse) != 0);
-
- if (!ret)
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[1],
- B_FALSE, B_FALSE);
-
- zfs_close(zhp);
- return (ret);
-}
-
-/*
- * zfs promote <fs>
- *
- * Promotes the given clone fs to be the parent
- */
-/* ARGSUSED */
-static int
-zfs_do_promote(int argc, char **argv)
-{
- zfs_handle_t *zhp;
- int ret;
-
- /* check options */
- if (argc > 1 && argv[1][0] == '-') {
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- argv[1][1]);
- usage(B_FALSE);
- }
-
- /* check number of arguments */
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing clone filesystem"
- " argument\n"));
- usage(B_FALSE);
- }
- if (argc > 2) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
- if (zhp == NULL)
- return (1);
-
- ret = (zfs_promote(zhp) != 0);
-
- if (!ret)
- zpool_log_history(g_zfs, argc, argv, argv[1], B_FALSE, B_FALSE);
-
- zfs_close(zhp);
- return (ret);
-}
-
-/*
- * zfs rollback [-rfR] <snapshot>
- *
- * -r Delete any intervening snapshots before doing rollback
- * -R Delete any snapshots and their clones
- * -f Force unmount filesystems, even if they are in use.
- *
- * Given a filesystem, rollback to a specific snapshot, discarding any changes
- * since then and making it the active dataset. If more recent snapshots exist,
- * the command will complain unless the '-r' flag is given.
- */
-typedef struct rollback_cbdata {
- uint64_t cb_create;
- boolean_t cb_first;
- int cb_doclones;
- char *cb_target;
- int cb_error;
- boolean_t cb_recurse;
- boolean_t cb_dependent;
-} rollback_cbdata_t;
-
-/*
- * Report any snapshots more recent than the one specified. Used when '-r' is
- * not specified. We reuse this same callback for the snapshot dependents - if
- * 'cb_dependent' is set, then this is a dependent and we should report it
- * without checking the transaction group.
- */
-static int
-rollback_check(zfs_handle_t *zhp, void *data)
-{
- rollback_cbdata_t *cbp = data;
-
- if (cbp->cb_doclones) {
- zfs_close(zhp);
- return (0);
- }
-
- if (!cbp->cb_dependent) {
- if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 &&
- zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
- zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
- cbp->cb_create) {
-
- if (cbp->cb_first && !cbp->cb_recurse) {
- (void) fprintf(stderr, gettext("cannot "
- "rollback to '%s': more recent snapshots "
- "exist\n"),
- cbp->cb_target);
- (void) fprintf(stderr, gettext("use '-r' to "
- "force deletion of the following "
- "snapshots:\n"));
- cbp->cb_first = 0;
- cbp->cb_error = 1;
- }
-
- if (cbp->cb_recurse) {
- cbp->cb_dependent = B_TRUE;
- if (zfs_iter_dependents(zhp, B_TRUE,
- rollback_check, cbp) != 0) {
- zfs_close(zhp);
- return (-1);
- }
- cbp->cb_dependent = B_FALSE;
- } else {
- (void) fprintf(stderr, "%s\n",
- zfs_get_name(zhp));
- }
- }
- } else {
- if (cbp->cb_first && cbp->cb_recurse) {
- (void) fprintf(stderr, gettext("cannot rollback to "
- "'%s': clones of previous snapshots exist\n"),
- cbp->cb_target);
- (void) fprintf(stderr, gettext("use '-R' to "
- "force deletion of the following clones and "
- "dependents:\n"));
- cbp->cb_first = 0;
- cbp->cb_error = 1;
- }
-
- (void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
- }
-
- zfs_close(zhp);
- return (0);
-}
-
-static int
-zfs_do_rollback(int argc, char **argv)
-{
- int ret;
- int c;
- rollback_cbdata_t cb = { 0 };
- zfs_handle_t *zhp, *snap;
- char parentname[ZFS_MAXNAMELEN];
- char *delim;
- int force = 0;
-
- /* check options */
- while ((c = getopt(argc, argv, "rfR")) != -1) {
- switch (c) {
- case 'f':
- force = 1;
- break;
- case 'r':
- cb.cb_recurse = 1;
- break;
- case 'R':
- cb.cb_recurse = 1;
- cb.cb_doclones = 1;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing dataset argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- /* open the snapshot */
- if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
- return (1);
-
- /* open the parent dataset */
- (void) strlcpy(parentname, argv[0], sizeof (parentname));
- verify((delim = strrchr(parentname, '@')) != NULL);
- *delim = '\0';
- if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_ANY)) == NULL) {
- zfs_close(snap);
- return (1);
- }
-
- /*
- * Check for more recent snapshots and/or clones based on the presence
- * of '-r' and '-R'.
- */
- cb.cb_target = argv[0];
- cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
- cb.cb_first = B_TRUE;
- cb.cb_error = 0;
- if ((ret = zfs_iter_children(zhp, rollback_check, &cb)) != 0)
- goto out;
-
- if ((ret = cb.cb_error) != 0)
- goto out;
-
- /*
- * Rollback parent to the given snapshot.
- */
- ret = zfs_rollback(zhp, snap, force);
-
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
- }
-
-out:
- zfs_close(snap);
- zfs_close(zhp);
-
- if (ret == 0)
- return (0);
- else
- return (1);
-}
-
-/*
- * zfs set property=value { fs | snap | vol } ...
- *
- * Sets the given property for all datasets specified on the command line.
- */
-typedef struct set_cbdata {
- char *cb_propname;
- char *cb_value;
- boolean_t cb_any_successful;
-} set_cbdata_t;
-
-static int
-set_callback(zfs_handle_t *zhp, void *data)
-{
- set_cbdata_t *cbp = data;
-
- if (zfs_prop_set(zhp, cbp->cb_propname, cbp->cb_value) != 0) {
- switch (libzfs_errno(g_zfs)) {
- case EZFS_MOUNTFAILED:
- (void) fprintf(stderr, gettext("property may be set "
- "but unable to remount filesystem\n"));
- break;
- case EZFS_SHARENFSFAILED:
- (void) fprintf(stderr, gettext("property may be set "
- "but unable to reshare filesystem\n"));
- break;
- }
- return (1);
- }
- cbp->cb_any_successful = B_TRUE;
- return (0);
-}
-
-static int
-zfs_do_set(int argc, char **argv)
-{
- set_cbdata_t cb;
- int ret;
-
- /* check for options */
- if (argc > 1 && argv[1][0] == '-') {
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- argv[1][1]);
- usage(B_FALSE);
- }
-
- /* check number of arguments */
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing property=value "
- "argument\n"));
- usage(B_FALSE);
- }
- if (argc < 3) {
- (void) fprintf(stderr, gettext("missing dataset name\n"));
- usage(B_FALSE);
- }
-
- /* validate property=value argument */
- cb.cb_propname = argv[1];
- if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) {
- (void) fprintf(stderr, gettext("missing value in "
- "property=value argument\n"));
- usage(B_FALSE);
- }
-
- *cb.cb_value = '\0';
- cb.cb_value++;
- cb.cb_any_successful = B_FALSE;
-
- if (*cb.cb_propname == '\0') {
- (void) fprintf(stderr,
- gettext("missing property in property=value argument\n"));
- usage(B_FALSE);
- }
-
- ret = zfs_for_each(argc - 2, argv + 2, B_FALSE,
- ZFS_TYPE_ANY, NULL, NULL, set_callback, &cb, B_FALSE);
-
- if (cb.cb_any_successful) {
- *(cb.cb_value - 1) = '=';
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
- }
-
- return (ret);
-}
-
-/*
- * zfs snapshot [-r] <fs@snap>
- *
- * Creates a snapshot with the given name. While functionally equivalent to
- * 'zfs create', it is a separate command to diffferentiate intent.
- */
-static int
-zfs_do_snapshot(int argc, char **argv)
-{
- int recursive = B_FALSE;
- int ret;
- char c;
-
- /* check options */
- while ((c = getopt(argc, argv, ":r")) != -1) {
- switch (c) {
- case 'r':
- recursive = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- ret = zfs_snapshot(g_zfs, argv[0], recursive);
- if (ret && recursive)
- (void) fprintf(stderr, gettext("no snapshots were created\n"));
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
- }
- return (ret != 0);
-}
-
-/*
- * zfs send [-i <@snap>] <fs@snap>
- *
- * Send a backup stream to stdout.
- */
-static int
-zfs_do_send(int argc, char **argv)
-{
- char *fromname = NULL;
- char *cp;
- zfs_handle_t *zhp;
- int c, err;
-
- /* check options */
- while ((c = getopt(argc, argv, ":i:")) != -1) {
- switch (c) {
- case 'i':
- if (fromname)
- usage(B_FALSE);
- fromname = optarg;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- if (isatty(STDOUT_FILENO)) {
- (void) fprintf(stderr,
- gettext("Error: Stream can not be written to a terminal.\n"
- "You must redirect standard output.\n"));
- return (1);
- }
-
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
- return (1);
-
- /*
- * If they specified the full path to the snapshot, chop off
- * everything except the short name of the snapshot.
- */
- if (fromname && (cp = strchr(fromname, '@')) != NULL) {
- if (cp != fromname &&
- strncmp(argv[0], fromname, cp - fromname + 1)) {
- (void) fprintf(stderr,
- gettext("incremental source must be "
- "in same filesystem\n"));
- usage(B_FALSE);
- }
- fromname = cp + 1;
- if (strchr(fromname, '@') || strchr(fromname, '/')) {
- (void) fprintf(stderr,
- gettext("invalid incremental source\n"));
- usage(B_FALSE);
- }
- }
-
- err = zfs_send(zhp, fromname, STDOUT_FILENO);
- zfs_close(zhp);
-
- return (err != 0);
-}
-
-/*
- * zfs receive <fs@snap>
- *
- * Restore a backup stream from stdin.
- */
-static int
-zfs_do_receive(int argc, char **argv)
-{
- int c, err;
- boolean_t isprefix = B_FALSE;
- boolean_t dryrun = B_FALSE;
- boolean_t verbose = B_FALSE;
- boolean_t force = B_FALSE;
-
- /* check options */
- while ((c = getopt(argc, argv, ":dnvF")) != -1) {
- switch (c) {
- case 'd':
- isprefix = B_TRUE;
- break;
- case 'n':
- dryrun = B_TRUE;
- break;
- case 'v':
- verbose = B_TRUE;
- break;
- case 'F':
- force = B_TRUE;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- if (isatty(STDIN_FILENO)) {
- (void) fprintf(stderr,
- gettext("Error: Backup stream can not be read "
- "from a terminal.\n"
- "You must redirect standard input.\n"));
- return (1);
- }
-
- err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun, force,
- STDIN_FILENO);
-
- if (!err) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
- }
-
- return (err != 0);
-}
-
-typedef struct get_all_cbdata {
- zfs_handle_t **cb_handles;
- size_t cb_alloc;
- size_t cb_used;
- uint_t cb_types;
-} get_all_cbdata_t;
-
-static int
-get_one_dataset(zfs_handle_t *zhp, void *data)
-{
- get_all_cbdata_t *cbp = data;
- zfs_type_t type = zfs_get_type(zhp);
-
- /*
- * Interate over any nested datasets.
- */
- if (type == ZFS_TYPE_FILESYSTEM &&
- zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
- zfs_close(zhp);
- return (1);
- }
-
- /*
- * Skip any datasets whose type does not match.
- */
- if ((type & cbp->cb_types) == 0) {
- zfs_close(zhp);
- return (0);
- }
-
- if (cbp->cb_alloc == cbp->cb_used) {
- zfs_handle_t **handles;
-
- if (cbp->cb_alloc == 0)
- cbp->cb_alloc = 64;
- else
- cbp->cb_alloc *= 2;
-
- handles = safe_malloc(cbp->cb_alloc * sizeof (void *));
-
- if (cbp->cb_handles) {
- bcopy(cbp->cb_handles, handles,
- cbp->cb_used * sizeof (void *));
- free(cbp->cb_handles);
- }
-
- cbp->cb_handles = handles;
- }
-
- cbp->cb_handles[cbp->cb_used++] = zhp;
-
- return (0);
-}
-
-static void
-get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count)
-{
- get_all_cbdata_t cb = { 0 };
- cb.cb_types = types;
-
- (void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
-
- *dslist = cb.cb_handles;
- *count = cb.cb_used;
-}
-
-static int
-dataset_cmp(const void *a, const void *b)
-{
- zfs_handle_t **za = (zfs_handle_t **)a;
- zfs_handle_t **zb = (zfs_handle_t **)b;
- char mounta[MAXPATHLEN];
- char mountb[MAXPATHLEN];
- boolean_t gota, gotb;
-
- if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
- sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
- if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
- sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
-
- if (gota && gotb)
- return (strcmp(mounta, mountb));
-
- if (gota)
- return (-1);
- if (gotb)
- return (1);
-
- return (strcmp(zfs_get_name(a), zfs_get_name(b)));
-}
-
-/*
- * Generic callback for sharing or mounting filesystems. Because the code is so
- * similar, we have a common function with an extra parameter to determine which
- * mode we are using.
- */
-#define OP_SHARE 0x1
-#define OP_MOUNT 0x2
-
-/*
- * Share or mount a dataset.
- */
-static int
-share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
- const char *options)
-{
- char mountpoint[ZFS_MAXPROPLEN];
- char shareopts[ZFS_MAXPROPLEN];
- const char *cmdname = op == OP_SHARE ? "share" : "mount";
- struct mnttab mnt;
- uint64_t zoned, canmount;
- zfs_type_t type = zfs_get_type(zhp);
-
- assert(type & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME));
-
- if (type == ZFS_TYPE_FILESYSTEM) {
- /*
- * Check to make sure we can mount/share this dataset. If we
- * are in the global zone and the filesystem is exported to a
- * local zone, or if we are in a local zone and the
- * filesystem is not exported, then it is an error.
- */
- zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
-
- if (zoned && getzoneid() == GLOBAL_ZONEID) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "dataset is exported to a local zone\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
-
- } else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "permission denied\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
- }
-
- /*
- * Ignore any filesystems which don't apply to us. This
- * includes those with a legacy mountpoint, or those with
- * legacy share options.
- */
- verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
- canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
-
- if (op == OP_SHARE && strcmp(shareopts, "off") == 0) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share '%s': "
- "legacy share\n"), zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use share(1M) to "
- "share this filesystem\n"));
- return (1);
- }
-
- /*
- * We cannot share or mount legacy filesystems. If the
- * shareopts is non-legacy but the mountpoint is legacy, we
- * treat it as a legacy share.
- */
- if (strcmp(mountpoint, "legacy") == 0) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use %s to "
- "%s this filesystem\n"), op == OP_SHARE ?
- "share(1M)" : "mount(1M)", cmdname);
- return (1);
- }
-
- if (strcmp(mountpoint, "none") == 0) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot %s '%s': no "
- "mountpoint set\n"), cmdname, zfs_get_name(zhp));
- return (1);
- }
-
- if (!canmount) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "'canmount' property is set to 'off'\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
- }
-
- /*
- * At this point, we have verified that the mountpoint and/or
- * shareopts are appropriate for auto management. If the
- * filesystem is already mounted or shared, return (failing
- * for explicit requests); otherwise mount or share the
- * filesystem.
- */
- switch (op) {
- case OP_SHARE:
- if (zfs_is_shared_nfs(zhp, NULL)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': filesystem already shared\n"),
- zfs_get_name(zhp));
- return (1);
- }
-
- if (!zfs_is_mounted(zhp, NULL) &&
- zfs_mount(zhp, NULL, 0) != 0)
- return (1);
-
- if (zfs_share_nfs(zhp) != 0)
- return (1);
- break;
-
- case OP_MOUNT:
- if (options == NULL)
- mnt.mnt_mntopts = "";
- else
- mnt.mnt_mntopts = (char *)options;
-
- if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
- zfs_is_mounted(zhp, NULL)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot mount "
- "'%s': filesystem already mounted\n"),
- zfs_get_name(zhp));
- return (1);
- }
-
- if (zfs_mount(zhp, options, flags) != 0)
- return (1);
- break;
- }
- } else {
- assert(op == OP_SHARE);
-
- /*
- * Ignore any volumes that aren't shared.
- */
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
-
- if (strcmp(shareopts, "off") == 0) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share '%s': "
- "'shareiscsi' property not set\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("set 'shareiscsi' "
- "property or use iscsitadm(1M) to share this "
- "volume\n"));
- return (1);
- }
-
- if (zfs_is_shared_iscsi(zhp)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': volume already shared\n"),
- zfs_get_name(zhp));
- return (1);
- }
-
- if (zfs_share_iscsi(zhp) != 0)
- return (1);
- }
-
- return (0);
-}
-
-static int
-share_mount(int op, int argc, char **argv)
-{
- int do_all = 0;
- int c, ret = 0;
- const char *options = NULL;
- int types, flags = 0;
-
- /* check options */
- while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ao:O" : "a"))
- != -1) {
- switch (c) {
- case 'a':
- do_all = 1;
- break;
- case 'o':
- options = optarg;
- break;
- case 'O':
- warnx("no overlay mounts support on FreeBSD, ignoring");
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check number of arguments */
- if (do_all) {
- zfs_handle_t **dslist = NULL;
- size_t i, count = 0;
-
- if (op == OP_MOUNT) {
- types = ZFS_TYPE_FILESYSTEM;
- } else if (argc > 0) {
- if (strcmp(argv[0], "nfs") == 0) {
- types = ZFS_TYPE_FILESYSTEM;
- } else if (strcmp(argv[0], "iscsi") == 0) {
- types = ZFS_TYPE_VOLUME;
- } else {
- (void) fprintf(stderr, gettext("share type "
- "must be 'nfs' or 'iscsi'\n"));
- usage(B_FALSE);
- }
-
- argc--;
- argv++;
- } else {
- types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
- }
-
- if (argc != 0) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- get_all_datasets(types, &dslist, &count);
-
- if (count == 0)
- return (0);
-
- qsort(dslist, count, sizeof (void *), dataset_cmp);
-
- for (i = 0; i < count; i++) {
- if (share_mount_one(dslist[i], op, flags, B_FALSE,
- options) != 0)
- ret = 1;
- zfs_close(dslist[i]);
- }
-
- free(dslist);
- } else if (argc == 0) {
- struct statfs *sfs;
- int i, n;
-
- if (op == OP_SHARE) {
- (void) fprintf(stderr, gettext("missing filesystem "
- "argument\n"));
- usage(B_FALSE);
- }
-
- /*
- * When mount is given no arguments, go through /etc/mnttab and
- * display any active ZFS mounts. We hide any snapshots, since
- * they are controlled automatically.
- */
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
- return (0);
- }
- for (i = 0; i < n; i++) {
- if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0 ||
- strchr(sfs[i].f_mntfromname, '@') != NULL)
- continue;
-
- (void) printf("%-30s %s\n", sfs[i].f_mntfromname,
- sfs[i].f_mntonname);
- }
-
- } else {
- zfs_handle_t *zhp;
-
- types = ZFS_TYPE_FILESYSTEM;
- if (op == OP_SHARE)
- types |= ZFS_TYPE_VOLUME;
-
- if (argc > 1) {
- (void) fprintf(stderr,
- gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) {
- ret = 1;
- } else {
- ret = share_mount_one(zhp, op, flags, B_TRUE,
- options);
- zfs_close(zhp);
- }
- }
-
- return (ret);
-}
-
-/*
- * zfs mount -a [nfs | iscsi]
- * zfs mount filesystem
- *
- * Mount all filesystems, or mount the given filesystem.
- */
-static int
-zfs_do_mount(int argc, char **argv)
-{
- return (share_mount(OP_MOUNT, argc, argv));
-}
-
-/*
- * zfs share -a [nfs | iscsi]
- * zfs share filesystem
- *
- * Share all filesystems, or share the given filesystem.
- */
-static int
-zfs_do_share(int argc, char **argv)
-{
- return (share_mount(OP_SHARE, argc, argv));
-}
-
-typedef struct unshare_unmount_node {
- zfs_handle_t *un_zhp;
- char *un_mountp;
- uu_avl_node_t un_avlnode;
-} unshare_unmount_node_t;
-
-/* ARGSUSED */
-static int
-unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
-{
- const unshare_unmount_node_t *l = larg;
- const unshare_unmount_node_t *r = rarg;
-
- return (strcmp(l->un_mountp, r->un_mountp));
-}
-
-/*
- * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an
- * absolute path, find the entry /etc/mnttab, verify that its a ZFS filesystem,
- * and unmount it appropriately.
- */
-static int
-unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
-{
- zfs_handle_t *zhp;
- int ret;
- struct mnttab search = { 0 }, entry;
- const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
- char property[ZFS_MAXPROPLEN];
-
- /*
- * Search for the given (major,minor) pair in the mount table.
- */
- search.mnt_mountp = path;
- rewind(mnttab_file);
- if (getmntany(mnttab_file, &entry, &search) != 0) {
- (void) fprintf(stderr, gettext("cannot %s '%s': not "
- "currently mounted\n"), cmdname, path);
- return (1);
- }
-
- if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
- (void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS "
- "filesystem\n"), cmdname, path);
- return (1);
- }
-
- if ((zhp = zfs_open(g_zfs, entry.mnt_special,
- ZFS_TYPE_FILESYSTEM)) == NULL)
- return (1);
-
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
-
- if (op == OP_SHARE) {
- if (strcmp(property, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot unshare "
- "'%s': legacy share\n"), path);
- (void) fprintf(stderr, gettext("use "
- "unshare(1M) to unshare this filesystem\n"));
- ret = 1;
- } else if (!zfs_is_shared_nfs(zhp, NULL)) {
- (void) fprintf(stderr, gettext("cannot unshare '%s': "
- "not currently shared\n"), path);
- ret = 1;
- } else {
- ret = zfs_unshareall_nfs(zhp);
- }
- } else {
- if (is_manual) {
- ret = zfs_unmount(zhp, NULL, flags);
- } else if (strcmp(property, "legacy") == 0) {
- (void) fprintf(stderr, gettext("cannot unmount "
- "'%s': legacy mountpoint\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use umount(1M) "
- "to unmount this filesystem\n"));
- ret = 1;
- } else {
- ret = zfs_unmountall(zhp, flags);
- }
- }
-
- zfs_close(zhp);
-
- return (ret != 0);
-}
-
-/*
- * Generic callback for unsharing or unmounting a filesystem.
- */
-static int
-unshare_unmount(int op, int argc, char **argv)
-{
- int do_all = 0;
- int flags = 0;
- int ret = 0;
- int types, c;
- zfs_handle_t *zhp;
- char property[ZFS_MAXPROPLEN];
-
- /* check options */
- while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) {
- switch (c) {
- case 'a':
- do_all = 1;
- break;
- case 'f':
- flags = MS_FORCE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if (do_all) {
- /*
- * We could make use of zfs_for_each() to walk all datasets in
- * the system, but this would be very inefficient, especially
- * since we would have to linearly search /etc/mnttab for each
- * one. Instead, do one pass through /etc/mnttab looking for
- * zfs entries and call zfs_unmount() for each one.
- *
- * Things get a little tricky if the administrator has created
- * mountpoints beneath other ZFS filesystems. In this case, we
- * have to unmount the deepest filesystems first. To accomplish
- * this, we place all the mountpoints in an AVL tree sorted by
- * the special type (dataset name), and walk the result in
- * reverse to make sure to get any snapshots first.
- */
- uu_avl_pool_t *pool;
- uu_avl_t *tree;
- unshare_unmount_node_t *node;
- uu_avl_index_t idx;
- uu_avl_walk_t *walk;
- struct statfs *sfs;
- int i, n;
-
- if (argc != 0) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- if ((pool = uu_avl_pool_create("unmount_pool",
- sizeof (unshare_unmount_node_t),
- offsetof(unshare_unmount_node_t, un_avlnode),
- unshare_unmount_compare,
- UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- exit(1);
- }
-
- if ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- exit(1);
- }
-
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "getmntinfo() failed\n"));
- exit(1);
- }
- for (i = 0; i < n; i++) {
-
- /* ignore non-ZFS entries */
- if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0)
- continue;
-
- /* ignore snapshots */
- if (strchr(sfs[i].f_mntfromname, '@') != NULL)
- continue;
-
- if ((zhp = zfs_open(g_zfs, sfs[i].f_mntfromname,
- ZFS_TYPE_FILESYSTEM)) == NULL) {
- ret = 1;
- continue;
- }
-
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
- property, sizeof (property), NULL, NULL,
- 0, B_FALSE) == 0);
-
- /* Ignore legacy mounts and shares */
- if ((op == OP_SHARE &&
- strcmp(property, "off") == 0) ||
- (op == OP_MOUNT &&
- strcmp(property, "legacy") == 0)) {
- zfs_close(zhp);
- continue;
- }
-
- node = safe_malloc(sizeof (unshare_unmount_node_t));
- node->un_zhp = zhp;
-
- if ((node->un_mountp = strdup(sfs[i].f_mntonname)) ==
- NULL) {
- (void) fprintf(stderr, gettext("internal error:"
- " out of memory\n"));
- exit(1);
- }
-
- uu_avl_node_init(node, &node->un_avlnode, pool);
-
- if (uu_avl_find(tree, node, NULL, &idx) == NULL) {
- uu_avl_insert(tree, node, idx);
- } else {
- zfs_close(node->un_zhp);
- free(node->un_mountp);
- free(node);
- }
- }
-
- /*
- * Walk the AVL tree in reverse, unmounting each filesystem and
- * removing it from the AVL tree in the process.
- */
- if ((walk = uu_avl_walk_start(tree,
- UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
-
- while ((node = uu_avl_walk_next(walk)) != NULL) {
- uu_avl_remove(tree, node);
-
- switch (op) {
- case OP_SHARE:
- if (zfs_unshare_nfs(node->un_zhp,
- node->un_mountp) != 0)
- ret = 1;
- break;
-
- case OP_MOUNT:
- if (zfs_unmount(node->un_zhp,
- node->un_mountp, flags) != 0)
- ret = 1;
- break;
- }
-
- zfs_close(node->un_zhp);
- free(node->un_mountp);
- free(node);
- }
-
- uu_avl_walk_end(walk);
- uu_avl_destroy(tree);
- uu_avl_pool_destroy(pool);
-
- if (op == OP_SHARE) {
- /*
- * Finally, unshare any volumes shared via iSCSI.
- */
- zfs_handle_t **dslist = NULL;
- size_t i, count = 0;
-
- get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count);
-
- if (count != 0) {
- qsort(dslist, count, sizeof (void *),
- dataset_cmp);
-
- for (i = 0; i < count; i++) {
- if (zfs_unshare_iscsi(dslist[i]) != 0)
- ret = 1;
- zfs_close(dslist[i]);
- }
-
- free(dslist);
- }
- }
- } else {
- if (argc != 1) {
- if (argc == 0)
- (void) fprintf(stderr,
- gettext("missing filesystem argument\n"));
- else
- (void) fprintf(stderr,
- gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- /*
- * We have an argument, but it may be a full path or a ZFS
- * filesystem. Pass full paths off to unmount_path() (shared by
- * manual_unmount), otherwise open the filesystem and pass to
- * zfs_unmount().
- */
- if (argv[0][0] == '/')
- return (unshare_unmount_path(op, argv[0],
- flags, B_FALSE));
-
- types = ZFS_TYPE_FILESYSTEM;
- if (op == OP_SHARE)
- types |= ZFS_TYPE_VOLUME;
-
- if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
- return (1);
-
- if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
-
- switch (op) {
- case OP_SHARE:
- if (strcmp(property, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': legacy share\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use "
- "unshare(1M) to unshare this "
- "filesystem\n"));
- ret = 1;
- } else if (!zfs_is_shared_nfs(zhp, NULL)) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': not currently "
- "shared\n"), zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unshareall_nfs(zhp) != 0) {
- ret = 1;
- }
- break;
-
- case OP_MOUNT:
- if (strcmp(property, "legacy") == 0) {
- (void) fprintf(stderr, gettext("cannot "
- "unmount '%s': legacy "
- "mountpoint\n"), zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use "
- "umount(1M) to unmount this "
- "filesystem\n"));
- ret = 1;
- } else if (!zfs_is_mounted(zhp, NULL)) {
- (void) fprintf(stderr, gettext("cannot "
- "unmount '%s': not currently "
- "mounted\n"),
- zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unmountall(zhp, flags) != 0) {
- ret = 1;
- }
- break;
- }
- } else {
- assert(op == OP_SHARE);
-
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
-
- if (strcmp(property, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot unshare "
- "'%s': 'shareiscsi' property not set\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("set "
- "'shareiscsi' property or use "
- "iscsitadm(1M) to share this volume\n"));
- ret = 1;
- } else if (!zfs_is_shared_iscsi(zhp)) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': not currently shared\n"),
- zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unshare_iscsi(zhp) != 0) {
- ret = 1;
- }
- }
-
- zfs_close(zhp);
- }
-
- return (ret);
-}
-
-/*
- * zfs unmount -a
- * zfs unmount filesystem
- *
- * Unmount all filesystems, or a specific ZFS filesystem.
- */
-static int
-zfs_do_unmount(int argc, char **argv)
-{
- return (unshare_unmount(OP_MOUNT, argc, argv));
-}
-
-/*
- * zfs unshare -a
- * zfs unshare filesystem
- *
- * Unshare all filesystems, or a specific ZFS filesystem.
- */
-static int
-zfs_do_unshare(int argc, char **argv)
-{
- return (unshare_unmount(OP_SHARE, argc, argv));
-}
-
-/*
- * Attach/detach the given dataset to/from the given jail
- */
-/* ARGSUSED */
-static int
-do_jail(int argc, char **argv, int attach)
-{
- zfs_handle_t *zhp;
- int jailid, ret;
-
- /* check number of arguments */
- if (argc < 3) {
- (void) fprintf(stderr, gettext("missing argument(s)\n"));
- usage(B_FALSE);
- }
- if (argc > 3) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- jailid = atoi(argv[1]);
- if (jailid == 0) {
- (void) fprintf(stderr, gettext("invalid jailid\n"));
- usage(B_FALSE);
- }
-
- zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
- if (zhp == NULL)
- return (1);
-
- ret = (zfs_jail(zhp, jailid, attach) != 0);
-
- if (!ret)
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
-
- zfs_close(zhp);
- return (ret);
-}
-
-/*
- * zfs jail jailid filesystem
- *
- * Attach the given dataset to the given jail
- */
-/* ARGSUSED */
-static int
-zfs_do_jail(int argc, char **argv)
-{
-
- return (do_jail(argc, argv, 1));
-}
-
-/*
- * zfs unjail jailid filesystem
- *
- * Detach the given dataset from the given jail
- */
-/* ARGSUSED */
-static int
-zfs_do_unjail(int argc, char **argv)
-{
-
- return (do_jail(argc, argv, 0));
-}
-
-/*
- * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is
- * 'legacy'. Otherwise, complain that use should be using 'zfs mount'.
- */
-static int
-manual_mount(int argc, char **argv)
-{
- zfs_handle_t *zhp;
- char mountpoint[ZFS_MAXPROPLEN];
- char mntopts[MNT_LINE_MAX] = { '\0' };
- int ret;
- int c;
- int flags = 0;
- char *dataset, *path;
-
- /* check options */
- while ((c = getopt(argc, argv, ":mo:O")) != -1) {
- switch (c) {
- case 'o':
- (void) strlcpy(mntopts, optarg, sizeof (mntopts));
- break;
- case 'O':
-#if 0 /* FreeBSD: No support for MS_OVERLAY. */
- flags |= MS_OVERLAY;
-#endif
- break;
- case 'm':
-#if 0 /* FreeBSD: No support for MS_NOMNTTAB. */
- flags |= MS_NOMNTTAB;
-#endif
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- (void) fprintf(stderr, gettext("usage: mount [-o opts] "
- "<path>\n"));
- return (2);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check that we only have two arguments */
- if (argc != 2) {
- if (argc == 0)
- (void) fprintf(stderr, gettext("missing dataset "
- "argument\n"));
- else if (argc == 1)
- (void) fprintf(stderr,
- gettext("missing mountpoint argument\n"));
- else
- (void) fprintf(stderr, gettext("too many arguments\n"));
- (void) fprintf(stderr, "usage: mount <dataset> <mountpoint>\n");
- return (2);
- }
-
- dataset = argv[0];
- path = argv[1];
-
- /* try to open the dataset */
- if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
- return (1);
-
- (void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
-
- /* check for legacy mountpoint and complain appropriately */
- ret = 0;
- if (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) {
- if (zmount(dataset, path, flags, MNTTYPE_ZFS,
- NULL, 0, mntopts, sizeof (mntopts)) != 0) {
- (void) fprintf(stderr, gettext("mount failed: %s\n"),
- strerror(errno));
- ret = 1;
- }
- } else {
- (void) fprintf(stderr, gettext("filesystem '%s' cannot be "
- "mounted using 'mount -F zfs'\n"), dataset);
- (void) fprintf(stderr, gettext("Use 'zfs set mountpoint=%s' "
- "instead.\n"), path);
- (void) fprintf(stderr, gettext("If you must use 'mount -F zfs' "
- "or /etc/vfstab, use 'zfs set mountpoint=legacy'.\n"));
- (void) fprintf(stderr, gettext("See zfs(1M) for more "
- "information.\n"));
- ret = 1;
- }
-
- return (ret);
-}
-
-/*
- * Called when invoked as /etc/fs/zfs/umount. Unlike a manual mount, we allow
- * unmounts of non-legacy filesystems, as this is the dominant administrative
- * interface.
- */
-static int
-manual_unmount(int argc, char **argv)
-{
- int flags = 0;
- int c;
-
- /* check options */
- while ((c = getopt(argc, argv, "f")) != -1) {
- switch (c) {
- case 'f':
- flags = MS_FORCE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- (void) fprintf(stderr, gettext("usage: unmount [-f] "
- "<path>\n"));
- return (2);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check arguments */
- if (argc != 1) {
- if (argc == 0)
- (void) fprintf(stderr, gettext("missing path "
- "argument\n"));
- else
- (void) fprintf(stderr, gettext("too many arguments\n"));
- (void) fprintf(stderr, gettext("usage: unmount [-f] <path>\n"));
- return (2);
- }
-
- return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
-}
-
-static int
-volcheck(zpool_handle_t *zhp, void *data)
-{
- boolean_t isinit = *((boolean_t *)data);
-
- if (isinit)
- return (zpool_create_zvol_links(zhp));
- else
- return (zpool_remove_zvol_links(zhp));
-}
-
-/*
- * Iterate over all pools in the system and either create or destroy /dev/zvol
- * links, depending on the value of 'isinit'.
- */
-static int
-do_volcheck(boolean_t isinit)
-{
- return (zpool_iter(g_zfs, volcheck, &isinit) ? 1 : 0);
-}
-
-int
-main(int argc, char **argv)
-{
- int ret;
- int i;
- char *progname;
- char *cmdname;
-
- (void) setlocale(LC_ALL, "");
- (void) textdomain(TEXT_DOMAIN);
-
- opterr = 0;
-
- if ((g_zfs = libzfs_init()) == NULL) {
- (void) fprintf(stderr, gettext("internal error: failed to "
- "initialize ZFS library\n"));
- return (1);
- }
-
- libzfs_print_on_error(g_zfs, B_TRUE);
-
- if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
- (void) fprintf(stderr, gettext("internal error: unable to "
- "open %s\n"), MNTTAB);
- return (1);
- }
-
- /*
- * This command also doubles as the /etc/fs mount and unmount program.
- * Determine if we should take this behavior based on argv[0].
- */
- progname = basename(argv[0]);
- if (strcmp(progname, "mount") == 0) {
- ret = manual_mount(argc, argv);
- } else if (strcmp(progname, "umount") == 0) {
- ret = manual_unmount(argc, argv);
- } else {
- /*
- * Make sure the user has specified some command.
- */
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing command\n"));
- usage(B_FALSE);
- }
-
- cmdname = argv[1];
-
- /*
- * The 'umount' command is an alias for 'unmount'
- */
- if (strcmp(cmdname, "umount") == 0)
- cmdname = "unmount";
-
- /*
- * The 'recv' command is an alias for 'receive'
- */
- if (strcmp(cmdname, "recv") == 0)
- cmdname = "receive";
-
- /*
- * Special case '-?'
- */
- if (strcmp(cmdname, "-?") == 0)
- usage(B_TRUE);
-
- /*
- * 'volinit' and 'volfini' do not appear in the usage message,
- * so we have to special case them here.
- */
- if (strcmp(cmdname, "volinit") == 0)
- return (do_volcheck(B_TRUE));
- else if (strcmp(cmdname, "volfini") == 0)
- return (do_volcheck(B_FALSE));
-
- /*
- * Run the appropriate command.
- */
- for (i = 0; i < NCOMMAND; i++) {
- if (command_table[i].name == NULL)
- continue;
-
- if (strcmp(cmdname, command_table[i].name) == 0) {
- current_command = &command_table[i];
- ret = command_table[i].func(argc - 1, argv + 1);
- break;
- }
- }
-
- if (i == NCOMMAND) {
- (void) fprintf(stderr, gettext("unrecognized "
- "command '%s'\n"), cmdname);
- usage(B_FALSE);
- }
- }
-
- (void) fclose(mnttab_file);
-
- libzfs_fini(g_zfs);
-
- /*
- * The 'ZFS_ABORT' environment variable causes us to dump core on exit
- * for the purposes of running ::findleaks.
- */
- if (getenv("ZFS_ABORT") != NULL) {
- (void) printf("dumping core by request\n");
- abort();
- }
-
- return (ret);
-}
diff --git a/contrib/opensolaris/cmd/zfs/zfs_util.h b/contrib/opensolaris/cmd/zfs/zfs_util.h
deleted file mode 100644
index c7f2f16..0000000
--- a/contrib/opensolaris/cmd/zfs/zfs_util.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZFS_UTIL_H
-#define _ZFS_UTIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <libzfs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void * safe_malloc(size_t size);
-libzfs_handle_t *g_zfs;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZFS_UTIL_H */
diff --git a/contrib/opensolaris/cmd/zpool/zpool.8 b/contrib/opensolaris/cmd/zpool/zpool.8
deleted file mode 100644
index 95ae008..0000000
--- a/contrib/opensolaris/cmd/zpool/zpool.8
+++ /dev/null
@@ -1,1140 +0,0 @@
-'\" te
-.\" CDDL HEADER START
-.\"
-.\" The contents of this file are subject to the terms of the
-.\" Common Development and Distribution License (the "License").
-.\" You may not use this file except in compliance with the License.
-.\"
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-.\" or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions
-.\" and limitations under the License.
-.\"
-.\" When distributing Covered Code, include this CDDL HEADER in each
-.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-.\" If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying
-.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.\"
-.\" CDDL HEADER END
-.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved.
-.TH zpool 1M "14 Nov 2006" "SunOS 5.11" "System Administration Commands"
-.SH NAME
-zpool \- configures ZFS storage pools
-.SH SYNOPSIS
-.LP
-.nf
-\fBzpool\fR [\fB-?\fR]
-.fi
-
-.LP
-.nf
-\fBzpool create\fR [\fB-fn\fR] [\fB-R\fR \fIroot\fR] [\fB-m\fR \fImountpoint\fR] \fIpool\fR \fIvdev ...\fR
-.fi
-
-.LP
-.nf
-\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR
-.fi
-
-.LP
-.nf
-\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR
-.fi
-
-.LP
-.nf
-\fBzpool remove\fR \fIpool\fR \fIvdev\fR
-.fi
-
-.LP
-.nf
-\fBzpool \fR \fBlist\fR [\fB-H\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]*] [\fIpool\fR] ...
-.fi
-
-.LP
-.nf
-\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR [\fIcount\fR]]
-.fi
-
-.LP
-.nf
-\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
-.fi
-
-.LP
-.nf
-\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
-.fi
-
-.LP
-.nf
-\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
-.fi
-
-.LP
-.nf
-\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...
-.fi
-
-.LP
-.nf
-\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR
-.fi
-
-.LP
-.nf
-\fBzpool detach\fR \fIpool\fR \fIdevice\fR
-.fi
-
-.LP
-.nf
-\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR [\fInew_device\fR]
-.fi
-
-.LP
-.nf
-\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...
-.fi
-
-.LP
-.nf
-\fBzpool export\fR [\fB-f\fR] \fIpool\fR
-.fi
-
-.LP
-.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
-.fi
-
-.LP
-.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-o \fIopts\fR\fR] [\fB-R \fR\fIroot\fR] \fIpool\fR | \fIid\fR
- [\fInewpool\fR]
-.fi
-
-.LP
-.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-a\fR]
-.fi
-
-.LP
-.nf
-\fBzpool upgrade\fR
-.fi
-
-.LP
-.nf
-\fBzpool upgrade\fR \fB-v\fR
-.fi
-
-.LP
-.nf
-\fBzpool upgrade\fR [\fB-a\fR | \fIpool\fR]
-.fi
-
-.LP
-.nf
-\fBzpool history\fR [\fIpool\fR] ...
-.fi
-
-.SH DESCRIPTION
-.LP
-The \fBzpool\fR command configures \fBZFS\fR storage pools. A storage pool is a collection of devices that provides physical storage and data replication for \fBZFS\fR datasets.
-.LP
-All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets.
-.SS "Virtual Devices (vdevs)"
-.LP
-A "virtual device" describes a single device or a collection of devices organized according to certain performance and fault characteristics. The following virtual devices are supported:
-.sp
-.ne 2
-.mk
-.na
-\fBdisk\fR
-.ad
-.RS 10n
-.rt
-A block device, typically located under "/dev/dsk". \fBZFS\fR can use individual slices or partitions, though the recommended mode of operation is to use whole disks. A disk can be specified by a full path, or it can be a shorthand name (the relative portion
-of the path under "/dev/dsk"). A whole disk can be specified by omitting the slice or partition designation. For example, "c0t0d0" is equivalent to "/dev/dsk/c0t0d0s2". When given a whole disk, \fBZFS\fR automatically labels the disk, if necessary.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBfile\fR
-.ad
-.RS 10n
-.rt
-A regular file. The use of files as a backing store is strongly discouraged. It is designed primarily for experimental purposes, as the fault tolerance of a file is only as good as the file system of which it is a part. A file must be specified by a full path.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBmirror\fR
-.ad
-.RS 10n
-.rt
-A mirror of two or more devices. Data is replicated in an identical fashion across all components of a mirror. A mirror with \fIN\fR disks of size \fIX\fR can hold \fIX\fR bytes and can withstand (\fIN-1\fR)
-devices failing before data integrity is compromised.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBraidz\fR
-.ad
-.br
-.na
-\fBraidz1\fR
-.ad
-.br
-.na
-\fBraidz2\fR
-.ad
-.RS 10n
-.rt
-A variation on \fBRAID-5\fR that allows for better distribution of parity and eliminates the "\fBRAID-5\fR write hole" (in which data and parity become inconsistent after a power loss). Data and parity is striped across all disks within a \fBraidz\fR group.
-.sp
-A \fBraidz\fR group can have either single- or double-parity, meaning that the \fBraidz\fR group can sustain one or two failures respectively without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group
-and the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
-.sp
-A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks can hold approximately (\fIN-P\fR)*\fIX\fR bytes and can withstand one device failing before
-data integrity is compromised. The minimum number of devices in a \fBraidz\fR group is one more than the number of parity disks. The recommended number is between 3 and 9.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fBspare\fR
-.ad
-.RS 10n
-.rt
-A special pseudo-\fBvdev\fR which keeps track of available hot spares for a pool. For more information, see the "Hot Spares" section.
-.RE
-
-.LP
-Virtual devices cannot be nested, so a mirror or \fBraidz\fR virtual device can only contain files or disks. Mirrors of mirrors (or other combinations) are not allowed.
-.LP
-A pool can have any number of virtual devices at the top of the configuration (known as "root vdevs"). Data is dynamically distributed across all top-level devices to balance data among devices. As new virtual devices are added, \fBZFS\fR automatically places data
-on the newly available devices.
-.LP
-Virtual devices are specified one at a time on the command line, separated by whitespace. The keywords "mirror" and "raidz" are used to distinguish where a group ends and another begins. For example, the following creates two root vdevs, each a mirror of two disks:
-.sp
-.in +2
-.nf
-\fB# zpool create mypool mirror c0t0d0 c0t1d0 mirror c1t0d0 c1t1d0\fR
-.fi
-.in -2
-.sp
-
-.SS "Device Failure and Recovery"
-.LP
-\fBZFS\fR supports a rich set of mechanisms for handling device failure and data corruption. All metadata and data is checksummed, and \fBZFS\fR automatically repairs bad data from a good copy when corruption is detected.
-.LP
-In order to take advantage of these features, a pool must make use of some form of redundancy, using either mirrored or \fBraidz\fR groups. While \fBZFS\fR supports running in a non-redundant configuration, where each root vdev is simply a disk or file, this is
-strongly discouraged. A single case of bit corruption can render some or all of your data unavailable.
-.LP
-A pool's health status is described by one of three states: online, degraded, or faulted. An online pool has all devices operating normally. A degraded pool is one in which one or more devices have failed, but the data is still available due to a redundant configuration. A faulted pool has
-one or more failed devices, and there is insufficient redundancy to replicate the missing data.
-.SS "Hot Spares"
-.LP
-\fBZFS\fR allows devices to be associated with pools as "hot spares". These devices are not actively used in the pool, but when an active device fails, it is automatically replaced by a hot spare. To create a pool with hot spares, specify a "spare" \fBvdev\fR with any number of devices. For example,
-.sp
-.in +2
-.nf
-# zpool create pool mirror c0d0 c1d0 spare c2d0 c3d0
-.fi
-.in -2
-.sp
-
-.LP
-Spares can be shared across multiple pools, and can be added with the "zpool add" command and removed with the "zpool remove" command. Once a spare replacement is initiated, a new "spare" \fBvdev\fR is created within the configuration that
-will remain there until the original device is replaced. At this point, the hot spare becomes available again if another device fails.
-.LP
-An in-progress spare replacement can be cancelled by detaching the hot spare. If the original faulted device is detached, then the hot spare assumes its place in the configuration, and is removed from the spare list of all active pools.
-.SS "Alternate Root Pools"
-.LP
-The "zpool create -R" and "zpool import -R" commands allow users to create and import a pool with a different root path. By default, whenever a pool is created or imported on a system, it is permanently added so that it is available whenever the system boots. For
-removable media, or when in recovery situations, this may not always be desirable. An alternate root pool does not persist on the system. Instead, it exists only until exported or the system is rebooted, at which point it will have to be imported again.
-.LP
-In addition, all mount points in the pool are prefixed with the given root, so a pool can be constrained to a particular area of the file system. This is most useful when importing unknown pools from removable media, as the mount points of any file systems cannot be trusted.
-.LP
-When creating an alternate root pool, the default mount point is "/", rather than the normal default "/\fIpool\fR".
-.SS "Subcommands"
-.LP
-All subcommands that modify state are logged persistently to the pool in their original form.
-.LP
-The \fBzpool\fR command provides subcommands to create and destroy storage pools, add capacity to storage pools, and provide information about the storage pools. The following subcommands are supported:
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool\fR \fB-?\fR\fR
-.ad
-.sp .6
-.RS 4n
-Displays a help message.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool create\fR [\fB-fn\fR] [\fB-R\fR \fIroot\fR] [\fB-m\fR \fImountpoint\fR] \fIpool\fR \fIvdev ...\fR\fR
-.ad
-.sp .6
-.RS 4n
-Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool
-names "mirror", "raidz", and "spare" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
-.sp
-The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses,
-such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
-.sp
-The command also checks that the replication strategy for the pool is consistent. An attempt to combine redundant and non-redundant storage in a single pool, or to mix disks and files, results in an error unless \fB-f\fR is specified. The use of differently sized devices within
-a single \fBraidz\fR or mirror group is also flagged as an error unless \fB-f\fR is specified.
-.sp
-Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR". The mount point must not exist or must be empty, or else the root dataset cannot be mounted. This can be overridden with the \fB-m\fR option.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 17n
-.rt
-Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-n\fR\fR
-.ad
-.RS 17n
-.rt
-Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-R\fR \fIroot\fR\fR
-.ad
-.RS 17n
-.rt
-Creates the pool with an alternate \fIroot\fR. See the "Alternate Root Pools" section. The root dataset has its mount point set to "/" as part of this operation.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-m\fR \fImountpoint\fR\fR
-.ad
-.RS 17n
-.rt
-Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR". The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount
-points, see \fBzfs\fR(1M).
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR\fR
-.ad
-.sp .6
-.RS 4n
-Destroys the given pool, freeing up any devices for other use. This command tries to unmount any active datasets before destroying the pool.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forces any active datasets contained within the pool to be unmounted.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev ...\fR\fR
-.ad
-.sp .6
-.RS 4n
-Adds the specified virtual devices to the given pool. The \fIvdev\fR specification is described in the "Virtual Devices" section. The behavior of the \fB-f\fR option, and the device checks performed are described in the "zpool create"
-subcommand.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-n\fR\fR
-.ad
-.RS 6n
-.rt
-Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
-.RE
-
-Do not add a disk that is currently configured as a quorum device to a zpool. Once a disk is in a zpool, that disk can then be configured as a quorum device.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool remove\fR \fIpool\fR \fIvdev\fR\fR
-.ad
-.sp .6
-.RS 4n
-Removes the given \fBvdev\fR from the pool. This command currently only supports removing hot spares. Devices which are part of a mirror can be removed using the "zpool detach" command. \fBRaidz\fR and top-level \fBvdevs\fR cannot
-be removed from a pool.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIfield\fR[,\fIfield*\fR]] [\fIpool\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-H\fR\fR
-.ad
-.RS 12n
-.rt
-Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIfield\fR\fR
-.ad
-.RS 12n
-.rt
-Comma-separated list of fields to display. Each field must be one of:
-.sp
-.in +2
-.nf
-name Pool name
-size Total size
-used Amount of space used
-available Amount of space available
-capacity Percentage of pool space used
-health Health status
-.fi
-.in -2
-.sp
-
-The default is all fields.
-.RE
-
-This command reports actual physical space available to the storage pool. The physical space can be different from the total amount of space that any contained datasets can actually use. The amount of space used in a \fBraidz\fR configuration depends on the characteristics of
-the data being written. In addition, \fBZFS\fR reserves some space for internal accounting that the \fBzfs\fR(1M) command takes into account, but the \fBzpool\fR command does not. For non-full pools of a reasonable size, these effects should be invisible. For small pools, or pools that are close to being completely full, these discrepancies may become more noticeable.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR [\fIcount\fR]]\fR
-.ad
-.sp .6
-.RS 4n
-Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for
-every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wide statistics.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed.
-.sp
-If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-x\fR\fR
-.ad
-.RS 6n
-.rt
-Only display status for pools that are exhibiting errors or are otherwise unavailable.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
-.sp
-This command is not applicable to spares.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-t\fR\fR
-.ad
-.RS 6n
-.rt
-Temporary. Upon reboot, the specified physical device reverts to its previous state.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool online\fR \fIpool\fR \fIdevice\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Brings the specified physical device online.
-.sp
-This command is not applicable to spares.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR\fR
-.ad
-.sp .6
-.RS 4n
-Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device cannot be part of a \fBraidz\fR configuration. If \fIdevice\fR is not currently part of a mirrored configuration, \fIdevice\fR automatically
-transforms into a two-way mirror of \fIdevice\fR and \fInew_device\fR. If \fIdevice\fR is part of a two-way mirror, attaching \fInew_device\fR creates a three-way mirror, and so on. In either case, \fInew_device\fR begins to resilver immediately.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool detach\fR \fIpool\fR \fIdevice\fR\fR
-.ad
-.sp .6
-.RS 4n
-Detaches \fIdevice\fR from a mirror. The operation is refused if there are no other valid replicas of the data.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIold_device\fR [\fInew_device\fR]\fR
-.ad
-.sp .6
-.RS 4n
-Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching \fInew_device\fR, waiting for it to resilver, and then detaching \fIold_device\fR.
-.sp
-The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
-.sp
-If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path
-as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Begins a scrub. The scrub examines all data in the specified pools to verify that it checksums correctly. For replicated (mirror or \fBraidz\fR) devices, \fBZFS\fR automatically repairs any damage discovered during the scrub. The "\fBzpool
-status\fR" command reports the progress of the scrub and summarizes the results of the scrub upon completion.
-.sp
-Scrubbing and resilvering are very similar operations. The difference is that resilvering only examines data that \fBZFS\fR knows to be out of date (for example, when attaching a new device to a mirror or replacing an existing device), whereas scrubbing examines all data to
-discover silent errors due to hardware faults or disk failure.
-.sp
-Because scrubbing and resilvering are \fBI/O\fR-intensive operations, \fBZFS\fR only allows one at a time. If a scrub is already in progress, the "\fBzpool scrub\fR" command terminates it and starts a new scrub. If a resilver is in progress, \fBZFS\fR does not allow a scrub to be started until the resilver completes.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-s\fR\fR
-.ad
-.RS 6n
-.rt
-Stop scrubbing.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
-.sp
-Before exporting the pool, all datasets within the pool are unmounted.
-.sp
-For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]\fR
-.ad
-.sp .6
-.RS 4n
-Lists pools available to import. If the \fB-d\fR option is not specified, this command searches for devices in "/dev/dsk". The \fB-d\fR option can be specified multiple times, and all directories are searched. If the device appears to be part of
-an exported pool, this command displays a summary of the pool with the name of the pool, a numeric identifier, as well as the \fIvdev\fR layout and current health of the device for each device or file. Destroyed pools, pools that were previously destroyed with the "\fB-zpool destroy\fR" command, are not listed unless the \fB-D\fR option is specified.
-.sp
-The numeric identifier is unique, and can be used instead of the pool name when multiple exported pools of the same name are available.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-d\fR \fIdir\fR\fR
-.ad
-.RS 10n
-.rt
-Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-D\fR\fR
-.ad
-.RS 10n
-.rt
-Lists destroyed pools only.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-o\fR \fIopts\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR | \fIid\fR [\fInewpool\fR]\fR
-.ad
-.sp .6
-.RS 4n
-Imports a specific pool. A pool can be identified by its name or the numeric identifier. If \fInewpool\fR is specified, the pool is imported using the name \fInewpool\fR. Otherwise, it is imported with the same name as its exported name.
-.sp
-If a device is removed from a system without running "\fBzpool export\fR" first, the device appears as potentially active. It cannot be determined if this was a failed export, or whether the device is really in use from another host. To import a pool in this state,
-the \fB-f\fR option is required.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-d\fR \fIdir\fR\fR
-.ad
-.RS 11n
-.rt
-Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-D\fR\fR
-.ad
-.RS 11n
-.rt
-Imports destroyed pool. The \fB-f\fR option is also required.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 11n
-.rt
-Forces import, even if the pool appears to be potentially active.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIopts\fR\fR
-.ad
-.RS 11n
-.rt
-Comma-separated list of mount options to use when mounting datasets within the pool. See \fBzfs\fR(1M) for a description of dataset properties and mount
-options.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-R\fR \fIroot\fR\fR
-.ad
-.RS 11n
-.rt
-Imports pool(s) with an alternate \fIroot\fR. See the "Alternate Root Pools" section.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-a\fR]\fR
-.ad
-.sp .6
-.RS 4n
-Imports all pools found in the search directories. Identical to the previous command, except that all pools with a sufficient number of devices available are imported. Destroyed pools, pools that were previously destroyed with the "\fB-zpool destroy\fR" command,
-will not be imported unless the \fB-D\fR option is specified.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-d\fR \fIdir\fR\fR
-.ad
-.RS 10n
-.rt
-Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-D\fR\fR
-.ad
-.RS 10n
-.rt
-Imports destroyed pools only. The \fB-f\fR option is also required.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 10n
-.rt
-Forces import, even if the pool appears to be potentially active.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool upgrade\fR\fR
-.ad
-.sp .6
-.RS 4n
-Displays all pools formatted using a different \fBZFS\fR on-disk version. Older versions can continue to be used, but some features may not be available. These pools can be upgraded using "\fBzpool upgrade -a\fR". Pools that are formatted with
-a more recent version are also displayed, although these pools will be inaccessible on the system.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool upgrade\fR \fB-v\fR\fR
-.ad
-.sp .6
-.RS 4n
-Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supportedversions are displayed, along with an explanation of the features provided with each version.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool upgrade\fR [\fB-a\fR | \fIpool\fR]\fR
-.ad
-.sp .6
-.RS 4n
-Upgrades the given pool to the latest on-disk version. Once this is done, the pool will no longer be accessible on systems running older versions of the software.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-a\fR\fR
-.ad
-.RS 6n
-.rt
-Upgrades all pools.
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool history\fR [\fIpool\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Displays the command history of the specified pools (or all pools if no pool is specified).
-.RE
-
-.SH EXAMPLES
-.LP
-\fBExample 1 \fRCreating a RAID-Z Storage Pool
-.LP
-The following command creates a pool with a single \fBraidz\fR root \fIvdev\fR that consists of six disks.
-
-.sp
-.in +2
-.nf
-\fB# zpool create tank raidz c0t0d0 c0t1d0 c0t2d0 c0t3d0 c0t4d0 c0t5d0\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 2 \fRCreating a Mirrored Storage Pool
-.LP
-The following command creates a pool with two mirrors, where each mirror contains two disks.
-
-.sp
-.in +2
-.nf
-\fB# zpool create tank mirror c0t0d0 c0t1d0 mirror c0t2d0 c0t3d0\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 3 \fRCreating a ZFS Storage Pool by Using Slices
-.LP
-The following command creates an unmirrored pool using two disk slices.
-
-.sp
-.in +2
-.nf
-\fB# zpool create tank /dev/dsk/c0t0d0s1 c0t1d0s4\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 4 \fRCreating a ZFS Storage Pool by Using Files
-.LP
-The following command creates an unmirrored pool using files. While not recommended, a pool based on files can be useful for experimental purposes.
-
-.sp
-.in +2
-.nf
-\fB# zpool create tank /path/to/file/a /path/to/file/b\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 5 \fRAdding a Mirror to a ZFS Storage Pool
-.LP
-The following command adds two mirrored disks to the pool "\fItank\fR", assuming the pool is already made up of two-way mirrors. The additional space is immediately available to any datasets within the pool.
-
-.sp
-.in +2
-.nf
-\fB# zpool add tank mirror c1t0d0 c1t1d0\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 6 \fRListing Available ZFS Storage Pools
-.LP
-The following command lists all available pools on the system. In this case, the pool \fIzion\fR is faulted due to a missing device.
-
-.LP
-The results from this command are similar to the following:
-
-.sp
-.in +2
-.nf
-\fB# zpool list\fR
- NAME SIZE USED AVAIL CAP HEALTH ALTROOT
- pool 67.5G 2.92M 67.5G 0% ONLINE -
- tank 67.5G 2.92M 67.5G 0% ONLINE -
- zion - - - 0% FAULTED -
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 7 \fRDestroying a ZFS Storage Pool
-.LP
-The following command destroys the pool "\fItank\fR" and any datasets contained within.
-
-.sp
-.in +2
-.nf
-\fB# zpool destroy -f tank\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 8 \fRExporting a ZFS Storage Pool
-.LP
-The following command exports the devices in pool \fItank\fR so that they can be relocated or later imported.
-
-.sp
-.in +2
-.nf
-\fB# zpool export tank\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 9 \fRImporting a ZFS Storage Pool
-.LP
-The following command displays available pools, and then imports the pool "tank" for use on the system.
-
-.LP
-The results from this command are similar to the following:
-
-.sp
-.in +2
-.nf
-\fB# zpool import\fR
- pool: tank
- id: 15451357997522795478
-state: ONLINE
-action: The pool can be imported using its name or numeric identifier.
-config:
-
- tank ONLINE
- mirror ONLINE
- c1t2d0 ONLINE
- c1t3d0 ONLINE
-
-\fB# zpool import tank\fR
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 10 \fRUpgrading All ZFS Storage Pools to the Current Version
-.LP
-The following command upgrades all ZFS Storage pools to the current version of the software.
-
-.sp
-.in +2
-.nf
-\fB# zpool upgrade -a\fR
-This system is currently running ZFS version 2.
-.fi
-.in -2
-.sp
-
-.LP
-\fBExample 11 \fRManaging Hot Spares
-.LP
-The following command creates a new pool with an available hot spare:
-
-.sp
-.in +2
-.nf
-\fB# zpool create tank mirror c0t0d0 c0t1d0 spare c0t2d0\fR
-.fi
-.in -2
-.sp
-
-.LP
-If one of the disks were to fail, the pool would be reduced to the degraded state. The failed device can be replaced using the following command:
-
-.sp
-.in +2
-.nf
-\fB# zpool replace tank c0t0d0 c0t3d0\fR
-.fi
-.in -2
-.sp
-
-.LP
-Once the data has been resilvered, the spare is automatically removed and is made available should another device fails. The hot spare can be permanently removed from the pool using the following command:
-
-.sp
-.in +2
-.nf
-\fB# zpool remove tank c0t2d0\fR
-.fi
-.in -2
-.sp
-
-.SH EXIT STATUS
-.LP
-The following exit values are returned:
-.sp
-.ne 2
-.mk
-.na
-\fB\fB0\fR\fR
-.ad
-.RS 5n
-.rt
-Successful completion.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB1\fR\fR
-.ad
-.RS 5n
-.rt
-An error occurred.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB2\fR\fR
-.ad
-.RS 5n
-.rt
-Invalid command line options were specified.
-.RE
-
-.SH ATTRIBUTES
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-tab() box;
-cw(2.75i) |cw(2.75i)
-lw(2.75i) |lw(2.75i)
-.
-ATTRIBUTE TYPEATTRIBUTE VALUE
-_
-AvailabilitySUNWzfsu
-_
-Interface StabilityEvolving
-.TE
-
-.SH SEE ALSO
-.LP
-\fBzfs\fR(1M), \fBattributes\fR(5)
diff --git a/contrib/opensolaris/cmd/zpool/zpool_iter.c b/contrib/opensolaris/cmd/zpool/zpool_iter.c
deleted file mode 100644
index f724179..0000000
--- a/contrib/opensolaris/cmd/zpool/zpool_iter.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <solaris.h>
-#include <libintl.h>
-#include <libuutil.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-
-#include <libzfs.h>
-
-#include "zpool_util.h"
-
-/*
- * Private interface for iterating over pools specified on the command line.
- * Most consumers will call for_each_pool, but in order to support iostat, we
- * allow fined grained control through the zpool_list_t interface.
- */
-
-typedef struct zpool_node {
- zpool_handle_t *zn_handle;
- uu_avl_node_t zn_avlnode;
- int zn_mark;
-} zpool_node_t;
-
-struct zpool_list {
- boolean_t zl_findall;
- uu_avl_t *zl_avl;
- uu_avl_pool_t *zl_pool;
-};
-
-/* ARGSUSED */
-static int
-zpool_compare(const void *larg, const void *rarg, void *unused)
-{
- zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle;
- zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle;
- const char *lname = zpool_get_name(l);
- const char *rname = zpool_get_name(r);
-
- return (strcmp(lname, rname));
-}
-
-/*
- * Callback function for pool_list_get(). Adds the given pool to the AVL tree
- * of known pools.
- */
-static int
-add_pool(zpool_handle_t *zhp, void *data)
-{
- zpool_list_t *zlp = data;
- zpool_node_t *node = safe_malloc(sizeof (zpool_node_t));
- uu_avl_index_t idx;
-
- node->zn_handle = zhp;
- uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool);
- if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) {
- uu_avl_insert(zlp->zl_avl, node, idx);
- } else {
- zpool_close(zhp);
- free(node);
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Create a list of pools based on the given arguments. If we're given no
- * arguments, then iterate over all pools in the system and add them to the AVL
- * tree. Otherwise, add only those pool explicitly specified on the command
- * line.
- */
-zpool_list_t *
-pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
-{
- zpool_list_t *zlp;
-
- zlp = safe_malloc(sizeof (zpool_list_t));
-
- zlp->zl_pool = uu_avl_pool_create("zfs_pool", sizeof (zpool_node_t),
- offsetof(zpool_node_t, zn_avlnode), zpool_compare, UU_DEFAULT);
-
- if (zlp->zl_pool == NULL)
- zpool_no_memory();
-
- if ((zlp->zl_avl = uu_avl_create(zlp->zl_pool, NULL,
- UU_DEFAULT)) == NULL)
- zpool_no_memory();
-
- if (argc == 0) {
- (void) zpool_iter(g_zfs, add_pool, zlp);
- zlp->zl_findall = B_TRUE;
- } else {
- int i;
-
- for (i = 0; i < argc; i++) {
- zpool_handle_t *zhp;
-
- if ((zhp = zpool_open_canfail(g_zfs,
- argv[i])) != NULL && add_pool(zhp, zlp) == 0) {
- if (proplist &&
- zpool_expand_proplist(zhp, proplist) != 0)
- *err = B_TRUE;
- } else
- *err = B_TRUE;
- }
- }
-
- return (zlp);
-}
-
-/*
- * Search for any new pools, adding them to the list. We only add pools when no
- * options were given on the command line. Otherwise, we keep the list fixed as
- * those that were explicitly specified.
- */
-void
-pool_list_update(zpool_list_t *zlp)
-{
- if (zlp->zl_findall)
- (void) zpool_iter(g_zfs, add_pool, zlp);
-}
-
-/*
- * Iterate over all pools in the list, executing the callback for each
- */
-int
-pool_list_iter(zpool_list_t *zlp, int unavail, zpool_iter_f func,
- void *data)
-{
- zpool_node_t *node, *next_node;
- int ret = 0;
-
- for (node = uu_avl_first(zlp->zl_avl); node != NULL; node = next_node) {
- next_node = uu_avl_next(zlp->zl_avl, node);
- if (zpool_get_state(node->zn_handle) != POOL_STATE_UNAVAIL ||
- unavail)
- ret |= func(node->zn_handle, data);
- }
-
- return (ret);
-}
-
-/*
- * Remove the given pool from the list. When running iostat, we want to remove
- * those pools that no longer exist.
- */
-void
-pool_list_remove(zpool_list_t *zlp, zpool_handle_t *zhp)
-{
- zpool_node_t search, *node;
-
- search.zn_handle = zhp;
- if ((node = uu_avl_find(zlp->zl_avl, &search, NULL, NULL)) != NULL) {
- uu_avl_remove(zlp->zl_avl, node);
- zpool_close(node->zn_handle);
- free(node);
- }
-}
-
-/*
- * Free all the handles associated with this list.
- */
-void
-pool_list_free(zpool_list_t *zlp)
-{
- uu_avl_walk_t *walk;
- zpool_node_t *node;
-
- if ((walk = uu_avl_walk_start(zlp->zl_avl, UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
-
- while ((node = uu_avl_walk_next(walk)) != NULL) {
- uu_avl_remove(zlp->zl_avl, node);
- zpool_close(node->zn_handle);
- free(node);
- }
-
- uu_avl_walk_end(walk);
- uu_avl_destroy(zlp->zl_avl);
- uu_avl_pool_destroy(zlp->zl_pool);
-
- free(zlp);
-}
-
-/*
- * Returns the number of elements in the pool list.
- */
-int
-pool_list_count(zpool_list_t *zlp)
-{
- return (uu_avl_numnodes(zlp->zl_avl));
-}
-
-/*
- * High level function which iterates over all pools given on the command line,
- * using the pool_list_* interfaces.
- */
-int
-for_each_pool(int argc, char **argv, boolean_t unavail,
- zpool_proplist_t **proplist, zpool_iter_f func, void *data)
-{
- zpool_list_t *list;
- int ret = 0;
-
- if ((list = pool_list_get(argc, argv, proplist, &ret)) == NULL)
- return (1);
-
- if (pool_list_iter(list, unavail, func, data) != 0)
- ret = 1;
-
- pool_list_free(list);
-
- return (ret);
-}
diff --git a/contrib/opensolaris/cmd/zpool/zpool_main.c b/contrib/opensolaris/cmd/zpool/zpool_main.c
deleted file mode 100644
index 5b1d856..0000000
--- a/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ /dev/null
@@ -1,3602 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <solaris.h>
-#include <assert.h>
-#include <ctype.h>
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libgen.h>
-#include <libintl.h>
-#include <libuutil.h>
-#include <locale.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-#include <unistd.h>
-#include <priv.h>
-#include <sys/time.h>
-#include <sys/fs/zfs.h>
-
-#include <sys/stat.h>
-
-#include <libzfs.h>
-
-#include "zpool_util.h"
-
-static int zpool_do_create(int, char **);
-static int zpool_do_destroy(int, char **);
-
-static int zpool_do_add(int, char **);
-static int zpool_do_remove(int, char **);
-
-static int zpool_do_list(int, char **);
-static int zpool_do_iostat(int, char **);
-static int zpool_do_status(int, char **);
-
-static int zpool_do_online(int, char **);
-static int zpool_do_offline(int, char **);
-static int zpool_do_clear(int, char **);
-
-static int zpool_do_attach(int, char **);
-static int zpool_do_detach(int, char **);
-static int zpool_do_replace(int, char **);
-
-static int zpool_do_scrub(int, char **);
-
-static int zpool_do_import(int, char **);
-static int zpool_do_export(int, char **);
-
-static int zpool_do_upgrade(int, char **);
-
-static int zpool_do_history(int, char **);
-
-static int zpool_do_get(int, char **);
-static int zpool_do_set(int, char **);
-
-/*
- * These libumem hooks provide a reasonable set of defaults for the allocator's
- * debugging facilities.
- */
-const char *
-_umem_debug_init(void)
-{
- return ("default,verbose"); /* $UMEM_DEBUG setting */
-}
-
-const char *
-_umem_logging_init(void)
-{
- return ("fail,contents"); /* $UMEM_LOGGING setting */
-}
-
-typedef enum {
- HELP_ADD,
- HELP_ATTACH,
- HELP_CLEAR,
- HELP_CREATE,
- HELP_DESTROY,
- HELP_DETACH,
- HELP_EXPORT,
- HELP_HISTORY,
- HELP_IMPORT,
- HELP_IOSTAT,
- HELP_LIST,
- HELP_OFFLINE,
- HELP_ONLINE,
- HELP_REPLACE,
- HELP_REMOVE,
- HELP_SCRUB,
- HELP_STATUS,
- HELP_UPGRADE,
- HELP_GET,
- HELP_SET
-} zpool_help_t;
-
-
-typedef struct zpool_command {
- const char *name;
- int (*func)(int, char **);
- zpool_help_t usage;
-} zpool_command_t;
-
-/*
- * Master command table. Each ZFS command has a name, associated function, and
- * usage message. The usage messages need to be internationalized, so we have
- * to have a function to return the usage message based on a command index.
- *
- * These commands are organized according to how they are displayed in the usage
- * message. An empty command (one with a NULL name) indicates an empty line in
- * the generic usage message.
- */
-static zpool_command_t command_table[] = {
- { "create", zpool_do_create, HELP_CREATE },
- { "destroy", zpool_do_destroy, HELP_DESTROY },
- { NULL },
- { "add", zpool_do_add, HELP_ADD },
- { "remove", zpool_do_remove, HELP_REMOVE },
- { NULL },
- { "list", zpool_do_list, HELP_LIST },
- { "iostat", zpool_do_iostat, HELP_IOSTAT },
- { "status", zpool_do_status, HELP_STATUS },
- { NULL },
- { "online", zpool_do_online, HELP_ONLINE },
- { "offline", zpool_do_offline, HELP_OFFLINE },
- { "clear", zpool_do_clear, HELP_CLEAR },
- { NULL },
- { "attach", zpool_do_attach, HELP_ATTACH },
- { "detach", zpool_do_detach, HELP_DETACH },
- { "replace", zpool_do_replace, HELP_REPLACE },
- { NULL },
- { "scrub", zpool_do_scrub, HELP_SCRUB },
- { NULL },
- { "import", zpool_do_import, HELP_IMPORT },
- { "export", zpool_do_export, HELP_EXPORT },
- { "upgrade", zpool_do_upgrade, HELP_UPGRADE },
- { NULL },
- { "history", zpool_do_history, HELP_HISTORY },
- { "get", zpool_do_get, HELP_GET },
- { "set", zpool_do_set, HELP_SET },
-};
-
-#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
-
-zpool_command_t *current_command;
-
-static const char *
-get_usage(zpool_help_t idx) {
- switch (idx) {
- case HELP_ADD:
- return (gettext("\tadd [-fn] <pool> <vdev> ...\n"));
- case HELP_ATTACH:
- return (gettext("\tattach [-f] <pool> <device> "
- "<new_device>\n"));
- case HELP_CLEAR:
- return (gettext("\tclear <pool> [device]\n"));
- case HELP_CREATE:
- return (gettext("\tcreate [-fn] [-R root] [-m mountpoint] "
- "<pool> <vdev> ...\n"));
- case HELP_DESTROY:
- return (gettext("\tdestroy [-f] <pool>\n"));
- case HELP_DETACH:
- return (gettext("\tdetach <pool> <device>\n"));
- case HELP_EXPORT:
- return (gettext("\texport [-f] <pool> ...\n"));
- case HELP_HISTORY:
- return (gettext("\thistory [<pool>]\n"));
- case HELP_IMPORT:
- return (gettext("\timport [-d dir] [-D]\n"
- "\timport [-d dir] [-D] [-f] [-o opts] [-R root] -a\n"
- "\timport [-d dir] [-D] [-f] [-o opts] [-R root ]"
- " <pool | id> [newpool]\n"));
- case HELP_IOSTAT:
- return (gettext("\tiostat [-v] [pool] ... [interval "
- "[count]]\n"));
- case HELP_LIST:
- return (gettext("\tlist [-H] [-o field[,field]*] "
- "[pool] ...\n"));
- case HELP_OFFLINE:
- return (gettext("\toffline [-t] <pool> <device> ...\n"));
- case HELP_ONLINE:
- return (gettext("\tonline <pool> <device> ...\n"));
- case HELP_REPLACE:
- return (gettext("\treplace [-f] <pool> <device> "
- "[new_device]\n"));
- case HELP_REMOVE:
- return (gettext("\tremove <pool> <device>\n"));
- case HELP_SCRUB:
- return (gettext("\tscrub [-s] <pool> ...\n"));
- case HELP_STATUS:
- return (gettext("\tstatus [-vx] [pool] ...\n"));
- case HELP_UPGRADE:
- return (gettext("\tupgrade\n"
- "\tupgrade -v\n"
- "\tupgrade <-a | pool>\n"));
- case HELP_GET:
- return (gettext("\tget <all | property[,property]...> "
- "<pool> ...\n"));
- case HELP_SET:
- return (gettext("\tset <property=value> <pool> \n"));
- }
-
- abort();
- /* NOTREACHED */
-}
-
-/*
- * Fields available for 'zpool list'.
- */
-typedef enum {
- ZPOOL_FIELD_NAME,
- ZPOOL_FIELD_SIZE,
- ZPOOL_FIELD_USED,
- ZPOOL_FIELD_AVAILABLE,
- ZPOOL_FIELD_CAPACITY,
- ZPOOL_FIELD_HEALTH,
- ZPOOL_FIELD_ROOT
-} zpool_field_t;
-
-#define MAX_FIELDS 10
-
-typedef struct column_def {
- const char *cd_title;
- size_t cd_width;
- enum {
- left_justify,
- right_justify
- } cd_justify;
-} column_def_t;
-
-static column_def_t column_table[] = {
- { "NAME", 20, left_justify },
- { "SIZE", 6, right_justify },
- { "USED", 6, right_justify },
- { "AVAIL", 6, right_justify },
- { "CAP", 5, right_justify },
- { "HEALTH", 9, left_justify },
- { "ALTROOT", 15, left_justify }
-};
-
-static char *column_subopts[] = {
- "name",
- "size",
- "used",
- "available",
- "capacity",
- "health",
- "root",
- NULL
-};
-
-/*
- * Callback routine that will print out a pool property value.
- */
-static zpool_prop_t
-print_prop_cb(zpool_prop_t prop, void *cb)
-{
- FILE *fp = cb;
-
- (void) fprintf(fp, "\t%-13s ", zpool_prop_to_name(prop));
-
- if (zpool_prop_values(prop) == NULL)
- (void) fprintf(fp, "-\n");
- else
- (void) fprintf(fp, "%s\n", zpool_prop_values(prop));
-
- return (ZFS_PROP_CONT);
-}
-
-/*
- * Display usage message. If we're inside a command, display only the usage for
- * that command. Otherwise, iterate over the entire command table and display
- * a complete usage message.
- */
-void
-usage(boolean_t requested)
-{
- int i;
- FILE *fp = requested ? stdout : stderr;
-
- if (current_command == NULL) {
- int i;
-
- (void) fprintf(fp, gettext("usage: zpool command args ...\n"));
- (void) fprintf(fp,
- gettext("where 'command' is one of the following:\n\n"));
-
- for (i = 0; i < NCOMMAND; i++) {
- if (command_table[i].name == NULL)
- (void) fprintf(fp, "\n");
- else
- (void) fprintf(fp, "%s",
- get_usage(command_table[i].usage));
- }
- } else {
- (void) fprintf(fp, gettext("usage:\n"));
- (void) fprintf(fp, "%s", get_usage(current_command->usage));
-
- if (strcmp(current_command->name, "list") == 0) {
- (void) fprintf(fp, gettext("\nwhere 'field' is one "
- "of the following:\n\n"));
-
- for (i = 0; column_subopts[i] != NULL; i++)
- (void) fprintf(fp, "\t%s\n", column_subopts[i]);
- }
- }
-
- if (current_command != NULL &&
- ((strcmp(current_command->name, "set") == 0) ||
- (strcmp(current_command->name, "get") == 0))) {
-
- (void) fprintf(fp,
- gettext("\nthe following properties are supported:\n"));
-
- (void) fprintf(fp, "\n\t%-13s %s\n\n",
- "PROPERTY", "VALUES");
-
- /* Iterate over all properties */
- (void) zpool_prop_iter(print_prop_cb, fp, B_FALSE);
- }
-
- /*
- * See comments at end of main().
- */
- if (getenv("ZFS_ABORT") != NULL) {
- (void) printf("dumping core by request\n");
- abort();
- }
-
- exit(requested ? 0 : 2);
-}
-
-const char *
-state_to_health(int vs_state)
-{
- switch (vs_state) {
- case VDEV_STATE_CLOSED:
- case VDEV_STATE_CANT_OPEN:
- case VDEV_STATE_OFFLINE:
- return (dgettext(TEXT_DOMAIN, "FAULTED"));
- case VDEV_STATE_DEGRADED:
- return (dgettext(TEXT_DOMAIN, "DEGRADED"));
- case VDEV_STATE_HEALTHY:
- return (dgettext(TEXT_DOMAIN, "ONLINE"));
- }
-
- return (dgettext(TEXT_DOMAIN, "UNKNOWN"));
-}
-
-const char *
-state_to_name(vdev_stat_t *vs)
-{
- switch (vs->vs_state) {
- case VDEV_STATE_CLOSED:
- case VDEV_STATE_CANT_OPEN:
- if (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
- return (gettext("FAULTED"));
- else
- return (gettext("UNAVAIL"));
- case VDEV_STATE_OFFLINE:
- return (gettext("OFFLINE"));
- case VDEV_STATE_DEGRADED:
- return (gettext("DEGRADED"));
- case VDEV_STATE_HEALTHY:
- return (gettext("ONLINE"));
- }
-
- return (gettext("UNKNOWN"));
-}
-
-void
-print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
-{
- nvlist_t **child;
- uint_t c, children;
- char *vname;
-
- if (name != NULL)
- (void) printf("\t%*s%s\n", indent, "", name);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
- return;
-
- for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, child[c]);
- print_vdev_tree(zhp, vname, child[c], indent + 2);
- free(vname);
- }
-}
-
-/*
- * zpool add [-fn] <pool> <vdev> ...
- *
- * -f Force addition of devices, even if they appear in use
- * -n Do not add the devices, but display the resulting layout if
- * they were to be added.
- *
- * Adds the given vdevs to 'pool'. As with create, the bulk of this work is
- * handled by get_vdev_spec(), which constructs the nvlist needed to pass to
- * libzfs.
- */
-int
-zpool_do_add(int argc, char **argv)
-{
- boolean_t force = B_FALSE;
- boolean_t dryrun = B_FALSE;
- int c;
- nvlist_t *nvroot;
- char *poolname;
- int ret;
- zpool_handle_t *zhp;
- nvlist_t *config;
-
- /* check options */
- while ((c = getopt(argc, argv, "fn")) != -1) {
- switch (c) {
- case 'f':
- force = B_TRUE;
- break;
- case 'n':
- dryrun = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing vdev specification\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
-
- argc--;
- argv++;
-
- if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
- return (1);
-
- if ((config = zpool_get_config(zhp, NULL)) == NULL) {
- (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
- poolname);
- zpool_close(zhp);
- return (1);
- }
-
- /* pass off to get_vdev_spec for processing */
- nvroot = make_root_vdev(config, force, !force, B_FALSE, argc, argv);
- if (nvroot == NULL) {
- zpool_close(zhp);
- return (1);
- }
-
- if (dryrun) {
- nvlist_t *poolnvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &poolnvroot) == 0);
-
- (void) printf(gettext("would update '%s' to the following "
- "configuration:\n"), zpool_get_name(zhp));
-
- print_vdev_tree(zhp, poolname, poolnvroot, 0);
- print_vdev_tree(zhp, NULL, nvroot, 0);
-
- ret = 0;
- } else {
- ret = (zpool_add(zhp, nvroot) != 0);
- if (!ret) {
- zpool_log_history(g_zfs, argc + 1 + optind,
- argv - 1 - optind, poolname, B_TRUE, B_FALSE);
- }
- }
-
- nvlist_free(nvroot);
- zpool_close(zhp);
-
- return (ret);
-}
-
-/*
- * zpool remove <pool> <vdev>
- *
- * Removes the given vdev from the pool. Currently, this only supports removing
- * spares from the pool. Eventually, we'll want to support removing leaf vdevs
- * (as an alias for 'detach') as well as toplevel vdevs.
- */
-int
-zpool_do_remove(int argc, char **argv)
-{
- char *poolname;
- int ret;
- zpool_handle_t *zhp;
-
- argc--;
- argv++;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing device\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
-
- if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
- return (1);
-
- ret = (zpool_vdev_remove(zhp, argv[1]) != 0);
- if (!ret) {
- zpool_log_history(g_zfs, ++argc, --argv, poolname, B_TRUE,
- B_FALSE);
- }
-
- return (ret);
-}
-
-/*
- * zpool create [-fn] [-R root] [-m mountpoint] <pool> <dev> ...
- *
- * -f Force creation, even if devices appear in use
- * -n Do not create the pool, but display the resulting layout if it
- * were to be created.
- * -R Create a pool under an alternate root
- * -m Set default mountpoint for the root dataset. By default it's
- * '/<pool>'
- *
- * Creates the named pool according to the given vdev specification. The
- * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once
- * we get the nvlist back from get_vdev_spec(), we either print out the contents
- * (if '-n' was specified), or pass it to libzfs to do the creation.
- */
-int
-zpool_do_create(int argc, char **argv)
-{
- boolean_t force = B_FALSE;
- boolean_t dryrun = B_FALSE;
- int c;
- nvlist_t *nvroot;
- char *poolname;
- int ret;
- char *altroot = NULL;
- char *mountpoint = NULL;
- nvlist_t **child;
- uint_t children;
-
- /* check options */
- while ((c = getopt(argc, argv, ":fnR:m:")) != -1) {
- switch (c) {
- case 'f':
- force = B_TRUE;
- break;
- case 'n':
- dryrun = B_TRUE;
- break;
- case 'R':
- altroot = optarg;
- break;
- case 'm':
- mountpoint = optarg;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing vdev specification\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
-
- /*
- * As a special case, check for use of '/' in the name, and direct the
- * user to use 'zfs create' instead.
- */
- if (strchr(poolname, '/') != NULL) {
- (void) fprintf(stderr, gettext("cannot create '%s': invalid "
- "character '/' in pool name\n"), poolname);
- (void) fprintf(stderr, gettext("use 'zfs create' to "
- "create a dataset\n"));
- return (1);
- }
-
- /* pass off to get_vdev_spec for bulk processing */
- nvroot = make_root_vdev(NULL, force, !force, B_FALSE, argc - 1,
- argv + 1);
- if (nvroot == NULL)
- return (1);
-
- /* make_root_vdev() allows 0 toplevel children if there are spares */
- verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- if (children == 0) {
- (void) fprintf(stderr, gettext("invalid vdev "
- "specification: at least one toplevel vdev must be "
- "specified\n"));
- return (1);
- }
-
-
- if (altroot != NULL && altroot[0] != '/') {
- (void) fprintf(stderr, gettext("invalid alternate root '%s': "
- "must be an absolute path\n"), altroot);
- nvlist_free(nvroot);
- return (1);
- }
-
- /*
- * Check the validity of the mountpoint and direct the user to use the
- * '-m' mountpoint option if it looks like its in use.
- */
- if (mountpoint == NULL ||
- (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
- strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
- char buf[MAXPATHLEN];
- struct stat64 statbuf;
-
- if (mountpoint && mountpoint[0] != '/') {
- (void) fprintf(stderr, gettext("invalid mountpoint "
- "'%s': must be an absolute path, 'legacy', or "
- "'none'\n"), mountpoint);
- nvlist_free(nvroot);
- return (1);
- }
-
- if (mountpoint == NULL) {
- if (altroot != NULL)
- (void) snprintf(buf, sizeof (buf), "%s/%s",
- altroot, poolname);
- else
- (void) snprintf(buf, sizeof (buf), "/%s",
- poolname);
- } else {
- if (altroot != NULL)
- (void) snprintf(buf, sizeof (buf), "%s%s",
- altroot, mountpoint);
- else
- (void) snprintf(buf, sizeof (buf), "%s",
- mountpoint);
- }
-
- if (stat64(buf, &statbuf) == 0 &&
- statbuf.st_nlink != 2) {
- if (mountpoint == NULL)
- (void) fprintf(stderr, gettext("default "
- "mountpoint '%s' exists and is not "
- "empty\n"), buf);
- else
- (void) fprintf(stderr, gettext("mountpoint "
- "'%s' exists and is not empty\n"), buf);
- (void) fprintf(stderr, gettext("use '-m' "
- "option to provide a different default\n"));
- nvlist_free(nvroot);
- return (1);
- }
- }
-
-
- if (dryrun) {
- /*
- * For a dry run invocation, print out a basic message and run
- * through all the vdevs in the list and print out in an
- * appropriate hierarchy.
- */
- (void) printf(gettext("would create '%s' with the "
- "following layout:\n\n"), poolname);
-
- print_vdev_tree(NULL, poolname, nvroot, 0);
-
- ret = 0;
- } else {
- ret = 1;
- /*
- * Hand off to libzfs.
- */
- if (zpool_create(g_zfs, poolname, nvroot, altroot) == 0) {
- zfs_handle_t *pool = zfs_open(g_zfs, poolname,
- ZFS_TYPE_FILESYSTEM);
- if (pool != NULL) {
- if (mountpoint != NULL)
- verify(zfs_prop_set(pool,
- zfs_prop_to_name(
- ZFS_PROP_MOUNTPOINT),
- mountpoint) == 0);
- if (zfs_mount(pool, NULL, 0) == 0)
- ret = zfs_share_nfs(pool);
- zfs_close(pool);
- }
- zpool_log_history(g_zfs, argc + optind, argv - optind,
- poolname, B_TRUE, B_TRUE);
- } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
- (void) fprintf(stderr, gettext("pool name may have "
- "been omitted\n"));
- }
- }
-
- nvlist_free(nvroot);
-
- return (ret);
-}
-
-/*
- * zpool destroy <pool>
- *
- * -f Forcefully unmount any datasets
- *
- * Destroy the given pool. Automatically unmounts any datasets in the pool.
- */
-int
-zpool_do_destroy(int argc, char **argv)
-{
- boolean_t force = B_FALSE;
- int c;
- char *pool;
- zpool_handle_t *zhp;
- int ret;
-
- /* check options */
- while ((c = getopt(argc, argv, "f")) != -1) {
- switch (c) {
- case 'f':
- force = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool argument\n"));
- usage(B_FALSE);
- }
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- pool = argv[0];
-
- if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
- /*
- * As a special case, check for use of '/' in the name, and
- * direct the user to use 'zfs destroy' instead.
- */
- if (strchr(pool, '/') != NULL)
- (void) fprintf(stderr, gettext("use 'zfs destroy' to "
- "destroy a dataset\n"));
- return (1);
- }
-
- if (zpool_disable_datasets(zhp, force) != 0) {
- (void) fprintf(stderr, gettext("could not destroy '%s': "
- "could not unmount datasets\n"), zpool_get_name(zhp));
- return (1);
- }
-
- zpool_log_history(g_zfs, argc + optind, argv - optind, pool, B_TRUE,
- B_FALSE);
-
- ret = (zpool_destroy(zhp) != 0);
-
- zpool_close(zhp);
-
- return (ret);
-}
-
-/*
- * zpool export [-f] <pool> ...
- *
- * -f Forcefully unmount datasets
- *
- * Export the given pools. By default, the command will attempt to cleanly
- * unmount any active datasets within the pool. If the '-f' flag is specified,
- * then the datasets will be forcefully unmounted.
- */
-int
-zpool_do_export(int argc, char **argv)
-{
- boolean_t force = B_FALSE;
- int c;
- zpool_handle_t *zhp;
- int ret;
- int i;
-
- /* check options */
- while ((c = getopt(argc, argv, "f")) != -1) {
- switch (c) {
- case 'f':
- force = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* check arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool argument\n"));
- usage(B_FALSE);
- }
-
- ret = 0;
- for (i = 0; i < argc; i++) {
- if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) {
- ret = 1;
- continue;
- }
-
- if (zpool_disable_datasets(zhp, force) != 0) {
- ret = 1;
- zpool_close(zhp);
- continue;
- }
-
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[i],
- B_TRUE, B_FALSE);
-
- if (zpool_export(zhp) != 0)
- ret = 1;
-
- zpool_close(zhp);
- }
-
- return (ret);
-}
-
-/*
- * Given a vdev configuration, determine the maximum width needed for the device
- * name column.
- */
-static int
-max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
-{
- char *name = zpool_vdev_name(g_zfs, zhp, nv);
- nvlist_t **child;
- uint_t c, children;
- int ret;
-
- if (strlen(name) + depth > max)
- max = strlen(name) + depth;
-
- free(name);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
- &child, &children) == 0) {
- for (c = 0; c < children; c++)
- if ((ret = max_width(zhp, child[c], depth + 2,
- max)) > max)
- max = ret;
- }
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0) {
- for (c = 0; c < children; c++)
- if ((ret = max_width(zhp, child[c], depth + 2,
- max)) > max)
- max = ret;
- }
-
-
- return (max);
-}
-
-
-/*
- * Print the configuration of an exported pool. Iterate over all vdevs in the
- * pool, printing out the name and status for each one.
- */
-void
-print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
-{
- nvlist_t **child;
- uint_t c, children;
- vdev_stat_t *vs;
- char *type, *vname;
-
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
- if (strcmp(type, VDEV_TYPE_MISSING) == 0)
- return;
-
- verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
-
- (void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
-
- if (vs->vs_aux != 0) {
- (void) printf(" %-8s ", state_to_name(vs));
-
- switch (vs->vs_aux) {
- case VDEV_AUX_OPEN_FAILED:
- (void) printf(gettext("cannot open"));
- break;
-
- case VDEV_AUX_BAD_GUID_SUM:
- (void) printf(gettext("missing device"));
- break;
-
- case VDEV_AUX_NO_REPLICAS:
- (void) printf(gettext("insufficient replicas"));
- break;
-
- case VDEV_AUX_VERSION_NEWER:
- (void) printf(gettext("newer version"));
- break;
-
- default:
- (void) printf(gettext("corrupted data"));
- break;
- }
- } else {
- (void) printf(" %s", state_to_name(vs));
- }
- (void) printf("\n");
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
- return;
-
- for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
- print_import_config(vname, child[c],
- namewidth, depth + 2);
- free(vname);
- }
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
- &child, &children) != 0)
- return;
-
- (void) printf(gettext("\tspares\n"));
- for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
- (void) printf("\t %s\n", vname);
- free(vname);
- }
-}
-
-/*
- * Display the status for the given pool.
- */
-static void
-show_import(nvlist_t *config)
-{
- uint64_t pool_state;
- vdev_stat_t *vs;
- char *name;
- uint64_t guid;
- char *msgid;
- nvlist_t *nvroot;
- int reason;
- const char *health;
- uint_t vsc;
- int namewidth;
-
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &pool_state) == 0);
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
- health = state_to_health(vs->vs_state);
-
- reason = zpool_import_status(config, &msgid);
-
- (void) printf(gettext(" pool: %s\n"), name);
- (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid);
- (void) printf(gettext(" state: %s"), health);
- if (pool_state == POOL_STATE_DESTROYED)
- (void) printf(gettext(" (DESTROYED)"));
- (void) printf("\n");
-
- switch (reason) {
- case ZPOOL_STATUS_MISSING_DEV_R:
- case ZPOOL_STATUS_MISSING_DEV_NR:
- case ZPOOL_STATUS_BAD_GUID_SUM:
- (void) printf(gettext("status: One or more devices are missing "
- "from the system.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_LABEL_R:
- case ZPOOL_STATUS_CORRUPT_LABEL_NR:
- (void) printf(gettext("status: One or more devices contains "
- "corrupted data.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_DATA:
- (void) printf(gettext("status: The pool data is corrupted.\n"));
- break;
-
- case ZPOOL_STATUS_OFFLINE_DEV:
- (void) printf(gettext("status: One or more devices "
- "are offlined.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_POOL:
- (void) printf(gettext("status: The pool metadata is "
- "corrupted.\n"));
- break;
-
- case ZPOOL_STATUS_VERSION_OLDER:
- (void) printf(gettext("status: The pool is formatted using an "
- "older on-disk version.\n"));
- break;
-
- case ZPOOL_STATUS_VERSION_NEWER:
- (void) printf(gettext("status: The pool is formatted using an "
- "incompatible version.\n"));
- break;
-
- case ZPOOL_STATUS_HOSTID_MISMATCH:
- (void) printf(gettext("status: The pool was last accessed by "
- "another system.\n"));
- break;
- default:
- /*
- * No other status can be seen when importing pools.
- */
- assert(reason == ZPOOL_STATUS_OK);
- }
-
- /*
- * Print out an action according to the overall state of the pool.
- */
- if (vs->vs_state == VDEV_STATE_HEALTHY) {
- if (reason == ZPOOL_STATUS_VERSION_OLDER)
- (void) printf(gettext("action: The pool can be "
- "imported using its name or numeric identifier, "
- "though\n\tsome features will not be available "
- "without an explicit 'zpool upgrade'.\n"));
- else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH)
- (void) printf(gettext("action: The pool can be "
- "imported using its name or numeric "
- "identifier and\n\tthe '-f' flag.\n"));
- else
- (void) printf(gettext("action: The pool can be "
- "imported using its name or numeric "
- "identifier.\n"));
- } else if (vs->vs_state == VDEV_STATE_DEGRADED) {
- (void) printf(gettext("action: The pool can be imported "
- "despite missing or damaged devices. The\n\tfault "
- "tolerance of the pool may be compromised if imported.\n"));
- } else {
- switch (reason) {
- case ZPOOL_STATUS_VERSION_NEWER:
- (void) printf(gettext("action: The pool cannot be "
- "imported. Access the pool on a system running "
- "newer\n\tsoftware, or recreate the pool from "
- "backup.\n"));
- break;
- case ZPOOL_STATUS_MISSING_DEV_R:
- case ZPOOL_STATUS_MISSING_DEV_NR:
- case ZPOOL_STATUS_BAD_GUID_SUM:
- (void) printf(gettext("action: The pool cannot be "
- "imported. Attach the missing\n\tdevices and try "
- "again.\n"));
- break;
- default:
- (void) printf(gettext("action: The pool cannot be "
- "imported due to damaged devices or data.\n"));
- }
- }
-
- /*
- * If the state is "closed" or "can't open", and the aux state
- * is "corrupt data":
- */
- if (((vs->vs_state == VDEV_STATE_CLOSED) ||
- (vs->vs_state == VDEV_STATE_CANT_OPEN)) &&
- (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) {
- if (pool_state == POOL_STATE_DESTROYED)
- (void) printf(gettext("\tThe pool was destroyed, "
- "but can be imported using the '-Df' flags.\n"));
- else if (pool_state != POOL_STATE_EXPORTED)
- (void) printf(gettext("\tThe pool may be active on "
- "on another system, but can be imported using\n\t"
- "the '-f' flag.\n"));
- }
-
- if (msgid != NULL)
- (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"),
- msgid);
-
- (void) printf(gettext("config:\n\n"));
-
- namewidth = max_width(NULL, nvroot, 0, 0);
- if (namewidth < 10)
- namewidth = 10;
- print_import_config(name, nvroot, namewidth, 0);
-
- if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
- (void) printf(gettext("\n\tAdditional devices are known to "
- "be part of this pool, though their\n\texact "
- "configuration cannot be determined.\n"));
- }
-}
-
-/*
- * Perform the import for the given configuration. This passes the heavy
- * lifting off to zpool_import(), and then mounts the datasets contained within
- * the pool.
- */
-static int
-do_import(nvlist_t *config, const char *newname, const char *mntopts,
- const char *altroot, int force, int argc, char **argv)
-{
- zpool_handle_t *zhp;
- char *name;
- uint64_t state;
- uint64_t version;
-
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
-
- verify(nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_POOL_STATE, &state) == 0);
- verify(nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_VERSION, &version) == 0);
- if (version > ZFS_VERSION) {
- (void) fprintf(stderr, gettext("cannot import '%s': pool "
- "is formatted using a newer ZFS version\n"), name);
- return (1);
- } else if (state != POOL_STATE_EXPORTED && !force) {
- uint64_t hostid;
-
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
- &hostid) == 0) {
- if ((unsigned long)hostid != gethostid()) {
- char *hostname;
- uint64_t timestamp;
- time_t t;
-
- verify(nvlist_lookup_string(config,
- ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
- verify(nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_TIMESTAMP, &timestamp) == 0);
- t = timestamp;
- (void) fprintf(stderr, gettext("cannot import "
- "'%s': pool may be in use from other "
- "system, it was last accessed by %s "
- "(hostid: 0x%lx) on %s"), name, hostname,
- (unsigned long)hostid,
- asctime(localtime(&t)));
- (void) fprintf(stderr, gettext("use '-f' to "
- "import anyway\n"));
- return (1);
- }
- } else {
- (void) fprintf(stderr, gettext("cannot import '%s': "
- "pool may be in use from other system\n"), name);
- (void) fprintf(stderr, gettext("use '-f' to import "
- "anyway\n"));
- return (1);
- }
- }
-
- if (zpool_import(g_zfs, config, newname, altroot) != 0)
- return (1);
-
- if (newname != NULL)
- name = (char *)newname;
-
- zpool_log_history(g_zfs, argc, argv, name, B_TRUE, B_FALSE);
-
- verify((zhp = zpool_open(g_zfs, name)) != NULL);
-
- if (zpool_enable_datasets(zhp, mntopts, 0) != 0) {
- zpool_close(zhp);
- return (1);
- }
-
- zpool_close(zhp);
- return (0);
-}
-
-/*
- * zpool import [-d dir] [-D]
- * import [-R root] [-D] [-d dir] [-f] -a
- * import [-R root] [-D] [-d dir] [-f] <pool | id> [newpool]
- *
- * -d Scan in a specific directory, other than /dev/dsk. More than
- * one directory can be specified using multiple '-d' options.
- *
- * -D Scan for previously destroyed pools or import all or only
- * specified destroyed pools.
- *
- * -R Temporarily import the pool, with all mountpoints relative to
- * the given root. The pool will remain exported when the machine
- * is rebooted.
- *
- * -f Force import, even if it appears that the pool is active.
- *
- * -a Import all pools found.
- *
- * The import command scans for pools to import, and import pools based on pool
- * name and GUID. The pool can also be renamed as part of the import process.
- */
-int
-zpool_do_import(int argc, char **argv)
-{
- char **searchdirs = NULL;
- int nsearch = 0;
- int c;
- int err;
- nvlist_t *pools;
- boolean_t do_all = B_FALSE;
- boolean_t do_destroyed = B_FALSE;
- char *altroot = NULL;
- char *mntopts = NULL;
- boolean_t do_force = B_FALSE;
- nvpair_t *elem;
- nvlist_t *config;
- uint64_t searchguid;
- char *searchname;
- nvlist_t *found_config;
- boolean_t first;
- uint64_t pool_state;
-
- /* check options */
- while ((c = getopt(argc, argv, ":Dfd:R:ao:")) != -1) {
- switch (c) {
- case 'a':
- do_all = B_TRUE;
- break;
- case 'd':
- if (searchdirs == NULL) {
- searchdirs = safe_malloc(sizeof (char *));
- } else {
- char **tmp = safe_malloc((nsearch + 1) *
- sizeof (char *));
- bcopy(searchdirs, tmp, nsearch *
- sizeof (char *));
- free(searchdirs);
- searchdirs = tmp;
- }
- searchdirs[nsearch++] = optarg;
- break;
- case 'D':
- do_destroyed = B_TRUE;
- break;
- case 'f':
- do_force = B_TRUE;
- break;
- case 'o':
- mntopts = optarg;
- break;
- case 'R':
- altroot = optarg;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if (searchdirs == NULL) {
- searchdirs = safe_malloc(sizeof (char *));
- searchdirs[0] = "/dev";
- nsearch = 1;
- }
-
- /* check argument count */
- if (do_all) {
- if (argc != 0) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
- } else {
- if (argc > 2) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- /*
- * Check for the SYS_CONFIG privilege. We do this explicitly
- * here because otherwise any attempt to discover pools will
- * silently fail.
- */
- if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
- (void) fprintf(stderr, gettext("cannot "
- "discover pools: permission denied\n"));
- free(searchdirs);
- return (1);
- }
- }
-
- if ((pools = zpool_find_import(g_zfs, nsearch, searchdirs)) == NULL) {
- free(searchdirs);
- return (1);
- }
-
- /*
- * We now have a list of all available pools in the given directories.
- * Depending on the arguments given, we do one of the following:
- *
- * <none> Iterate through all pools and display information about
- * each one.
- *
- * -a Iterate through all pools and try to import each one.
- *
- * <id> Find the pool that corresponds to the given GUID/pool
- * name and import that one.
- *
- * -D Above options applies only to destroyed pools.
- */
- if (argc != 0) {
- char *endptr;
-
- errno = 0;
- searchguid = strtoull(argv[0], &endptr, 10);
- if (errno != 0 || *endptr != '\0')
- searchname = argv[0];
- else
- searchname = NULL;
- found_config = NULL;
- }
-
- err = 0;
- elem = NULL;
- first = B_TRUE;
- while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
-
- verify(nvpair_value_nvlist(elem, &config) == 0);
-
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &pool_state) == 0);
- if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
- continue;
- if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
- continue;
-
- if (argc == 0) {
- if (first)
- first = B_FALSE;
- else if (!do_all)
- (void) printf("\n");
-
- if (do_all)
- err |= do_import(config, NULL, mntopts,
- altroot, do_force, argc + optind,
- argv - optind);
- else
- show_import(config);
- } else if (searchname != NULL) {
- char *name;
-
- /*
- * We are searching for a pool based on name.
- */
- verify(nvlist_lookup_string(config,
- ZPOOL_CONFIG_POOL_NAME, &name) == 0);
-
- if (strcmp(name, searchname) == 0) {
- if (found_config != NULL) {
- (void) fprintf(stderr, gettext(
- "cannot import '%s': more than "
- "one matching pool\n"), searchname);
- (void) fprintf(stderr, gettext(
- "import by numeric ID instead\n"));
- err = B_TRUE;
- }
- found_config = config;
- }
- } else {
- uint64_t guid;
-
- /*
- * Search for a pool by guid.
- */
- verify(nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
-
- if (guid == searchguid)
- found_config = config;
- }
- }
-
- /*
- * If we were searching for a specific pool, verify that we found a
- * pool, and then do the import.
- */
- if (argc != 0 && err == 0) {
- if (found_config == NULL) {
- (void) fprintf(stderr, gettext("cannot import '%s': "
- "no such pool available\n"), argv[0]);
- err = B_TRUE;
- } else {
- err |= do_import(found_config, argc == 1 ? NULL :
- argv[1], mntopts, altroot, do_force, argc + optind,
- argv - optind);
- }
- }
-
- /*
- * If we were just looking for pools, report an error if none were
- * found.
- */
- if (argc == 0 && first)
- (void) fprintf(stderr,
- gettext("no pools available to import\n"));
-
- nvlist_free(pools);
- free(searchdirs);
-
- return (err ? 1 : 0);
-}
-
-typedef struct iostat_cbdata {
- zpool_list_t *cb_list;
- int cb_verbose;
- int cb_iteration;
- int cb_namewidth;
-} iostat_cbdata_t;
-
-static void
-print_iostat_separator(iostat_cbdata_t *cb)
-{
- int i = 0;
-
- for (i = 0; i < cb->cb_namewidth; i++)
- (void) printf("-");
- (void) printf(" ----- ----- ----- ----- ----- -----\n");
-}
-
-static void
-print_iostat_header(iostat_cbdata_t *cb)
-{
- (void) printf("%*s capacity operations bandwidth\n",
- cb->cb_namewidth, "");
- (void) printf("%-*s used avail read write read write\n",
- cb->cb_namewidth, "pool");
- print_iostat_separator(cb);
-}
-
-/*
- * Display a single statistic.
- */
-void
-print_one_stat(uint64_t value)
-{
- char buf[64];
-
- zfs_nicenum(value, buf, sizeof (buf));
- (void) printf(" %5s", buf);
-}
-
-/*
- * Print out all the statistics for the given vdev. This can either be the
- * toplevel configuration, or called recursively. If 'name' is NULL, then this
- * is a verbose output, and we don't want to display the toplevel pool stats.
- */
-void
-print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
- nvlist_t *newnv, iostat_cbdata_t *cb, int depth)
-{
- nvlist_t **oldchild, **newchild;
- uint_t c, children;
- vdev_stat_t *oldvs, *newvs;
- vdev_stat_t zerovs = { 0 };
- uint64_t tdelta;
- double scale;
- char *vname;
-
- if (oldnv != NULL) {
- verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&oldvs, &c) == 0);
- } else {
- oldvs = &zerovs;
- }
-
- verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&newvs, &c) == 0);
-
- if (strlen(name) + depth > cb->cb_namewidth)
- (void) printf("%*s%s", depth, "", name);
- else
- (void) printf("%*s%s%*s", depth, "", name,
- (int)(cb->cb_namewidth - strlen(name) - depth), "");
-
- tdelta = newvs->vs_timestamp - oldvs->vs_timestamp;
-
- if (tdelta == 0)
- scale = 1.0;
- else
- scale = (double)NANOSEC / tdelta;
-
- /* only toplevel vdevs have capacity stats */
- if (newvs->vs_space == 0) {
- (void) printf(" - -");
- } else {
- print_one_stat(newvs->vs_alloc);
- print_one_stat(newvs->vs_space - newvs->vs_alloc);
- }
-
- print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] -
- oldvs->vs_ops[ZIO_TYPE_READ])));
-
- print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] -
- oldvs->vs_ops[ZIO_TYPE_WRITE])));
-
- print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] -
- oldvs->vs_bytes[ZIO_TYPE_READ])));
-
- print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] -
- oldvs->vs_bytes[ZIO_TYPE_WRITE])));
-
- (void) printf("\n");
-
- if (!cb->cb_verbose)
- return;
-
- if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN,
- &newchild, &children) != 0)
- return;
-
- if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN,
- &oldchild, &c) != 0)
- return;
-
- for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
- print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
- newchild[c], cb, depth + 2);
- free(vname);
- }
-}
-
-static int
-refresh_iostat(zpool_handle_t *zhp, void *data)
-{
- iostat_cbdata_t *cb = data;
- boolean_t missing;
-
- /*
- * If the pool has disappeared, remove it from the list and continue.
- */
- if (zpool_refresh_stats(zhp, &missing) != 0)
- return (-1);
-
- if (missing)
- pool_list_remove(cb->cb_list, zhp);
-
- return (0);
-}
-
-/*
- * Callback to print out the iostats for the given pool.
- */
-int
-print_iostat(zpool_handle_t *zhp, void *data)
-{
- iostat_cbdata_t *cb = data;
- nvlist_t *oldconfig, *newconfig;
- nvlist_t *oldnvroot, *newnvroot;
-
- newconfig = zpool_get_config(zhp, &oldconfig);
-
- if (cb->cb_iteration == 1)
- oldconfig = NULL;
-
- verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE,
- &newnvroot) == 0);
-
- if (oldconfig == NULL)
- oldnvroot = NULL;
- else
- verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE,
- &oldnvroot) == 0);
-
- /*
- * Print out the statistics for the pool.
- */
- print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0);
-
- if (cb->cb_verbose)
- print_iostat_separator(cb);
-
- return (0);
-}
-
-int
-get_namewidth(zpool_handle_t *zhp, void *data)
-{
- iostat_cbdata_t *cb = data;
- nvlist_t *config, *nvroot;
-
- if ((config = zpool_get_config(zhp, NULL)) != NULL) {
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (!cb->cb_verbose)
- cb->cb_namewidth = strlen(zpool_get_name(zhp));
- else
- cb->cb_namewidth = max_width(zhp, nvroot, 0, 0);
- }
-
- /*
- * The width must fall into the range [10,38]. The upper limit is the
- * maximum we can have and still fit in 80 columns.
- */
- if (cb->cb_namewidth < 10)
- cb->cb_namewidth = 10;
- if (cb->cb_namewidth > 38)
- cb->cb_namewidth = 38;
-
- return (0);
-}
-
-/*
- * zpool iostat [-v] [pool] ... [interval [count]]
- *
- * -v Display statistics for individual vdevs
- *
- * This command can be tricky because we want to be able to deal with pool
- * creation/destruction as well as vdev configuration changes. The bulk of this
- * processing is handled by the pool_list_* routines in zpool_iter.c. We rely
- * on pool_list_update() to detect the addition of new pools. Configuration
- * changes are all handled within libzfs.
- */
-int
-zpool_do_iostat(int argc, char **argv)
-{
- int c;
- int ret;
- int npools;
- unsigned long interval = 0, count = 0;
- zpool_list_t *list;
- boolean_t verbose = B_FALSE;
- iostat_cbdata_t cb;
-
- /* check options */
- while ((c = getopt(argc, argv, "v")) != -1) {
- switch (c) {
- case 'v':
- verbose = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /*
- * Determine if the last argument is an integer or a pool name
- */
- if (argc > 0 && isdigit(argv[argc - 1][0])) {
- char *end;
-
- errno = 0;
- interval = strtoul(argv[argc - 1], &end, 10);
-
- if (*end == '\0' && errno == 0) {
- if (interval == 0) {
- (void) fprintf(stderr, gettext("interval "
- "cannot be zero\n"));
- usage(B_FALSE);
- }
-
- /*
- * Ignore the last parameter
- */
- argc--;
- } else {
- /*
- * If this is not a valid number, just plow on. The
- * user will get a more informative error message later
- * on.
- */
- interval = 0;
- }
- }
-
- /*
- * If the last argument is also an integer, then we have both a count
- * and an integer.
- */
- if (argc > 0 && isdigit(argv[argc - 1][0])) {
- char *end;
-
- errno = 0;
- count = interval;
- interval = strtoul(argv[argc - 1], &end, 10);
-
- if (*end == '\0' && errno == 0) {
- if (interval == 0) {
- (void) fprintf(stderr, gettext("interval "
- "cannot be zero\n"));
- usage(B_FALSE);
- }
-
- /*
- * Ignore the last parameter
- */
- argc--;
- } else {
- interval = 0;
- }
- }
-
- /*
- * Construct the list of all interesting pools.
- */
- ret = 0;
- if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL)
- return (1);
-
- if (pool_list_count(list) == 0 && argc != 0) {
- pool_list_free(list);
- return (1);
- }
-
- if (pool_list_count(list) == 0 && interval == 0) {
- pool_list_free(list);
- (void) fprintf(stderr, gettext("no pools available\n"));
- return (1);
- }
-
- /*
- * Enter the main iostat loop.
- */
- cb.cb_list = list;
- cb.cb_verbose = verbose;
- cb.cb_iteration = 0;
- cb.cb_namewidth = 0;
-
- for (;;) {
- pool_list_update(list);
-
- if ((npools = pool_list_count(list)) == 0)
- break;
-
- /*
- * Refresh all statistics. This is done as an explicit step
- * before calculating the maximum name width, so that any
- * configuration changes are properly accounted for.
- */
- (void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb);
-
- /*
- * Iterate over all pools to determine the maximum width
- * for the pool / device name column across all pools.
- */
- cb.cb_namewidth = 0;
- (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb);
-
- /*
- * If it's the first time, or verbose mode, print the header.
- */
- if (++cb.cb_iteration == 1 || verbose)
- print_iostat_header(&cb);
-
- (void) pool_list_iter(list, B_FALSE, print_iostat, &cb);
-
- /*
- * If there's more than one pool, and we're not in verbose mode
- * (which prints a separator for us), then print a separator.
- */
- if (npools > 1 && !verbose)
- print_iostat_separator(&cb);
-
- if (verbose)
- (void) printf("\n");
-
- /*
- * Flush the output so that redirection to a file isn't buffered
- * indefinitely.
- */
- (void) fflush(stdout);
-
- if (interval == 0)
- break;
-
- if (count != 0 && --count == 0)
- break;
-
- (void) sleep(interval);
- }
-
- pool_list_free(list);
-
- return (ret);
-}
-
-typedef struct list_cbdata {
- boolean_t cb_scripted;
- boolean_t cb_first;
- int cb_fields[MAX_FIELDS];
- int cb_fieldcount;
-} list_cbdata_t;
-
-/*
- * Given a list of columns to display, output appropriate headers for each one.
- */
-void
-print_header(int *fields, size_t count)
-{
- int i;
- column_def_t *col;
- const char *fmt;
-
- for (i = 0; i < count; i++) {
- col = &column_table[fields[i]];
- if (i != 0)
- (void) printf(" ");
- if (col->cd_justify == left_justify)
- fmt = "%-*s";
- else
- fmt = "%*s";
-
- (void) printf(fmt, i == count - 1 ? strlen(col->cd_title) :
- col->cd_width, col->cd_title);
- }
-
- (void) printf("\n");
-}
-
-int
-list_callback(zpool_handle_t *zhp, void *data)
-{
- list_cbdata_t *cbp = data;
- nvlist_t *config;
- int i;
- char buf[ZPOOL_MAXNAMELEN];
- uint64_t total;
- uint64_t used;
- const char *fmt;
- column_def_t *col;
-
- if (cbp->cb_first) {
- if (!cbp->cb_scripted)
- print_header(cbp->cb_fields, cbp->cb_fieldcount);
- cbp->cb_first = B_FALSE;
- }
-
- if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
- config = NULL;
- } else {
- config = zpool_get_config(zhp, NULL);
- total = zpool_get_space_total(zhp);
- used = zpool_get_space_used(zhp);
- }
-
- for (i = 0; i < cbp->cb_fieldcount; i++) {
- if (i != 0) {
- if (cbp->cb_scripted)
- (void) printf("\t");
- else
- (void) printf(" ");
- }
-
- col = &column_table[cbp->cb_fields[i]];
-
- switch (cbp->cb_fields[i]) {
- case ZPOOL_FIELD_NAME:
- (void) strlcpy(buf, zpool_get_name(zhp), sizeof (buf));
- break;
-
- case ZPOOL_FIELD_SIZE:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else
- zfs_nicenum(total, buf, sizeof (buf));
- break;
-
- case ZPOOL_FIELD_USED:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else
- zfs_nicenum(used, buf, sizeof (buf));
- break;
-
- case ZPOOL_FIELD_AVAILABLE:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else
- zfs_nicenum(total - used, buf, sizeof (buf));
- break;
-
- case ZPOOL_FIELD_CAPACITY:
- if (config == NULL) {
- (void) strlcpy(buf, "-", sizeof (buf));
- } else {
- uint64_t capacity = (total == 0 ? 0 :
- (used * 100 / total));
- (void) snprintf(buf, sizeof (buf), "%llu%%",
- (u_longlong_t)capacity);
- }
- break;
-
- case ZPOOL_FIELD_HEALTH:
- if (config == NULL) {
- (void) strlcpy(buf, "FAULTED", sizeof (buf));
- } else {
- nvlist_t *nvroot;
- vdev_stat_t *vs;
- uint_t vsc;
-
- verify(nvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot,
- ZPOOL_CONFIG_STATS, (uint64_t **)&vs,
- &vsc) == 0);
- (void) strlcpy(buf, state_to_name(vs),
- sizeof (buf));
- }
- break;
-
- case ZPOOL_FIELD_ROOT:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else if (zpool_get_root(zhp, buf, sizeof (buf)) != 0)
- (void) strlcpy(buf, "-", sizeof (buf));
- break;
- }
-
- if (cbp->cb_scripted)
- (void) printf("%s", buf);
- else {
- if (col->cd_justify == left_justify)
- fmt = "%-*s";
- else
- fmt = "%*s";
-
- (void) printf(fmt, i == cbp->cb_fieldcount - 1 ?
- strlen(buf) : col->cd_width, buf);
- }
- }
-
- (void) printf("\n");
-
- return (0);
-}
-
-/*
- * zpool list [-H] [-o field[,field]*] [pool] ...
- *
- * -H Scripted mode. Don't display headers, and separate fields by
- * a single tab.
- * -o List of fields to display. Defaults to all fields, or
- * "name,size,used,available,capacity,health,root"
- *
- * List all pools in the system, whether or not they're healthy. Output space
- * statistics for each one, as well as health status summary.
- */
-int
-zpool_do_list(int argc, char **argv)
-{
- int c;
- int ret;
- list_cbdata_t cb = { 0 };
- static char default_fields[] =
- "name,size,used,available,capacity,health,root";
- char *fields = default_fields;
- char *value;
-
- /* check options */
- while ((c = getopt(argc, argv, ":Ho:")) != -1) {
- switch (c) {
- case 'H':
- cb.cb_scripted = B_TRUE;
- break;
- case 'o':
- fields = optarg;
- break;
- case ':':
- (void) fprintf(stderr, gettext("missing argument for "
- "'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- while (*fields != '\0') {
- if (cb.cb_fieldcount == MAX_FIELDS) {
- (void) fprintf(stderr, gettext("too many "
- "properties given to -o option\n"));
- usage(B_FALSE);
- }
-
- if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields,
- column_subopts, &value)) == -1) {
- (void) fprintf(stderr, gettext("invalid property "
- "'%s'\n"), value);
- usage(B_FALSE);
- }
-
- cb.cb_fieldcount++;
- }
-
-
- cb.cb_first = B_TRUE;
-
- ret = for_each_pool(argc, argv, B_TRUE, NULL, list_callback, &cb);
-
- if (argc == 0 && cb.cb_first) {
- (void) printf(gettext("no pools available\n"));
- return (0);
- }
-
- return (ret);
-}
-
-static nvlist_t *
-zpool_get_vdev_by_name(nvlist_t *nv, char *name)
-{
- nvlist_t **child;
- uint_t c, children;
- nvlist_t *match;
- char *path;
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0) {
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
- if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
- name += sizeof(_PATH_DEV)-1;
- if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
- path += sizeof(_PATH_DEV)-1;
- if (strcmp(name, path) == 0)
- return (nv);
- return (NULL);
- }
-
- for (c = 0; c < children; c++)
- if ((match = zpool_get_vdev_by_name(child[c], name)) != NULL)
- return (match);
-
- return (NULL);
-}
-
-static int
-zpool_do_attach_or_replace(int argc, char **argv, int replacing)
-{
- boolean_t force = B_FALSE;
- int c;
- nvlist_t *nvroot;
- char *poolname, *old_disk, *new_disk;
- zpool_handle_t *zhp;
- nvlist_t *config;
- int ret;
- int log_argc;
- char **log_argv;
-
- /* check options */
- while ((c = getopt(argc, argv, "f")) != -1) {
- switch (c) {
- case 'f':
- force = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- log_argc = argc;
- log_argv = argv;
- argc -= optind;
- argv += optind;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
-
- if (argc < 2) {
- (void) fprintf(stderr,
- gettext("missing <device> specification\n"));
- usage(B_FALSE);
- }
-
- old_disk = argv[1];
-
- if (argc < 3) {
- if (!replacing) {
- (void) fprintf(stderr,
- gettext("missing <new_device> specification\n"));
- usage(B_FALSE);
- }
- new_disk = old_disk;
- argc -= 1;
- argv += 1;
- } else {
- new_disk = argv[2];
- argc -= 2;
- argv += 2;
- }
-
- if (argc > 1) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
- return (1);
-
- if ((config = zpool_get_config(zhp, NULL)) == NULL) {
- (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
- poolname);
- zpool_close(zhp);
- return (1);
- }
-
- nvroot = make_root_vdev(config, force, B_FALSE, replacing, argc, argv);
- if (nvroot == NULL) {
- zpool_close(zhp);
- return (1);
- }
-
- ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
-
- if (!ret) {
- zpool_log_history(g_zfs, log_argc, log_argv, poolname, B_TRUE,
- B_FALSE);
- }
-
- nvlist_free(nvroot);
- zpool_close(zhp);
-
- return (ret);
-}
-
-/*
- * zpool replace [-f] <pool> <device> <new_device>
- *
- * -f Force attach, even if <new_device> appears to be in use.
- *
- * Replace <device> with <new_device>.
- */
-/* ARGSUSED */
-int
-zpool_do_replace(int argc, char **argv)
-{
- return (zpool_do_attach_or_replace(argc, argv, B_TRUE));
-}
-
-/*
- * zpool attach [-f] <pool> <device> <new_device>
- *
- * -f Force attach, even if <new_device> appears to be in use.
- *
- * Attach <new_device> to the mirror containing <device>. If <device> is not
- * part of a mirror, then <device> will be transformed into a mirror of
- * <device> and <new_device>. In either case, <new_device> will begin life
- * with a DTL of [0, now], and will immediately begin to resilver itself.
- */
-int
-zpool_do_attach(int argc, char **argv)
-{
- return (zpool_do_attach_or_replace(argc, argv, B_FALSE));
-}
-
-/*
- * zpool detach [-f] <pool> <device>
- *
- * -f Force detach of <device>, even if DTLs argue against it
- * (not supported yet)
- *
- * Detach a device from a mirror. The operation will be refused if <device>
- * is the last device in the mirror, or if the DTLs indicate that this device
- * has the only valid copy of some data.
- */
-/* ARGSUSED */
-int
-zpool_do_detach(int argc, char **argv)
-{
- int c;
- char *poolname, *path;
- zpool_handle_t *zhp;
- int ret;
-
- /* check options */
- while ((c = getopt(argc, argv, "f")) != -1) {
- switch (c) {
- case 'f':
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
- }
-
- if (argc < 2) {
- (void) fprintf(stderr,
- gettext("missing <device> specification\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
- path = argv[1];
-
- if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
- return (1);
-
- ret = zpool_vdev_detach(zhp, path);
-
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
- B_TRUE, B_FALSE);
- }
- zpool_close(zhp);
-
- return (ret);
-}
-
-/*
- * zpool online <pool> <device> ...
- */
-/* ARGSUSED */
-int
-zpool_do_online(int argc, char **argv)
-{
- int c, i;
- char *poolname;
- zpool_handle_t *zhp;
- int ret = 0;
-
- /* check options */
- while ((c = getopt(argc, argv, "t")) != -1) {
- switch (c) {
- case 't':
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing device name\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
-
- if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
- return (1);
-
- for (i = 1; i < argc; i++)
- if (zpool_vdev_online(zhp, argv[i]) == 0)
- (void) printf(gettext("Bringing device %s online\n"),
- argv[i]);
- else
- ret = 1;
-
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
- B_TRUE, B_FALSE);
- }
- zpool_close(zhp);
-
- return (ret);
-}
-
-/*
- * zpool offline [-ft] <pool> <device> ...
- *
- * -f Force the device into the offline state, even if doing
- * so would appear to compromise pool availability.
- * (not supported yet)
- *
- * -t Only take the device off-line temporarily. The offline
- * state will not be persistent across reboots.
- */
-/* ARGSUSED */
-int
-zpool_do_offline(int argc, char **argv)
-{
- int c, i;
- char *poolname;
- zpool_handle_t *zhp;
- int ret = 0;
- boolean_t istmp = B_FALSE;
-
- /* check options */
- while ((c = getopt(argc, argv, "ft")) != -1) {
- switch (c) {
- case 't':
- istmp = B_TRUE;
- break;
- case 'f':
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- /* get pool name and check number of arguments */
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name\n"));
- usage(B_FALSE);
- }
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing device name\n"));
- usage(B_FALSE);
- }
-
- poolname = argv[0];
-
- if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
- return (1);
-
- for (i = 1; i < argc; i++)
- if (zpool_vdev_offline(zhp, argv[i], istmp) == 0)
- (void) printf(gettext("Bringing device %s offline\n"),
- argv[i]);
- else
- ret = 1;
-
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
- B_TRUE, B_FALSE);
- }
- zpool_close(zhp);
-
- return (ret);
-}
-
-/*
- * zpool clear <pool> [device]
- *
- * Clear all errors associated with a pool or a particular device.
- */
-int
-zpool_do_clear(int argc, char **argv)
-{
- int ret = 0;
- zpool_handle_t *zhp;
- char *pool, *device;
-
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing pool name\n"));
- usage(B_FALSE);
- }
-
- if (argc > 3) {
- (void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
- }
-
- pool = argv[1];
- device = argc == 3 ? argv[2] : NULL;
-
- if ((zhp = zpool_open(g_zfs, pool)) == NULL)
- return (1);
-
- if (zpool_clear(zhp, device) != 0)
- ret = 1;
-
- if (!ret)
- zpool_log_history(g_zfs, argc, argv, pool, B_TRUE, B_FALSE);
- zpool_close(zhp);
-
- return (ret);
-}
-
-typedef struct scrub_cbdata {
- int cb_type;
- int cb_argc;
- char **cb_argv;
-} scrub_cbdata_t;
-
-int
-scrub_callback(zpool_handle_t *zhp, void *data)
-{
- scrub_cbdata_t *cb = data;
- int err;
-
- /*
- * Ignore faulted pools.
- */
- if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
- (void) fprintf(stderr, gettext("cannot scrub '%s': pool is "
- "currently unavailable\n"), zpool_get_name(zhp));
- return (1);
- }
-
- err = zpool_scrub(zhp, cb->cb_type);
-
- if (!err) {
- zpool_log_history(g_zfs, cb->cb_argc, cb->cb_argv,
- zpool_get_name(zhp), B_TRUE, B_FALSE);
- }
-
- return (err != 0);
-}
-
-/*
- * zpool scrub [-s] <pool> ...
- *
- * -s Stop. Stops any in-progress scrub.
- */
-int
-zpool_do_scrub(int argc, char **argv)
-{
- int c;
- scrub_cbdata_t cb;
-
- cb.cb_type = POOL_SCRUB_EVERYTHING;
-
- /* check options */
- while ((c = getopt(argc, argv, "s")) != -1) {
- switch (c) {
- case 's':
- cb.cb_type = POOL_SCRUB_NONE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- cb.cb_argc = argc;
- cb.cb_argv = argv;
- argc -= optind;
- argv += optind;
-
- if (argc < 1) {
- (void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
- }
-
- return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
-}
-
-typedef struct status_cbdata {
- int cb_count;
- boolean_t cb_allpools;
- boolean_t cb_verbose;
- boolean_t cb_explain;
- boolean_t cb_first;
-} status_cbdata_t;
-
-/*
- * Print out detailed scrub status.
- */
-void
-print_scrub_status(nvlist_t *nvroot)
-{
- vdev_stat_t *vs;
- uint_t vsc;
- time_t start, end, now;
- double fraction_done;
- uint64_t examined, total, minutes_left;
- char *scrub_type;
-
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
-
- /*
- * If there's never been a scrub, there's not much to say.
- */
- if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) {
- (void) printf(gettext("none requested\n"));
- return;
- }
-
- scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
- "resilver" : "scrub";
-
- start = vs->vs_scrub_start;
- end = vs->vs_scrub_end;
- now = time(NULL);
- examined = vs->vs_scrub_examined;
- total = vs->vs_alloc;
-
- if (end != 0) {
- (void) printf(gettext("%s %s with %llu errors on %s"),
- scrub_type, vs->vs_scrub_complete ? "completed" : "stopped",
- (u_longlong_t)vs->vs_scrub_errors, ctime(&end));
- return;
- }
-
- if (examined == 0)
- examined = 1;
- if (examined > total)
- total = examined;
-
- fraction_done = (double)examined / total;
- minutes_left = (uint64_t)((now - start) *
- (1 - fraction_done) / fraction_done / 60);
-
- (void) printf(gettext("%s in progress, %.2f%% done, %lluh%um to go\n"),
- scrub_type, 100 * fraction_done,
- (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
-}
-
-typedef struct spare_cbdata {
- uint64_t cb_guid;
- zpool_handle_t *cb_zhp;
-} spare_cbdata_t;
-
-static boolean_t
-find_vdev(nvlist_t *nv, uint64_t search)
-{
- uint64_t guid;
- nvlist_t **child;
- uint_t c, children;
-
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
- search == guid)
- return (B_TRUE);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0) {
- for (c = 0; c < children; c++)
- if (find_vdev(child[c], search))
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-static int
-find_spare(zpool_handle_t *zhp, void *data)
-{
- spare_cbdata_t *cbp = data;
- nvlist_t *config, *nvroot;
-
- config = zpool_get_config(zhp, NULL);
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- if (find_vdev(nvroot, cbp->cb_guid)) {
- cbp->cb_zhp = zhp;
- return (1);
- }
-
- zpool_close(zhp);
- return (0);
-}
-
-/*
- * Print out configuration state as requested by status_callback.
- */
-void
-print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
- int namewidth, int depth, boolean_t isspare)
-{
- nvlist_t **child;
- uint_t c, children;
- vdev_stat_t *vs;
- char rbuf[6], wbuf[6], cbuf[6], repaired[7];
- char *vname;
- uint64_t notpresent;
- spare_cbdata_t cb;
- const char *state;
-
- verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
- children = 0;
-
- state = state_to_name(vs);
- if (isspare) {
- /*
- * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
- * online drives.
- */
- if (vs->vs_aux == VDEV_AUX_SPARED)
- state = "INUSE";
- else if (vs->vs_state == VDEV_STATE_HEALTHY)
- state = "AVAIL";
- }
-
- (void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
- name, state);
-
- if (!isspare) {
- zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
- zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
- zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
- (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
- }
-
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
- &notpresent) == 0) {
- char *path;
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
- (void) printf(" was %s", path);
- } else if (vs->vs_aux != 0) {
- (void) printf(" ");
-
- switch (vs->vs_aux) {
- case VDEV_AUX_OPEN_FAILED:
- (void) printf(gettext("cannot open"));
- break;
-
- case VDEV_AUX_BAD_GUID_SUM:
- (void) printf(gettext("missing device"));
- break;
-
- case VDEV_AUX_NO_REPLICAS:
- (void) printf(gettext("insufficient replicas"));
- break;
-
- case VDEV_AUX_VERSION_NEWER:
- (void) printf(gettext("newer version"));
- break;
-
- case VDEV_AUX_SPARED:
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
- &cb.cb_guid) == 0);
- if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
- if (strcmp(zpool_get_name(cb.cb_zhp),
- zpool_get_name(zhp)) == 0)
- (void) printf(gettext("currently in "
- "use"));
- else
- (void) printf(gettext("in use by "
- "pool '%s'"),
- zpool_get_name(cb.cb_zhp));
- zpool_close(cb.cb_zhp);
- } else {
- (void) printf(gettext("currently in use"));
- }
- break;
-
- default:
- (void) printf(gettext("corrupted data"));
- break;
- }
- } else if (vs->vs_scrub_repaired != 0 && children == 0) {
- /*
- * Report bytes resilvered/repaired on leaf devices.
- */
- zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
- (void) printf(gettext(" %s %s"), repaired,
- (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
- "resilvered" : "repaired");
- }
-
- (void) printf("\n");
-
- for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, child[c]);
- print_status_config(zhp, vname, child[c],
- namewidth, depth + 2, isspare);
- free(vname);
- }
-}
-
-static void
-print_error_log(zpool_handle_t *zhp)
-{
- nvlist_t *nverrlist;
- nvpair_t *elem;
- char *pathname;
- size_t len = MAXPATHLEN * 2;
-
- if (zpool_get_errlog(zhp, &nverrlist) != 0) {
- (void) printf("errors: List of errors unavailable "
- "(insufficient privileges)\n");
- return;
- }
-
- (void) printf("errors: Permanent errors have been "
- "detected in the following files:\n\n");
-
- pathname = safe_malloc(len);
- elem = NULL;
- while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) {
- nvlist_t *nv;
- uint64_t dsobj, obj;
-
- verify(nvpair_value_nvlist(elem, &nv) == 0);
- verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET,
- &dsobj) == 0);
- verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT,
- &obj) == 0);
- zpool_obj_to_path(zhp, dsobj, obj, pathname, len);
- (void) printf("%7s %s\n", "", pathname);
- }
- free(pathname);
- nvlist_free(nverrlist);
-}
-
-static void
-print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
- int namewidth)
-{
- uint_t i;
- char *name;
-
- if (nspares == 0)
- return;
-
- (void) printf(gettext("\tspares\n"));
-
- for (i = 0; i < nspares; i++) {
- name = zpool_vdev_name(g_zfs, zhp, spares[i]);
- print_status_config(zhp, name, spares[i],
- namewidth, 2, B_TRUE);
- free(name);
- }
-}
-
-/*
- * Display a summary of pool status. Displays a summary such as:
- *
- * pool: tank
- * status: DEGRADED
- * reason: One or more devices ...
- * see: http://www.sun.com/msg/ZFS-xxxx-01
- * config:
- * mirror DEGRADED
- * c1t0d0 OK
- * c2t0d0 UNAVAIL
- *
- * When given the '-v' option, we print out the complete config. If the '-e'
- * option is specified, then we print out error rate information as well.
- */
-int
-status_callback(zpool_handle_t *zhp, void *data)
-{
- status_cbdata_t *cbp = data;
- nvlist_t *config, *nvroot;
- char *msgid;
- int reason;
- const char *health;
- uint_t c;
- vdev_stat_t *vs;
-
- config = zpool_get_config(zhp, NULL);
- reason = zpool_get_status(zhp, &msgid);
-
- cbp->cb_count++;
-
- /*
- * If we were given 'zpool status -x', only report those pools with
- * problems.
- */
- if (reason == ZPOOL_STATUS_OK && cbp->cb_explain) {
- if (!cbp->cb_allpools) {
- (void) printf(gettext("pool '%s' is healthy\n"),
- zpool_get_name(zhp));
- if (cbp->cb_first)
- cbp->cb_first = B_FALSE;
- }
- return (0);
- }
-
- if (cbp->cb_first)
- cbp->cb_first = B_FALSE;
- else
- (void) printf("\n");
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
- health = state_to_name(vs);
-
- (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp));
- (void) printf(gettext(" state: %s\n"), health);
-
- switch (reason) {
- case ZPOOL_STATUS_MISSING_DEV_R:
- (void) printf(gettext("status: One or more devices could not "
- "be opened. Sufficient replicas exist for\n\tthe pool to "
- "continue functioning in a degraded state.\n"));
- (void) printf(gettext("action: Attach the missing device and "
- "online it using 'zpool online'.\n"));
- break;
-
- case ZPOOL_STATUS_MISSING_DEV_NR:
- (void) printf(gettext("status: One or more devices could not "
- "be opened. There are insufficient\n\treplicas for the "
- "pool to continue functioning.\n"));
- (void) printf(gettext("action: Attach the missing device and "
- "online it using 'zpool online'.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_LABEL_R:
- (void) printf(gettext("status: One or more devices could not "
- "be used because the label is missing or\n\tinvalid. "
- "Sufficient replicas exist for the pool to continue\n\t"
- "functioning in a degraded state.\n"));
- (void) printf(gettext("action: Replace the device using "
- "'zpool replace'.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_LABEL_NR:
- (void) printf(gettext("status: One or more devices could not "
- "be used because the label is missing \n\tor invalid. "
- "There are insufficient replicas for the pool to "
- "continue\n\tfunctioning.\n"));
- (void) printf(gettext("action: Destroy and re-create the pool "
- "from a backup source.\n"));
- break;
-
- case ZPOOL_STATUS_FAILING_DEV:
- (void) printf(gettext("status: One or more devices has "
- "experienced an unrecoverable error. An\n\tattempt was "
- "made to correct the error. Applications are "
- "unaffected.\n"));
- (void) printf(gettext("action: Determine if the device needs "
- "to be replaced, and clear the errors\n\tusing "
- "'zpool clear' or replace the device with 'zpool "
- "replace'.\n"));
- break;
-
- case ZPOOL_STATUS_OFFLINE_DEV:
- (void) printf(gettext("status: One or more devices has "
- "been taken offline by the administrator.\n\tSufficient "
- "replicas exist for the pool to continue functioning in "
- "a\n\tdegraded state.\n"));
- (void) printf(gettext("action: Online the device using "
- "'zpool online' or replace the device with\n\t'zpool "
- "replace'.\n"));
- break;
-
- case ZPOOL_STATUS_RESILVERING:
- (void) printf(gettext("status: One or more devices is "
- "currently being resilvered. The pool will\n\tcontinue "
- "to function, possibly in a degraded state.\n"));
- (void) printf(gettext("action: Wait for the resilver to "
- "complete.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_DATA:
- (void) printf(gettext("status: One or more devices has "
- "experienced an error resulting in data\n\tcorruption. "
- "Applications may be affected.\n"));
- (void) printf(gettext("action: Restore the file in question "
- "if possible. Otherwise restore the\n\tentire pool from "
- "backup.\n"));
- break;
-
- case ZPOOL_STATUS_CORRUPT_POOL:
- (void) printf(gettext("status: The pool metadata is corrupted "
- "and the pool cannot be opened.\n"));
- (void) printf(gettext("action: Destroy and re-create the pool "
- "from a backup source.\n"));
- break;
-
- case ZPOOL_STATUS_VERSION_OLDER:
- (void) printf(gettext("status: The pool is formatted using an "
- "older on-disk format. The pool can\n\tstill be used, but "
- "some features are unavailable.\n"));
- (void) printf(gettext("action: Upgrade the pool using 'zpool "
- "upgrade'. Once this is done, the\n\tpool will no longer "
- "be accessible on older software versions.\n"));
- break;
-
- case ZPOOL_STATUS_VERSION_NEWER:
- (void) printf(gettext("status: The pool has been upgraded to a "
- "newer, incompatible on-disk version.\n\tThe pool cannot "
- "be accessed on this system.\n"));
- (void) printf(gettext("action: Access the pool from a system "
- "running more recent software, or\n\trestore the pool from "
- "backup.\n"));
- break;
-
- default:
- /*
- * The remaining errors can't actually be generated, yet.
- */
- assert(reason == ZPOOL_STATUS_OK);
- }
-
- if (msgid != NULL)
- (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"),
- msgid);
-
- if (config != NULL) {
- int namewidth;
- uint64_t nerr;
- nvlist_t **spares;
- uint_t nspares;
-
-
- (void) printf(gettext(" scrub: "));
- print_scrub_status(nvroot);
-
- namewidth = max_width(zhp, nvroot, 0, 0);
- if (namewidth < 10)
- namewidth = 10;
-
- (void) printf(gettext("config:\n\n"));
- (void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth,
- "NAME", "STATE", "READ", "WRITE", "CKSUM");
- print_status_config(zhp, zpool_get_name(zhp), nvroot,
- namewidth, 0, B_FALSE);
-
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0)
- print_spares(zhp, spares, nspares, namewidth);
-
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
- &nerr) == 0) {
- nvlist_t *nverrlist = NULL;
-
- /*
- * If the approximate error count is small, get a
- * precise count by fetching the entire log and
- * uniquifying the results.
- */
- if (nerr < 100 && !cbp->cb_verbose &&
- zpool_get_errlog(zhp, &nverrlist) == 0) {
- nvpair_t *elem;
-
- elem = NULL;
- nerr = 0;
- while ((elem = nvlist_next_nvpair(nverrlist,
- elem)) != NULL) {
- nerr++;
- }
- }
- nvlist_free(nverrlist);
-
- (void) printf("\n");
-
- if (nerr == 0)
- (void) printf(gettext("errors: No known data "
- "errors\n"));
- else if (!cbp->cb_verbose)
- (void) printf(gettext("errors: %llu data "
- "errors, use '-v' for a list\n"),
- (u_longlong_t)nerr);
- else
- print_error_log(zhp);
- }
- } else {
- (void) printf(gettext("config: The configuration cannot be "
- "determined.\n"));
- }
-
- return (0);
-}
-
-/*
- * zpool status [-vx] [pool] ...
- *
- * -v Display complete error logs
- * -x Display only pools with potential problems
- *
- * Describes the health status of all pools or some subset.
- */
-int
-zpool_do_status(int argc, char **argv)
-{
- int c;
- int ret;
- status_cbdata_t cb = { 0 };
-
- /* check options */
- while ((c = getopt(argc, argv, "vx")) != -1) {
- switch (c) {
- case 'v':
- cb.cb_verbose = B_TRUE;
- break;
- case 'x':
- cb.cb_explain = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- cb.cb_first = B_TRUE;
-
- if (argc == 0)
- cb.cb_allpools = B_TRUE;
-
- ret = for_each_pool(argc, argv, B_TRUE, NULL, status_callback, &cb);
-
- if (argc == 0 && cb.cb_count == 0)
- (void) printf(gettext("no pools available\n"));
- else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
- (void) printf(gettext("all pools are healthy\n"));
-
- return (ret);
-}
-
-typedef struct upgrade_cbdata {
- int cb_all;
- int cb_first;
- int cb_newer;
- int cb_argc;
- char **cb_argv;
-} upgrade_cbdata_t;
-
-static int
-upgrade_cb(zpool_handle_t *zhp, void *arg)
-{
- upgrade_cbdata_t *cbp = arg;
- nvlist_t *config;
- uint64_t version;
- int ret = 0;
-
- config = zpool_get_config(zhp, NULL);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
- &version) == 0);
-
- if (!cbp->cb_newer && version < ZFS_VERSION) {
- if (!cbp->cb_all) {
- if (cbp->cb_first) {
- (void) printf(gettext("The following pools are "
- "out of date, and can be upgraded. After "
- "being\nupgraded, these pools will no "
- "longer be accessible by older software "
- "versions.\n\n"));
- (void) printf(gettext("VER POOL\n"));
- (void) printf(gettext("--- ------------\n"));
- cbp->cb_first = B_FALSE;
- }
-
- (void) printf("%2llu %s\n", (u_longlong_t)version,
- zpool_get_name(zhp));
- } else {
- cbp->cb_first = B_FALSE;
- ret = zpool_upgrade(zhp);
- if (!ret) {
- zpool_log_history(g_zfs, cbp->cb_argc,
- cbp->cb_argv, zpool_get_name(zhp), B_TRUE,
- B_FALSE);
- (void) printf(gettext("Successfully upgraded "
- "'%s'\n"), zpool_get_name(zhp));
- }
- }
- } else if (cbp->cb_newer && version > ZFS_VERSION) {
- assert(!cbp->cb_all);
-
- if (cbp->cb_first) {
- (void) printf(gettext("The following pools are "
- "formatted using a newer software version and\n"
- "cannot be accessed on the current system.\n\n"));
- (void) printf(gettext("VER POOL\n"));
- (void) printf(gettext("--- ------------\n"));
- cbp->cb_first = B_FALSE;
- }
-
- (void) printf("%2llu %s\n", (u_longlong_t)version,
- zpool_get_name(zhp));
- }
-
- zpool_close(zhp);
- return (ret);
-}
-
-/* ARGSUSED */
-static int
-upgrade_one(zpool_handle_t *zhp, void *data)
-{
- nvlist_t *config;
- uint64_t version;
- int ret;
- upgrade_cbdata_t *cbp = data;
-
- config = zpool_get_config(zhp, NULL);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
- &version) == 0);
-
- if (version == ZFS_VERSION) {
- (void) printf(gettext("Pool '%s' is already formatted "
- "using the current version.\n"), zpool_get_name(zhp));
- return (0);
- }
-
- ret = zpool_upgrade(zhp);
-
- if (!ret) {
- zpool_log_history(g_zfs, cbp->cb_argc, cbp->cb_argv,
- zpool_get_name(zhp), B_TRUE, B_FALSE);
- (void) printf(gettext("Successfully upgraded '%s' "
- "from version %llu to version %llu\n"), zpool_get_name(zhp),
- (u_longlong_t)version, (u_longlong_t)ZFS_VERSION);
- }
-
- return (ret != 0);
-}
-
-/*
- * zpool upgrade
- * zpool upgrade -v
- * zpool upgrade <-a | pool>
- *
- * With no arguments, display downrev'd ZFS pool available for upgrade.
- * Individual pools can be upgraded by specifying the pool, and '-a' will
- * upgrade all pools.
- */
-int
-zpool_do_upgrade(int argc, char **argv)
-{
- int c;
- upgrade_cbdata_t cb = { 0 };
- int ret = 0;
- boolean_t showversions = B_FALSE;
-
- /* check options */
- while ((c = getopt(argc, argv, "av")) != -1) {
- switch (c) {
- case 'a':
- cb.cb_all = B_TRUE;
- break;
- case 'v':
- showversions = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- cb.cb_argc = argc;
- cb.cb_argv = argv;
- argc -= optind;
- argv += optind;
-
- if (showversions) {
- if (cb.cb_all || argc != 0) {
- (void) fprintf(stderr, gettext("-v option is "
- "incompatible with other arguments\n"));
- usage(B_FALSE);
- }
- } else if (cb.cb_all) {
- if (argc != 0) {
- (void) fprintf(stderr, gettext("-a option is "
- "incompatible with other arguments\n"));
- usage(B_FALSE);
- }
- }
-
- (void) printf(gettext("This system is currently running ZFS version "
- "%llu.\n\n"), ZFS_VERSION);
- cb.cb_first = B_TRUE;
- if (showversions) {
- (void) printf(gettext("The following versions are "
- "supported:\n\n"));
- (void) printf(gettext("VER DESCRIPTION\n"));
- (void) printf("--- -----------------------------------------"
- "---------------\n");
- (void) printf(gettext(" 1 Initial ZFS version\n"));
- (void) printf(gettext(" 2 Ditto blocks "
- "(replicated metadata)\n"));
- (void) printf(gettext(" 3 Hot spares and double parity "
- "RAID-Z\n"));
- (void) printf(gettext(" 4 zpool history\n"));
- (void) printf(gettext(" 5 Compression using the gzip "
- "algorithm\n"));
- (void) printf(gettext(" 6 bootfs pool property "));
- (void) printf(gettext("\nFor more information on a particular "
- "version, including supported releases, see:\n\n"));
- (void) printf("http://www.opensolaris.org/os/community/zfs/"
- "version/N\n\n");
- (void) printf(gettext("Where 'N' is the version number.\n"));
- } else if (argc == 0) {
- int notfound;
-
- ret = zpool_iter(g_zfs, upgrade_cb, &cb);
- notfound = cb.cb_first;
-
- if (!cb.cb_all && ret == 0) {
- if (!cb.cb_first)
- (void) printf("\n");
- cb.cb_first = B_TRUE;
- cb.cb_newer = B_TRUE;
- ret = zpool_iter(g_zfs, upgrade_cb, &cb);
- if (!cb.cb_first) {
- notfound = B_FALSE;
- (void) printf("\n");
- }
- }
-
- if (ret == 0) {
- if (notfound)
- (void) printf(gettext("All pools are formatted "
- "using this version.\n"));
- else if (!cb.cb_all)
- (void) printf(gettext("Use 'zpool upgrade -v' "
- "for a list of available versions and "
- "their associated\nfeatures.\n"));
- }
- } else {
- ret = for_each_pool(argc, argv, B_FALSE, NULL,
- upgrade_one, &cb);
- }
-
- return (ret);
-}
-
-/*
- * Print out the command history for a specific pool.
- */
-static int
-get_history_one(zpool_handle_t *zhp, void *data)
-{
- nvlist_t *nvhis;
- nvlist_t **records;
- uint_t numrecords;
- char *cmdstr;
- uint64_t dst_time;
- time_t tsec;
- struct tm t;
- char tbuf[30];
- int ret, i;
-
- *(boolean_t *)data = B_FALSE;
-
- (void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp));
-
- if ((ret = zpool_get_history(zhp, &nvhis)) != 0)
- return (ret);
-
- verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD,
- &records, &numrecords) == 0);
- for (i = 0; i < numrecords; i++) {
- if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME,
- &dst_time) == 0) {
- verify(nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
- &cmdstr) == 0);
- tsec = dst_time;
- (void) localtime_r(&tsec, &t);
- (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
- (void) printf("%s %s\n", tbuf, cmdstr);
- }
- }
- (void) printf("\n");
- nvlist_free(nvhis);
-
- return (ret);
-}
-
-/*
- * zpool history <pool>
- *
- * Displays the history of commands that modified pools.
- */
-int
-zpool_do_history(int argc, char **argv)
-{
- boolean_t first = B_TRUE;
- int ret;
-
- argc -= optind;
- argv += optind;
-
- ret = for_each_pool(argc, argv, B_FALSE, NULL, get_history_one,
- &first);
-
- if (argc == 0 && first == B_TRUE) {
- (void) printf(gettext("no pools available\n"));
- return (0);
- }
-
- return (ret);
-}
-
-static int
-get_callback(zpool_handle_t *zhp, void *data)
-{
- libzfs_get_cbdata_t *cbp = (libzfs_get_cbdata_t *)data;
- char value[MAXNAMELEN];
- zfs_source_t srctype;
- zpool_proplist_t *pl;
-
- for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
-
- /*
- * Skip the special fake placeholder.
- */
- if (pl->pl_prop == ZFS_PROP_NAME &&
- pl == cbp->cb_proplist)
- continue;
-
- if (zpool_get_prop(zhp, pl->pl_prop,
- value, sizeof (value), &srctype) != 0)
- continue;
-
- libzfs_print_one_property(zpool_get_name(zhp), cbp,
- zpool_prop_to_name(pl->pl_prop), value, srctype, NULL);
- }
- return (0);
-}
-
-int
-zpool_do_get(int argc, char **argv)
-{
- libzfs_get_cbdata_t cb = { 0 };
- zpool_proplist_t fake_name = { 0 };
- int ret;
-
- if (argc < 3)
- usage(B_FALSE);
-
- cb.cb_first = B_TRUE;
- cb.cb_sources = ZFS_SRC_ALL;
- cb.cb_columns[0] = GET_COL_NAME;
- cb.cb_columns[1] = GET_COL_PROPERTY;
- cb.cb_columns[2] = GET_COL_VALUE;
- cb.cb_columns[3] = GET_COL_SOURCE;
-
- if (zpool_get_proplist(g_zfs, argv[1], &cb.cb_proplist) != 0)
- usage(B_FALSE);
-
- if (cb.cb_proplist != NULL) {
- fake_name.pl_prop = ZFS_PROP_NAME;
- fake_name.pl_width = strlen(gettext("NAME"));
- fake_name.pl_next = cb.cb_proplist;
- cb.cb_proplist = &fake_name;
- }
-
- ret = for_each_pool(argc - 2, argv + 2, B_TRUE, &cb.cb_proplist,
- get_callback, &cb);
-
- if (cb.cb_proplist == &fake_name)
- zfs_free_proplist(fake_name.pl_next);
- else
- zfs_free_proplist(cb.cb_proplist);
-
- return (ret);
-}
-
-typedef struct set_cbdata {
- char *cb_propname;
- char *cb_value;
- boolean_t cb_any_successful;
-} set_cbdata_t;
-
-int
-set_callback(zpool_handle_t *zhp, void *data)
-{
- int error;
- set_cbdata_t *cb = (set_cbdata_t *)data;
-
- error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value);
-
- if (!error)
- cb->cb_any_successful = B_TRUE;
-
- return (error);
-}
-
-int
-zpool_do_set(int argc, char **argv)
-{
- set_cbdata_t cb = { 0 };
- int error;
-
- if (argc > 1 && argv[1][0] == '-') {
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- argv[1][1]);
- usage(B_FALSE);
- }
-
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing property=value "
- "argument\n"));
- usage(B_FALSE);
- }
-
- if (argc < 3) {
- (void) fprintf(stderr, gettext("missing pool name\n"));
- usage(B_FALSE);
- }
-
- if (argc > 3) {
- (void) fprintf(stderr, gettext("too many pool names\n"));
- usage(B_FALSE);
- }
-
- cb.cb_propname = argv[1];
- cb.cb_value = strchr(cb.cb_propname, '=');
- if (cb.cb_value == NULL) {
- (void) fprintf(stderr, gettext("missing value in "
- "property=value argument\n"));
- usage(B_FALSE);
- }
-
- *(cb.cb_value) = '\0';
- cb.cb_value++;
-
- error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL,
- set_callback, &cb);
-
- if (cb.cb_any_successful) {
- *(cb.cb_value - 1) = '=';
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
- }
-
- return (error);
-}
-
-static int
-find_command_idx(char *command, int *idx)
-{
- int i;
-
- for (i = 0; i < NCOMMAND; i++) {
- if (command_table[i].name == NULL)
- continue;
-
- if (strcmp(command, command_table[i].name) == 0) {
- *idx = i;
- return (0);
- }
- }
- return (1);
-}
-
-int
-main(int argc, char **argv)
-{
- int ret;
- int i;
- char *cmdname;
- int found = 0;
-
- (void) setlocale(LC_ALL, "");
- (void) textdomain(TEXT_DOMAIN);
-
- if ((g_zfs = libzfs_init()) == NULL) {
- (void) fprintf(stderr, gettext("internal error: failed to "
- "initialize ZFS library\n"));
- return (1);
- }
-
- libzfs_print_on_error(g_zfs, B_TRUE);
-
- opterr = 0;
-
- /*
- * Make sure the user has specified some command.
- */
- if (argc < 2) {
- (void) fprintf(stderr, gettext("missing command\n"));
- usage(B_FALSE);
- }
-
- cmdname = argv[1];
-
- /*
- * Special case '-?'
- */
- if (strcmp(cmdname, "-?") == 0)
- usage(B_TRUE);
-
- /*
- * Run the appropriate command.
- */
- if (find_command_idx(cmdname, &i) == 0) {
- current_command = &command_table[i];
- ret = command_table[i].func(argc - 1, argv + 1);
- found++;
- }
-
- /*
- * 'freeze' is a vile debugging abomination, so we treat it as such.
- */
- if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
- char buf[16384];
- int fd = open(ZFS_DEV, O_RDWR);
- (void) strcpy((void *)buf, argv[2]);
- return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf));
- }
-
- if (!found) {
- (void) fprintf(stderr, gettext("unrecognized "
- "command '%s'\n"), cmdname);
- usage(B_FALSE);
- }
-
- libzfs_fini(g_zfs);
-
- /*
- * The 'ZFS_ABORT' environment variable causes us to dump core on exit
- * for the purposes of running ::findleaks.
- */
- if (getenv("ZFS_ABORT") != NULL) {
- (void) printf("dumping core by request\n");
- abort();
- }
-
- return (ret);
-}
diff --git a/contrib/opensolaris/cmd/zpool/zpool_util.c b/contrib/opensolaris/cmd/zpool/zpool_util.c
deleted file mode 100644
index 8eb9c81..0000000
--- a/contrib/opensolaris/cmd/zpool/zpool_util.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <errno.h>
-#include <libgen.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-
-#include "zpool_util.h"
-
-/*
- * Utility function to guarantee malloc() success.
- */
-void *
-safe_malloc(size_t size)
-{
- void *data;
-
- if ((data = calloc(1, size)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
-
- return (data);
-}
-
-/*
- * Same as above, but for strdup()
- */
-char *
-safe_strdup(const char *str)
-{
- char *ret;
-
- if ((ret = strdup(str)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
-
- return (ret);
-}
-
-/*
- * Display an out of memory error message and abort the current program.
- */
-void
-zpool_no_memory(void)
-{
- assert(errno == ENOMEM);
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
-}
diff --git a/contrib/opensolaris/cmd/zpool/zpool_util.h b/contrib/opensolaris/cmd/zpool/zpool_util.h
deleted file mode 100644
index cb05bda..0000000
--- a/contrib/opensolaris/cmd/zpool/zpool_util.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef ZPOOL_UTIL_H
-#define ZPOOL_UTIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <libnvpair.h>
-#include <libzfs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Basic utility functions
- */
-void *safe_malloc(size_t);
-char *safe_strdup(const char *);
-void zpool_no_memory(void);
-
-/*
- * Virtual device functions
- */
-nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
- boolean_t isreplace, int argc, char **argv);
-
-/*
- * Pool list functions
- */
-int for_each_pool(int, char **, boolean_t unavail, zpool_proplist_t **,
- zpool_iter_f, void *);
-
-typedef struct zpool_list zpool_list_t;
-
-zpool_list_t *pool_list_get(int, char **, zpool_proplist_t **, int *);
-void pool_list_update(zpool_list_t *);
-int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *);
-void pool_list_free(zpool_list_t *);
-int pool_list_count(zpool_list_t *);
-void pool_list_remove(zpool_list_t *, zpool_handle_t *);
-
-libzfs_handle_t *g_zfs;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ZPOOL_UTIL_H */
diff --git a/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/contrib/opensolaris/cmd/zpool/zpool_vdev.c
deleted file mode 100644
index cfed1f0..0000000
--- a/contrib/opensolaris/cmd/zpool/zpool_vdev.c
+++ /dev/null
@@ -1,883 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Functions to convert between a list of vdevs and an nvlist representing the
- * configuration. Each entry in the list can be one of:
- *
- * Device vdevs
- * disk=(path=..., devid=...)
- * file=(path=...)
- *
- * Group vdevs
- * raidz[1|2]=(...)
- * mirror=(...)
- *
- * Hot spares
- *
- * While the underlying implementation supports it, group vdevs cannot contain
- * other group vdevs. All userland verification of devices is contained within
- * this file. If successful, the nvlist returned can be passed directly to the
- * kernel; we've done as much verification as possible in userland.
- *
- * Hot spares are a special case, and passed down as an array of disk vdevs, at
- * the same level as the root of the vdev tree.
- *
- * The only function exported by this file is 'get_vdev_spec'. The function
- * performs several passes:
- *
- * 1. Construct the vdev specification. Performs syntax validation and
- * makes sure each device is valid.
- * 2. Check for devices in use. Using libdiskmgt, makes sure that no
- * devices are also in use. Some can be overridden using the 'force'
- * flag, others cannot.
- * 3. Check for replication errors if the 'force' flag is not specified.
- * validates that the replication level is consistent across the
- * entire pool.
- */
-
-#include <assert.h>
-#include <devid.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libintl.h>
-#include <libnvpair.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <paths.h>
-#include <sys/stat.h>
-#include <sys/disk.h>
-#include <sys/mntent.h>
-#include <libgeom.h>
-
-#include <libzfs.h>
-
-#include "zpool_util.h"
-
-/*
- * For any given vdev specification, we can have multiple errors. The
- * vdev_error() function keeps track of whether we have seen an error yet, and
- * prints out a header if its the first error we've seen.
- */
-boolean_t error_seen;
-boolean_t is_force;
-
-/*PRINTFLIKE1*/
-static void
-vdev_error(const char *fmt, ...)
-{
- va_list ap;
-
- if (!error_seen) {
- (void) fprintf(stderr, gettext("invalid vdev specification\n"));
- if (!is_force)
- (void) fprintf(stderr, gettext("use '-f' to override "
- "the following errors:\n"));
- else
- (void) fprintf(stderr, gettext("the following errors "
- "must be manually repaired:\n"));
- error_seen = B_TRUE;
- }
-
- va_start(ap, fmt);
- (void) vfprintf(stderr, fmt, ap);
- va_end(ap);
-}
-
-/*
- * Validate a GEOM provider.
- */
-static int
-check_provider(const char *name, boolean_t force, boolean_t isspare)
-{
- struct gmesh mesh;
- struct gclass *mp;
- struct ggeom *gp;
- struct gprovider *pp;
- int rv;
-
- /* XXX: What to do with isspare? */
-
- if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- name += sizeof(_PATH_DEV) - 1;
-
- rv = geom_gettree(&mesh);
- assert(rv == 0);
-
- pp = NULL;
- LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
- LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
- LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
- if (strcmp(pp->lg_name, name) == 0)
- goto out;
- }
- }
- }
-out:
- rv = -1;
- if (pp == NULL)
- vdev_error("no such provider %s\n", name);
- else {
- int acr, acw, ace;
-
- VERIFY(sscanf(pp->lg_mode, "r%dw%de%d", &acr, &acw, &ace) == 3);
- if (acw == 0 && ace == 0)
- rv = 0;
- else
- vdev_error("%s is in use (%s)\n", name, pp->lg_mode);
- }
- geom_deletetree(&mesh);
- return (rv);
-}
-
-static boolean_t
-is_provider(const char *name)
-{
- int fd;
-
- fd = g_open(name, 0);
- if (fd >= 0) {
- g_close(fd);
- return (B_TRUE);
- }
- return (B_FALSE);
-
-}
-/*
- * Create a leaf vdev. Determine if this is a GEOM provider.
- * Valid forms for a leaf vdev are:
- *
- * /dev/xxx Complete path to a GEOM provider
- * xxx Shorthand for /dev/xxx
- */
-nvlist_t *
-make_leaf_vdev(const char *arg)
-{
- char ident[DISK_IDENT_SIZE], path[MAXPATHLEN];
- struct stat64 statbuf;
- nvlist_t *vdev = NULL;
- char *type = NULL;
- boolean_t wholedisk = B_FALSE;
-
- if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- strlcpy(path, arg, sizeof (path));
- else
- snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
-
- if (is_provider(path))
- type = VDEV_TYPE_DISK;
- else {
- (void) fprintf(stderr, gettext("cannot use '%s': must be a "
- "GEOM provider\n"), path);
- return (NULL);
- }
-
- /*
- * Finally, we have the complete device or file, and we know that it is
- * acceptable to use. Construct the nvlist to describe this vdev. All
- * vdevs have a 'path' element, and devices also have a 'devid' element.
- */
- verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
- verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
- verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
- if (strcmp(type, VDEV_TYPE_DISK) == 0)
- verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
- (uint64_t)B_FALSE) == 0);
-
- /*
- * For a whole disk, defer getting its devid until after labeling it.
- */
- if (1 || (S_ISBLK(statbuf.st_mode) && !wholedisk)) {
- /*
- * Get the devid for the device.
- */
- int fd;
- ddi_devid_t devid;
- char *minor = NULL, *devid_str = NULL;
-
- if ((fd = open(path, O_RDONLY)) < 0) {
- (void) fprintf(stderr, gettext("cannot open '%s': "
- "%s\n"), path, strerror(errno));
- nvlist_free(vdev);
- return (NULL);
- }
-
- if (devid_get(fd, &devid) == 0) {
- if (devid_get_minor_name(fd, &minor) == 0 &&
- (devid_str = devid_str_encode(devid, minor)) !=
- NULL) {
- verify(nvlist_add_string(vdev,
- ZPOOL_CONFIG_DEVID, devid_str) == 0);
- }
- if (devid_str != NULL)
- devid_str_free(devid_str);
- if (minor != NULL)
- devid_str_free(minor);
- devid_free(devid);
- }
-
- (void) close(fd);
- }
-
- return (vdev);
-}
-
-/*
- * Go through and verify the replication level of the pool is consistent.
- * Performs the following checks:
- *
- * For the new spec, verifies that devices in mirrors and raidz are the
- * same size.
- *
- * If the current configuration already has inconsistent replication
- * levels, ignore any other potential problems in the new spec.
- *
- * Otherwise, make sure that the current spec (if there is one) and the new
- * spec have consistent replication levels.
- */
-typedef struct replication_level {
- char *zprl_type;
- uint64_t zprl_children;
- uint64_t zprl_parity;
-} replication_level_t;
-
-/*
- * Given a list of toplevel vdevs, return the current replication level. If
- * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
- * an error message will be displayed for each self-inconsistent vdev.
- */
-replication_level_t *
-get_replication(nvlist_t *nvroot, boolean_t fatal)
-{
- nvlist_t **top;
- uint_t t, toplevels;
- nvlist_t **child;
- uint_t c, children;
- nvlist_t *nv;
- char *type;
- replication_level_t lastrep, rep, *ret;
- boolean_t dontreport;
-
- ret = safe_malloc(sizeof (replication_level_t));
-
- verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- &top, &toplevels) == 0);
-
- lastrep.zprl_type = NULL;
- for (t = 0; t < toplevels; t++) {
- nv = top[t];
-
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0) {
- /*
- * This is a 'file' or 'disk' vdev.
- */
- rep.zprl_type = type;
- rep.zprl_children = 1;
- rep.zprl_parity = 0;
- } else {
- uint64_t vdev_size;
-
- /*
- * This is a mirror or RAID-Z vdev. Go through and make
- * sure the contents are all the same (files vs. disks),
- * keeping track of the number of elements in the
- * process.
- *
- * We also check that the size of each vdev (if it can
- * be determined) is the same.
- */
- rep.zprl_type = type;
- rep.zprl_children = 0;
-
- if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
- verify(nvlist_lookup_uint64(nv,
- ZPOOL_CONFIG_NPARITY,
- &rep.zprl_parity) == 0);
- assert(rep.zprl_parity != 0);
- } else {
- rep.zprl_parity = 0;
- }
-
- /*
- * The 'dontreport' variable indicatest that we've
- * already reported an error for this spec, so don't
- * bother doing it again.
- */
- type = NULL;
- dontreport = 0;
- vdev_size = -1ULL;
- for (c = 0; c < children; c++) {
- nvlist_t *cnv = child[c];
- char *path;
- struct stat64 statbuf;
- uint64_t size = -1ULL;
- char *childtype;
- int fd, err;
-
- rep.zprl_children++;
-
- verify(nvlist_lookup_string(cnv,
- ZPOOL_CONFIG_TYPE, &childtype) == 0);
-
- /*
- * If this is a a replacing or spare vdev, then
- * get the real first child of the vdev.
- */
- if (strcmp(childtype,
- VDEV_TYPE_REPLACING) == 0 ||
- strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
- nvlist_t **rchild;
- uint_t rchildren;
-
- verify(nvlist_lookup_nvlist_array(cnv,
- ZPOOL_CONFIG_CHILDREN, &rchild,
- &rchildren) == 0);
- assert(rchildren == 2);
- cnv = rchild[0];
-
- verify(nvlist_lookup_string(cnv,
- ZPOOL_CONFIG_TYPE,
- &childtype) == 0);
- }
-
- verify(nvlist_lookup_string(cnv,
- ZPOOL_CONFIG_PATH, &path) == 0);
-
- /*
- * If we have a raidz/mirror that combines disks
- * with files, report it as an error.
- */
- if (!dontreport && type != NULL &&
- strcmp(type, childtype) != 0) {
- if (ret != NULL)
- free(ret);
- ret = NULL;
- if (fatal)
- vdev_error(gettext(
- "mismatched replication "
- "level: %s contains both "
- "files and devices\n"),
- rep.zprl_type);
- else
- return (NULL);
- dontreport = B_TRUE;
- }
-
- /*
- * According to stat(2), the value of 'st_size'
- * is undefined for block devices and character
- * devices. But there is no effective way to
- * determine the real size in userland.
- *
- * Instead, we'll take advantage of an
- * implementation detail of spec_size(). If the
- * device is currently open, then we (should)
- * return a valid size.
- *
- * If we still don't get a valid size (indicated
- * by a size of 0 or MAXOFFSET_T), then ignore
- * this device altogether.
- */
- if ((fd = open(path, O_RDONLY)) >= 0) {
- err = fstat64(fd, &statbuf);
- (void) close(fd);
- } else {
- err = stat64(path, &statbuf);
- }
-
- if (err != 0 || statbuf.st_size == 0)
- continue;
-
- size = statbuf.st_size;
-
- /*
- * Also check the size of each device. If they
- * differ, then report an error.
- */
- if (!dontreport && vdev_size != -1ULL &&
- size != vdev_size) {
- if (ret != NULL)
- free(ret);
- ret = NULL;
- if (fatal)
- vdev_error(gettext(
- "%s contains devices of "
- "different sizes\n"),
- rep.zprl_type);
- else
- return (NULL);
- dontreport = B_TRUE;
- }
-
- type = childtype;
- vdev_size = size;
- }
- }
-
- /*
- * At this point, we have the replication of the last toplevel
- * vdev in 'rep'. Compare it to 'lastrep' to see if its
- * different.
- */
- if (lastrep.zprl_type != NULL) {
- if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
- if (ret != NULL)
- free(ret);
- ret = NULL;
- if (fatal)
- vdev_error(gettext(
- "mismatched replication level: "
- "both %s and %s vdevs are "
- "present\n"),
- lastrep.zprl_type, rep.zprl_type);
- else
- return (NULL);
- } else if (lastrep.zprl_parity != rep.zprl_parity) {
- if (ret)
- free(ret);
- ret = NULL;
- if (fatal)
- vdev_error(gettext(
- "mismatched replication level: "
- "both %llu and %llu device parity "
- "%s vdevs are present\n"),
- lastrep.zprl_parity,
- rep.zprl_parity,
- rep.zprl_type);
- else
- return (NULL);
- } else if (lastrep.zprl_children != rep.zprl_children) {
- if (ret)
- free(ret);
- ret = NULL;
- if (fatal)
- vdev_error(gettext(
- "mismatched replication level: "
- "both %llu-way and %llu-way %s "
- "vdevs are present\n"),
- lastrep.zprl_children,
- rep.zprl_children,
- rep.zprl_type);
- else
- return (NULL);
- }
- }
- lastrep = rep;
- }
-
- if (ret != NULL)
- *ret = rep;
-
- return (ret);
-}
-
-/*
- * Check the replication level of the vdev spec against the current pool. Calls
- * get_replication() to make sure the new spec is self-consistent. If the pool
- * has a consistent replication level, then we ignore any errors. Otherwise,
- * report any difference between the two.
- */
-int
-check_replication(nvlist_t *config, nvlist_t *newroot)
-{
- replication_level_t *current = NULL, *new;
- int ret;
-
- /*
- * If we have a current pool configuration, check to see if it's
- * self-consistent. If not, simply return success.
- */
- if (config != NULL) {
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if ((current = get_replication(nvroot, B_FALSE)) == NULL)
- return (0);
- }
-
- /*
- * Get the replication level of the new vdev spec, reporting any
- * inconsistencies found.
- */
- if ((new = get_replication(newroot, B_TRUE)) == NULL) {
- free(current);
- return (-1);
- }
-
- /*
- * Check to see if the new vdev spec matches the replication level of
- * the current pool.
- */
- ret = 0;
- if (current != NULL) {
- if (strcmp(current->zprl_type, new->zprl_type) != 0) {
- vdev_error(gettext(
- "mismatched replication level: pool uses %s "
- "and new vdev is %s\n"),
- current->zprl_type, new->zprl_type);
- ret = -1;
- } else if (current->zprl_parity != new->zprl_parity) {
- vdev_error(gettext(
- "mismatched replication level: pool uses %llu "
- "device parity and new vdev uses %llu\n"),
- current->zprl_parity, new->zprl_parity);
- ret = -1;
- } else if (current->zprl_children != new->zprl_children) {
- vdev_error(gettext(
- "mismatched replication level: pool uses %llu-way "
- "%s and new vdev uses %llu-way %s\n"),
- current->zprl_children, current->zprl_type,
- new->zprl_children, new->zprl_type);
- ret = -1;
- }
- }
-
- free(new);
- if (current != NULL)
- free(current);
-
- return (ret);
-}
-
-/*
- * Determine if the given path is a hot spare within the given configuration.
- */
-static boolean_t
-is_spare(nvlist_t *config, const char *path)
-{
- int fd;
- pool_state_t state;
- char *name = NULL;
- nvlist_t *label;
- uint64_t guid, spareguid;
- nvlist_t *nvroot;
- nvlist_t **spares;
- uint_t i, nspares;
- boolean_t inuse;
-
- if ((fd = open(path, O_RDONLY)) < 0)
- return (B_FALSE);
-
- if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
- !inuse ||
- state != POOL_STATE_SPARE ||
- zpool_read_label(fd, &label) != 0) {
- free(name);
- (void) close(fd);
- return (B_FALSE);
- }
- free(name);
-
- (void) close(fd);
- verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
- nvlist_free(label);
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- for (i = 0; i < nspares; i++) {
- verify(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &spareguid) == 0);
- if (spareguid == guid)
- return (B_TRUE);
- }
- }
-
- return (B_FALSE);
-}
-
-/*
- * Go through and find any devices that are in use. We rely on libdiskmgt for
- * the majority of this task.
- */
-int
-check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
- int isspare)
-{
- nvlist_t **child;
- uint_t c, children;
- char *type, *path;
- int ret;
- char buf[MAXPATHLEN];
- uint64_t wholedisk;
-
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0) {
-
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-
- /*
- * As a generic check, we look to see if this is a replace of a
- * hot spare within the same pool. If so, we allow it
- * regardless of what libdiskmgt or zpool_in_use() says.
- */
- if (isreplacing) {
- (void) strlcpy(buf, path, sizeof (buf));
- if (is_spare(config, buf))
- return (0);
- }
-
- if (strcmp(type, VDEV_TYPE_DISK) == 0)
- ret = check_provider(path, force, isspare);
-
- return (ret);
- }
-
- for (c = 0; c < children; c++)
- if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_FALSE)) != 0)
- return (ret);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
- &child, &children) == 0)
- for (c = 0; c < children; c++)
- if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_TRUE)) != 0)
- return (ret);
-
- return (0);
-}
-
-const char *
-is_grouping(const char *type, int *mindev)
-{
- if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
- if (mindev != NULL)
- *mindev = 2;
- return (VDEV_TYPE_RAIDZ);
- }
-
- if (strcmp(type, "raidz2") == 0) {
- if (mindev != NULL)
- *mindev = 3;
- return (VDEV_TYPE_RAIDZ);
- }
-
- if (strcmp(type, "mirror") == 0) {
- if (mindev != NULL)
- *mindev = 2;
- return (VDEV_TYPE_MIRROR);
- }
-
- if (strcmp(type, "spare") == 0) {
- if (mindev != NULL)
- *mindev = 1;
- return (VDEV_TYPE_SPARE);
- }
-
- return (NULL);
-}
-
-/*
- * Construct a syntactically valid vdev specification,
- * and ensure that all devices and files exist and can be opened.
- * Note: we don't bother freeing anything in the error paths
- * because the program is just going to exit anyway.
- */
-nvlist_t *
-construct_spec(int argc, char **argv)
-{
- nvlist_t *nvroot, *nv, **top, **spares;
- int t, toplevels, mindev, nspares;
- const char *type;
-
- top = NULL;
- toplevels = 0;
- spares = NULL;
- nspares = 0;
-
- while (argc > 0) {
- nv = NULL;
-
- /*
- * If it's a mirror or raidz, the subsequent arguments are
- * its leaves -- until we encounter the next mirror or raidz.
- */
- if ((type = is_grouping(argv[0], &mindev)) != NULL) {
- nvlist_t **child = NULL;
- int c, children = 0;
-
- if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
- spares != NULL) {
- (void) fprintf(stderr, gettext("invalid vdev "
- "specification: 'spare' can be "
- "specified only once\n"));
- return (NULL);
- }
-
- for (c = 1; c < argc; c++) {
- if (is_grouping(argv[c], NULL) != NULL)
- break;
- children++;
- child = realloc(child,
- children * sizeof (nvlist_t *));
- if (child == NULL)
- zpool_no_memory();
- if ((nv = make_leaf_vdev(argv[c])) == NULL)
- return (NULL);
- child[children - 1] = nv;
- }
-
- if (children < mindev) {
- (void) fprintf(stderr, gettext("invalid vdev "
- "specification: %s requires at least %d "
- "devices\n"), argv[0], mindev);
- return (NULL);
- }
-
- argc -= c;
- argv += c;
-
- if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
- spares = child;
- nspares = children;
- continue;
- } else {
- verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
- 0) == 0);
- verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
- type) == 0);
- if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
- verify(nvlist_add_uint64(nv,
- ZPOOL_CONFIG_NPARITY,
- mindev - 1) == 0);
- }
- verify(nvlist_add_nvlist_array(nv,
- ZPOOL_CONFIG_CHILDREN, child,
- children) == 0);
-
- for (c = 0; c < children; c++)
- nvlist_free(child[c]);
- free(child);
- }
- } else {
- /*
- * We have a device. Pass off to make_leaf_vdev() to
- * construct the appropriate nvlist describing the vdev.
- */
- if ((nv = make_leaf_vdev(argv[0])) == NULL)
- return (NULL);
- argc--;
- argv++;
- }
-
- toplevels++;
- top = realloc(top, toplevels * sizeof (nvlist_t *));
- if (top == NULL)
- zpool_no_memory();
- top[toplevels - 1] = nv;
- }
-
- if (toplevels == 0 && nspares == 0) {
- (void) fprintf(stderr, gettext("invalid vdev "
- "specification: at least one toplevel vdev must be "
- "specified\n"));
- return (NULL);
- }
-
- /*
- * Finally, create nvroot and add all top-level vdevs to it.
- */
- verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
- verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_ROOT) == 0);
- verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- top, toplevels) == 0);
- if (nspares != 0)
- verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- spares, nspares) == 0);
-
- for (t = 0; t < toplevels; t++)
- nvlist_free(top[t]);
- for (t = 0; t < nspares; t++)
- nvlist_free(spares[t]);
- if (spares)
- free(spares);
- free(top);
-
- return (nvroot);
-}
-
-/*
- * Get and validate the contents of the given vdev specification. This ensures
- * that the nvlist returned is well-formed, that all the devices exist, and that
- * they are not currently in use by any other known consumer. The 'poolconfig'
- * parameter is the current configuration of the pool when adding devices
- * existing pool, and is used to perform additional checks, such as changing the
- * replication level of the pool. It can be 'NULL' to indicate that this is a
- * new pool. The 'force' flag controls whether devices should be forcefully
- * added, even if they appear in use.
- */
-nvlist_t *
-make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
- boolean_t isreplacing, int argc, char **argv)
-{
- nvlist_t *newroot;
-
- is_force = force;
-
- /*
- * Construct the vdev specification. If this is successful, we know
- * that we have a valid specification, and that all devices can be
- * opened.
- */
- if ((newroot = construct_spec(argc, argv)) == NULL)
- return (NULL);
-
- /*
- * Validate each device to make sure that its not shared with another
- * subsystem. We do this even if 'force' is set, because there are some
- * uses (such as a dedicated dump device) that even '-f' cannot
- * override.
- */
- if (check_in_use(poolconfig, newroot, force, isreplacing,
- B_FALSE) != 0) {
- nvlist_free(newroot);
- return (NULL);
- }
-
- /*
- * Check the replication level of the given vdevs and report any errors
- * found. We include the existing pool spec, if any, as we need to
- * catch changes against the existing replication level.
- */
- if (check_rep && check_replication(poolconfig, newroot) != 0) {
- nvlist_free(newroot);
- return (NULL);
- }
-
- return (newroot);
-}
diff --git a/contrib/opensolaris/cmd/ztest/ztest.c b/contrib/opensolaris/cmd/ztest/ztest.c
deleted file mode 100644
index 5d9f028..0000000
--- a/contrib/opensolaris/cmd/ztest/ztest.c
+++ /dev/null
@@ -1,3495 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * The objective of this program is to provide a DMU/ZAP/SPA stress test
- * that runs entirely in userland, is easy to use, and easy to extend.
- *
- * The overall design of the ztest program is as follows:
- *
- * (1) For each major functional area (e.g. adding vdevs to a pool,
- * creating and destroying datasets, reading and writing objects, etc)
- * we have a simple routine to test that functionality. These
- * individual routines do not have to do anything "stressful".
- *
- * (2) We turn these simple functionality tests into a stress test by
- * running them all in parallel, with as many threads as desired,
- * and spread across as many datasets, objects, and vdevs as desired.
- *
- * (3) While all this is happening, we inject faults into the pool to
- * verify that self-healing data really works.
- *
- * (4) Every time we open a dataset, we change its checksum and compression
- * functions. Thus even individual objects vary from block to block
- * in which checksum they use and whether they're compressed.
- *
- * (5) To verify that we never lose on-disk consistency after a crash,
- * we run the entire test in a child of the main process.
- * At random times, the child self-immolates with a SIGKILL.
- * This is the software equivalent of pulling the power cord.
- * The parent then runs the test again, using the existing
- * storage pool, as many times as desired.
- *
- * (6) To verify that we don't have future leaks or temporal incursions,
- * many of the functional tests record the transaction group number
- * as part of their data. When reading old data, they verify that
- * the transaction group number is less than the current, open txg.
- * If you add a new test, please do this if applicable.
- *
- * When run with no arguments, ztest runs for about five minutes and
- * produces no output if successful. To get a little bit of information,
- * specify -V. To get more information, specify -VV, and so on.
- *
- * To turn this into an overnight stress test, use -T to specify run time.
- *
- * You can ask more more vdevs [-v], datasets [-d], or threads [-t]
- * to increase the pool capacity, fanout, and overall stress level.
- *
- * The -N(okill) option will suppress kills, so each child runs to completion.
- * This can be useful when you're trying to distinguish temporal incursions
- * from plain old race conditions.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/dmu.h>
-#include <sys/txg.h>
-#include <sys/zap.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dmu_objset.h>
-#include <sys/poll.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <sys/resource.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
-#include <sys/zil.h>
-#include <sys/vdev_impl.h>
-#include <sys/spa_impl.h>
-#include <sys/dsl_prop.h>
-#include <sys/refcount.h>
-#include <stdio.h>
-#include <stdio_ext.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <umem.h>
-#include <dlfcn.h>
-#include <ctype.h>
-#include <math.h>
-#include <errno.h>
-#include <sys/fs/zfs.h>
-
-static char cmdname[] = "ztest";
-static char *zopt_pool = cmdname;
-static char *progname;
-
-static uint64_t zopt_vdevs = 5;
-static uint64_t zopt_vdevtime;
-static int zopt_ashift = SPA_MINBLOCKSHIFT;
-static int zopt_mirrors = 2;
-static int zopt_raidz = 4;
-static int zopt_raidz_parity = 1;
-static size_t zopt_vdev_size = SPA_MINDEVSIZE;
-static int zopt_datasets = 7;
-static int zopt_threads = 23;
-static uint64_t zopt_passtime = 60; /* 60 seconds */
-static uint64_t zopt_killrate = 70; /* 70% kill rate */
-static int zopt_verbose = 0;
-static int zopt_init = 1;
-static char *zopt_dir = "/tmp";
-static uint64_t zopt_time = 300; /* 5 minutes */
-static int zopt_maxfaults;
-
-typedef struct ztest_args {
- char *za_pool;
- objset_t *za_os;
- zilog_t *za_zilog;
- thread_t za_thread;
- uint64_t za_instance;
- uint64_t za_random;
- uint64_t za_diroff;
- uint64_t za_diroff_shared;
- uint64_t za_zil_seq;
- hrtime_t za_start;
- hrtime_t za_stop;
- hrtime_t za_kill;
- traverse_handle_t *za_th;
-} ztest_args_t;
-
-typedef void ztest_func_t(ztest_args_t *);
-
-/*
- * Note: these aren't static because we want dladdr() to work.
- */
-ztest_func_t ztest_dmu_read_write;
-ztest_func_t ztest_dmu_write_parallel;
-ztest_func_t ztest_dmu_object_alloc_free;
-ztest_func_t ztest_zap;
-ztest_func_t ztest_zap_parallel;
-ztest_func_t ztest_traverse;
-ztest_func_t ztest_dsl_prop_get_set;
-ztest_func_t ztest_dmu_objset_create_destroy;
-ztest_func_t ztest_dmu_snapshot_create_destroy;
-ztest_func_t ztest_spa_create_destroy;
-ztest_func_t ztest_fault_inject;
-ztest_func_t ztest_vdev_attach_detach;
-ztest_func_t ztest_vdev_LUN_growth;
-ztest_func_t ztest_vdev_add_remove;
-ztest_func_t ztest_scrub;
-ztest_func_t ztest_spa_rename;
-
-typedef struct ztest_info {
- ztest_func_t *zi_func; /* test function */
- uint64_t *zi_interval; /* execute every <interval> seconds */
- uint64_t zi_calls; /* per-pass count */
- uint64_t zi_call_time; /* per-pass time */
- uint64_t zi_call_total; /* cumulative total */
- uint64_t zi_call_target; /* target cumulative total */
-} ztest_info_t;
-
-uint64_t zopt_always = 0; /* all the time */
-uint64_t zopt_often = 1; /* every second */
-uint64_t zopt_sometimes = 10; /* every 10 seconds */
-uint64_t zopt_rarely = 60; /* every 60 seconds */
-
-ztest_info_t ztest_info[] = {
- { ztest_dmu_read_write, &zopt_always },
- { ztest_dmu_write_parallel, &zopt_always },
- { ztest_dmu_object_alloc_free, &zopt_always },
- { ztest_zap, &zopt_always },
- { ztest_zap_parallel, &zopt_always },
- { ztest_traverse, &zopt_often },
- { ztest_dsl_prop_get_set, &zopt_sometimes },
- { ztest_dmu_objset_create_destroy, &zopt_sometimes },
- { ztest_dmu_snapshot_create_destroy, &zopt_rarely },
- { ztest_spa_create_destroy, &zopt_sometimes },
- { ztest_fault_inject, &zopt_sometimes },
- { ztest_spa_rename, &zopt_rarely },
- { ztest_vdev_attach_detach, &zopt_rarely },
- { ztest_vdev_LUN_growth, &zopt_rarely },
- { ztest_vdev_add_remove, &zopt_vdevtime },
- { ztest_scrub, &zopt_vdevtime },
-};
-
-#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
-
-#define ZTEST_SYNC_LOCKS 16
-
-/*
- * Stuff we need to share writably between parent and child.
- */
-typedef struct ztest_shared {
- mutex_t zs_vdev_lock;
- rwlock_t zs_name_lock;
- uint64_t zs_vdev_primaries;
- uint64_t zs_enospc_count;
- hrtime_t zs_start_time;
- hrtime_t zs_stop_time;
- uint64_t zs_alloc;
- uint64_t zs_space;
- uint64_t zs_txg;
- ztest_info_t zs_info[ZTEST_FUNCS];
- mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
- uint64_t zs_seq[ZTEST_SYNC_LOCKS];
-} ztest_shared_t;
-
-typedef struct ztest_block_tag {
- uint64_t bt_objset;
- uint64_t bt_object;
- uint64_t bt_offset;
- uint64_t bt_txg;
- uint64_t bt_thread;
- uint64_t bt_seq;
-} ztest_block_tag_t;
-
-static char ztest_dev_template[] = "%s/%s.%llua";
-static ztest_shared_t *ztest_shared;
-
-static int ztest_random_fd;
-static int ztest_dump_core = 1;
-
-extern uint64_t zio_gang_bang;
-extern uint16_t zio_zil_fail_shift;
-
-#define ZTEST_DIROBJ 1
-#define ZTEST_MICROZAP_OBJ 2
-#define ZTEST_FATZAP_OBJ 3
-
-#define ZTEST_DIROBJ_BLOCKSIZE (1 << 10)
-#define ZTEST_DIRSIZE 256
-
-static void usage(boolean_t) __NORETURN;
-
-/*
- * These libumem hooks provide a reasonable set of defaults for the allocator's
- * debugging facilities.
- */
-const char *
-_umem_debug_init()
-{
- return ("default,verbose"); /* $UMEM_DEBUG setting */
-}
-
-const char *
-_umem_logging_init(void)
-{
- return ("fail,contents"); /* $UMEM_LOGGING setting */
-}
-
-#define FATAL_MSG_SZ 1024
-
-char *fatal_msg;
-
-static void
-fatal(int do_perror, char *message, ...)
-{
- va_list args;
- int save_errno = errno;
- char buf[FATAL_MSG_SZ];
-
- (void) fflush(stdout);
-
- va_start(args, message);
- (void) sprintf(buf, "ztest: ");
- /* LINTED */
- (void) vsprintf(buf + strlen(buf), message, args);
- va_end(args);
- if (do_perror) {
- (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
- ": %s", strerror(save_errno));
- }
- (void) fprintf(stderr, "%s\n", buf);
- fatal_msg = buf; /* to ease debugging */
- if (ztest_dump_core)
- abort();
- exit(3);
-}
-
-static int
-str2shift(const char *buf)
-{
- const char *ends = "BKMGTPEZ";
- int i;
-
- if (buf[0] == '\0')
- return (0);
- for (i = 0; i < strlen(ends); i++) {
- if (toupper(buf[0]) == ends[i])
- break;
- }
- if (i == strlen(ends)) {
- (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
- buf);
- usage(B_FALSE);
- }
- if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
- return (10*i);
- }
- (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
- usage(B_FALSE);
- /* NOTREACHED */
-}
-
-static uint64_t
-nicenumtoull(const char *buf)
-{
- char *end;
- uint64_t val;
-
- val = strtoull(buf, &end, 0);
- if (end == buf) {
- (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
- usage(B_FALSE);
- } else if (end[0] == '.') {
- double fval = strtod(buf, &end);
- fval *= pow(2, str2shift(end));
- if (fval > UINT64_MAX) {
- (void) fprintf(stderr, "ztest: value too large: %s\n",
- buf);
- usage(B_FALSE);
- }
- val = (uint64_t)fval;
- } else {
- int shift = str2shift(end);
- if (shift >= 64 || (val << shift) >> shift != val) {
- (void) fprintf(stderr, "ztest: value too large: %s\n",
- buf);
- usage(B_FALSE);
- }
- val <<= shift;
- }
- return (val);
-}
-
-static void
-usage(boolean_t requested)
-{
- char nice_vdev_size[10];
- char nice_gang_bang[10];
- FILE *fp = requested ? stdout : stderr;
-
- nicenum(zopt_vdev_size, nice_vdev_size);
- nicenum(zio_gang_bang, nice_gang_bang);
-
- (void) fprintf(fp, "Usage: %s\n"
- "\t[-v vdevs (default: %llu)]\n"
- "\t[-s size_of_each_vdev (default: %s)]\n"
- "\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
- "\t[-m mirror_copies (default: %d)]\n"
- "\t[-r raidz_disks (default: %d)]\n"
- "\t[-R raidz_parity (default: %d)]\n"
- "\t[-d datasets (default: %d)]\n"
- "\t[-t threads (default: %d)]\n"
- "\t[-g gang_block_threshold (default: %s)]\n"
- "\t[-i initialize pool i times (default: %d)]\n"
- "\t[-k kill percentage (default: %llu%%)]\n"
- "\t[-p pool_name (default: %s)]\n"
- "\t[-f file directory for vdev files (default: %s)]\n"
- "\t[-V(erbose)] (use multiple times for ever more blather)\n"
- "\t[-E(xisting)] (use existing pool instead of creating new one)\n"
- "\t[-T time] total run time (default: %llu sec)\n"
- "\t[-P passtime] time per pass (default: %llu sec)\n"
- "\t[-z zil failure rate (default: fail every 2^%llu allocs)]\n"
- "\t[-h] (print help)\n"
- "",
- cmdname,
- (u_longlong_t)zopt_vdevs, /* -v */
- nice_vdev_size, /* -s */
- zopt_ashift, /* -a */
- zopt_mirrors, /* -m */
- zopt_raidz, /* -r */
- zopt_raidz_parity, /* -R */
- zopt_datasets, /* -d */
- zopt_threads, /* -t */
- nice_gang_bang, /* -g */
- zopt_init, /* -i */
- (u_longlong_t)zopt_killrate, /* -k */
- zopt_pool, /* -p */
- zopt_dir, /* -f */
- (u_longlong_t)zopt_time, /* -T */
- (u_longlong_t)zopt_passtime, /* -P */
- (u_longlong_t)zio_zil_fail_shift); /* -z */
- exit(requested ? 0 : 1);
-}
-
-static uint64_t
-ztest_random(uint64_t range)
-{
- uint64_t r;
-
- if (range == 0)
- return (0);
-
- if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
- fatal(1, "short read from /dev/urandom");
-
- return (r % range);
-}
-
-static void
-ztest_record_enospc(char *s)
-{
- dprintf("ENOSPC doing: %s\n", s ? s : "<unknown>");
- ztest_shared->zs_enospc_count++;
-}
-
-static void
-process_options(int argc, char **argv)
-{
- int opt;
- uint64_t value;
-
- /* Remember program name. */
- progname = argv[0];
-
- /* By default, test gang blocks for blocks 32K and greater */
- zio_gang_bang = 32 << 10;
-
- /* Default value, fail every 32nd allocation */
- zio_zil_fail_shift = 5;
-
- while ((opt = getopt(argc, argv,
- "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:z:h")) != EOF) {
- value = 0;
- switch (opt) {
- case 'v':
- case 's':
- case 'a':
- case 'm':
- case 'r':
- case 'R':
- case 'd':
- case 't':
- case 'g':
- case 'i':
- case 'k':
- case 'T':
- case 'P':
- case 'z':
- value = nicenumtoull(optarg);
- }
- switch (opt) {
- case 'v':
- zopt_vdevs = value;
- break;
- case 's':
- zopt_vdev_size = MAX(SPA_MINDEVSIZE, value);
- break;
- case 'a':
- zopt_ashift = value;
- break;
- case 'm':
- zopt_mirrors = value;
- break;
- case 'r':
- zopt_raidz = MAX(1, value);
- break;
- case 'R':
- zopt_raidz_parity = MIN(MAX(value, 1), 2);
- break;
- case 'd':
- zopt_datasets = MAX(1, value);
- break;
- case 't':
- zopt_threads = MAX(1, value);
- break;
- case 'g':
- zio_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value);
- break;
- case 'i':
- zopt_init = value;
- break;
- case 'k':
- zopt_killrate = value;
- break;
- case 'p':
- zopt_pool = strdup(optarg);
- break;
- case 'f':
- zopt_dir = strdup(optarg);
- break;
- case 'V':
- zopt_verbose++;
- break;
- case 'E':
- zopt_init = 0;
- break;
- case 'T':
- zopt_time = value;
- break;
- case 'P':
- zopt_passtime = MAX(1, value);
- break;
- case 'z':
- zio_zil_fail_shift = MIN(value, 16);
- break;
- case 'h':
- usage(B_TRUE);
- break;
- case '?':
- default:
- usage(B_FALSE);
- break;
- }
- }
-
- zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
-
- zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
- zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
-}
-
-static uint64_t
-ztest_get_ashift(void)
-{
- if (zopt_ashift == 0)
- return (SPA_MINBLOCKSHIFT + ztest_random(3));
- return (zopt_ashift);
-}
-
-static nvlist_t *
-make_vdev_file(size_t size)
-{
- char dev_name[MAXPATHLEN];
- uint64_t vdev;
- uint64_t ashift = ztest_get_ashift();
- int fd;
- nvlist_t *file;
-
- if (size == 0) {
- (void) snprintf(dev_name, sizeof (dev_name), "%s",
- "/dev/bogus");
- } else {
- vdev = ztest_shared->zs_vdev_primaries++;
- (void) sprintf(dev_name, ztest_dev_template,
- zopt_dir, zopt_pool, vdev);
-
- fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (fd == -1)
- fatal(1, "can't open %s", dev_name);
- if (ftruncate(fd, size) != 0)
- fatal(1, "can't ftruncate %s", dev_name);
- (void) close(fd);
- }
-
- VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0);
- VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
-
- return (file);
-}
-
-static nvlist_t *
-make_vdev_raidz(size_t size, int r)
-{
- nvlist_t *raidz, **child;
- int c;
-
- if (r < 2)
- return (make_vdev_file(size));
-
- child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
-
- for (c = 0; c < r; c++)
- child[c] = make_vdev_file(size);
-
- VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_RAIDZ) == 0);
- VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
- zopt_raidz_parity) == 0);
- VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
- child, r) == 0);
-
- for (c = 0; c < r; c++)
- nvlist_free(child[c]);
-
- umem_free(child, r * sizeof (nvlist_t *));
-
- return (raidz);
-}
-
-static nvlist_t *
-make_vdev_mirror(size_t size, int r, int m)
-{
- nvlist_t *mirror, **child;
- int c;
-
- if (m < 1)
- return (make_vdev_raidz(size, r));
-
- child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
-
- for (c = 0; c < m; c++)
- child[c] = make_vdev_raidz(size, r);
-
- VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_MIRROR) == 0);
- VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
- child, m) == 0);
-
- for (c = 0; c < m; c++)
- nvlist_free(child[c]);
-
- umem_free(child, m * sizeof (nvlist_t *));
-
- return (mirror);
-}
-
-static nvlist_t *
-make_vdev_root(size_t size, int r, int m, int t)
-{
- nvlist_t *root, **child;
- int c;
-
- ASSERT(t > 0);
-
- child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
-
- for (c = 0; c < t; c++)
- child[c] = make_vdev_mirror(size, r, m);
-
- VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
- VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
- child, t) == 0);
-
- for (c = 0; c < t; c++)
- nvlist_free(child[c]);
-
- umem_free(child, t * sizeof (nvlist_t *));
-
- return (root);
-}
-
-static void
-ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
-{
- int bs = SPA_MINBLOCKSHIFT +
- ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
- int ibs = DN_MIN_INDBLKSHIFT +
- ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
- int error;
-
- error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
- if (error) {
- char osname[300];
- dmu_objset_name(os, osname);
- fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
- osname, object, 1 << bs, ibs, error);
- }
-}
-
-static uint8_t
-ztest_random_checksum(void)
-{
- uint8_t checksum;
-
- do {
- checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
- } while (zio_checksum_table[checksum].ci_zbt);
-
- if (checksum == ZIO_CHECKSUM_OFF)
- checksum = ZIO_CHECKSUM_ON;
-
- return (checksum);
-}
-
-static uint8_t
-ztest_random_compress(void)
-{
- return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
-}
-
-typedef struct ztest_replay {
- objset_t *zr_os;
- uint64_t zr_assign;
-} ztest_replay_t;
-
-static int
-ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
-{
- objset_t *os = zr->zr_os;
- dmu_tx_t *tx;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, zr->zr_assign);
- if (error) {
- dmu_tx_abort(tx);
- return (error);
- }
-
- error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
- DMU_OT_NONE, 0, tx);
- ASSERT3U(error, ==, 0);
- dmu_tx_commit(tx);
-
- if (zopt_verbose >= 5) {
- char osname[MAXNAMELEN];
- dmu_objset_name(os, osname);
- (void) printf("replay create of %s object %llu"
- " in txg %llu = %d\n",
- osname, (u_longlong_t)lr->lr_doid,
- (u_longlong_t)zr->zr_assign, error);
- }
-
- return (error);
-}
-
-static int
-ztest_replay_remove(ztest_replay_t *zr, lr_remove_t *lr, boolean_t byteswap)
-{
- objset_t *os = zr->zr_os;
- dmu_tx_t *tx;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, zr->zr_assign);
- if (error) {
- dmu_tx_abort(tx);
- return (error);
- }
-
- error = dmu_object_free(os, lr->lr_doid, tx);
- dmu_tx_commit(tx);
-
- return (error);
-}
-
-zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
- NULL, /* 0 no such transaction type */
- ztest_replay_create, /* TX_CREATE */
- NULL, /* TX_MKDIR */
- NULL, /* TX_MKXATTR */
- NULL, /* TX_SYMLINK */
- ztest_replay_remove, /* TX_REMOVE */
- NULL, /* TX_RMDIR */
- NULL, /* TX_LINK */
- NULL, /* TX_RENAME */
- NULL, /* TX_WRITE */
- NULL, /* TX_TRUNCATE */
- NULL, /* TX_SETATTR */
- NULL, /* TX_ACL */
-};
-
-/*
- * Verify that we can't destroy an active pool, create an existing pool,
- * or create a pool with a bad vdev spec.
- */
-void
-ztest_spa_create_destroy(ztest_args_t *za)
-{
- int error;
- spa_t *spa;
- nvlist_t *nvroot;
-
- /*
- * Attempt to create using a bad file.
- */
- nvroot = make_vdev_root(0, 0, 0, 1);
- error = spa_create("ztest_bad_file", nvroot, NULL);
- nvlist_free(nvroot);
- if (error != ENOENT)
- fatal(0, "spa_create(bad_file) = %d", error);
-
- /*
- * Attempt to create using a bad mirror.
- */
- nvroot = make_vdev_root(0, 0, 2, 1);
- error = spa_create("ztest_bad_mirror", nvroot, NULL);
- nvlist_free(nvroot);
- if (error != ENOENT)
- fatal(0, "spa_create(bad_mirror) = %d", error);
-
- /*
- * Attempt to create an existing pool. It shouldn't matter
- * what's in the nvroot; we should fail with EEXIST.
- */
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- nvroot = make_vdev_root(0, 0, 0, 1);
- error = spa_create(za->za_pool, nvroot, NULL);
- nvlist_free(nvroot);
- if (error != EEXIST)
- fatal(0, "spa_create(whatever) = %d", error);
-
- error = spa_open(za->za_pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
-
- error = spa_destroy(za->za_pool);
- if (error != EBUSY)
- fatal(0, "spa_destroy() = %d", error);
-
- spa_close(spa, FTAG);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
-}
-
-/*
- * Verify that vdev_add() works as expected.
- */
-void
-ztest_vdev_add_remove(ztest_args_t *za)
-{
- spa_t *spa = dmu_objset_spa(za->za_os);
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
- nvlist_t *nvroot;
- int error;
-
- if (zopt_verbose >= 6)
- (void) printf("adding vdev\n");
-
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
-
- spa_config_enter(spa, RW_READER, FTAG);
-
- ztest_shared->zs_vdev_primaries =
- spa->spa_root_vdev->vdev_children * leaves;
-
- spa_config_exit(spa, FTAG);
-
- nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
- error = spa_vdev_add(spa, nvroot);
- nvlist_free(nvroot);
-
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
-
- if (error == ENOSPC)
- ztest_record_enospc("spa_vdev_add");
- else if (error != 0)
- fatal(0, "spa_vdev_add() = %d", error);
-
- if (zopt_verbose >= 6)
- (void) printf("spa_vdev_add = %d, as expected\n", error);
-}
-
-static vdev_t *
-vdev_lookup_by_path(vdev_t *vd, const char *path)
-{
- int c;
- vdev_t *mvd;
-
- if (vd->vdev_path != NULL) {
- if (vd->vdev_wholedisk == 1) {
- /*
- * For whole disks, the internal path has 's0', but the
- * path passed in by the user doesn't.
- */
- if (strlen(path) == strlen(vd->vdev_path) - 2 &&
- strncmp(path, vd->vdev_path, strlen(path)) == 0)
- return (vd);
- } else if (strcmp(path, vd->vdev_path) == 0) {
- return (vd);
- }
- }
-
- for (c = 0; c < vd->vdev_children; c++)
- if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
- NULL)
- return (mvd);
-
- return (NULL);
-}
-
-/*
- * Verify that we can attach and detach devices.
- */
-void
-ztest_vdev_attach_detach(ztest_args_t *za)
-{
- spa_t *spa = dmu_objset_spa(za->za_os);
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *oldvd, *newvd, *pvd;
- nvlist_t *root, *file;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
- uint64_t leaf, top;
- uint64_t ashift = ztest_get_ashift();
- size_t oldsize, newsize;
- char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
- int replacing;
- int error, expected_error;
- int fd;
-
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
-
- spa_config_enter(spa, RW_READER, FTAG);
-
- /*
- * Decide whether to do an attach or a replace.
- */
- replacing = ztest_random(2);
-
- /*
- * Pick a random top-level vdev.
- */
- top = ztest_random(rvd->vdev_children);
-
- /*
- * Pick a random leaf within it.
- */
- leaf = ztest_random(leaves);
-
- /*
- * Generate the path to this leaf. The filename will end with 'a'.
- * We'll alternate replacements with a filename that ends with 'b'.
- */
- (void) snprintf(oldpath, sizeof (oldpath),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
-
- bcopy(oldpath, newpath, MAXPATHLEN);
-
- /*
- * If the 'a' file isn't part of the pool, the 'b' file must be.
- */
- if (vdev_lookup_by_path(rvd, oldpath) == NULL)
- oldpath[strlen(oldpath) - 1] = 'b';
- else
- newpath[strlen(newpath) - 1] = 'b';
-
- /*
- * Now oldpath represents something that's already in the pool,
- * and newpath is the thing we'll try to attach.
- */
- oldvd = vdev_lookup_by_path(rvd, oldpath);
- newvd = vdev_lookup_by_path(rvd, newpath);
- ASSERT(oldvd != NULL);
- pvd = oldvd->vdev_parent;
-
- /*
- * Make newsize a little bigger or smaller than oldsize.
- * If it's smaller, the attach should fail.
- * If it's larger, and we're doing a replace,
- * we should get dynamic LUN growth when we're done.
- */
- oldsize = vdev_get_rsize(oldvd);
- newsize = 10 * oldsize / (9 + ztest_random(3));
-
- /*
- * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
- * unless it's a replace; in that case any non-replacing parent is OK.
- *
- * If newvd is already part of the pool, it should fail with EBUSY.
- *
- * If newvd is too small, it should fail with EOVERFLOW.
- */
- if (newvd != NULL)
- expected_error = EBUSY;
- else if (pvd->vdev_ops != &vdev_mirror_ops &&
- pvd->vdev_ops != &vdev_root_ops &&
- (!replacing || pvd->vdev_ops == &vdev_replacing_ops))
- expected_error = ENOTSUP;
- else if (newsize < oldsize)
- expected_error = EOVERFLOW;
- else if (ashift > oldvd->vdev_top->vdev_ashift)
- expected_error = EDOM;
- else
- expected_error = 0;
-
- /*
- * If newvd isn't already part of the pool, create it.
- */
- if (newvd == NULL) {
- fd = open(newpath, O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (fd == -1)
- fatal(1, "can't open %s", newpath);
- if (ftruncate(fd, newsize) != 0)
- fatal(1, "can't ftruncate %s", newpath);
- (void) close(fd);
- }
-
- spa_config_exit(spa, FTAG);
-
- /*
- * Build the nvlist describing newpath.
- */
- VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, newpath) == 0);
- VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
-
- VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
- VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
- &file, 1) == 0);
-
- error = spa_vdev_attach(spa, oldvd->vdev_guid, root, replacing);
-
- nvlist_free(file);
- nvlist_free(root);
-
- /*
- * If our parent was the replacing vdev, but the replace completed,
- * then instead of failing with ENOTSUP we may either succeed,
- * fail with ENODEV, or fail with EOVERFLOW.
- */
- if (expected_error == ENOTSUP &&
- (error == 0 || error == ENODEV || error == EOVERFLOW))
- expected_error = error;
-
- /*
- * If someone grew the LUN, the replacement may be too small.
- */
- if (error == EOVERFLOW)
- expected_error = error;
-
- if (error != expected_error) {
- fatal(0, "attach (%s, %s, %d) returned %d, expected %d",
- oldpath, newpath, replacing, error, expected_error);
- }
-
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
-}
-
-/*
- * Verify that dynamic LUN growth works as expected.
- */
-/* ARGSUSED */
-void
-ztest_vdev_LUN_growth(ztest_args_t *za)
-{
- spa_t *spa = dmu_objset_spa(za->za_os);
- char dev_name[MAXPATHLEN];
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
- uint64_t vdev;
- size_t fsize;
- int fd;
-
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
-
- /*
- * Pick a random leaf vdev.
- */
- spa_config_enter(spa, RW_READER, FTAG);
- vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
- spa_config_exit(spa, FTAG);
-
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
-
- if ((fd = open(dev_name, O_RDWR)) != -1) {
- /*
- * Determine the size.
- */
- fsize = lseek(fd, 0, SEEK_END);
-
- /*
- * If it's less than 2x the original size, grow by around 3%.
- */
- if (fsize < 2 * zopt_vdev_size) {
- size_t newsize = fsize + ztest_random(fsize / 32);
- (void) ftruncate(fd, newsize);
- if (zopt_verbose >= 6) {
- (void) printf("%s grew from %lu to %lu bytes\n",
- dev_name, (ulong_t)fsize, (ulong_t)newsize);
- }
- }
- (void) close(fd);
- }
-
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
-}
-
-/* ARGSUSED */
-static void
-ztest_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
-{
- /*
- * Create the directory object.
- */
- VERIFY(dmu_object_claim(os, ZTEST_DIROBJ,
- DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE,
- DMU_OT_UINT64_OTHER, sizeof (ztest_block_tag_t), tx) == 0);
-
- VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ,
- DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
-
- VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ,
- DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
-}
-
-/* ARGSUSED */
-static int
-ztest_destroy_cb(char *name, void *arg)
-{
- objset_t *os;
- dmu_object_info_t doi;
- int error;
-
- /*
- * Verify that the dataset contains a directory object.
- */
- error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- ASSERT3U(error, ==, 0);
- error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
- if (error != ENOENT) {
- /* We could have crashed in the middle of destroying it */
- ASSERT3U(error, ==, 0);
- ASSERT3U(doi.doi_type, ==, DMU_OT_UINT64_OTHER);
- ASSERT3S(doi.doi_physical_blks, >=, 0);
- }
- dmu_objset_close(os);
-
- /*
- * Destroy the dataset.
- */
- error = dmu_objset_destroy(name);
- ASSERT3U(error, ==, 0);
- return (0);
-}
-
-/*
- * Verify that dmu_objset_{create,destroy,open,close} work as expected.
- */
-static uint64_t
-ztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode)
-{
- itx_t *itx;
- lr_create_t *lr;
- size_t namesize;
- char name[24];
-
- (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object);
- namesize = strlen(name) + 1;
-
- itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize +
- ztest_random(ZIL_MAX_BLKSZ));
- lr = (lr_create_t *)&itx->itx_lr;
- bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr));
- lr->lr_doid = object;
- lr->lr_foid = 0;
- lr->lr_mode = mode;
- lr->lr_uid = 0;
- lr->lr_gid = 0;
- lr->lr_gen = dmu_tx_get_txg(tx);
- lr->lr_crtime[0] = time(NULL);
- lr->lr_crtime[1] = 0;
- lr->lr_rdev = 0;
- bcopy(name, (char *)(lr + 1), namesize);
-
- return (zil_itx_assign(zilog, itx, tx));
-}
-
-void
-ztest_dmu_objset_create_destroy(ztest_args_t *za)
-{
- int error;
- objset_t *os;
- char name[100];
- int mode, basemode, expected_error;
- zilog_t *zilog;
- uint64_t seq;
- uint64_t objects;
- ztest_replay_t zr;
-
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
- (u_longlong_t)za->za_instance);
-
- basemode = DS_MODE_LEVEL(za->za_instance);
- if (basemode == DS_MODE_NONE)
- basemode++;
-
- /*
- * If this dataset exists from a previous run, process its replay log
- * half of the time. If we don't replay it, then dmu_objset_destroy()
- * (invoked from ztest_destroy_cb() below) should just throw it away.
- */
- if (ztest_random(2) == 0 &&
- dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_PRIMARY, &os) == 0) {
- zr.zr_os = os;
- zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector);
- dmu_objset_close(os);
- }
-
- /*
- * There may be an old instance of the dataset we're about to
- * create lying around from a previous run. If so, destroy it
- * and all of its snapshots.
- */
- (void) dmu_objset_find(name, ztest_destroy_cb, NULL,
- DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
-
- /*
- * Verify that the destroyed dataset is no longer in the namespace.
- */
- error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
- if (error != ENOENT)
- fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
- name, os);
-
- /*
- * Verify that we can create a new dataset.
- */
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, ztest_create_cb,
- NULL);
- if (error) {
- if (error == ENOSPC) {
- ztest_record_enospc("dmu_objset_create");
- (void) rw_unlock(&ztest_shared->zs_name_lock);
- return;
- }
- fatal(0, "dmu_objset_create(%s) = %d", name, error);
- }
-
- error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
- if (error) {
- fatal(0, "dmu_objset_open(%s) = %d", name, error);
- }
-
- /*
- * Open the intent log for it.
- */
- zilog = zil_open(os, NULL);
-
- /*
- * Put a random number of objects in there.
- */
- objects = ztest_random(20);
- seq = 0;
- while (objects-- != 0) {
- uint64_t object;
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name));
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, object, tx);
- seq = ztest_log_create(zilog, tx, object,
- DMU_OT_UINT64_OTHER);
- dmu_write(os, object, 0, sizeof (name), name, tx);
- dmu_tx_commit(tx);
- }
- if (ztest_random(5) == 0) {
- zil_commit(zilog, seq, object);
- }
- if (ztest_random(100) == 0) {
- error = zil_suspend(zilog);
- if (error == 0) {
- zil_resume(zilog);
- }
- }
- }
-
- /*
- * Verify that we cannot create an existing dataset.
- */
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, NULL, NULL);
- if (error != EEXIST)
- fatal(0, "created existing dataset, error = %d", error);
-
- /*
- * Verify that multiple dataset opens are allowed, but only when
- * the new access mode is compatible with the base mode.
- * We use a mixture of typed and typeless opens, and when the
- * open succeeds, verify that the discovered type is correct.
- */
- for (mode = DS_MODE_STANDARD; mode < DS_MODE_LEVELS; mode++) {
- objset_t *os2;
- error = dmu_objset_open(name, DMU_OST_OTHER, mode, &os2);
- expected_error = (basemode + mode < DS_MODE_LEVELS) ? 0 : EBUSY;
- if (error != expected_error)
- fatal(0, "dmu_objset_open('%s') = %d, expected %d",
- name, error, expected_error);
- if (error == 0)
- dmu_objset_close(os2);
- }
-
- zil_close(zilog);
- dmu_objset_close(os);
-
- error = dmu_objset_destroy(name);
- if (error)
- fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
-
- (void) rw_unlock(&ztest_shared->zs_name_lock);
-}
-
-/*
- * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
- */
-void
-ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
-{
- int error;
- objset_t *os = za->za_os;
- char snapname[100];
- char osname[MAXNAMELEN];
-
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- dmu_objset_name(os, osname);
- (void) snprintf(snapname, 100, "%s@%llu", osname,
- (u_longlong_t)za->za_instance);
-
- error = dmu_objset_destroy(snapname);
- if (error != 0 && error != ENOENT)
- fatal(0, "dmu_objset_destroy() = %d", error);
- error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE);
- if (error == ENOSPC)
- ztest_record_enospc("dmu_take_snapshot");
- else if (error != 0 && error != EEXIST)
- fatal(0, "dmu_take_snapshot() = %d", error);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
-}
-
-#define ZTEST_TRAVERSE_BLOCKS 1000
-
-static int
-ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
-{
- ztest_args_t *za = arg;
- zbookmark_t *zb = &bc->bc_bookmark;
- blkptr_t *bp = &bc->bc_blkptr;
- dnode_phys_t *dnp = bc->bc_dnode;
- traverse_handle_t *th = za->za_th;
- uint64_t size = BP_GET_LSIZE(bp);
-
- /*
- * Level -1 indicates the objset_phys_t or something in its intent log.
- */
- if (zb->zb_level == -1) {
- if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
- ASSERT3U(zb->zb_object, ==, 0);
- ASSERT3U(zb->zb_blkid, ==, 0);
- ASSERT3U(size, ==, sizeof (objset_phys_t));
- za->za_zil_seq = 0;
- } else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
- ASSERT3U(zb->zb_object, ==, 0);
- ASSERT3U(zb->zb_blkid, >, za->za_zil_seq);
- za->za_zil_seq = zb->zb_blkid;
- } else {
- ASSERT3U(zb->zb_object, !=, 0); /* lr_write_t */
- }
-
- return (0);
- }
-
- ASSERT(dnp != NULL);
-
- if (bc->bc_errno)
- return (ERESTART);
-
- /*
- * Once in a while, abort the traverse. We only do this to odd
- * instance numbers to ensure that even ones can run to completion.
- */
- if ((za->za_instance & 1) && ztest_random(10000) == 0)
- return (EINTR);
-
- if (bp->blk_birth == 0) {
- ASSERT(th->th_advance & ADVANCE_HOLES);
- return (0);
- }
-
- if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) &&
- bc == &th->th_cache[ZB_DN_CACHE][0]) {
- ASSERT(bc->bc_data == NULL);
- return (0);
- }
-
- ASSERT(bc->bc_data != NULL);
-
- /*
- * This is an expensive question, so don't ask it too often.
- */
- if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) {
- void *xbuf = umem_alloc(size, UMEM_NOFAIL);
- if (arc_tryread(spa, bp, xbuf) == 0) {
- ASSERT(bcmp(bc->bc_data, xbuf, size) == 0);
- }
- umem_free(xbuf, size);
- }
-
- if (zb->zb_level > 0) {
- ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift);
- return (0);
- }
-
- ASSERT(zb->zb_level == 0);
- ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT);
-
- return (0);
-}
-
-/*
- * Verify that live pool traversal works.
- */
-void
-ztest_traverse(ztest_args_t *za)
-{
- spa_t *spa = dmu_objset_spa(za->za_os);
- traverse_handle_t *th = za->za_th;
- int rc, advance;
- uint64_t cbstart, cblimit;
-
- if (th == NULL) {
- advance = 0;
-
- if (ztest_random(2) == 0)
- advance |= ADVANCE_PRE;
-
- if (ztest_random(2) == 0)
- advance |= ADVANCE_PRUNE;
-
- if (ztest_random(2) == 0)
- advance |= ADVANCE_DATA;
-
- if (ztest_random(2) == 0)
- advance |= ADVANCE_HOLES;
-
- if (ztest_random(2) == 0)
- advance |= ADVANCE_ZIL;
-
- th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance,
- ZIO_FLAG_CANFAIL);
-
- traverse_add_pool(th, 0, -1ULL);
- }
-
- advance = th->th_advance;
- cbstart = th->th_callbacks;
- cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000);
-
- while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit)
- continue;
-
- if (zopt_verbose >= 5)
- (void) printf("traverse %s%s%s%s %llu blocks to "
- "<%llu, %llu, %lld, %llx>%s\n",
- (advance & ADVANCE_PRE) ? "pre" : "post",
- (advance & ADVANCE_PRUNE) ? "|prune" : "",
- (advance & ADVANCE_DATA) ? "|data" : "",
- (advance & ADVANCE_HOLES) ? "|holes" : "",
- (u_longlong_t)(th->th_callbacks - cbstart),
- (u_longlong_t)th->th_lastcb.zb_objset,
- (u_longlong_t)th->th_lastcb.zb_object,
- (u_longlong_t)th->th_lastcb.zb_level,
- (u_longlong_t)th->th_lastcb.zb_blkid,
- rc == 0 ? " [done]" :
- rc == EINTR ? " [aborted]" :
- rc == EAGAIN ? "" :
- strerror(rc));
-
- if (rc != EAGAIN) {
- if (rc != 0 && rc != EINTR)
- fatal(0, "traverse_more(%p) = %d", th, rc);
- traverse_fini(th);
- za->za_th = NULL;
- }
-}
-
-/*
- * Verify that dmu_object_{alloc,free} work as expected.
- */
-void
-ztest_dmu_object_alloc_free(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- dmu_buf_t *db;
- dmu_tx_t *tx;
- uint64_t batchobj, object, batchsize, endoff, temp;
- int b, c, error, bonuslen;
- dmu_object_info_t doi;
- char osname[MAXNAMELEN];
-
- dmu_objset_name(os, osname);
-
- endoff = -8ULL;
- batchsize = 2;
-
- /*
- * Create a batch object if necessary, and record it in the directory.
- */
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj));
- if (batchobj == 0) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create a batch object");
- dmu_tx_abort(tx);
- return;
- }
- batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, batchobj, tx);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj, tx);
- dmu_tx_commit(tx);
- }
-
- /*
- * Destroy the previous batch of objects.
- */
- for (b = 0; b < batchsize; b++) {
- VERIFY(0 == dmu_read(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object));
- if (object == 0)
- continue;
- /*
- * Read and validate contents.
- * We expect the nth byte of the bonus buffer to be n.
- */
- VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
-
- dmu_object_info_from_db(db, &doi);
- ASSERT(doi.doi_type == DMU_OT_UINT64_OTHER);
- ASSERT(doi.doi_bonus_type == DMU_OT_PLAIN_OTHER);
- ASSERT3S(doi.doi_physical_blks, >=, 0);
-
- bonuslen = db->db_size;
-
- for (c = 0; c < bonuslen; c++) {
- if (((uint8_t *)db->db_data)[c] !=
- (uint8_t)(c + bonuslen)) {
- fatal(0,
- "bad bonus: %s, obj %llu, off %d: %u != %u",
- osname, object, c,
- ((uint8_t *)db->db_data)[c],
- (uint8_t)(c + bonuslen));
- }
- }
-
- dmu_buf_rele(db, FTAG);
-
- /*
- * We expect the word at endoff to be our object number.
- */
- VERIFY(0 == dmu_read(os, object, endoff,
- sizeof (uint64_t), &temp));
-
- if (temp != object) {
- fatal(0, "bad data in %s, got %llu, expected %llu",
- osname, temp, object);
- }
-
- /*
- * Destroy old object and clear batch entry.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, batchobj,
- b * sizeof (uint64_t), sizeof (uint64_t));
- dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("free object");
- dmu_tx_abort(tx);
- return;
- }
- error = dmu_object_free(os, object, tx);
- if (error) {
- fatal(0, "dmu_object_free('%s', %llu) = %d",
- osname, object, error);
- }
- object = 0;
-
- dmu_object_set_checksum(os, batchobj,
- ztest_random_checksum(), tx);
- dmu_object_set_compress(os, batchobj,
- ztest_random_compress(), tx);
-
- dmu_write(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, tx);
-
- dmu_tx_commit(tx);
- }
-
- /*
- * Before creating the new batch of objects, generate a bunch of churn.
- */
- for (b = ztest_random(100); b > 0; b--) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("churn objects");
- dmu_tx_abort(tx);
- return;
- }
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, object, tx);
- error = dmu_object_free(os, object, tx);
- if (error) {
- fatal(0, "dmu_object_free('%s', %llu) = %d",
- osname, object, error);
- }
- dmu_tx_commit(tx);
- }
-
- /*
- * Create a new batch of objects with randomly chosen
- * blocksizes and record them in the batch directory.
- */
- for (b = 0; b < batchsize; b++) {
- uint32_t va_blksize;
- u_longlong_t va_nblocks;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff,
- sizeof (uint64_t));
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create batchobj");
- dmu_tx_abort(tx);
- return;
- }
- bonuslen = (int)ztest_random(dmu_bonus_max()) + 1;
-
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_PLAIN_OTHER, bonuslen, tx);
-
- ztest_set_random_blocksize(os, object, tx);
-
- dmu_object_set_checksum(os, object,
- ztest_random_checksum(), tx);
- dmu_object_set_compress(os, object,
- ztest_random_compress(), tx);
-
- dmu_write(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, tx);
-
- /*
- * Write to both the bonus buffer and the regular data.
- */
- VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
- ASSERT3U(bonuslen, ==, db->db_size);
-
- dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
- ASSERT3S(va_nblocks, >=, 0);
-
- dmu_buf_will_dirty(db, tx);
-
- /*
- * See comments above regarding the contents of
- * the bonus buffer and the word at endoff.
- */
- for (c = 0; c < db->db_size; c++)
- ((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
-
- dmu_buf_rele(db, FTAG);
-
- /*
- * Write to a large offset to increase indirection.
- */
- dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx);
-
- dmu_tx_commit(tx);
- }
-}
-
-/*
- * Verify that dmu_{read,write} work as expected.
- */
-typedef struct bufwad {
- uint64_t bw_index;
- uint64_t bw_txg;
- uint64_t bw_data;
-} bufwad_t;
-
-typedef struct dmu_read_write_dir {
- uint64_t dd_packobj;
- uint64_t dd_bigobj;
- uint64_t dd_chunk;
-} dmu_read_write_dir_t;
-
-void
-ztest_dmu_read_write(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- dmu_read_write_dir_t dd;
- dmu_tx_t *tx;
- int i, freeit, error;
- uint64_t n, s, txg;
- bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
- uint64_t packoff, packsize, bigoff, bigsize;
- uint64_t regions = 997;
- uint64_t stride = 123456789ULL;
- uint64_t width = 40;
- int free_percent = 5;
-
- /*
- * This test uses two objects, packobj and bigobj, that are always
- * updated together (i.e. in the same tx) so that their contents are
- * in sync and can be compared. Their contents relate to each other
- * in a simple way: packobj is a dense array of 'bufwad' structures,
- * while bigobj is a sparse array of the same bufwads. Specifically,
- * for any index n, there are three bufwads that should be identical:
- *
- * packobj, at offset n * sizeof (bufwad_t)
- * bigobj, at the head of the nth chunk
- * bigobj, at the tail of the nth chunk
- *
- * The chunk size is arbitrary. It doesn't have to be a power of two,
- * and it doesn't have any relation to the object blocksize.
- * The only requirement is that it can hold at least two bufwads.
- *
- * Normally, we write the bufwad to each of these locations.
- * However, free_percent of the time we instead write zeroes to
- * packobj and perform a dmu_free_range() on bigobj. By comparing
- * bigobj to packobj, we can verify that the DMU is correctly
- * tracking which parts of an object are allocated and free,
- * and that the contents of the allocated blocks are correct.
- */
-
- /*
- * Read the directory info. If it's the first time, set things up.
- */
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (dd), &dd));
- if (dd.dd_chunk == 0) {
- ASSERT(dd.dd_packobj == 0);
- ASSERT(dd.dd_bigobj == 0);
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create r/w directory");
- dmu_tx_abort(tx);
- return;
- }
-
- dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t);
-
- ztest_set_random_blocksize(os, dd.dd_packobj, tx);
- ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
-
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
- tx);
- dmu_tx_commit(tx);
- }
-
- /*
- * Prefetch a random chunk of the big object.
- * Our aim here is to get some async reads in flight
- * for blocks that we may free below; the DMU should
- * handle this race correctly.
- */
- n = ztest_random(regions) * stride + ztest_random(width);
- s = 1 + ztest_random(2 * width - 1);
- dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk);
-
- /*
- * Pick a random index and compute the offsets into packobj and bigobj.
- */
- n = ztest_random(regions) * stride + ztest_random(width);
- s = 1 + ztest_random(width - 1);
-
- packoff = n * sizeof (bufwad_t);
- packsize = s * sizeof (bufwad_t);
-
- bigoff = n * dd.dd_chunk;
- bigsize = s * dd.dd_chunk;
-
- packbuf = umem_alloc(packsize, UMEM_NOFAIL);
- bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
-
- /*
- * free_percent of the time, free a range of bigobj rather than
- * overwriting it.
- */
- freeit = (ztest_random(100) < free_percent);
-
- /*
- * Read the current contents of our objects.
- */
- error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
- ASSERT3U(error, ==, 0);
- error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
- ASSERT3U(error, ==, 0);
-
- /*
- * Get a tx for the mods to both packobj and bigobj.
- */
- tx = dmu_tx_create(os);
-
- dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
-
- if (freeit)
- dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize);
- else
- dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
-
- error = dmu_tx_assign(tx, TXG_WAIT);
-
- if (error) {
- ztest_record_enospc("dmu r/w range");
- dmu_tx_abort(tx);
- umem_free(packbuf, packsize);
- umem_free(bigbuf, bigsize);
- return;
- }
-
- txg = dmu_tx_get_txg(tx);
-
- /*
- * For each index from n to n + s, verify that the existing bufwad
- * in packobj matches the bufwads at the head and tail of the
- * corresponding chunk in bigobj. Then update all three bufwads
- * with the new values we want to write out.
- */
- for (i = 0; i < s; i++) {
- /* LINTED */
- pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
- /* LINTED */
- bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
- /* LINTED */
- bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
-
- ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
- ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
-
- if (pack->bw_txg > txg)
- fatal(0, "future leak: got %llx, open txg is %llx",
- pack->bw_txg, txg);
-
- if (pack->bw_data != 0 && pack->bw_index != n + i)
- fatal(0, "wrong index: got %llx, wanted %llx+%llx",
- pack->bw_index, n, i);
-
- if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
- fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
-
- if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
- fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
-
- if (freeit) {
- bzero(pack, sizeof (bufwad_t));
- } else {
- pack->bw_index = n + i;
- pack->bw_txg = txg;
- pack->bw_data = 1 + ztest_random(-2ULL);
- }
- *bigH = *pack;
- *bigT = *pack;
- }
-
- /*
- * We've verified all the old bufwads, and made new ones.
- * Now write them out.
- */
- dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
-
- if (freeit) {
- if (zopt_verbose >= 6) {
- (void) printf("freeing offset %llx size %llx"
- " txg %llx\n",
- (u_longlong_t)bigoff,
- (u_longlong_t)bigsize,
- (u_longlong_t)txg);
- }
- VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff,
- bigsize, tx));
- } else {
- if (zopt_verbose >= 6) {
- (void) printf("writing offset %llx size %llx"
- " txg %llx\n",
- (u_longlong_t)bigoff,
- (u_longlong_t)bigsize,
- (u_longlong_t)txg);
- }
- dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx);
- }
-
- dmu_tx_commit(tx);
-
- /*
- * Sanity check the stuff we just wrote.
- */
- {
- void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
- void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
-
- VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
- packsize, packcheck));
- VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
- bigsize, bigcheck));
-
- ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
- ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
-
- umem_free(packcheck, packsize);
- umem_free(bigcheck, bigsize);
- }
-
- umem_free(packbuf, packsize);
- umem_free(bigbuf, bigsize);
-}
-
-void
-ztest_dmu_check_future_leak(objset_t *os, uint64_t txg)
-{
- dmu_buf_t *db;
- ztest_block_tag_t rbt;
-
- if (zopt_verbose >= 3) {
- char osname[MAXNAMELEN];
- dmu_objset_name(os, osname);
- (void) printf("checking %s for future leaks in txg %lld...\n",
- osname, (u_longlong_t)txg);
- }
-
- /*
- * Make sure that, if there is a write record in the bonus buffer
- * of the ZTEST_DIROBJ, that the txg for this record is <= the
- * last synced txg of the pool.
- */
-
- VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db));
- ASSERT3U(db->db_size, ==, sizeof (rbt));
- bcopy(db->db_data, &rbt, db->db_size);
- if (rbt.bt_objset != 0) {
- ASSERT3U(rbt.bt_objset, ==, dmu_objset_id(os));
- ASSERT3U(rbt.bt_object, ==, ZTEST_DIROBJ);
- ASSERT3U(rbt.bt_offset, ==, -1ULL);
- if (rbt.bt_txg > txg) {
- fatal(0,
- "future leak: got %llx, last synced txg is %llx",
- rbt.bt_txg, txg);
- }
- }
- dmu_buf_rele(db, FTAG);
-}
-
-void
-ztest_dmu_write_parallel(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- dmu_tx_t *tx;
- dmu_buf_t *db;
- int i, b, error, do_free, bs;
- uint64_t off, txg_how, txg;
- mutex_t *lp;
- char osname[MAXNAMELEN];
- char iobuf[SPA_MAXBLOCKSIZE];
- ztest_block_tag_t rbt, wbt;
-
- dmu_objset_name(os, osname);
- bs = ZTEST_DIROBJ_BLOCKSIZE;
-
- /*
- * Have multiple threads write to large offsets in ZTEST_DIROBJ
- * to verify that having multiple threads writing to the same object
- * in parallel doesn't cause any trouble.
- * Also do parallel writes to the bonus buffer on occasion.
- */
- for (i = 0; i < 50; i++) {
- b = ztest_random(ZTEST_SYNC_LOCKS);
- lp = &ztest_shared->zs_sync_lock[b];
-
- do_free = (ztest_random(4) == 0);
-
- off = za->za_diroff_shared + ((uint64_t)b << SPA_MAXBLOCKSHIFT);
-
- if (ztest_random(4) == 0) {
- /*
- * Do the bonus buffer instead of a regular block.
- */
- do_free = 0;
- off = -1ULL;
- }
-
- tx = dmu_tx_create(os);
-
- if (off == -1ULL)
- dmu_tx_hold_bonus(tx, ZTEST_DIROBJ);
- else if (do_free)
- dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs);
- else
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs);
-
- txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
- error = dmu_tx_assign(tx, txg_how);
- if (error) {
- if (error == ERESTART) {
- ASSERT(txg_how == TXG_NOWAIT);
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- continue;
- }
- dmu_tx_abort(tx);
- ztest_record_enospc("dmu write parallel");
- return;
- }
- txg = dmu_tx_get_txg(tx);
-
- if (do_free) {
- (void) mutex_lock(lp);
- VERIFY(0 == dmu_free_range(os, ZTEST_DIROBJ, off,
- bs, tx));
- (void) mutex_unlock(lp);
- dmu_tx_commit(tx);
- continue;
- }
-
- wbt.bt_objset = dmu_objset_id(os);
- wbt.bt_object = ZTEST_DIROBJ;
- wbt.bt_offset = off;
- wbt.bt_txg = txg;
- wbt.bt_thread = za->za_instance;
-
- if (off == -1ULL) {
- wbt.bt_seq = 0;
- VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ,
- FTAG, &db));
- ASSERT3U(db->db_size, ==, sizeof (wbt));
- bcopy(db->db_data, &rbt, db->db_size);
- if (rbt.bt_objset != 0) {
- ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
- ASSERT3U(rbt.bt_object, ==, wbt.bt_object);
- ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset);
- ASSERT3U(rbt.bt_txg, <=, wbt.bt_txg);
- }
- dmu_buf_will_dirty(db, tx);
- bcopy(&wbt, db->db_data, db->db_size);
- dmu_buf_rele(db, FTAG);
- dmu_tx_commit(tx);
- continue;
- }
-
- (void) mutex_lock(lp);
-
- wbt.bt_seq = ztest_shared->zs_seq[b]++;
-
- dmu_write(os, ZTEST_DIROBJ, off, sizeof (wbt), &wbt, tx);
-
- (void) mutex_unlock(lp);
-
- if (ztest_random(100) == 0)
- (void) poll(NULL, 0, 1); /* open dn_notxholds window */
-
- dmu_tx_commit(tx);
-
- if (ztest_random(1000) == 0)
- txg_wait_synced(dmu_objset_pool(os), txg);
-
- if (ztest_random(2) == 0) {
- blkptr_t blk = { 0 };
- uint64_t blkoff;
- zbookmark_t zb;
-
- (void) mutex_lock(lp);
- blkoff = P2ALIGN_TYPED(off, bs, uint64_t);
- error = dmu_buf_hold(os,
- ZTEST_DIROBJ, blkoff, FTAG, &db);
- if (error) {
- dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n",
- osname, ZTEST_DIROBJ, blkoff, error);
- (void) mutex_unlock(lp);
- continue;
- }
- blkoff = off - blkoff;
- error = dmu_sync(NULL, db, &blk, txg, NULL, NULL);
- dmu_buf_rele(db, FTAG);
- (void) mutex_unlock(lp);
- if (error) {
- dprintf("dmu_sync(%s, %d, %llx) = %d\n",
- osname, ZTEST_DIROBJ, off, error);
- continue;
- }
-
- if (blk.blk_birth == 0) { /* concurrent free */
- continue;
- }
- txg_suspend(dmu_objset_pool(os));
-
- ASSERT(blk.blk_fill == 1);
- ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
- ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
- ASSERT3U(BP_GET_LSIZE(&blk), ==, bs);
-
- /*
- * Read the block that dmu_sync() returned to
- * make sure its contents match what we wrote.
- * We do this while still txg_suspend()ed to ensure
- * that the block can't be reused before we read it.
- */
- zb.zb_objset = dmu_objset_id(os);
- zb.zb_object = ZTEST_DIROBJ;
- zb.zb_level = 0;
- zb.zb_blkid = off / bs;
- error = zio_wait(zio_read(NULL, dmu_objset_spa(os),
- &blk, iobuf, bs, NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
- ASSERT(error == 0);
-
- txg_resume(dmu_objset_pool(os));
-
- bcopy(&iobuf[blkoff], &rbt, sizeof (rbt));
-
- if (rbt.bt_objset == 0) /* concurrent free */
- continue;
-
- ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
- ASSERT3U(rbt.bt_object, ==, wbt.bt_object);
- ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset);
-
- /*
- * The semantic of dmu_sync() is that we always
- * push the most recent version of the data,
- * so in the face of concurrent updates we may
- * see a newer version of the block. That's OK.
- */
- ASSERT3U(rbt.bt_txg, >=, wbt.bt_txg);
- if (rbt.bt_thread == wbt.bt_thread)
- ASSERT3U(rbt.bt_seq, ==, wbt.bt_seq);
- else
- ASSERT3U(rbt.bt_seq, >, wbt.bt_seq);
- }
- }
-}
-
-/*
- * Verify that zap_{create,destroy,add,remove,update} work as expected.
- */
-#define ZTEST_ZAP_MIN_INTS 1
-#define ZTEST_ZAP_MAX_INTS 4
-#define ZTEST_ZAP_MAX_PROPS 1000
-
-void
-ztest_zap(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- uint64_t object;
- uint64_t txg, last_txg;
- uint64_t value[ZTEST_ZAP_MAX_INTS];
- uint64_t zl_ints, zl_intsize, prop;
- int i, ints;
- int iters = 100;
- dmu_tx_t *tx;
- char propname[100], txgname[100];
- int error;
- char osname[MAXNAMELEN];
- char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
-
- dmu_objset_name(os, osname);
-
- /*
- * Create a new object if necessary, and record it in the directory.
- */
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object));
-
- if (object == 0) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t));
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create zap test obj");
- dmu_tx_abort(tx);
- return;
- }
- object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx);
- if (error) {
- fatal(0, "zap_create('%s', %llu) = %d",
- osname, object, error);
- }
- ASSERT(object != 0);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object, tx);
- /*
- * Generate a known hash collision, and verify that
- * we can lookup and remove both entries.
- */
- for (i = 0; i < 2; i++) {
- value[i] = i;
- error = zap_add(os, object, hc[i], sizeof (uint64_t),
- 1, &value[i], tx);
- ASSERT3U(error, ==, 0);
- }
- for (i = 0; i < 2; i++) {
- error = zap_add(os, object, hc[i], sizeof (uint64_t),
- 1, &value[i], tx);
- ASSERT3U(error, ==, EEXIST);
- error = zap_length(os, object, hc[i],
- &zl_intsize, &zl_ints);
- ASSERT3U(error, ==, 0);
- ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
- ASSERT3U(zl_ints, ==, 1);
- }
- for (i = 0; i < 2; i++) {
- error = zap_remove(os, object, hc[i], tx);
- ASSERT3U(error, ==, 0);
- }
-
- dmu_tx_commit(tx);
- }
-
- ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
-
- while (--iters >= 0) {
- prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
- (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
- (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
- bzero(value, sizeof (value));
- last_txg = 0;
-
- /*
- * If these zap entries already exist, validate their contents.
- */
- error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
- if (error == 0) {
- ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
- ASSERT3U(zl_ints, ==, 1);
-
- error = zap_lookup(os, object, txgname, zl_intsize,
- zl_ints, &last_txg);
-
- ASSERT3U(error, ==, 0);
-
- error = zap_length(os, object, propname, &zl_intsize,
- &zl_ints);
-
- ASSERT3U(error, ==, 0);
- ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
- ASSERT3U(zl_ints, ==, ints);
-
- error = zap_lookup(os, object, propname, zl_intsize,
- zl_ints, value);
-
- ASSERT3U(error, ==, 0);
-
- for (i = 0; i < ints; i++) {
- ASSERT3U(value[i], ==, last_txg + object + i);
- }
- } else {
- ASSERT3U(error, ==, ENOENT);
- }
-
- /*
- * Atomically update two entries in our zap object.
- * The first is named txg_%llu, and contains the txg
- * in which the property was last updated. The second
- * is named prop_%llu, and the nth element of its value
- * should be txg + object + n.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create zap entry");
- dmu_tx_abort(tx);
- return;
- }
- txg = dmu_tx_get_txg(tx);
-
- if (last_txg > txg)
- fatal(0, "zap future leak: old %llu new %llu",
- last_txg, txg);
-
- for (i = 0; i < ints; i++)
- value[i] = txg + object + i;
-
- error = zap_update(os, object, txgname, sizeof (uint64_t),
- 1, &txg, tx);
- if (error)
- fatal(0, "zap_update('%s', %llu, '%s') = %d",
- osname, object, txgname, error);
-
- error = zap_update(os, object, propname, sizeof (uint64_t),
- ints, value, tx);
- if (error)
- fatal(0, "zap_update('%s', %llu, '%s') = %d",
- osname, object, propname, error);
-
- dmu_tx_commit(tx);
-
- /*
- * Remove a random pair of entries.
- */
- prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
- (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
- (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
-
- error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
-
- if (error == ENOENT)
- continue;
-
- ASSERT3U(error, ==, 0);
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("remove zap entry");
- dmu_tx_abort(tx);
- return;
- }
- error = zap_remove(os, object, txgname, tx);
- if (error)
- fatal(0, "zap_remove('%s', %llu, '%s') = %d",
- osname, object, txgname, error);
-
- error = zap_remove(os, object, propname, tx);
- if (error)
- fatal(0, "zap_remove('%s', %llu, '%s') = %d",
- osname, object, propname, error);
-
- dmu_tx_commit(tx);
- }
-
- /*
- * Once in a while, destroy the object.
- */
- if (ztest_random(100) != 0)
- return;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t));
- dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("destroy zap object");
- dmu_tx_abort(tx);
- return;
- }
- error = zap_destroy(os, object, tx);
- if (error)
- fatal(0, "zap_destroy('%s', %llu) = %d",
- osname, object, error);
- object = 0;
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t),
- &object, tx);
- dmu_tx_commit(tx);
-}
-
-void
-ztest_zap_parallel(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
- int iters = 100;
- dmu_tx_t *tx;
- int i, namelen, error;
- char name[20], string_value[20];
- void *data;
-
- while (--iters >= 0) {
- /*
- * Generate a random name of the form 'xxx.....' where each
- * x is a random printable character and the dots are dots.
- * There are 94 such characters, and the name length goes from
- * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
- */
- namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
-
- for (i = 0; i < 3; i++)
- name[i] = '!' + ztest_random('~' - '!' + 1);
- for (; i < namelen - 1; i++)
- name[i] = '.';
- name[i] = '\0';
-
- if (ztest_random(2) == 0)
- object = ZTEST_MICROZAP_OBJ;
- else
- object = ZTEST_FATZAP_OBJ;
-
- if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) {
- wsize = sizeof (txg);
- wc = 1;
- data = &txg;
- } else {
- wsize = 1;
- wc = namelen;
- data = string_value;
- }
-
- count = -1ULL;
- VERIFY(zap_count(os, object, &count) == 0);
- ASSERT(count != -1ULL);
-
- /*
- * Select an operation: length, lookup, add, update, remove.
- */
- i = ztest_random(5);
-
- if (i >= 2) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("zap parallel");
- dmu_tx_abort(tx);
- return;
- }
- txg = dmu_tx_get_txg(tx);
- bcopy(name, string_value, namelen);
- } else {
- tx = NULL;
- txg = 0;
- bzero(string_value, namelen);
- }
-
- switch (i) {
-
- case 0:
- error = zap_length(os, object, name, &zl_wsize, &zl_wc);
- if (error == 0) {
- ASSERT3U(wsize, ==, zl_wsize);
- ASSERT3U(wc, ==, zl_wc);
- } else {
- ASSERT3U(error, ==, ENOENT);
- }
- break;
-
- case 1:
- error = zap_lookup(os, object, name, wsize, wc, data);
- if (error == 0) {
- if (data == string_value &&
- bcmp(name, data, namelen) != 0)
- fatal(0, "name '%s' != val '%s' len %d",
- name, data, namelen);
- } else {
- ASSERT3U(error, ==, ENOENT);
- }
- break;
-
- case 2:
- error = zap_add(os, object, name, wsize, wc, data, tx);
- ASSERT(error == 0 || error == EEXIST);
- break;
-
- case 3:
- VERIFY(zap_update(os, object, name, wsize, wc,
- data, tx) == 0);
- break;
-
- case 4:
- error = zap_remove(os, object, name, tx);
- ASSERT(error == 0 || error == ENOENT);
- break;
- }
-
- if (tx != NULL)
- dmu_tx_commit(tx);
- }
-}
-
-void
-ztest_dsl_prop_get_set(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- int i, inherit;
- uint64_t value;
- const char *prop, *valname;
- char setpoint[MAXPATHLEN];
- char osname[MAXNAMELEN];
- int error;
-
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
-
- dmu_objset_name(os, osname);
-
- for (i = 0; i < 2; i++) {
- if (i == 0) {
- prop = "checksum";
- value = ztest_random_checksum();
- inherit = (value == ZIO_CHECKSUM_INHERIT);
- } else {
- prop = "compression";
- value = ztest_random_compress();
- inherit = (value == ZIO_COMPRESS_INHERIT);
- }
-
- error = dsl_prop_set(osname, prop, sizeof (value),
- !inherit, &value);
-
- if (error == ENOSPC) {
- ztest_record_enospc("dsl_prop_set");
- break;
- }
-
- ASSERT3U(error, ==, 0);
-
- VERIFY3U(dsl_prop_get(osname, prop, sizeof (value),
- 1, &value, setpoint), ==, 0);
-
- if (i == 0)
- valname = zio_checksum_table[value].ci_name;
- else
- valname = zio_compress_table[value].ci_name;
-
- if (zopt_verbose >= 6) {
- (void) printf("%s %s = %s for '%s'\n",
- osname, prop, valname, setpoint);
- }
- }
-
- (void) rw_unlock(&ztest_shared->zs_name_lock);
-}
-
-static void
-ztest_error_setup(vdev_t *vd, int mode, int mask, uint64_t arg)
-{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- ztest_error_setup(vd->vdev_child[c], mode, mask, arg);
-
- if (vd->vdev_path != NULL) {
- vd->vdev_fault_mode = mode;
- vd->vdev_fault_mask = mask;
- vd->vdev_fault_arg = arg;
- }
-}
-
-/*
- * Inject random faults into the on-disk data.
- */
-void
-ztest_fault_inject(ztest_args_t *za)
-{
- int fd;
- uint64_t offset;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
- uint64_t bad = 0x1990c0ffeedecadeULL;
- uint64_t top, leaf;
- char path0[MAXPATHLEN];
- char pathrand[MAXPATHLEN];
- size_t fsize;
- spa_t *spa = dmu_objset_spa(za->za_os);
- int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
- int iters = 1000;
- vdev_t *vd0;
- uint64_t guid0 = 0;
-
- /*
- * We can't inject faults when we have no fault tolerance.
- */
- if (zopt_maxfaults == 0)
- return;
-
- ASSERT(leaves >= 2);
-
- /*
- * Pick a random top-level vdev.
- */
- spa_config_enter(spa, RW_READER, FTAG);
- top = ztest_random(spa->spa_root_vdev->vdev_children);
- spa_config_exit(spa, FTAG);
-
- /*
- * Pick a random leaf.
- */
- leaf = ztest_random(leaves);
-
- /*
- * Generate paths to the first two leaves in this top-level vdev,
- * and to the random leaf we selected. We'll induce transient
- * I/O errors and random online/offline activity on leaf 0,
- * and we'll write random garbage to the randomly chosen leaf.
- */
- (void) snprintf(path0, sizeof (path0),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 0);
- (void) snprintf(pathrand, sizeof (pathrand),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
-
- dprintf("damaging %s and %s\n", path0, pathrand);
-
- spa_config_enter(spa, RW_READER, FTAG);
-
- /*
- * If we can tolerate two or more faults, make vd0 fail randomly.
- */
- vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
- if (vd0 != NULL && zopt_maxfaults >= 2) {
- guid0 = vd0->vdev_guid;
- ztest_error_setup(vd0, VDEV_FAULT_COUNT,
- (1U << ZIO_TYPE_READ) | (1U << ZIO_TYPE_WRITE), 100);
- }
-
- spa_config_exit(spa, FTAG);
-
- /*
- * If we can tolerate two or more faults, randomly online/offline vd0.
- */
- if (zopt_maxfaults >= 2 && guid0 != 0) {
- if (ztest_random(10) < 6)
- (void) vdev_offline(spa, guid0, B_TRUE);
- else
- (void) vdev_online(spa, guid0);
- }
-
- /*
- * We have at least single-fault tolerance, so inject data corruption.
- */
- fd = open(pathrand, O_RDWR);
-
- if (fd == -1) /* we hit a gap in the device namespace */
- return;
-
- fsize = lseek(fd, 0, SEEK_END);
-
- while (--iters != 0) {
- offset = ztest_random(fsize / (leaves << bshift)) *
- (leaves << bshift) + (leaf << bshift) +
- (ztest_random(1ULL << (bshift - 1)) & -8ULL);
-
- if (offset >= fsize)
- continue;
-
- if (zopt_verbose >= 6)
- (void) printf("injecting bad word into %s,"
- " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
-
- if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
- fatal(1, "can't inject bad word at 0x%llx in %s",
- offset, pathrand);
- }
-
- (void) close(fd);
-}
-
-/*
- * Scrub the pool.
- */
-void
-ztest_scrub(ztest_args_t *za)
-{
- spa_t *spa = dmu_objset_spa(za->za_os);
-
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE);
- (void) poll(NULL, 0, 1000); /* wait a second, then force a restart */
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE);
-}
-
-/*
- * Rename the pool to a different name and then rename it back.
- */
-void
-ztest_spa_rename(ztest_args_t *za)
-{
- char *oldname, *newname;
- int error;
- spa_t *spa;
-
- (void) rw_wrlock(&ztest_shared->zs_name_lock);
-
- oldname = za->za_pool;
- newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
- (void) strcpy(newname, oldname);
- (void) strcat(newname, "_tmp");
-
- /*
- * Do the rename
- */
- error = spa_rename(oldname, newname);
- if (error)
- fatal(0, "spa_rename('%s', '%s') = %d", oldname,
- newname, error);
-
- /*
- * Try to open it under the old name, which shouldn't exist
- */
- error = spa_open(oldname, &spa, FTAG);
- if (error != ENOENT)
- fatal(0, "spa_open('%s') = %d", oldname, error);
-
- /*
- * Open it under the new name and make sure it's still the same spa_t.
- */
- error = spa_open(newname, &spa, FTAG);
- if (error != 0)
- fatal(0, "spa_open('%s') = %d", newname, error);
-
- ASSERT(spa == dmu_objset_spa(za->za_os));
- spa_close(spa, FTAG);
-
- /*
- * Rename it back to the original
- */
- error = spa_rename(newname, oldname);
- if (error)
- fatal(0, "spa_rename('%s', '%s') = %d", newname,
- oldname, error);
-
- /*
- * Make sure it can still be opened
- */
- error = spa_open(oldname, &spa, FTAG);
- if (error != 0)
- fatal(0, "spa_open('%s') = %d", oldname, error);
-
- ASSERT(spa == dmu_objset_spa(za->za_os));
- spa_close(spa, FTAG);
-
- umem_free(newname, strlen(newname) + 1);
-
- (void) rw_unlock(&ztest_shared->zs_name_lock);
-}
-
-
-/*
- * Completely obliterate one disk.
- */
-static void
-ztest_obliterate_one_disk(uint64_t vdev)
-{
- int fd;
- char dev_name[MAXPATHLEN], copy_name[MAXPATHLEN];
- size_t fsize;
-
- if (zopt_maxfaults < 2)
- return;
-
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
- (void) snprintf(copy_name, MAXPATHLEN, "%s.old", dev_name);
-
- fd = open(dev_name, O_RDWR);
-
- if (fd == -1)
- fatal(1, "can't open %s", dev_name);
-
- /*
- * Determine the size.
- */
- fsize = lseek(fd, 0, SEEK_END);
-
- (void) close(fd);
-
- /*
- * Rename the old device to dev_name.old (useful for debugging).
- */
- VERIFY(rename(dev_name, copy_name) == 0);
-
- /*
- * Create a new one.
- */
- VERIFY((fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666)) >= 0);
- VERIFY(ftruncate(fd, fsize) == 0);
- (void) close(fd);
-}
-
-static void
-ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
-{
- char dev_name[MAXPATHLEN];
- nvlist_t *file, *root;
- int error;
- uint64_t guid;
- uint64_t ashift = ztest_get_ashift();
- vdev_t *vd;
-
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
-
- /*
- * Build the nvlist describing dev_name.
- */
- VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0);
- VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
-
- VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
- VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
- &file, 1) == 0);
-
- spa_config_enter(spa, RW_READER, FTAG);
- if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL)
- guid = 0;
- else
- guid = vd->vdev_guid;
- spa_config_exit(spa, FTAG);
- error = spa_vdev_attach(spa, guid, root, B_TRUE);
- if (error != 0 &&
- error != EBUSY &&
- error != ENOTSUP &&
- error != ENODEV &&
- error != EDOM)
- fatal(0, "spa_vdev_attach(in-place) = %d", error);
-
- nvlist_free(file);
- nvlist_free(root);
-}
-
-static void
-ztest_verify_blocks(char *pool)
-{
- int status;
- char zdb[MAXPATHLEN + MAXNAMELEN + 20];
- char zbuf[1024];
- char *bin;
- FILE *fp;
-
- if (realpath(progname, zdb) == NULL)
- assert(!"realpath() failed");
-
- /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */
- bin = strstr(zdb, "/usr/bin/");
- if (bin == NULL)
- bin = zdb;
- /* LINTED */
- (void) sprintf(bin, "/usr/sbin/zdb -bc%s%s -U -O %s %s",
- zopt_verbose >= 3 ? "s" : "",
- zopt_verbose >= 4 ? "v" : "",
- ztest_random(2) == 0 ? "pre" : "post", pool);
-
- if (zopt_verbose >= 5)
- (void) printf("Executing %s\n", strstr(zdb, "zdb "));
-
- fp = popen(zdb, "r");
- assert(fp != NULL);
-
- while (fgets(zbuf, sizeof (zbuf), fp) != NULL)
- if (zopt_verbose >= 3)
- (void) printf("%s", zbuf);
-
- status = pclose(fp);
-
- if (status == 0)
- return;
-
- ztest_dump_core = 0;
- if (WIFEXITED(status))
- fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status));
- else
- fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status));
-}
-
-static void
-ztest_walk_pool_directory(char *header)
-{
- spa_t *spa = NULL;
-
- if (zopt_verbose >= 6)
- (void) printf("%s\n", header);
-
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL)
- if (zopt_verbose >= 6)
- (void) printf("\t%s\n", spa_name(spa));
- mutex_exit(&spa_namespace_lock);
-}
-
-static void
-ztest_spa_import_export(char *oldname, char *newname)
-{
- nvlist_t *config;
- uint64_t pool_guid;
- spa_t *spa;
- int error;
-
- if (zopt_verbose >= 4) {
- (void) printf("import/export: old = %s, new = %s\n",
- oldname, newname);
- }
-
- /*
- * Clean up from previous runs.
- */
- (void) spa_destroy(newname);
-
- /*
- * Get the pool's configuration and guid.
- */
- error = spa_open(oldname, &spa, FTAG);
- if (error)
- fatal(0, "spa_open('%s') = %d", oldname, error);
-
- pool_guid = spa_guid(spa);
- spa_close(spa, FTAG);
-
- ztest_walk_pool_directory("pools before export");
-
- /*
- * Export it.
- */
- error = spa_export(oldname, &config);
- if (error)
- fatal(0, "spa_export('%s') = %d", oldname, error);
-
- ztest_walk_pool_directory("pools after export");
-
- /*
- * Import it under the new name.
- */
- error = spa_import(newname, config, NULL);
- if (error)
- fatal(0, "spa_import('%s') = %d", newname, error);
-
- ztest_walk_pool_directory("pools after import");
-
- /*
- * Try to import it again -- should fail with EEXIST.
- */
- error = spa_import(newname, config, NULL);
- if (error != EEXIST)
- fatal(0, "spa_import('%s') twice", newname);
-
- /*
- * Try to import it under a different name -- should fail with EEXIST.
- */
- error = spa_import(oldname, config, NULL);
- if (error != EEXIST)
- fatal(0, "spa_import('%s') under multiple names", newname);
-
- /*
- * Verify that the pool is no longer visible under the old name.
- */
- error = spa_open(oldname, &spa, FTAG);
- if (error != ENOENT)
- fatal(0, "spa_open('%s') = %d", newname, error);
-
- /*
- * Verify that we can open and close the pool using the new name.
- */
- error = spa_open(newname, &spa, FTAG);
- if (error)
- fatal(0, "spa_open('%s') = %d", newname, error);
- ASSERT(pool_guid == spa_guid(spa));
- spa_close(spa, FTAG);
-
- nvlist_free(config);
-}
-
-static void *
-ztest_thread(void *arg)
-{
- ztest_args_t *za = arg;
- ztest_shared_t *zs = ztest_shared;
- hrtime_t now, functime;
- ztest_info_t *zi;
- int f;
-
- while ((now = gethrtime()) < za->za_stop) {
- /*
- * See if it's time to force a crash.
- */
- if (now > za->za_kill) {
- dmu_tx_t *tx;
- uint64_t txg;
-
- mutex_enter(&spa_namespace_lock);
- tx = dmu_tx_create(za->za_os);
- VERIFY(0 == dmu_tx_assign(tx, TXG_NOWAIT));
- txg = dmu_tx_get_txg(tx);
- dmu_tx_commit(tx);
- zs->zs_txg = txg;
- if (zopt_verbose >= 3)
- (void) printf(
- "killing process after txg %lld\n",
- (u_longlong_t)txg);
- txg_wait_synced(dmu_objset_pool(za->za_os), txg);
- zs->zs_alloc = spa_get_alloc(dmu_objset_spa(za->za_os));
- zs->zs_space = spa_get_space(dmu_objset_spa(za->za_os));
- (void) kill(getpid(), SIGKILL);
- }
-
- /*
- * Pick a random function.
- */
- f = ztest_random(ZTEST_FUNCS);
- zi = &zs->zs_info[f];
-
- /*
- * Decide whether to call it, based on the requested frequency.
- */
- if (zi->zi_call_target == 0 ||
- (double)zi->zi_call_total / zi->zi_call_target >
- (double)(now - zs->zs_start_time) / (zopt_time * NANOSEC))
- continue;
-
- atomic_add_64(&zi->zi_calls, 1);
- atomic_add_64(&zi->zi_call_total, 1);
-
- za->za_diroff = (za->za_instance * ZTEST_FUNCS + f) *
- ZTEST_DIRSIZE;
- za->za_diroff_shared = (1ULL << 63);
-
- ztest_dmu_write_parallel(za);
-
- zi->zi_func(za);
-
- functime = gethrtime() - now;
-
- atomic_add_64(&zi->zi_call_time, functime);
-
- if (zopt_verbose >= 4) {
- Dl_info dli;
- (void) dladdr((void *)zi->zi_func, &dli);
- (void) printf("%6.2f sec in %s\n",
- (double)functime / NANOSEC, dli.dli_sname);
- }
-
- /*
- * If we're getting ENOSPC with some regularity, stop.
- */
- if (zs->zs_enospc_count > 10)
- break;
- }
-
- return (NULL);
-}
-
-/*
- * Kick off threads to run tests on all datasets in parallel.
- */
-static void
-ztest_run(char *pool)
-{
- int t, d, error;
- ztest_shared_t *zs = ztest_shared;
- ztest_args_t *za;
- spa_t *spa;
- char name[100];
-
- (void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL);
- (void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL);
-
- for (t = 0; t < ZTEST_SYNC_LOCKS; t++)
- (void) _mutex_init(&zs->zs_sync_lock[t], USYNC_THREAD, NULL);
-
- /*
- * Destroy one disk before we even start.
- * It's mirrored, so everything should work just fine.
- * This makes us exercise fault handling very early in spa_load().
- */
- ztest_obliterate_one_disk(0);
-
- /*
- * Verify that the sum of the sizes of all blocks in the pool
- * equals the SPA's allocated space total.
- */
- ztest_verify_blocks(pool);
-
- /*
- * Kick off a replacement of the disk we just obliterated.
- */
- kernel_init(FREAD | FWRITE);
- error = spa_open(pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open(%s) = %d", pool, error);
- ztest_replace_one_disk(spa, 0);
- if (zopt_verbose >= 5)
- show_pool_stats(spa);
- spa_close(spa, FTAG);
- kernel_fini();
-
- kernel_init(FREAD | FWRITE);
-
- /*
- * Verify that we can export the pool and reimport it under a
- * different name.
- */
- if (ztest_random(2) == 0) {
- (void) snprintf(name, 100, "%s_import", pool);
- ztest_spa_import_export(pool, name);
- ztest_spa_import_export(name, pool);
- }
-
- /*
- * Verify that we can loop over all pools.
- */
- mutex_enter(&spa_namespace_lock);
- for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) {
- if (zopt_verbose > 3) {
- (void) printf("spa_next: found %s\n", spa_name(spa));
- }
- }
- mutex_exit(&spa_namespace_lock);
-
- /*
- * Open our pool.
- */
- error = spa_open(pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
-
- /*
- * Verify that we can safely inquire about about any object,
- * whether it's allocated or not. To make it interesting,
- * we probe a 5-wide window around each power of two.
- * This hits all edge cases, including zero and the max.
- */
- for (t = 0; t < 64; t++) {
- for (d = -5; d <= 5; d++) {
- error = dmu_object_info(spa->spa_meta_objset,
- (1ULL << t) + d, NULL);
- ASSERT(error == 0 || error == ENOENT ||
- error == EINVAL);
- }
- }
-
- /*
- * Now kick off all the tests that run in parallel.
- */
- zs->zs_enospc_count = 0;
-
- za = umem_zalloc(zopt_threads * sizeof (ztest_args_t), UMEM_NOFAIL);
-
- if (zopt_verbose >= 4)
- (void) printf("starting main threads...\n");
-
- za[0].za_start = gethrtime();
- za[0].za_stop = za[0].za_start + zopt_passtime * NANOSEC;
- za[0].za_stop = MIN(za[0].za_stop, zs->zs_stop_time);
- za[0].za_kill = za[0].za_stop;
- if (ztest_random(100) < zopt_killrate)
- za[0].za_kill -= ztest_random(zopt_passtime * NANOSEC);
-
- for (t = 0; t < zopt_threads; t++) {
- d = t % zopt_datasets;
- if (t < zopt_datasets) {
- ztest_replay_t zr;
- int test_future = FALSE;
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL,
- ztest_create_cb, NULL);
- if (error == EEXIST) {
- test_future = TRUE;
- } else if (error != 0) {
- if (error == ENOSPC) {
- zs->zs_enospc_count++;
- (void) rw_unlock(
- &ztest_shared->zs_name_lock);
- break;
- }
- fatal(0, "dmu_objset_create(%s) = %d",
- name, error);
- }
- error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_STANDARD, &za[d].za_os);
- if (error)
- fatal(0, "dmu_objset_open('%s') = %d",
- name, error);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
- if (test_future && ztest_shared->zs_txg > 0)
- ztest_dmu_check_future_leak(za[d].za_os,
- ztest_shared->zs_txg);
- zr.zr_os = za[d].za_os;
- zil_replay(zr.zr_os, &zr, &zr.zr_assign,
- ztest_replay_vector);
- za[d].za_zilog = zil_open(za[d].za_os, NULL);
- }
- za[t].za_pool = spa_strdup(pool);
- za[t].za_os = za[d].za_os;
- za[t].za_zilog = za[d].za_zilog;
- za[t].za_instance = t;
- za[t].za_random = ztest_random(-1ULL);
- za[t].za_start = za[0].za_start;
- za[t].za_stop = za[0].za_stop;
- za[t].za_kill = za[0].za_kill;
-
- error = thr_create(0, 0, ztest_thread, &za[t], THR_BOUND,
- &za[t].za_thread);
- if (error)
- fatal(0, "can't create thread %d: error %d",
- t, error);
- }
- ztest_shared->zs_txg = 0;
-
- while (--t >= 0) {
- error = thr_join(za[t].za_thread, NULL, NULL);
- if (error)
- fatal(0, "thr_join(%d) = %d", t, error);
- if (za[t].za_th)
- traverse_fini(za[t].za_th);
- if (t < zopt_datasets) {
- zil_close(za[t].za_zilog);
- dmu_objset_close(za[t].za_os);
- }
- spa_strfree(za[t].za_pool);
- }
-
- umem_free(za, zopt_threads * sizeof (ztest_args_t));
-
- if (zopt_verbose >= 3)
- show_pool_stats(spa);
-
- txg_wait_synced(spa_get_dsl(spa), 0);
-
- zs->zs_alloc = spa_get_alloc(spa);
- zs->zs_space = spa_get_space(spa);
-
- /*
- * Did we have out-of-space errors? If so, destroy a random objset.
- */
- if (zs->zs_enospc_count != 0) {
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_%d", pool, pool,
- (int)ztest_random(zopt_datasets));
- if (zopt_verbose >= 3)
- (void) printf("Destroying %s to free up space\n", name);
- (void) dmu_objset_find(name, ztest_destroy_cb, NULL,
- DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
- }
-
- txg_wait_synced(spa_get_dsl(spa), 0);
-
- /*
- * Right before closing the pool, kick off a bunch of async I/O;
- * spa_close() should wait for it to complete.
- */
- for (t = 1; t < 50; t++)
- dmu_prefetch(spa->spa_meta_objset, t, 0, 1 << 15);
-
- spa_close(spa, FTAG);
-
- kernel_fini();
-}
-
-void
-print_time(hrtime_t t, char *timebuf)
-{
- hrtime_t s = t / NANOSEC;
- hrtime_t m = s / 60;
- hrtime_t h = m / 60;
- hrtime_t d = h / 24;
-
- s -= m * 60;
- m -= h * 60;
- h -= d * 24;
-
- timebuf[0] = '\0';
-
- if (d)
- (void) sprintf(timebuf,
- "%llud%02lluh%02llum%02llus", d, h, m, s);
- else if (h)
- (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
- else if (m)
- (void) sprintf(timebuf, "%llum%02llus", m, s);
- else
- (void) sprintf(timebuf, "%llus", s);
-}
-
-/*
- * Create a storage pool with the given name and initial vdev size.
- * Then create the specified number of datasets in the pool.
- */
-static void
-ztest_init(char *pool)
-{
- spa_t *spa;
- int error;
- nvlist_t *nvroot;
-
- kernel_init(FREAD | FWRITE);
-
- /*
- * Create the storage pool.
- */
- (void) spa_destroy(pool);
- ztest_shared->zs_vdev_primaries = 0;
- nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
- error = spa_create(pool, nvroot, NULL);
- nvlist_free(nvroot);
-
- if (error)
- fatal(0, "spa_create() = %d", error);
- error = spa_open(pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
-
- if (zopt_verbose >= 3)
- show_pool_stats(spa);
-
- spa_close(spa, FTAG);
-
- kernel_fini();
-}
-
-int
-main(int argc, char **argv)
-{
- int kills = 0;
- int iters = 0;
- int i, f;
- ztest_shared_t *zs;
- ztest_info_t *zi;
- char timebuf[100];
- char numbuf[6];
-
- (void) setvbuf(stdout, NULL, _IOLBF, 0);
-
- /* Override location of zpool.cache */
- spa_config_dir = "/tmp";
-
- ztest_random_fd = open("/dev/urandom", O_RDONLY);
-
- process_options(argc, argv);
-
- argc -= optind;
- argv += optind;
-
- dprintf_setup(&argc, argv);
-
- /*
- * Blow away any existing copy of zpool.cache
- */
- if (zopt_init != 0)
- (void) remove("/tmp/zpool.cache");
-
- zs = ztest_shared = (void *)mmap(0,
- P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
-
- if (zopt_verbose >= 1) {
- (void) printf("%llu vdevs, %d datasets, %d threads,"
- " %llu seconds...\n",
- (u_longlong_t)zopt_vdevs, zopt_datasets, zopt_threads,
- (u_longlong_t)zopt_time);
- }
-
- /*
- * Create and initialize our storage pool.
- */
- for (i = 1; i <= zopt_init; i++) {
- bzero(zs, sizeof (ztest_shared_t));
- if (zopt_verbose >= 3 && zopt_init != 1)
- (void) printf("ztest_init(), pass %d\n", i);
- ztest_init(zopt_pool);
- }
-
- /*
- * Initialize the call targets for each function.
- */
- for (f = 0; f < ZTEST_FUNCS; f++) {
- zi = &zs->zs_info[f];
-
- *zi = ztest_info[f];
-
- if (*zi->zi_interval == 0)
- zi->zi_call_target = UINT64_MAX;
- else
- zi->zi_call_target = zopt_time / *zi->zi_interval;
- }
-
- zs->zs_start_time = gethrtime();
- zs->zs_stop_time = zs->zs_start_time + zopt_time * NANOSEC;
-
- /*
- * Run the tests in a loop. These tests include fault injection
- * to verify that self-healing data works, and forced crashes
- * to verify that we never lose on-disk consistency.
- */
- while (gethrtime() < zs->zs_stop_time) {
- int status;
- pid_t pid;
- char *tmp;
-
- /*
- * Initialize the workload counters for each function.
- */
- for (f = 0; f < ZTEST_FUNCS; f++) {
- zi = &zs->zs_info[f];
- zi->zi_calls = 0;
- zi->zi_call_time = 0;
- }
-
- pid = fork();
-
- if (pid == -1)
- fatal(1, "fork failed");
-
- if (pid == 0) { /* child */
- struct rlimit rl = { 1024, 1024 };
- (void) setrlimit(RLIMIT_NOFILE, &rl);
- (void) enable_extended_FILE_stdio(-1, -1);
- ztest_run(zopt_pool);
- exit(0);
- }
-
- while (waitpid(pid, &status, 0) != pid)
- continue;
-
- if (WIFEXITED(status)) {
- if (WEXITSTATUS(status) != 0) {
- (void) fprintf(stderr,
- "child exited with code %d\n",
- WEXITSTATUS(status));
- exit(2);
- }
- } else if (WIFSIGNALED(status)) {
- if (WTERMSIG(status) != SIGKILL) {
- (void) fprintf(stderr,
- "child died with signal %d\n",
- WTERMSIG(status));
- exit(3);
- }
- kills++;
- } else {
- (void) fprintf(stderr, "something strange happened "
- "to child\n");
- exit(4);
- }
-
- iters++;
-
- if (zopt_verbose >= 1) {
- hrtime_t now = gethrtime();
-
- now = MIN(now, zs->zs_stop_time);
- print_time(zs->zs_stop_time - now, timebuf);
- nicenum(zs->zs_space, numbuf);
-
- (void) printf("Pass %3d, %8s, %3llu ENOSPC, "
- "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
- iters,
- WIFEXITED(status) ? "Complete" : "SIGKILL",
- (u_longlong_t)zs->zs_enospc_count,
- 100.0 * zs->zs_alloc / zs->zs_space,
- numbuf,
- 100.0 * (now - zs->zs_start_time) /
- (zopt_time * NANOSEC), timebuf);
- }
-
- if (zopt_verbose >= 2) {
- (void) printf("\nWorkload summary:\n\n");
- (void) printf("%7s %9s %s\n",
- "Calls", "Time", "Function");
- (void) printf("%7s %9s %s\n",
- "-----", "----", "--------");
- for (f = 0; f < ZTEST_FUNCS; f++) {
- Dl_info dli;
-
- zi = &zs->zs_info[f];
- print_time(zi->zi_call_time, timebuf);
- (void) dladdr((void *)zi->zi_func, &dli);
- (void) printf("%7llu %9s %s\n",
- (u_longlong_t)zi->zi_calls, timebuf,
- dli.dli_sname);
- }
- (void) printf("\n");
- }
-
- /*
- * It's possible that we killed a child during a rename test, in
- * which case we'll have a 'ztest_tmp' pool lying around instead
- * of 'ztest'. Do a blind rename in case this happened.
- */
- tmp = umem_alloc(strlen(zopt_pool) + 5, UMEM_NOFAIL);
- (void) strcpy(tmp, zopt_pool);
- (void) strcat(tmp, "_tmp");
- kernel_init(FREAD | FWRITE);
- (void) spa_rename(tmp, zopt_pool);
- kernel_fini();
- umem_free(tmp, strlen(tmp) + 1);
- }
-
- ztest_verify_blocks(zopt_pool);
-
- if (zopt_verbose >= 1) {
- (void) printf("%d killed, %d completed, %.0f%% kill rate\n",
- kills, iters - kills, (100.0 * kills) / MAX(1, iters));
- }
-
- return (0);
-}
diff --git a/contrib/opensolaris/head/assert.h b/contrib/opensolaris/head/assert.h
deleted file mode 100644
index b088033..0000000
--- a/contrib/opensolaris/head/assert.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/* Copyright (c) 1988 AT&T */
-/* All Rights Reserved */
-
-
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ASSERT_H
-#define _ASSERT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.6.1.4 */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__STDC__)
-#if __STDC_VERSION__ - 0 >= 199901L
-extern void __assert_c99(const char *, const char *, int, const char *);
-#else
-extern void __assert(const char *, const char *, int);
-#endif /* __STDC_VERSION__ - 0 >= 199901L */
-#else
-extern void _assert();
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ASSERT_H */
-
-/*
- * Note that the ANSI C Standard requires all headers to be idempotent except
- * <assert.h> which is explicitly required not to be idempotent (section 4.1.2).
- * Therefore, it is by intent that the header guards (#ifndef _ASSERT_H) do
- * not span this entire file.
- */
-
-#undef assert
-
-#ifdef NDEBUG
-
-#define assert(EX) ((void)0)
-
-#else
-
-#if defined(__STDC__)
-#if __STDC_VERSION__ - 0 >= 199901L
-#define assert(EX) (void)((EX) || \
- (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
-#else
-#define assert(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
-#endif /* __STDC_VERSION__ - 0 >= 199901L */
-#else
-#define assert(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
-#endif /* __STDC__ */
-
-#endif /* NDEBUG */
diff --git a/contrib/opensolaris/head/atomic.h b/contrib/opensolaris/head/atomic.h
deleted file mode 100644
index 00c9476..0000000
--- a/contrib/opensolaris/head/atomic.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ATOMIC_H
-#define _ATOMIC_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/atomic.h>
-
-#endif /* _ATOMIC_H */
diff --git a/contrib/opensolaris/head/libintl.h b/contrib/opensolaris/head/libintl.h
deleted file mode 100644
index 94b4d03..0000000
--- a/contrib/opensolaris/head/libintl.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Libintl is a library of advanced internationalization functions. */
-
-#ifndef _LIBINTL_H
-#define _LIBINTL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/isa_defs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * wchar_t is a built-in type in standard C++ and as such is not
- * defined here when using standard C++. However, the GNU compiler
- * fixincludes utility nonetheless creates its own version of this
- * header for use by gcc and g++. In that version it adds a redundant
- * guard for __cplusplus. To avoid the creation of a gcc/g++ specific
- * header we need to include the following magic comment:
- *
- * we must use the C++ compiler's type
- *
- * The above comment should not be removed or changed until GNU
- * gcc/fixinc/inclhack.def is updated to bypass this header.
- */
-#if !defined(__cplusplus) || (__cplusplus < 199711L && !defined(__GNUG__))
-#ifndef _WCHAR_T
-#define _WCHAR_T
-#if defined(_LP64)
-typedef int wchar_t;
-#else
-typedef long wchar_t;
-#endif
-#endif /* !_WCHAR_T */
-#endif /* !defined(__cplusplus) ... */
-
-#define TEXTDOMAINMAX 256
-
-#ifdef __STDC__
-extern char *dcgettext(const char *, const char *, const int);
-extern char *dgettext(const char *, const char *);
-extern char *gettext(const char *);
-extern char *textdomain(const char *);
-extern char *bindtextdomain(const char *, const char *);
-
-/*
- * LI18NUX 2000 Globalization Specification Version 1.0
- * with Amendment 2
- */
-extern char *dcngettext(const char *, const char *,
- const char *, unsigned long int, int);
-extern char *dngettext(const char *, const char *,
- const char *, unsigned long int);
-extern char *ngettext(const char *, const char *, unsigned long int);
-extern char *bind_textdomain_codeset(const char *, const char *);
-
-/* Word handling functions --- requires dynamic linking */
-/* Warning: these are experimental and subject to change. */
-extern int wdinit(void);
-extern int wdchkind(wchar_t);
-extern int wdbindf(wchar_t, wchar_t, int);
-extern wchar_t *wddelim(wchar_t, wchar_t, int);
-extern wchar_t mcfiller(void);
-extern int mcwrap(void);
-
-#else
-extern char *dcgettext();
-extern char *dgettext();
-extern char *gettext();
-extern char *textdomain();
-extern char *bindtextdomain();
-
-/*
- * LI18NUX 2000 Globalization Specification Version 1.0
- * with Amendment 2
- */
-extern char *dcngettext();
-extern char *dngettext();
-extern char *ngettext();
-extern char *bind_textdomain_codeset();
-
-/* Word handling functions --- requires dynamic linking */
-/* Warning: these are experimental and subject to change. */
-extern int wdinit();
-extern int wdchkind();
-extern int wdbindf();
-extern wchar_t *wddelim();
-extern wchar_t mcfiller();
-extern int mcwrap();
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LIBINTL_H */
diff --git a/contrib/opensolaris/head/stdio_ext.h b/contrib/opensolaris/head/stdio_ext.h
deleted file mode 100644
index 839e05f..0000000
--- a/contrib/opensolaris/head/stdio_ext.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _STDIO_EXT_H
-#define _STDIO_EXT_H
-
-#define enable_extended_FILE_stdio(x,y) (0)
-
-#endif
diff --git a/contrib/opensolaris/head/synch.h b/contrib/opensolaris/head/synch.h
deleted file mode 100644
index 8d825d5..0000000
--- a/contrib/opensolaris/head/synch.h
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYNCH_H
-#define _SYNCH_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * synch.h:
- * definitions needed to use the thread synchronization interface
- */
-
-#ifndef _ASM
-#include <sys/machlock.h>
-#include <sys/time_impl.h>
-#include <sys/synch.h>
-#endif /* _ASM */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef _ASM
-
-/*
- * Semaphores
- */
-typedef struct _sema {
- /* this structure must be the same as sem_t in <semaphore.h> */
- uint32_t count; /* semaphore count */
- uint16_t type;
- uint16_t magic;
- upad64_t pad1[3]; /* reserved for a mutex_t */
- upad64_t pad2[2]; /* reserved for a cond_t */
-} sema_t;
-
-/*
- * POSIX.1c Note:
- * POSIX.1c requires that <pthread.h> define the structures pthread_mutex_t
- * and pthread_cond_t. These structures are identical to mutex_t (lwp_mutex_t)
- * and cond_t (lwp_cond_t) which are defined in <synch.h>. A nested included
- * of <synch.h> (to allow a "#typedef mutex_t pthread_mutex_t") would pull in
- * non-posix symbols/constants violating the namespace restrictions. Hence,
- * pthread_mutex_t/pthread_cond_t have been redefined in <pthread.h> (actually
- * in <sys/types.h>). Any modifications done to mutex_t/lwp_mutex_t or
- * cond_t/lwp_cond_t should also be done to pthread_mutex_t/pthread_cond_t.
- */
-typedef lwp_mutex_t mutex_t;
-typedef lwp_cond_t cond_t;
-
-/*
- * Readers/writer locks
- *
- * NOTE: The layout of this structure should be kept in sync with the layout
- * of the correponding structure of pthread_rwlock_t in sys/types.h.
- * Also, there is an identical structure for lwp_rwlock_t in <sys/synch.h>.
- * Because we have to deal with C++, we cannot redefine this one as that one.
- */
-typedef struct _rwlock {
- int32_t readers; /* -1 == writer else # of readers */
- uint16_t type;
- uint16_t magic;
- mutex_t mutex; /* used to indicate ownership */
- cond_t readercv; /* unused */
- cond_t writercv; /* unused */
-} rwlock_t;
-
-#ifdef __STDC__
-int _lwp_mutex_lock(lwp_mutex_t *);
-int _lwp_mutex_unlock(lwp_mutex_t *);
-int _lwp_mutex_trylock(lwp_mutex_t *);
-int _lwp_cond_wait(lwp_cond_t *, lwp_mutex_t *);
-int _lwp_cond_timedwait(lwp_cond_t *, lwp_mutex_t *, timespec_t *);
-int _lwp_cond_reltimedwait(lwp_cond_t *, lwp_mutex_t *, timespec_t *);
-int _lwp_cond_signal(lwp_cond_t *);
-int _lwp_cond_broadcast(lwp_cond_t *);
-int _lwp_sema_init(lwp_sema_t *, int);
-int _lwp_sema_wait(lwp_sema_t *);
-int _lwp_sema_trywait(lwp_sema_t *);
-int _lwp_sema_post(lwp_sema_t *);
-int cond_init(cond_t *, int, void *);
-int cond_destroy(cond_t *);
-int cond_wait(cond_t *, mutex_t *);
-int cond_timedwait(cond_t *, mutex_t *, const timespec_t *);
-int cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *);
-int cond_signal(cond_t *);
-int cond_broadcast(cond_t *);
-int mutex_init(mutex_t *, int, void *);
-int mutex_destroy(mutex_t *);
-int mutex_lock(mutex_t *);
-int mutex_trylock(mutex_t *);
-int mutex_unlock(mutex_t *);
-int rwlock_init(rwlock_t *, int, void *);
-int rwlock_destroy(rwlock_t *);
-int rw_rdlock(rwlock_t *);
-int rw_wrlock(rwlock_t *);
-int rw_unlock(rwlock_t *);
-int rw_tryrdlock(rwlock_t *);
-int rw_trywrlock(rwlock_t *);
-int sema_init(sema_t *, unsigned int, int, void *);
-int sema_destroy(sema_t *);
-int sema_wait(sema_t *);
-int sema_timedwait(sema_t *, const timespec_t *);
-int sema_reltimedwait(sema_t *, const timespec_t *);
-int sema_post(sema_t *);
-int sema_trywait(sema_t *);
-
-#else /* __STDC__ */
-
-int _lwp_mutex_lock();
-int _lwp_mutex_unlock();
-int _lwp_mutex_trylock();
-int _lwp_cond_wait();
-int _lwp_cond_timedwait();
-int _lwp_cond_reltimedwait();
-int _lwp_cond_signal();
-int _lwp_cond_broadcast();
-int _lwp_sema_init();
-int _lwp_sema_wait();
-int _lwp_sema_trywait();
-int _lwp_sema_post();
-int cond_init();
-int cond_destroy();
-int cond_wait();
-int cond_timedwait();
-int cond_reltimedwait();
-int cond_signal();
-int cond_broadcast();
-int mutex_init();
-int mutex_destroy();
-int mutex_lock();
-int mutex_trylock();
-int mutex_unlock();
-int rwlock_init();
-int rwlock_destroy();
-int rw_rdlock();
-int rw_wrlock();
-int rw_unlock();
-int rw_tryrdlock();
-int rw_trywrlock();
-int sema_init();
-int sema_destroy();
-int sema_wait();
-int sema_timedwait();
-int sema_reltimedwait();
-int sema_post();
-int sema_trywait();
-
-#endif /* __STDC__ */
-
-#endif /* _ASM */
-
-/* "Magic numbers" tagging synchronization object types */
-#define MUTEX_MAGIC _MUTEX_MAGIC
-#define SEMA_MAGIC _SEMA_MAGIC
-#define COND_MAGIC _COND_MAGIC
-#define RWL_MAGIC _RWL_MAGIC
-
-/*
- * POSIX.1c Note:
- * DEFAULTMUTEX is defined same as PTHREAD_MUTEX_INITIALIZER in <pthread.h>.
- * DEFAULTCV is defined same as PTHREAD_COND_INITIALIZER in <pthread.h>.
- * DEFAULTRWLOCK is defined same as PTHREAD_RWLOCK_INITIALIZER in <pthread.h>.
- * Any changes to these macros should be reflected in <pthread.h>
- */
-#define DEFAULTMUTEX \
- {{0, 0, 0, {USYNC_THREAD}, MUTEX_MAGIC}, \
- {{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
-#define SHAREDMUTEX \
- {{0, 0, 0, {USYNC_PROCESS}, MUTEX_MAGIC}, \
- {{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
-#define RECURSIVEMUTEX \
- {{0, 0, 0, {USYNC_THREAD|LOCK_RECURSIVE}, MUTEX_MAGIC}, \
- {{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
-#define ERRORCHECKMUTEX \
- {{0, 0, 0, {USYNC_THREAD|LOCK_ERRORCHECK}, MUTEX_MAGIC}, \
- {{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
-#define RECURSIVE_ERRORCHECKMUTEX \
- {{0, 0, 0, {USYNC_THREAD|LOCK_RECURSIVE|LOCK_ERRORCHECK}, \
- MUTEX_MAGIC}, {{{0, 0, 0, 0, 0, 0, 0, 0}}}, 0}
-#define DEFAULTCV \
- {{{0, 0, 0, 0}, USYNC_THREAD, COND_MAGIC}, 0}
-#define SHAREDCV \
- {{{0, 0, 0, 0}, USYNC_PROCESS, COND_MAGIC}, 0}
-#define DEFAULTSEMA \
- {0, USYNC_THREAD, SEMA_MAGIC, {0, 0, 0}, {0, 0}}
-#define SHAREDSEMA \
- {0, USYNC_PROCESS, SEMA_MAGIC, {0, 0, 0}, {0, 0}}
-#define DEFAULTRWLOCK \
- {0, USYNC_THREAD, RWL_MAGIC, DEFAULTMUTEX, DEFAULTCV, DEFAULTCV}
-#define SHAREDRWLOCK \
- {0, USYNC_PROCESS, RWL_MAGIC, SHAREDMUTEX, SHAREDCV, SHAREDCV}
-
-/*
- * Tests on lock states.
- */
-#define SEMA_HELD(x) _sema_held(x)
-#define RW_READ_HELD(x) _rw_read_held(x)
-#define RW_WRITE_HELD(x) _rw_write_held(x)
-#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
-#define MUTEX_HELD(x) _mutex_held(x)
-
-/*
- * The following definitions are for assertions which can be checked
- * statically by tools like lock_lint. You can also define your own
- * run-time test for each. If you don't, we define them to 1 so that
- * such assertions simply pass.
- */
-#ifndef NO_LOCKS_HELD
-#define NO_LOCKS_HELD 1
-#endif
-#ifndef NO_COMPETING_THREADS
-#define NO_COMPETING_THREADS 1
-#endif
-
-#ifndef _ASM
-
-#ifdef __STDC__
-
-int _sema_held(sema_t *);
-int _rw_read_held(rwlock_t *);
-int _rw_write_held(rwlock_t *);
-int _mutex_held(mutex_t *);
-
-#else /* __STDC__ */
-
-int _sema_held();
-int _rw_read_held();
-int _rw_write_held();
-int _mutex_held();
-
-#endif /* __STDC__ */
-
-#endif /* _ASM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYNCH_H */
diff --git a/contrib/opensolaris/head/thread.h b/contrib/opensolaris/head/thread.h
deleted file mode 100644
index b9ed952..0000000
--- a/contrib/opensolaris/head/thread.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _THREAD_H
-#define _THREAD_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <pthread.h>
-#include <assert.h>
-
-/*
- * Compatibility thread stuff needed for Solaris -> Linux port
- */
-
-typedef pthread_t thread_t;
-typedef pthread_mutex_t mutex_t;
-typedef pthread_cond_t cond_t;
-typedef pthread_rwlock_t rwlock_t;
-
-#define USYNC_THREAD 0
-
-#define thr_self() (unsigned long)pthread_self()
-#define thr_equal(a,b) pthread_equal(a,b)
-#define thr_join(t,d,s) pthread_join(t,s)
-#define thr_exit(r) pthread_exit(r)
-#define _mutex_init(l,f,a) pthread_mutex_init(l,NULL)
-#define _mutex_destroy(l) pthread_mutex_destroy(l)
-#define mutex_lock(l) pthread_mutex_lock(l)
-#define mutex_trylock(l) pthread_mutex_trylock(l)
-#define mutex_unlock(l) pthread_mutex_unlock(l)
-#define rwlock_init(l,f,a) pthread_rwlock_init(l,NULL)
-#define rwlock_destroy(l) pthread_rwlock_destroy(l)
-#define rw_rdlock(l) pthread_rwlock_rdlock(l)
-#define rw_wrlock(l) pthread_rwlock_wrlock(l)
-#define rw_tryrdlock(l) pthread_rwlock_tryrdlock(l)
-#define rw_trywrlock(l) pthread_rwlock_trywrlock(l)
-#define rw_unlock(l) pthread_rwlock_unlock(l)
-#define cond_init(l,f,a) pthread_cond_init(l,NULL)
-#define cond_destroy(l) pthread_cond_destroy(l)
-#define cond_wait(l,m) pthread_cond_wait(l,m)
-#define cond_signal(l) pthread_cond_signal(l)
-#define cond_broadcast(l) pthread_cond_broadcast(l)
-
-#define THR_BOUND 0x00000001 /* = PTHREAD_SCOPE_SYSTEM */
-#define THR_NEW_LWP 0x00000002
-#define THR_DETACHED 0x00000040 /* = PTHREAD_CREATE_DETACHED */
-#define THR_SUSPENDED 0x00000080
-#define THR_DAEMON 0x00000100
-
-static __inline int
-thr_create(void *stack_base, size_t stack_size, void *(*start_func) (void*),
- void *arg, long flags, thread_t *new_thread_ID)
-{
- int ret;
-
- assert(stack_base == NULL);
- assert(stack_size == 0);
- assert((flags & ~THR_BOUND & ~THR_DETACHED) == 0);
-
- pthread_attr_t attr;
- pthread_attr_init(&attr);
-
- if(flags & THR_DETACHED)
- pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-
- /* This function ignores the THR_BOUND flag, since NPTL doesn't seem to support PTHREAD_SCOPE_PROCESS */
-
- ret = pthread_create(new_thread_ID, &attr, start_func, arg);
-
- pthread_attr_destroy(&attr);
-
- return (ret);
-}
-
-#endif /* _THREAD_H */
diff --git a/contrib/opensolaris/lib/libnvpair/libnvpair.c b/contrib/opensolaris/lib/libnvpair/libnvpair.c
deleted file mode 100644
index 58a47f1..0000000
--- a/contrib/opensolaris/lib/libnvpair/libnvpair.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <unistd.h>
-#include <strings.h>
-#include "libnvpair.h"
-
-/*
- * libnvpair - A tools library for manipulating <name, value> pairs.
- *
- * This library provides routines packing an unpacking nv pairs
- * for transporting data across process boundaries, transporting
- * between kernel and userland, and possibly saving onto disk files.
- */
-
-static void
-indent(FILE *fp, int depth)
-{
- while (depth-- > 0)
- (void) fprintf(fp, "\t");
-}
-
-/*
- * nvlist_print - Prints elements in an event buffer
- */
-static
-void
-nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
-{
- int i;
- char *name;
- uint_t nelem;
- nvpair_t *nvp;
-
- if (nvl == NULL)
- return;
-
- indent(fp, depth);
- (void) fprintf(fp, "nvlist version: %d\n", NVL_VERSION(nvl));
-
- nvp = nvlist_next_nvpair(nvl, NULL);
-
- while (nvp) {
- data_type_t type = nvpair_type(nvp);
-
- indent(fp, depth);
- name = nvpair_name(nvp);
- (void) fprintf(fp, "\t%s =", name);
- nelem = 0;
- switch (type) {
- case DATA_TYPE_BOOLEAN: {
- (void) fprintf(fp, " 1");
- break;
- }
- case DATA_TYPE_BOOLEAN_VALUE: {
- boolean_t val;
- (void) nvpair_value_boolean_value(nvp, &val);
- (void) fprintf(fp, " %d", val);
- break;
- }
- case DATA_TYPE_BYTE: {
- uchar_t val;
- (void) nvpair_value_byte(nvp, &val);
- (void) fprintf(fp, " 0x%2.2x", val);
- break;
- }
- case DATA_TYPE_INT8: {
- int8_t val;
- (void) nvpair_value_int8(nvp, &val);
- (void) fprintf(fp, " %d", val);
- break;
- }
- case DATA_TYPE_UINT8: {
- uint8_t val;
- (void) nvpair_value_uint8(nvp, &val);
- (void) fprintf(fp, " 0x%x", val);
- break;
- }
- case DATA_TYPE_INT16: {
- int16_t val;
- (void) nvpair_value_int16(nvp, &val);
- (void) fprintf(fp, " %d", val);
- break;
- }
- case DATA_TYPE_UINT16: {
- uint16_t val;
- (void) nvpair_value_uint16(nvp, &val);
- (void) fprintf(fp, " 0x%x", val);
- break;
- }
- case DATA_TYPE_INT32: {
- int32_t val;
- (void) nvpair_value_int32(nvp, &val);
- (void) fprintf(fp, " %d", val);
- break;
- }
- case DATA_TYPE_UINT32: {
- uint32_t val;
- (void) nvpair_value_uint32(nvp, &val);
- (void) fprintf(fp, " 0x%x", val);
- break;
- }
- case DATA_TYPE_INT64: {
- int64_t val;
- (void) nvpair_value_int64(nvp, &val);
- (void) fprintf(fp, " %lld", (longlong_t)val);
- break;
- }
- case DATA_TYPE_UINT64: {
- uint64_t val;
- (void) nvpair_value_uint64(nvp, &val);
- (void) fprintf(fp, " 0x%llx", (u_longlong_t)val);
- break;
- }
- case DATA_TYPE_STRING: {
- char *val;
- (void) nvpair_value_string(nvp, &val);
- (void) fprintf(fp, " %s", val);
- break;
- }
- case DATA_TYPE_BOOLEAN_ARRAY: {
- boolean_t *val;
- (void) nvpair_value_boolean_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
- break;
- }
- case DATA_TYPE_BYTE_ARRAY: {
- uchar_t *val;
- (void) nvpair_value_byte_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%2.2x", val[i]);
- break;
- }
- case DATA_TYPE_INT8_ARRAY: {
- int8_t *val;
- (void) nvpair_value_int8_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
- break;
- }
- case DATA_TYPE_UINT8_ARRAY: {
- uint8_t *val;
- (void) nvpair_value_uint8_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%x", val[i]);
- break;
- }
- case DATA_TYPE_INT16_ARRAY: {
- int16_t *val;
- (void) nvpair_value_int16_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
- break;
- }
- case DATA_TYPE_UINT16_ARRAY: {
- uint16_t *val;
- (void) nvpair_value_uint16_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%x", val[i]);
- break;
- }
- case DATA_TYPE_INT32_ARRAY: {
- int32_t *val;
- (void) nvpair_value_int32_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
- break;
- }
- case DATA_TYPE_UINT32_ARRAY: {
- uint32_t *val;
- (void) nvpair_value_uint32_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%x", val[i]);
- break;
- }
- case DATA_TYPE_INT64_ARRAY: {
- int64_t *val;
- (void) nvpair_value_int64_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %lld", (longlong_t)val[i]);
- break;
- }
- case DATA_TYPE_UINT64_ARRAY: {
- uint64_t *val;
- (void) nvpair_value_uint64_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%llx",
- (u_longlong_t)val[i]);
- break;
- }
- case DATA_TYPE_STRING_ARRAY: {
- char **val;
- (void) nvpair_value_string_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %s", val[i]);
- break;
- }
- case DATA_TYPE_HRTIME: {
- hrtime_t val;
- (void) nvpair_value_hrtime(nvp, &val);
- (void) fprintf(fp, " 0x%llx", val);
- break;
- }
- case DATA_TYPE_NVLIST: {
- nvlist_t *val;
- (void) nvpair_value_nvlist(nvp, &val);
- (void) fprintf(fp, " (embedded nvlist)\n");
- nvlist_print_with_indent(fp, val, depth + 1);
- indent(fp, depth + 1);
- (void) fprintf(fp, "(end %s)\n", name);
- break;
- }
- case DATA_TYPE_NVLIST_ARRAY: {
- nvlist_t **val;
- (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
- (void) fprintf(fp, " (array of embedded nvlists)\n");
- for (i = 0; i < nelem; i++) {
- indent(fp, depth + 1);
- (void) fprintf(fp,
- "(start %s[%d])\n", name, i);
- nvlist_print_with_indent(fp, val[i], depth + 1);
- indent(fp, depth + 1);
- (void) fprintf(fp, "(end %s[%d])\n", name, i);
- }
- break;
- }
- default:
- (void) fprintf(fp, " unknown data type (%d)", type);
- break;
- }
- (void) fprintf(fp, "\n");
- nvp = nvlist_next_nvpair(nvl, nvp);
- }
-}
-
-void
-nvlist_print(FILE *fp, nvlist_t *nvl)
-{
- nvlist_print_with_indent(fp, nvl, 0);
-}
diff --git a/contrib/opensolaris/lib/libnvpair/libnvpair.h b/contrib/opensolaris/lib/libnvpair/libnvpair.h
deleted file mode 100644
index d1d25ea..0000000
--- a/contrib/opensolaris/lib/libnvpair/libnvpair.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBNVPAIR_H
-#define _LIBNVPAIR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/nvpair.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void nvlist_print(FILE *, nvlist_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LIBNVPAIR_H */
diff --git a/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c b/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c
deleted file mode 100644
index 1aefc10..0000000
--- a/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/nvpair.h>
-#include <stdlib.h>
-
-/*ARGSUSED*/
-static void *
-nv_alloc_sys(nv_alloc_t *nva, size_t size)
-{
- return (malloc(size));
-}
-
-/*ARGSUSED*/
-static void
-nv_free_sys(nv_alloc_t *nva, void *buf, size_t size)
-{
- free(buf);
-}
-
-const nv_alloc_ops_t system_ops_def = {
- NULL, /* nv_ao_init() */
- NULL, /* nv_ao_fini() */
- nv_alloc_sys, /* nv_ao_alloc() */
- nv_free_sys, /* nv_ao_free() */
- NULL /* nv_ao_reset() */
-};
-
-nv_alloc_t nv_alloc_nosleep_def = {
- &system_ops_def,
- NULL
-};
-
-nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
diff --git a/contrib/opensolaris/lib/libuutil/common/libuutil.h b/contrib/opensolaris/lib/libuutil/common/libuutil.h
deleted file mode 100644
index fcd5e79..0000000
--- a/contrib/opensolaris/lib/libuutil/common/libuutil.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBUUTIL_H
-#define _LIBUUTIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <solaris.h>
-#include <sys/types.h>
-#include <stdarg.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Standard flags codes.
- */
-#define UU_DEFAULT 0
-
-/*
- * Standard error codes.
- */
-#define UU_ERROR_NONE 0 /* no error */
-#define UU_ERROR_INVALID_ARGUMENT 1 /* invalid argument */
-#define UU_ERROR_UNKNOWN_FLAG 2 /* passed flag invalid */
-#define UU_ERROR_NO_MEMORY 3 /* out of memory */
-#define UU_ERROR_CALLBACK_FAILED 4 /* callback-initiated error */
-#define UU_ERROR_NOT_SUPPORTED 5 /* operation not supported */
-#define UU_ERROR_EMPTY 6 /* no value provided */
-#define UU_ERROR_UNDERFLOW 7 /* value is too small */
-#define UU_ERROR_OVERFLOW 8 /* value is too value */
-#define UU_ERROR_INVALID_CHAR 9 /* value contains unexpected char */
-#define UU_ERROR_INVALID_DIGIT 10 /* value contains digit not in base */
-
-#define UU_ERROR_SYSTEM 99 /* underlying system error */
-#define UU_ERROR_UNKNOWN 100 /* error status not known */
-
-/*
- * Standard program exit codes.
- */
-#define UU_EXIT_OK (*(uu_exit_ok()))
-#define UU_EXIT_FATAL (*(uu_exit_fatal()))
-#define UU_EXIT_USAGE (*(uu_exit_usage()))
-
-/*
- * Exit status profiles.
- */
-#define UU_PROFILE_DEFAULT 0
-#define UU_PROFILE_LAUNCHER 1
-
-/*
- * Error reporting functions.
- */
-uint32_t uu_error(void);
-const char *uu_strerror(uint32_t);
-
-/*
- * Program notification functions.
- */
-extern void uu_alt_exit(int);
-extern const char *uu_setpname(char *);
-extern const char *uu_getpname(void);
-/*PRINTFLIKE1*/
-extern void uu_warn(const char *, ...);
-extern void uu_vwarn(const char *, va_list);
-/*PRINTFLIKE1*/
-extern void uu_die(const char *, ...) __NORETURN;
-extern void uu_vdie(const char *, va_list) __NORETURN;
-/*PRINTFLIKE2*/
-extern void uu_xdie(int, const char *, ...) __NORETURN;
-extern void uu_vxdie(int, const char *, va_list) __NORETURN;
-
-/*
- * Exit status functions (not to be used directly)
- */
-extern int *uu_exit_ok(void);
-extern int *uu_exit_fatal(void);
-extern int *uu_exit_usage(void);
-
-/*
- * string->number conversions
- */
-extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t);
-extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t);
-
-/*
- * Debug print facility functions.
- */
-typedef struct uu_dprintf uu_dprintf_t;
-
-typedef enum {
- UU_DPRINTF_SILENT,
- UU_DPRINTF_FATAL,
- UU_DPRINTF_WARNING,
- UU_DPRINTF_NOTICE,
- UU_DPRINTF_INFO,
- UU_DPRINTF_DEBUG
-} uu_dprintf_severity_t;
-
-extern uu_dprintf_t *uu_dprintf_create(const char *, uu_dprintf_severity_t,
- uint_t);
-/*PRINTFLIKE3*/
-extern void uu_dprintf(uu_dprintf_t *, uu_dprintf_severity_t,
- const char *, ...);
-extern void uu_dprintf_destroy(uu_dprintf_t *);
-extern const char *uu_dprintf_getname(uu_dprintf_t *);
-
-/*
- * Identifier test flags and function.
- */
-#define UU_NAME_DOMAIN 0x1 /* allow SUNW, or com.sun, prefix */
-#define UU_NAME_PATH 0x2 /* allow '/'-delimited paths */
-
-int uu_check_name(const char *, uint_t);
-
-/*
- * File creation functions.
- */
-extern int uu_open_tmp(const char *dir, uint_t uflags);
-
-/*
- * Convenience functions.
- */
-/*PRINTFLIKE1*/
-extern char *uu_msprintf(const char *format, ...);
-extern void *uu_zalloc(size_t);
-extern void uu_free(void *);
-
-/*
- * Comparison function type definition.
- * Developers should be careful in their use of the _private argument. If you
- * break interface guarantees, you get undefined behavior.
- */
-typedef int uu_compare_fn_t(const void *__left, const void *__right,
- void *__private);
-
-/*
- * Walk variant flags.
- * A data structure need not provide support for all variants and
- * combinations. Refer to the appropriate documentation.
- */
-#define UU_WALK_ROBUST 0x00000001 /* walk can survive removes */
-#define UU_WALK_REVERSE 0x00000002 /* reverse walk order */
-
-#define UU_WALK_PREORDER 0x00000010 /* walk tree in pre-order */
-#define UU_WALK_POSTORDER 0x00000020 /* walk tree in post-order */
-
-/*
- * Walk callback function return codes.
- */
-#define UU_WALK_ERROR -1
-#define UU_WALK_NEXT 0
-#define UU_WALK_DONE 1
-
-/*
- * Walk callback function type definition.
- */
-typedef int uu_walk_fn_t(void *_elem, void *_private);
-
-/*
- * lists: opaque structures
- */
-typedef struct uu_list_pool uu_list_pool_t;
-typedef struct uu_list uu_list_t;
-
-typedef struct uu_list_node {
- uintptr_t uln_opaque[2];
-} uu_list_node_t;
-
-typedef struct uu_list_walk uu_list_walk_t;
-
-typedef uintptr_t uu_list_index_t;
-
-/*
- * lists: interface
- *
- * basic usage:
- * typedef struct foo {
- * ...
- * uu_list_node_t foo_node;
- * ...
- * } foo_t;
- *
- * static int
- * foo_compare(void *l_arg, void *r_arg, void *private)
- * {
- * foo_t *l = l_arg;
- * foo_t *r = r_arg;
- *
- * if (... l greater than r ...)
- * return (1);
- * if (... l less than r ...)
- * return (-1);
- * return (0);
- * }
- *
- * ...
- * // at initialization time
- * foo_pool = uu_list_pool_create("foo_pool",
- * sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
- * debugging? 0 : UU_AVL_POOL_DEBUG);
- * ...
- */
-uu_list_pool_t *uu_list_pool_create(const char *, size_t, size_t,
- uu_compare_fn_t *, uint32_t);
-#define UU_LIST_POOL_DEBUG 0x00000001
-
-void uu_list_pool_destroy(uu_list_pool_t *);
-
-/*
- * usage:
- *
- * foo_t *a;
- * a = malloc(sizeof(*a));
- * uu_list_node_init(a, &a->foo_list, pool);
- * ...
- * uu_list_node_fini(a, &a->foo_list, pool);
- * free(a);
- */
-void uu_list_node_init(void *, uu_list_node_t *, uu_list_pool_t *);
-void uu_list_node_fini(void *, uu_list_node_t *, uu_list_pool_t *);
-
-uu_list_t *uu_list_create(uu_list_pool_t *, void *_parent, uint32_t);
-#define UU_LIST_DEBUG 0x00000001
-#define UU_LIST_SORTED 0x00000002 /* list is sorted */
-
-void uu_list_destroy(uu_list_t *); /* list must be empty */
-
-size_t uu_list_numnodes(uu_list_t *);
-
-void *uu_list_first(uu_list_t *);
-void *uu_list_last(uu_list_t *);
-
-void *uu_list_next(uu_list_t *, void *);
-void *uu_list_prev(uu_list_t *, void *);
-
-int uu_list_walk(uu_list_t *, uu_walk_fn_t *, void *, uint32_t);
-
-uu_list_walk_t *uu_list_walk_start(uu_list_t *, uint32_t);
-void *uu_list_walk_next(uu_list_walk_t *);
-void uu_list_walk_end(uu_list_walk_t *);
-
-void *uu_list_find(uu_list_t *, void *, void *, uu_list_index_t *);
-void uu_list_insert(uu_list_t *, void *, uu_list_index_t);
-
-void *uu_list_nearest_next(uu_list_t *, uu_list_index_t);
-void *uu_list_nearest_prev(uu_list_t *, uu_list_index_t);
-
-void *uu_list_teardown(uu_list_t *, void **);
-
-void uu_list_remove(uu_list_t *, void *);
-
-/*
- * lists: interfaces for non-sorted lists only
- */
-int uu_list_insert_before(uu_list_t *, void *_target, void *_elem);
-int uu_list_insert_after(uu_list_t *, void *_target, void *_elem);
-
-/*
- * avl trees: opaque structures
- */
-typedef struct uu_avl_pool uu_avl_pool_t;
-typedef struct uu_avl uu_avl_t;
-
-typedef struct uu_avl_node {
-#ifdef _LP64
- uintptr_t uan_opaque[3];
-#else
- uintptr_t uan_opaque[4];
-#endif
-} uu_avl_node_t;
-
-typedef struct uu_avl_walk uu_avl_walk_t;
-
-typedef uintptr_t uu_avl_index_t;
-
-/*
- * avl trees: interface
- *
- * basic usage:
- * typedef struct foo {
- * ...
- * uu_avl_node_t foo_node;
- * ...
- * } foo_t;
- *
- * static int
- * foo_compare(void *l_arg, void *r_arg, void *private)
- * {
- * foo_t *l = l_arg;
- * foo_t *r = r_arg;
- *
- * if (... l greater than r ...)
- * return (1);
- * if (... l less than r ...)
- * return (-1);
- * return (0);
- * }
- *
- * ...
- * // at initialization time
- * foo_pool = uu_avl_pool_create("foo_pool",
- * sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
- * debugging? 0 : UU_AVL_POOL_DEBUG);
- * ...
- */
-uu_avl_pool_t *uu_avl_pool_create(const char *, size_t, size_t,
- uu_compare_fn_t *, uint32_t);
-#define UU_AVL_POOL_DEBUG 0x00000001
-
-void uu_avl_pool_destroy(uu_avl_pool_t *);
-
-/*
- * usage:
- *
- * foo_t *a;
- * a = malloc(sizeof(*a));
- * uu_avl_node_init(a, &a->foo_avl, pool);
- * ...
- * uu_avl_node_fini(a, &a->foo_avl, pool);
- * free(a);
- */
-void uu_avl_node_init(void *, uu_avl_node_t *, uu_avl_pool_t *);
-void uu_avl_node_fini(void *, uu_avl_node_t *, uu_avl_pool_t *);
-
-uu_avl_t *uu_avl_create(uu_avl_pool_t *, void *_parent, uint32_t);
-#define UU_AVL_DEBUG 0x00000001
-
-void uu_avl_destroy(uu_avl_t *); /* list must be empty */
-
-size_t uu_avl_numnodes(uu_avl_t *);
-
-void *uu_avl_first(uu_avl_t *);
-void *uu_avl_last(uu_avl_t *);
-
-void *uu_avl_next(uu_avl_t *, void *);
-void *uu_avl_prev(uu_avl_t *, void *);
-
-int uu_avl_walk(uu_avl_t *, uu_walk_fn_t *, void *, uint32_t);
-
-uu_avl_walk_t *uu_avl_walk_start(uu_avl_t *, uint32_t);
-void *uu_avl_walk_next(uu_avl_walk_t *);
-void uu_avl_walk_end(uu_avl_walk_t *);
-
-void *uu_avl_find(uu_avl_t *, void *, void *, uu_avl_index_t *);
-void uu_avl_insert(uu_avl_t *, void *, uu_avl_index_t);
-
-void *uu_avl_nearest_next(uu_avl_t *, uu_avl_index_t);
-void *uu_avl_nearest_prev(uu_avl_t *, uu_avl_index_t);
-
-void *uu_avl_teardown(uu_avl_t *, void **);
-
-void uu_avl_remove(uu_avl_t *, void *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LIBUUTIL_H */
diff --git a/contrib/opensolaris/lib/libuutil/common/libuutil_common.h b/contrib/opensolaris/lib/libuutil/common/libuutil_common.h
deleted file mode 100644
index fc9cc7c..0000000
--- a/contrib/opensolaris/lib/libuutil/common/libuutil_common.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBUUTIL_COMMON_H
-#define _LIBUUTIL_COMMON_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <solaris.h>
-
-/*
- * We don't bind to the internal libc interfaces if this is a
- * native build.
- */
-#ifndef NATIVE_BUILD
-#include "c_synonyms.h"
-#endif
-
-#include <libuutil.h>
-#include <libuutil_impl.h>
-
-#endif /* _LIBUUTIL_COMMON_H */
diff --git a/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h b/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h
deleted file mode 100644
index 9466e59..0000000
--- a/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBUUTIL_IMPL_H
-#define _LIBUUTIL_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <libuutil.h>
-#include <pthread.h>
-
-#include <sys/avl_impl.h>
-#include <sys/byteorder.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void uu_set_error(uint_t);
-#pragma rarely_called(uu_set_error)
-
-/*PRINTFLIKE1*/
-void uu_panic(const char *format, ...);
-#pragma rarely_called(uu_panic)
-
-struct uu_dprintf {
- char *uud_name;
- uu_dprintf_severity_t uud_severity;
- uint_t uud_flags;
-};
-
-/*
- * For debugging purposes, libuutil keeps around linked lists of all uu_lists
- * and uu_avls, along with pointers to their parents. These can cause false
- * negatives when looking for memory leaks, so we encode the pointers by
- * storing them with swapped endianness; this is not perfect, but it's about
- * the best we can do without wasting a lot of space.
- */
-#ifdef _LP64
-#define UU_PTR_ENCODE(ptr) BSWAP_64((uintptr_t)(void *)(ptr))
-#else
-#define UU_PTR_ENCODE(ptr) BSWAP_32((uintptr_t)(void *)(ptr))
-#endif
-
-#define UU_PTR_DECODE(ptr) ((void *)UU_PTR_ENCODE(ptr))
-
-/*
- * uu_list structures
- */
-typedef struct uu_list_node_impl {
- struct uu_list_node_impl *uln_next;
- struct uu_list_node_impl *uln_prev;
-} uu_list_node_impl_t;
-
-struct uu_list_walk {
- uu_list_walk_t *ulw_next;
- uu_list_walk_t *ulw_prev;
-
- uu_list_t *ulw_list;
- int8_t ulw_dir;
- uint8_t ulw_robust;
- uu_list_node_impl_t *ulw_next_result;
-};
-
-struct uu_list {
- uintptr_t ul_next_enc;
- uintptr_t ul_prev_enc;
-
- uu_list_pool_t *ul_pool;
- uintptr_t ul_parent_enc; /* encoded parent pointer */
- size_t ul_offset;
- size_t ul_numnodes;
- uint8_t ul_debug;
- uint8_t ul_sorted;
- uint8_t ul_index; /* mark for uu_list_index_ts */
-
- uu_list_node_impl_t ul_null_node;
- uu_list_walk_t ul_null_walk; /* for robust walkers */
-};
-
-#define UU_LIST_PTR(ptr) ((uu_list_t *)UU_PTR_DECODE(ptr))
-
-#define UU_LIST_POOL_MAXNAME 64
-
-struct uu_list_pool {
- uu_list_pool_t *ulp_next;
- uu_list_pool_t *ulp_prev;
-
- char ulp_name[UU_LIST_POOL_MAXNAME];
- size_t ulp_nodeoffset;
- size_t ulp_objsize;
- uu_compare_fn_t *ulp_cmp;
- uint8_t ulp_debug;
- uint8_t ulp_last_index;
- pthread_mutex_t ulp_lock; /* protects null_list */
- uu_list_t ulp_null_list;
-};
-
-/*
- * uu_avl structures
- */
-typedef struct avl_node uu_avl_node_impl_t;
-
-struct uu_avl_walk {
- uu_avl_walk_t *uaw_next;
- uu_avl_walk_t *uaw_prev;
-
- uu_avl_t *uaw_avl;
- void *uaw_next_result;
- int8_t uaw_dir;
- uint8_t uaw_robust;
-};
-
-struct uu_avl {
- uintptr_t ua_next_enc;
- uintptr_t ua_prev_enc;
-
- uu_avl_pool_t *ua_pool;
- uintptr_t ua_parent_enc;
- uint8_t ua_debug;
- uint8_t ua_index; /* mark for uu_avl_index_ts */
-
- struct avl_tree ua_tree;
- uu_avl_walk_t ua_null_walk;
-};
-
-#define UU_AVL_PTR(x) ((uu_avl_t *)UU_PTR_DECODE(x))
-
-#define UU_AVL_POOL_MAXNAME 64
-
-struct uu_avl_pool {
- uu_avl_pool_t *uap_next;
- uu_avl_pool_t *uap_prev;
-
- char uap_name[UU_AVL_POOL_MAXNAME];
- size_t uap_nodeoffset;
- size_t uap_objsize;
- uu_compare_fn_t *uap_cmp;
- uint8_t uap_debug;
- uint8_t uap_last_index;
- pthread_mutex_t uap_lock; /* protects null_avl */
- uu_avl_t uap_null_avl;
-};
-
-/*
- * atfork() handlers
- */
-void uu_avl_lockup(void);
-void uu_avl_release(void);
-
-void uu_list_lockup(void);
-void uu_list_release(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LIBUUTIL_IMPL_H */
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_alloc.c b/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
deleted file mode 100644
index 7cdbf01..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-void *
-uu_zalloc(size_t n)
-{
- void *p = malloc(n);
-
- if (p == NULL) {
- uu_set_error(UU_ERROR_SYSTEM);
- return (NULL);
- }
-
- (void) memset(p, 0, n);
-
- return (p);
-}
-
-void
-uu_free(void *p)
-{
- free(p);
-}
-
-char *
-uu_msprintf(const char *format, ...)
-{
- va_list args;
- char attic[1];
- uint_t M, m;
- char *b;
-
- va_start(args, format);
- M = vsnprintf(attic, 1, format, args);
- va_end(args);
-
- for (;;) {
- m = M;
- if ((b = uu_zalloc(m + 1)) == NULL)
- return (NULL);
-
- va_start(args, format);
- M = vsnprintf(b, m + 1, format, args);
- va_end(args);
-
- if (M == m)
- break; /* sizes match */
-
- uu_free(b);
- }
-
- return (b);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_avl.c b/contrib/opensolaris/lib/libuutil/common/uu_avl.c
deleted file mode 100644
index 93feea9..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_avl.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/avl.h>
-
-static uu_avl_pool_t uu_null_apool = { &uu_null_apool, &uu_null_apool };
-static pthread_mutex_t uu_apool_list_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/*
- * The index mark change on every insert and delete, to catch stale
- * references.
- *
- * We leave the low bit alone, since the avl code uses it.
- */
-#define INDEX_MAX (sizeof (uintptr_t) - 2)
-#define INDEX_NEXT(m) (((m) == INDEX_MAX)? 2 : ((m) + 2) & INDEX_MAX)
-
-#define INDEX_DECODE(i) ((i) & ~INDEX_MAX)
-#define INDEX_ENCODE(p, n) (((n) & ~INDEX_MAX) | (p)->ua_index)
-#define INDEX_VALID(p, i) (((i) & INDEX_MAX) == (p)->ua_index)
-#define INDEX_CHECK(i) (((i) & INDEX_MAX) != 0)
-
-/*
- * When an element is inactive (not in a tree), we keep a marked pointer to
- * its containing pool in its first word, and a NULL pointer in its second.
- *
- * On insert, we use these to verify that it comes from the correct pool.
- */
-#define NODE_ARRAY(p, n) ((uintptr_t *)((uintptr_t)(n) + \
- (pp)->uap_nodeoffset))
-
-#define POOL_TO_MARKER(pp) (((uintptr_t)(pp) | 1))
-
-#define DEAD_MARKER 0xc4
-
-uu_avl_pool_t *
-uu_avl_pool_create(const char *name, size_t objsize, size_t nodeoffset,
- uu_compare_fn_t *compare_func, uint32_t flags)
-{
- uu_avl_pool_t *pp, *next, *prev;
-
- if (name == NULL ||
- uu_check_name(name, UU_NAME_DOMAIN) == -1 ||
- nodeoffset + sizeof (uu_avl_node_t) > objsize ||
- compare_func == NULL) {
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (NULL);
- }
-
- if (flags & ~UU_AVL_POOL_DEBUG) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (NULL);
- }
-
- pp = uu_zalloc(sizeof (uu_avl_pool_t));
- if (pp == NULL) {
- uu_set_error(UU_ERROR_NO_MEMORY);
- return (NULL);
- }
-
- (void) strlcpy(pp->uap_name, name, sizeof (pp->uap_name));
- pp->uap_nodeoffset = nodeoffset;
- pp->uap_objsize = objsize;
- pp->uap_cmp = compare_func;
- if (flags & UU_AVL_POOL_DEBUG)
- pp->uap_debug = 1;
- pp->uap_last_index = 0;
-
- (void) pthread_mutex_init(&pp->uap_lock, NULL);
-
- pp->uap_null_avl.ua_next_enc = UU_PTR_ENCODE(&pp->uap_null_avl);
- pp->uap_null_avl.ua_prev_enc = UU_PTR_ENCODE(&pp->uap_null_avl);
-
- (void) pthread_mutex_lock(&uu_apool_list_lock);
- pp->uap_next = next = &uu_null_apool;
- pp->uap_prev = prev = next->uap_prev;
- next->uap_prev = pp;
- prev->uap_next = pp;
- (void) pthread_mutex_unlock(&uu_apool_list_lock);
-
- return (pp);
-}
-
-void
-uu_avl_pool_destroy(uu_avl_pool_t *pp)
-{
- if (pp->uap_debug) {
- if (pp->uap_null_avl.ua_next_enc !=
- UU_PTR_ENCODE(&pp->uap_null_avl) ||
- pp->uap_null_avl.ua_prev_enc !=
- UU_PTR_ENCODE(&pp->uap_null_avl)) {
- uu_panic("uu_avl_pool_destroy: Pool \"%.*s\" (%p) has "
- "outstanding avls, or is corrupt.\n",
- sizeof (pp->uap_name), pp->uap_name, pp);
- }
- }
- (void) pthread_mutex_lock(&uu_apool_list_lock);
- pp->uap_next->uap_prev = pp->uap_prev;
- pp->uap_prev->uap_next = pp->uap_next;
- (void) pthread_mutex_unlock(&uu_apool_list_lock);
- pp->uap_prev = NULL;
- pp->uap_next = NULL;
- uu_free(pp);
-}
-
-void
-uu_avl_node_init(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
-{
- uintptr_t *na = (uintptr_t *)np;
-
- if (pp->uap_debug) {
- uintptr_t offset = (uintptr_t)np - (uintptr_t)base;
- if (offset + sizeof (*np) > pp->uap_objsize) {
- uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
- "offset %ld doesn't fit in object (size %ld)\n",
- base, np, pp, pp->uap_name, offset,
- pp->uap_objsize);
- }
- if (offset != pp->uap_nodeoffset) {
- uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): "
- "offset %ld doesn't match pool's offset (%ld)\n",
- base, np, pp, pp->uap_name, offset,
- pp->uap_objsize);
- }
- }
-
- na[0] = POOL_TO_MARKER(pp);
- na[1] = 0;
-}
-
-void
-uu_avl_node_fini(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp)
-{
- uintptr_t *na = (uintptr_t *)np;
-
- if (pp->uap_debug) {
- if (na[0] == DEAD_MARKER && na[1] == DEAD_MARKER) {
- uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
- "node already finied\n",
- base, np, pp, pp->uap_name);
- }
- if (na[0] != POOL_TO_MARKER(pp) || na[1] != 0) {
- uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): "
- "node corrupt, in tree, or in different pool\n",
- base, np, pp, pp->uap_name);
- }
- }
-
- na[0] = DEAD_MARKER;
- na[1] = DEAD_MARKER;
- na[2] = DEAD_MARKER;
-}
-
-struct uu_avl_node_compare_info {
- uu_compare_fn_t *ac_compare;
- void *ac_private;
- void *ac_right;
- void *ac_found;
-};
-
-static int
-uu_avl_node_compare(const void *l, const void *r)
-{
- struct uu_avl_node_compare_info *info =
- (struct uu_avl_node_compare_info *)l;
-
- int res = info->ac_compare(r, info->ac_right, info->ac_private);
-
- if (res == 0) {
- if (info->ac_found == NULL)
- info->ac_found = (void *)r;
- return (-1);
- }
- if (res < 0)
- return (1);
- return (-1);
-}
-
-uu_avl_t *
-uu_avl_create(uu_avl_pool_t *pp, void *parent, uint32_t flags)
-{
- uu_avl_t *ap, *next, *prev;
-
- if (flags & ~UU_AVL_DEBUG) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (NULL);
- }
-
- ap = uu_zalloc(sizeof (*ap));
- if (ap == NULL) {
- uu_set_error(UU_ERROR_NO_MEMORY);
- return (NULL);
- }
-
- ap->ua_pool = pp;
- ap->ua_parent_enc = UU_PTR_ENCODE(parent);
- ap->ua_debug = pp->uap_debug || (flags & UU_AVL_DEBUG);
- ap->ua_index = (pp->uap_last_index = INDEX_NEXT(pp->uap_last_index));
-
- avl_create(&ap->ua_tree, &uu_avl_node_compare, pp->uap_objsize,
- pp->uap_nodeoffset);
-
- ap->ua_null_walk.uaw_next = &ap->ua_null_walk;
- ap->ua_null_walk.uaw_prev = &ap->ua_null_walk;
-
- (void) pthread_mutex_lock(&pp->uap_lock);
- next = &pp->uap_null_avl;
- prev = UU_PTR_DECODE(next->ua_prev_enc);
- ap->ua_next_enc = UU_PTR_ENCODE(next);
- ap->ua_prev_enc = UU_PTR_ENCODE(prev);
- next->ua_prev_enc = UU_PTR_ENCODE(ap);
- prev->ua_next_enc = UU_PTR_ENCODE(ap);
- (void) pthread_mutex_unlock(&pp->uap_lock);
-
- return (ap);
-}
-
-void
-uu_avl_destroy(uu_avl_t *ap)
-{
- uu_avl_pool_t *pp = ap->ua_pool;
-
- if (ap->ua_debug) {
- if (avl_numnodes(&ap->ua_tree) != 0) {
- uu_panic("uu_avl_destroy(%p): tree not empty\n", ap);
- }
- if (ap->ua_null_walk.uaw_next != &ap->ua_null_walk ||
- ap->ua_null_walk.uaw_prev != &ap->ua_null_walk) {
- uu_panic("uu_avl_destroy(%p): outstanding walkers\n",
- ap);
- }
- }
- (void) pthread_mutex_lock(&pp->uap_lock);
- UU_AVL_PTR(ap->ua_next_enc)->ua_prev_enc = ap->ua_prev_enc;
- UU_AVL_PTR(ap->ua_prev_enc)->ua_next_enc = ap->ua_next_enc;
- (void) pthread_mutex_unlock(&pp->uap_lock);
- ap->ua_prev_enc = UU_PTR_ENCODE(NULL);
- ap->ua_next_enc = UU_PTR_ENCODE(NULL);
-
- ap->ua_pool = NULL;
- avl_destroy(&ap->ua_tree);
-
- uu_free(ap);
-}
-
-size_t
-uu_avl_numnodes(uu_avl_t *ap)
-{
- return (avl_numnodes(&ap->ua_tree));
-}
-
-void *
-uu_avl_first(uu_avl_t *ap)
-{
- return (avl_first(&ap->ua_tree));
-}
-
-void *
-uu_avl_last(uu_avl_t *ap)
-{
- return (avl_last(&ap->ua_tree));
-}
-
-void *
-uu_avl_next(uu_avl_t *ap, void *node)
-{
- return (AVL_NEXT(&ap->ua_tree, node));
-}
-
-void *
-uu_avl_prev(uu_avl_t *ap, void *node)
-{
- return (AVL_PREV(&ap->ua_tree, node));
-}
-
-static void
-_avl_walk_init(uu_avl_walk_t *wp, uu_avl_t *ap, uint32_t flags)
-{
- uu_avl_walk_t *next, *prev;
-
- int robust = (flags & UU_WALK_ROBUST);
- int direction = (flags & UU_WALK_REVERSE)? -1 : 1;
-
- (void) memset(wp, 0, sizeof (*wp));
- wp->uaw_avl = ap;
- wp->uaw_robust = robust;
- wp->uaw_dir = direction;
-
- if (direction > 0)
- wp->uaw_next_result = avl_first(&ap->ua_tree);
- else
- wp->uaw_next_result = avl_last(&ap->ua_tree);
-
- if (ap->ua_debug || robust) {
- wp->uaw_next = next = &ap->ua_null_walk;
- wp->uaw_prev = prev = next->uaw_prev;
- next->uaw_prev = wp;
- prev->uaw_next = wp;
- }
-}
-
-static void *
-_avl_walk_advance(uu_avl_walk_t *wp, uu_avl_t *ap)
-{
- void *np = wp->uaw_next_result;
-
- avl_tree_t *t = &ap->ua_tree;
-
- if (np == NULL)
- return (NULL);
-
- wp->uaw_next_result = (wp->uaw_dir > 0)? AVL_NEXT(t, np) :
- AVL_PREV(t, np);
-
- return (np);
-}
-
-static void
-_avl_walk_fini(uu_avl_walk_t *wp)
-{
- if (wp->uaw_next != NULL) {
- wp->uaw_next->uaw_prev = wp->uaw_prev;
- wp->uaw_prev->uaw_next = wp->uaw_next;
- wp->uaw_next = NULL;
- wp->uaw_prev = NULL;
- }
- wp->uaw_avl = NULL;
- wp->uaw_next_result = NULL;
-}
-
-uu_avl_walk_t *
-uu_avl_walk_start(uu_avl_t *ap, uint32_t flags)
-{
- uu_avl_walk_t *wp;
-
- if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (NULL);
- }
-
- wp = uu_zalloc(sizeof (*wp));
- if (wp == NULL) {
- uu_set_error(UU_ERROR_NO_MEMORY);
- return (NULL);
- }
-
- _avl_walk_init(wp, ap, flags);
- return (wp);
-}
-
-void *
-uu_avl_walk_next(uu_avl_walk_t *wp)
-{
- return (_avl_walk_advance(wp, wp->uaw_avl));
-}
-
-void
-uu_avl_walk_end(uu_avl_walk_t *wp)
-{
- _avl_walk_fini(wp);
- uu_free(wp);
-}
-
-int
-uu_avl_walk(uu_avl_t *ap, uu_walk_fn_t *func, void *private, uint32_t flags)
-{
- void *e;
- uu_avl_walk_t my_walk;
-
- int status = UU_WALK_NEXT;
-
- if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (-1);
- }
-
- _avl_walk_init(&my_walk, ap, flags);
- while (status == UU_WALK_NEXT &&
- (e = _avl_walk_advance(&my_walk, ap)) != NULL)
- status = (*func)(e, private);
- _avl_walk_fini(&my_walk);
-
- if (status >= 0)
- return (0);
- uu_set_error(UU_ERROR_CALLBACK_FAILED);
- return (-1);
-}
-
-void
-uu_avl_remove(uu_avl_t *ap, void *elem)
-{
- uu_avl_walk_t *wp;
- uu_avl_pool_t *pp = ap->ua_pool;
- uintptr_t *na = NODE_ARRAY(pp, elem);
-
- if (ap->ua_debug) {
- /*
- * invalidate outstanding uu_avl_index_ts.
- */
- ap->ua_index = INDEX_NEXT(ap->ua_index);
- }
-
- /*
- * Robust walkers most be advanced, if we are removing the node
- * they are currently using. In debug mode, non-robust walkers
- * are also on the walker list.
- */
- for (wp = ap->ua_null_walk.uaw_next; wp != &ap->ua_null_walk;
- wp = wp->uaw_next) {
- if (wp->uaw_robust) {
- if (elem == wp->uaw_next_result)
- (void) _avl_walk_advance(wp, ap);
- } else if (wp->uaw_next_result != NULL) {
- uu_panic("uu_avl_remove(%p, %p): active non-robust "
- "walker\n", ap, elem);
- }
- }
-
- avl_remove(&ap->ua_tree, elem);
-
- na[0] = POOL_TO_MARKER(pp);
- na[1] = 0;
-}
-
-void *
-uu_avl_teardown(uu_avl_t *ap, void **cookie)
-{
- void *elem = avl_destroy_nodes(&ap->ua_tree, cookie);
-
- if (elem != NULL) {
- uu_avl_pool_t *pp = ap->ua_pool;
- uintptr_t *na = NODE_ARRAY(pp, elem);
-
- na[0] = POOL_TO_MARKER(pp);
- na[1] = 0;
- }
- return (elem);
-}
-
-void *
-uu_avl_find(uu_avl_t *ap, void *elem, void *private, uu_avl_index_t *out)
-{
- struct uu_avl_node_compare_info info;
- void *result;
-
- info.ac_compare = ap->ua_pool->uap_cmp;
- info.ac_private = private;
- info.ac_right = elem;
- info.ac_found = NULL;
-
- result = avl_find(&ap->ua_tree, &info, out);
- if (out != NULL)
- *out = INDEX_ENCODE(ap, *out);
-
- if (ap->ua_debug && result != NULL)
- uu_panic("uu_avl_find: internal error: avl_find succeeded\n");
-
- return (info.ac_found);
-}
-
-void
-uu_avl_insert(uu_avl_t *ap, void *elem, uu_avl_index_t idx)
-{
- if (ap->ua_debug) {
- uu_avl_pool_t *pp = ap->ua_pool;
- uintptr_t *na = NODE_ARRAY(pp, elem);
-
- if (na[1] != 0)
- uu_panic("uu_avl_insert(%p, %p, %p): node already "
- "in tree, or corrupt\n",
- ap, elem, idx);
- if (na[0] == 0)
- uu_panic("uu_avl_insert(%p, %p, %p): node not "
- "initialized\n",
- ap, elem, idx);
- if (na[0] != POOL_TO_MARKER(pp))
- uu_panic("uu_avl_insert(%p, %p, %p): node from "
- "other pool, or corrupt\n",
- ap, elem, idx);
-
- if (!INDEX_VALID(ap, idx))
- uu_panic("uu_avl_insert(%p, %p, %p): %s\n",
- ap, elem, idx,
- INDEX_CHECK(idx)? "outdated index" :
- "invalid index");
-
- /*
- * invalidate outstanding uu_avl_index_ts.
- */
- ap->ua_index = INDEX_NEXT(ap->ua_index);
- }
- avl_insert(&ap->ua_tree, elem, INDEX_DECODE(idx));
-}
-
-void *
-uu_avl_nearest_next(uu_avl_t *ap, uu_avl_index_t idx)
-{
- if (ap->ua_debug && !INDEX_VALID(ap, idx))
- uu_panic("uu_avl_nearest_next(%p, %p): %s\n",
- ap, idx, INDEX_CHECK(idx)? "outdated index" :
- "invalid index");
- return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_AFTER));
-}
-
-void *
-uu_avl_nearest_prev(uu_avl_t *ap, uu_avl_index_t idx)
-{
- if (ap->ua_debug && !INDEX_VALID(ap, idx))
- uu_panic("uu_avl_nearest_prev(%p, %p): %s\n",
- ap, idx, INDEX_CHECK(idx)? "outdated index" :
- "invalid index");
- return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_BEFORE));
-}
-
-/*
- * called from uu_lockup() and uu_release(), as part of our fork1()-safety.
- */
-void
-uu_avl_lockup(void)
-{
- uu_avl_pool_t *pp;
-
- (void) pthread_mutex_lock(&uu_apool_list_lock);
- for (pp = uu_null_apool.uap_next; pp != &uu_null_apool;
- pp = pp->uap_next)
- (void) pthread_mutex_lock(&pp->uap_lock);
-}
-
-void
-uu_avl_release(void)
-{
- uu_avl_pool_t *pp;
-
- for (pp = uu_null_apool.uap_next; pp != &uu_null_apool;
- pp = pp->uap_next)
- (void) pthread_mutex_unlock(&pp->uap_lock);
- (void) pthread_mutex_unlock(&uu_apool_list_lock);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c b/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c
deleted file mode 100644
index 5b990a5..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <errno.h>
-#include <libintl.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-
-#define FACILITY_FMT "%s (%s): "
-
-#if !defined(TEXT_DOMAIN)
-#define TEXT_DOMAIN "SYS_TEST"
-#endif
-
-static const char *
-strseverity(uu_dprintf_severity_t severity)
-{
- switch (severity) {
- case UU_DPRINTF_SILENT:
- return (dgettext(TEXT_DOMAIN, "silent"));
- case UU_DPRINTF_FATAL:
- return (dgettext(TEXT_DOMAIN, "FATAL"));
- case UU_DPRINTF_WARNING:
- return (dgettext(TEXT_DOMAIN, "WARNING"));
- case UU_DPRINTF_NOTICE:
- return (dgettext(TEXT_DOMAIN, "note"));
- case UU_DPRINTF_INFO:
- return (dgettext(TEXT_DOMAIN, "info"));
- case UU_DPRINTF_DEBUG:
- return (dgettext(TEXT_DOMAIN, "debug"));
- default:
- return (dgettext(TEXT_DOMAIN, "unspecified"));
- }
-}
-
-uu_dprintf_t *
-uu_dprintf_create(const char *name, uu_dprintf_severity_t severity,
- uint_t flags)
-{
- uu_dprintf_t *D;
-
- if (uu_check_name(name, UU_NAME_DOMAIN) == -1) {
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (NULL);
- }
-
- if ((D = uu_zalloc(sizeof (uu_dprintf_t))) == NULL)
- return (NULL);
-
- if (name != NULL) {
- D->uud_name = strdup(name);
- if (D->uud_name == NULL) {
- uu_free(D);
- return (NULL);
- }
- } else {
- D->uud_name = NULL;
- }
-
- D->uud_severity = severity;
- D->uud_flags = flags;
-
- return (D);
-}
-
-/*PRINTFLIKE3*/
-void
-uu_dprintf(uu_dprintf_t *D, uu_dprintf_severity_t severity,
- const char *format, ...)
-{
- va_list alist;
-
- /* XXX Assert that severity is not UU_DPRINTF_SILENT. */
-
- if (severity > D->uud_severity)
- return;
-
- (void) fprintf(stderr, FACILITY_FMT, D->uud_name,
- strseverity(severity));
-
- va_start(alist, format);
- (void) vfprintf(stderr, format, alist);
- va_end(alist);
-}
-
-void
-uu_dprintf_destroy(uu_dprintf_t *D)
-{
- if (D->uud_name)
- free(D->uud_name);
-
- uu_free(D);
-}
-
-const char *
-uu_dprintf_getname(uu_dprintf_t *D)
-{
- return (D->uud_name);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_ident.c b/contrib/opensolaris/lib/libuutil/common/uu_ident.c
deleted file mode 100644
index 9a64384..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_ident.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <string.h>
-
-/*
- * We require names of the form:
- * [provider,]identifier[/[provider,]identifier]...
- *
- * Where provider is either a stock symbol (SUNW) or a java-style reversed
- * domain name (com.sun).
- *
- * Both providers and identifiers must start with a letter, and may
- * only contain alphanumerics, dashes, and underlines. Providers
- * may also contain periods.
- *
- * Note that we do _not_ use the macros in <ctype.h>, since they are affected
- * by the current locale settings.
- */
-
-#define IS_ALPHA(c) \
- (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
-
-#define IS_DIGIT(c) \
- ((c) >= '0' && (c) <= '9')
-
-static int
-is_valid_ident(const char *s, const char *e, int allowdot)
-{
- char c;
-
- if (s >= e)
- return (0); /* name is empty */
-
- c = *s++;
- if (!IS_ALPHA(c))
- return (0); /* does not start with letter */
-
- while (s < e && (c = *s++) != 0) {
- if (IS_ALPHA(c) || IS_DIGIT(c) || c == '-' || c == '_' ||
- (allowdot && c == '.'))
- continue;
- return (0); /* invalid character */
- }
- return (1);
-}
-
-static int
-is_valid_component(const char *b, const char *e, uint_t flags)
-{
- char *sp;
-
- if (flags & UU_NAME_DOMAIN) {
- sp = strchr(b, ',');
- if (sp != NULL && sp < e) {
- if (!is_valid_ident(b, sp, 1))
- return (0);
- b = sp + 1;
- }
- }
-
- return (is_valid_ident(b, e, 0));
-}
-
-int
-uu_check_name(const char *name, uint_t flags)
-{
- const char *end = name + strlen(name);
- const char *p;
-
- if (flags & ~(UU_NAME_DOMAIN | UU_NAME_PATH)) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (-1);
- }
-
- if (!(flags & UU_NAME_PATH)) {
- if (!is_valid_component(name, end, flags))
- goto bad;
- return (0);
- }
-
- while ((p = strchr(name, '/')) != NULL) {
- if (!is_valid_component(name, p - 1, flags))
- goto bad;
- name = p + 1;
- }
- if (!is_valid_component(name, end, flags))
- goto bad;
-
- return (0);
-
-bad:
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (-1);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_list.c b/contrib/opensolaris/lib/libuutil/common/uu_list.c
deleted file mode 100644
index d9dc86f..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_list.c
+++ /dev/null
@@ -1,711 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#define ELEM_TO_NODE(lp, e) \
- ((uu_list_node_impl_t *)((uintptr_t)(e) + (lp)->ul_offset))
-
-#define NODE_TO_ELEM(lp, n) \
- ((void *)((uintptr_t)(n) - (lp)->ul_offset))
-
-/*
- * uu_list_index_ts define a location for insertion. They are simply a
- * pointer to the object after the insertion point. We store a mark
- * in the low-bits of the index, to help prevent mistakes.
- *
- * When debugging, the index mark changes on every insert and delete, to
- * catch stale references.
- */
-#define INDEX_MAX (sizeof (uintptr_t) - 1)
-#define INDEX_NEXT(m) (((m) == INDEX_MAX)? 1 : ((m) + 1) & INDEX_MAX)
-
-#define INDEX_TO_NODE(i) ((uu_list_node_impl_t *)((i) & ~INDEX_MAX))
-#define NODE_TO_INDEX(p, n) (((uintptr_t)(n) & ~INDEX_MAX) | (p)->ul_index)
-#define INDEX_VALID(p, i) (((i) & INDEX_MAX) == (p)->ul_index)
-#define INDEX_CHECK(i) (((i) & INDEX_MAX) != 0)
-
-#define POOL_TO_MARKER(pp) ((void *)((uintptr_t)(pp) | 1))
-
-static uu_list_pool_t uu_null_lpool = { &uu_null_lpool, &uu_null_lpool };
-static pthread_mutex_t uu_lpool_list_lock = PTHREAD_MUTEX_INITIALIZER;
-
-uu_list_pool_t *
-uu_list_pool_create(const char *name, size_t objsize,
- size_t nodeoffset, uu_compare_fn_t *compare_func, uint32_t flags)
-{
- uu_list_pool_t *pp, *next, *prev;
-
- if (name == NULL ||
- uu_check_name(name, UU_NAME_DOMAIN) == -1 ||
- nodeoffset + sizeof (uu_list_node_t) > objsize) {
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (NULL);
- }
-
- if (flags & ~UU_LIST_POOL_DEBUG) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (NULL);
- }
-
- pp = uu_zalloc(sizeof (uu_list_pool_t));
- if (pp == NULL) {
- uu_set_error(UU_ERROR_NO_MEMORY);
- return (NULL);
- }
-
- (void) strlcpy(pp->ulp_name, name, sizeof (pp->ulp_name));
- pp->ulp_nodeoffset = nodeoffset;
- pp->ulp_objsize = objsize;
- pp->ulp_cmp = compare_func;
- if (flags & UU_LIST_POOL_DEBUG)
- pp->ulp_debug = 1;
- pp->ulp_last_index = 0;
-
- (void) pthread_mutex_init(&pp->ulp_lock, NULL);
-
- pp->ulp_null_list.ul_next_enc = UU_PTR_ENCODE(&pp->ulp_null_list);
- pp->ulp_null_list.ul_prev_enc = UU_PTR_ENCODE(&pp->ulp_null_list);
-
- (void) pthread_mutex_lock(&uu_lpool_list_lock);
- pp->ulp_next = next = &uu_null_lpool;
- pp->ulp_prev = prev = next->ulp_prev;
- next->ulp_prev = pp;
- prev->ulp_next = pp;
- (void) pthread_mutex_unlock(&uu_lpool_list_lock);
-
- return (pp);
-}
-
-void
-uu_list_pool_destroy(uu_list_pool_t *pp)
-{
- if (pp->ulp_debug) {
- if (pp->ulp_null_list.ul_next_enc !=
- UU_PTR_ENCODE(&pp->ulp_null_list) ||
- pp->ulp_null_list.ul_prev_enc !=
- UU_PTR_ENCODE(&pp->ulp_null_list)) {
- uu_panic("uu_list_pool_destroy: Pool \"%.*s\" (%p) has "
- "outstanding lists, or is corrupt.\n",
- sizeof (pp->ulp_name), pp->ulp_name, pp);
- }
- }
- (void) pthread_mutex_lock(&uu_lpool_list_lock);
- pp->ulp_next->ulp_prev = pp->ulp_prev;
- pp->ulp_prev->ulp_next = pp->ulp_next;
- (void) pthread_mutex_unlock(&uu_lpool_list_lock);
- pp->ulp_prev = NULL;
- pp->ulp_next = NULL;
- uu_free(pp);
-}
-
-void
-uu_list_node_init(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
-{
- uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg;
-
- if (pp->ulp_debug) {
- uintptr_t offset = (uintptr_t)np - (uintptr_t)base;
- if (offset + sizeof (*np) > pp->ulp_objsize) {
- uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
- "offset %ld doesn't fit in object (size %ld)\n",
- base, np, pp, pp->ulp_name, offset,
- pp->ulp_objsize);
- }
- if (offset != pp->ulp_nodeoffset) {
- uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): "
- "offset %ld doesn't match pool's offset (%ld)\n",
- base, np, pp, pp->ulp_name, offset,
- pp->ulp_objsize);
- }
- }
- np->uln_next = POOL_TO_MARKER(pp);
- np->uln_prev = NULL;
-}
-
-void
-uu_list_node_fini(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp)
-{
- uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg;
-
- if (pp->ulp_debug) {
- if (np->uln_next == NULL &&
- np->uln_prev == NULL) {
- uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
- "node already finied\n",
- base, np_arg, pp, pp->ulp_name);
- }
- if (np->uln_next != POOL_TO_MARKER(pp) ||
- np->uln_prev != NULL) {
- uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): "
- "node corrupt or on list\n",
- base, np_arg, pp, pp->ulp_name);
- }
- }
- np->uln_next = NULL;
- np->uln_prev = NULL;
-}
-
-uu_list_t *
-uu_list_create(uu_list_pool_t *pp, void *parent, uint32_t flags)
-{
- uu_list_t *lp, *next, *prev;
-
- if (flags & ~(UU_LIST_DEBUG | UU_LIST_SORTED)) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (NULL);
- }
-
- if ((flags & UU_LIST_SORTED) && pp->ulp_cmp == NULL) {
- if (pp->ulp_debug)
- uu_panic("uu_list_create(%p, ...): requested "
- "UU_LIST_SORTED, but pool has no comparison func\n",
- pp);
- uu_set_error(UU_ERROR_NOT_SUPPORTED);
- return (NULL);
- }
-
- lp = uu_zalloc(sizeof (*lp));
- if (lp == NULL) {
- uu_set_error(UU_ERROR_NO_MEMORY);
- return (NULL);
- }
-
- lp->ul_pool = pp;
- lp->ul_parent_enc = UU_PTR_ENCODE(parent);
- lp->ul_offset = pp->ulp_nodeoffset;
- lp->ul_debug = pp->ulp_debug || (flags & UU_LIST_DEBUG);
- lp->ul_sorted = (flags & UU_LIST_SORTED);
- lp->ul_numnodes = 0;
- lp->ul_index = (pp->ulp_last_index = INDEX_NEXT(pp->ulp_last_index));
-
- lp->ul_null_node.uln_next = &lp->ul_null_node;
- lp->ul_null_node.uln_prev = &lp->ul_null_node;
-
- lp->ul_null_walk.ulw_next = &lp->ul_null_walk;
- lp->ul_null_walk.ulw_prev = &lp->ul_null_walk;
-
- (void) pthread_mutex_lock(&pp->ulp_lock);
- next = &pp->ulp_null_list;
- prev = UU_PTR_DECODE(next->ul_prev_enc);
- lp->ul_next_enc = UU_PTR_ENCODE(next);
- lp->ul_prev_enc = UU_PTR_ENCODE(prev);
- next->ul_prev_enc = UU_PTR_ENCODE(lp);
- prev->ul_next_enc = UU_PTR_ENCODE(lp);
- (void) pthread_mutex_unlock(&pp->ulp_lock);
-
- return (lp);
-}
-
-void
-uu_list_destroy(uu_list_t *lp)
-{
- uu_list_pool_t *pp = lp->ul_pool;
-
- if (lp->ul_debug) {
- if (lp->ul_null_node.uln_next != &lp->ul_null_node ||
- lp->ul_null_node.uln_prev != &lp->ul_null_node) {
- uu_panic("uu_list_destroy(%p): list not empty\n",
- lp);
- }
- if (lp->ul_numnodes != 0) {
- uu_panic("uu_list_destroy(%p): numnodes is nonzero, "
- "but list is empty\n", lp);
- }
- if (lp->ul_null_walk.ulw_next != &lp->ul_null_walk ||
- lp->ul_null_walk.ulw_prev != &lp->ul_null_walk) {
- uu_panic("uu_list_destroy(%p): outstanding walkers\n",
- lp);
- }
- }
-
- (void) pthread_mutex_lock(&pp->ulp_lock);
- UU_LIST_PTR(lp->ul_next_enc)->ul_prev_enc = lp->ul_prev_enc;
- UU_LIST_PTR(lp->ul_prev_enc)->ul_next_enc = lp->ul_next_enc;
- (void) pthread_mutex_unlock(&pp->ulp_lock);
- lp->ul_prev_enc = UU_PTR_ENCODE(NULL);
- lp->ul_next_enc = UU_PTR_ENCODE(NULL);
- lp->ul_pool = NULL;
- uu_free(lp);
-}
-
-static void
-list_insert(uu_list_t *lp, uu_list_node_impl_t *np, uu_list_node_impl_t *prev,
- uu_list_node_impl_t *next)
-{
- if (lp->ul_debug) {
- if (next->uln_prev != prev || prev->uln_next != next)
- uu_panic("insert(%p): internal error: %p and %p not "
- "neighbors\n", lp, next, prev);
-
- if (np->uln_next != POOL_TO_MARKER(lp->ul_pool) ||
- np->uln_prev != NULL) {
- uu_panic("insert(%p): elem %p node %p corrupt, "
- "not initialized, or already in a list.\n",
- lp, NODE_TO_ELEM(lp, np), np);
- }
- /*
- * invalidate outstanding uu_list_index_ts.
- */
- lp->ul_index = INDEX_NEXT(lp->ul_index);
- }
- np->uln_next = next;
- np->uln_prev = prev;
- next->uln_prev = np;
- prev->uln_next = np;
-
- lp->ul_numnodes++;
-}
-
-void
-uu_list_insert(uu_list_t *lp, void *elem, uu_list_index_t idx)
-{
- uu_list_node_impl_t *np;
-
- np = INDEX_TO_NODE(idx);
- if (np == NULL)
- np = &lp->ul_null_node;
-
- if (lp->ul_debug) {
- if (!INDEX_VALID(lp, idx))
- uu_panic("uu_list_insert(%p, %p, %p): %s\n",
- lp, elem, idx,
- INDEX_CHECK(idx)? "outdated index" :
- "invalid index");
- if (np->uln_prev == NULL)
- uu_panic("uu_list_insert(%p, %p, %p): out-of-date "
- "index\n", lp, elem, idx);
- }
-
- list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
-}
-
-void *
-uu_list_find(uu_list_t *lp, void *elem, void *private, uu_list_index_t *out)
-{
- int sorted = lp->ul_sorted;
- uu_compare_fn_t *func = lp->ul_pool->ulp_cmp;
- uu_list_node_impl_t *np;
-
- if (func == NULL) {
- if (out != NULL)
- *out = 0;
- uu_set_error(UU_ERROR_NOT_SUPPORTED);
- return (NULL);
- }
- for (np = lp->ul_null_node.uln_next; np != &lp->ul_null_node;
- np = np->uln_next) {
- void *ep = NODE_TO_ELEM(lp, np);
- int cmp = func(ep, elem, private);
- if (cmp == 0) {
- if (out != NULL)
- *out = NODE_TO_INDEX(lp, np);
- return (ep);
- }
- if (sorted && cmp > 0) {
- if (out != NULL)
- *out = NODE_TO_INDEX(lp, np);
- return (NULL);
- }
- }
- if (out != NULL)
- *out = NODE_TO_INDEX(lp, 0);
- return (NULL);
-}
-
-void *
-uu_list_nearest_next(uu_list_t *lp, uu_list_index_t idx)
-{
- uu_list_node_impl_t *np = INDEX_TO_NODE(idx);
-
- if (np == NULL)
- np = &lp->ul_null_node;
-
- if (lp->ul_debug) {
- if (!INDEX_VALID(lp, idx))
- uu_panic("uu_list_nearest_next(%p, %p): %s\n",
- lp, idx, INDEX_CHECK(idx)? "outdated index" :
- "invalid index");
- if (np->uln_prev == NULL)
- uu_panic("uu_list_nearest_next(%p, %p): out-of-date "
- "index\n", lp, idx);
- }
-
- if (np == &lp->ul_null_node)
- return (NULL);
- else
- return (NODE_TO_ELEM(lp, np));
-}
-
-void *
-uu_list_nearest_prev(uu_list_t *lp, uu_list_index_t idx)
-{
- uu_list_node_impl_t *np = INDEX_TO_NODE(idx);
-
- if (np == NULL)
- np = &lp->ul_null_node;
-
- if (lp->ul_debug) {
- if (!INDEX_VALID(lp, idx))
- uu_panic("uu_list_nearest_prev(%p, %p): %s\n",
- lp, idx, INDEX_CHECK(idx)? "outdated index" :
- "invalid index");
- if (np->uln_prev == NULL)
- uu_panic("uu_list_nearest_prev(%p, %p): out-of-date "
- "index\n", lp, idx);
- }
-
- if ((np = np->uln_prev) == &lp->ul_null_node)
- return (NULL);
- else
- return (NODE_TO_ELEM(lp, np));
-}
-
-static void
-list_walk_init(uu_list_walk_t *wp, uu_list_t *lp, uint32_t flags)
-{
- uu_list_walk_t *next, *prev;
-
- int robust = (flags & UU_WALK_ROBUST);
- int direction = (flags & UU_WALK_REVERSE)? -1 : 1;
-
- (void) memset(wp, 0, sizeof (*wp));
- wp->ulw_list = lp;
- wp->ulw_robust = robust;
- wp->ulw_dir = direction;
- if (direction > 0)
- wp->ulw_next_result = lp->ul_null_node.uln_next;
- else
- wp->ulw_next_result = lp->ul_null_node.uln_prev;
-
- if (lp->ul_debug || robust) {
- wp->ulw_next = next = &lp->ul_null_walk;
- wp->ulw_prev = prev = next->ulw_prev;
- next->ulw_prev = wp;
- prev->ulw_next = wp;
- }
-}
-
-static uu_list_node_impl_t *
-list_walk_advance(uu_list_walk_t *wp, uu_list_t *lp)
-{
- uu_list_node_impl_t *np = wp->ulw_next_result;
- uu_list_node_impl_t *next;
-
- if (np == &lp->ul_null_node)
- return (NULL);
-
- next = (wp->ulw_dir > 0)? np->uln_next : np->uln_prev;
-
- wp->ulw_next_result = next;
- return (np);
-}
-
-static void
-list_walk_fini(uu_list_walk_t *wp)
-{
- /* GLXXX debugging? */
- if (wp->ulw_next != NULL) {
- wp->ulw_next->ulw_prev = wp->ulw_prev;
- wp->ulw_prev->ulw_next = wp->ulw_next;
- wp->ulw_next = NULL;
- wp->ulw_prev = NULL;
- }
- wp->ulw_list = NULL;
- wp->ulw_next_result = NULL;
-}
-
-uu_list_walk_t *
-uu_list_walk_start(uu_list_t *lp, uint32_t flags)
-{
- uu_list_walk_t *wp;
-
- if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (NULL);
- }
-
- wp = uu_zalloc(sizeof (*wp));
- if (wp == NULL) {
- uu_set_error(UU_ERROR_NO_MEMORY);
- return (NULL);
- }
-
- list_walk_init(wp, lp, flags);
- return (wp);
-}
-
-void *
-uu_list_walk_next(uu_list_walk_t *wp)
-{
- uu_list_t *lp = wp->ulw_list;
- uu_list_node_impl_t *np = list_walk_advance(wp, lp);
-
- if (np == NULL)
- return (NULL);
-
- return (NODE_TO_ELEM(lp, np));
-}
-
-void
-uu_list_walk_end(uu_list_walk_t *wp)
-{
- list_walk_fini(wp);
- uu_free(wp);
-}
-
-int
-uu_list_walk(uu_list_t *lp, uu_walk_fn_t *func, void *private, uint32_t flags)
-{
- uu_list_node_impl_t *np;
-
- int status = UU_WALK_NEXT;
-
- int robust = (flags & UU_WALK_ROBUST);
- int reverse = (flags & UU_WALK_REVERSE);
-
- if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) {
- uu_set_error(UU_ERROR_UNKNOWN_FLAG);
- return (-1);
- }
-
- if (lp->ul_debug || robust) {
- uu_list_walk_t my_walk;
- void *e;
-
- list_walk_init(&my_walk, lp, flags);
- while (status == UU_WALK_NEXT &&
- (e = uu_list_walk_next(&my_walk)) != NULL)
- status = (*func)(e, private);
- list_walk_fini(&my_walk);
- } else {
- if (!reverse) {
- for (np = lp->ul_null_node.uln_next;
- status == UU_WALK_NEXT && np != &lp->ul_null_node;
- np = np->uln_next) {
- status = (*func)(NODE_TO_ELEM(lp, np), private);
- }
- } else {
- for (np = lp->ul_null_node.uln_prev;
- status == UU_WALK_NEXT && np != &lp->ul_null_node;
- np = np->uln_prev) {
- status = (*func)(NODE_TO_ELEM(lp, np), private);
- }
- }
- }
- if (status >= 0)
- return (0);
- uu_set_error(UU_ERROR_CALLBACK_FAILED);
- return (-1);
-}
-
-void
-uu_list_remove(uu_list_t *lp, void *elem)
-{
- uu_list_node_impl_t *np = ELEM_TO_NODE(lp, elem);
- uu_list_walk_t *wp;
-
- if (lp->ul_debug) {
- if (np->uln_prev == NULL)
- uu_panic("uu_list_remove(%p, %p): elem not on list\n",
- lp, elem);
- /*
- * invalidate outstanding uu_list_index_ts.
- */
- lp->ul_index = INDEX_NEXT(lp->ul_index);
- }
-
- /*
- * robust walkers must be advanced. In debug mode, non-robust
- * walkers are also on the list. If there are any, it's an error.
- */
- for (wp = lp->ul_null_walk.ulw_next; wp != &lp->ul_null_walk;
- wp = wp->ulw_next) {
- if (wp->ulw_robust) {
- if (np == wp->ulw_next_result)
- (void) list_walk_advance(wp, lp);
- } else if (wp->ulw_next_result != NULL) {
- uu_panic("uu_list_remove(%p, %p): active non-robust "
- "walker\n", lp, elem);
- }
- }
-
- np->uln_next->uln_prev = np->uln_prev;
- np->uln_prev->uln_next = np->uln_next;
-
- lp->ul_numnodes--;
-
- np->uln_next = POOL_TO_MARKER(lp->ul_pool);
- np->uln_prev = NULL;
-}
-
-void *
-uu_list_teardown(uu_list_t *lp, void **cookie)
-{
- void *ep;
-
- /*
- * XXX: disable list modification until list is empty
- */
- if (lp->ul_debug && *cookie != NULL)
- uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n", lp,
- cookie);
-
- ep = uu_list_first(lp);
- if (ep)
- uu_list_remove(lp, ep);
- return (ep);
-}
-
-int
-uu_list_insert_before(uu_list_t *lp, void *target, void *elem)
-{
- uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target);
-
- if (target == NULL)
- np = &lp->ul_null_node;
-
- if (lp->ul_debug) {
- if (np->uln_prev == NULL)
- uu_panic("uu_list_insert_before(%p, %p, %p): %p is "
- "not currently on a list\n",
- lp, target, elem, target);
- }
- if (lp->ul_sorted) {
- if (lp->ul_debug)
- uu_panic("uu_list_insert_before(%p, ...): list is "
- "UU_LIST_SORTED\n", lp);
- uu_set_error(UU_ERROR_NOT_SUPPORTED);
- return (-1);
- }
-
- list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np);
- return (0);
-}
-
-int
-uu_list_insert_after(uu_list_t *lp, void *target, void *elem)
-{
- uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target);
-
- if (target == NULL)
- np = &lp->ul_null_node;
-
- if (lp->ul_debug) {
- if (np->uln_prev == NULL)
- uu_panic("uu_list_insert_after(%p, %p, %p): %p is "
- "not currently on a list\n",
- lp, target, elem, target);
- }
- if (lp->ul_sorted) {
- if (lp->ul_debug)
- uu_panic("uu_list_insert_after(%p, ...): list is "
- "UU_LIST_SORTED\n", lp);
- uu_set_error(UU_ERROR_NOT_SUPPORTED);
- return (-1);
- }
-
- list_insert(lp, ELEM_TO_NODE(lp, elem), np, np->uln_next);
- return (0);
-}
-
-size_t
-uu_list_numnodes(uu_list_t *lp)
-{
- return (lp->ul_numnodes);
-}
-
-void *
-uu_list_first(uu_list_t *lp)
-{
- uu_list_node_impl_t *n = lp->ul_null_node.uln_next;
- if (n == &lp->ul_null_node)
- return (NULL);
- return (NODE_TO_ELEM(lp, n));
-}
-
-void *
-uu_list_last(uu_list_t *lp)
-{
- uu_list_node_impl_t *n = lp->ul_null_node.uln_prev;
- if (n == &lp->ul_null_node)
- return (NULL);
- return (NODE_TO_ELEM(lp, n));
-}
-
-void *
-uu_list_next(uu_list_t *lp, void *elem)
-{
- uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem);
-
- n = n->uln_next;
- if (n == &lp->ul_null_node)
- return (NULL);
- return (NODE_TO_ELEM(lp, n));
-}
-
-void *
-uu_list_prev(uu_list_t *lp, void *elem)
-{
- uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem);
-
- n = n->uln_prev;
- if (n == &lp->ul_null_node)
- return (NULL);
- return (NODE_TO_ELEM(lp, n));
-}
-
-/*
- * called from uu_lockup() and uu_release(), as part of our fork1()-safety.
- */
-void
-uu_list_lockup(void)
-{
- uu_list_pool_t *pp;
-
- (void) pthread_mutex_lock(&uu_lpool_list_lock);
- for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool;
- pp = pp->ulp_next)
- (void) pthread_mutex_lock(&pp->ulp_lock);
-}
-
-void
-uu_list_release(void)
-{
- uu_list_pool_t *pp;
-
- for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool;
- pp = pp->ulp_next)
- (void) pthread_mutex_unlock(&pp->ulp_lock);
- (void) pthread_mutex_unlock(&uu_lpool_list_lock);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_misc.c b/contrib/opensolaris/lib/libuutil/common/uu_misc.c
deleted file mode 100644
index fb0c32b..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_misc.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <assert.h>
-#include <errno.h>
-#include <libintl.h>
-#include <pthread.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/debug.h>
-#include <thread.h>
-#include <unistd.h>
-
-#if !defined(TEXT_DOMAIN)
-#define TEXT_DOMAIN "SYS_TEST"
-#endif
-
-/*
- * All of the old code under !defined(PTHREAD_ONCE_KEY_NP)
- * is here to enable the building of a native version of
- * libuutil.so when the build machine has not yet been upgraded
- * to a version of libc that provides pthread_key_create_once_np().
- * It should all be deleted when solaris_nevada ships.
- * The code is not MT-safe in a relaxed memory model.
- */
-
-#if defined(PTHREAD_ONCE_KEY_NP)
-static pthread_key_t uu_error_key = PTHREAD_ONCE_KEY_NP;
-#else /* PTHREAD_ONCE_KEY_NP */
-static pthread_key_t uu_error_key = 0;
-static pthread_mutex_t uu_key_lock = PTHREAD_MUTEX_INITIALIZER;
-#endif /* PTHREAD_ONCE_KEY_NP */
-
-static int uu_error_key_setup = 0;
-
-static pthread_mutex_t uu_panic_lock = PTHREAD_MUTEX_INITIALIZER;
-/* LINTED static unused */
-static const char *uu_panic_format;
-/* LINTED static unused */
-static va_list uu_panic_args;
-static pthread_t uu_panic_thread;
-
-static uint32_t _uu_main_error;
-
-void
-uu_set_error(uint_t code)
-{
-
-#if defined(PTHREAD_ONCE_KEY_NP)
- if (pthread_key_create_once_np(&uu_error_key, NULL) != 0)
- uu_error_key_setup = -1;
- else
- uu_error_key_setup = 1;
-#else /* PTHREAD_ONCE_KEY_NP */
- if (uu_error_key_setup == 0) {
- (void) pthread_mutex_lock(&uu_key_lock);
- if (uu_error_key_setup == 0) {
- if (pthread_key_create(&uu_error_key, NULL) != 0)
- uu_error_key_setup = -1;
- else
- uu_error_key_setup = 1;
- }
- (void) pthread_mutex_unlock(&uu_key_lock);
- }
-#endif /* PTHREAD_ONCE_KEY_NP */
- if (uu_error_key_setup > 0)
- (void) pthread_setspecific(uu_error_key,
- (void *)(uintptr_t)code);
-}
-
-uint32_t
-uu_error(void)
-{
-
- if (uu_error_key_setup < 0) /* can't happen? */
- return (UU_ERROR_UNKNOWN);
-
- /*
- * Because UU_ERROR_NONE == 0, if uu_set_error() was
- * never called, then this will return UU_ERROR_NONE:
- */
- return ((uint32_t)(uintptr_t)pthread_getspecific(uu_error_key));
-}
-
-const char *
-uu_strerror(uint32_t code)
-{
- const char *str;
-
- switch (code) {
- case UU_ERROR_NONE:
- str = dgettext(TEXT_DOMAIN, "No error");
- break;
-
- case UU_ERROR_INVALID_ARGUMENT:
- str = dgettext(TEXT_DOMAIN, "Invalid argument");
- break;
-
- case UU_ERROR_UNKNOWN_FLAG:
- str = dgettext(TEXT_DOMAIN, "Unknown flag passed");
- break;
-
- case UU_ERROR_NO_MEMORY:
- str = dgettext(TEXT_DOMAIN, "Out of memory");
- break;
-
- case UU_ERROR_CALLBACK_FAILED:
- str = dgettext(TEXT_DOMAIN, "Callback-initiated failure");
- break;
-
- case UU_ERROR_NOT_SUPPORTED:
- str = dgettext(TEXT_DOMAIN, "Operation not supported");
- break;
-
- case UU_ERROR_EMPTY:
- str = dgettext(TEXT_DOMAIN, "No value provided");
- break;
-
- case UU_ERROR_UNDERFLOW:
- str = dgettext(TEXT_DOMAIN, "Value too small");
- break;
-
- case UU_ERROR_OVERFLOW:
- str = dgettext(TEXT_DOMAIN, "Value too large");
- break;
-
- case UU_ERROR_INVALID_CHAR:
- str = dgettext(TEXT_DOMAIN,
- "Value contains unexpected character");
- break;
-
- case UU_ERROR_INVALID_DIGIT:
- str = dgettext(TEXT_DOMAIN,
- "Value contains digit not in base");
- break;
-
- case UU_ERROR_SYSTEM:
- str = dgettext(TEXT_DOMAIN, "Underlying system error");
- break;
-
- case UU_ERROR_UNKNOWN:
- str = dgettext(TEXT_DOMAIN, "Error status not known");
- break;
-
- default:
- errno = ESRCH;
- str = NULL;
- break;
- }
- return (str);
-}
-
-void
-uu_panic(const char *format, ...)
-{
- va_list args;
-
- va_start(args, format);
-
- (void) pthread_mutex_lock(&uu_panic_lock);
- if (uu_panic_thread == 0) {
- uu_panic_thread = pthread_self();
- uu_panic_format = format;
- va_copy(uu_panic_args, args);
- }
- (void) pthread_mutex_unlock(&uu_panic_lock);
-
- (void) vfprintf(stderr, format, args);
-
- if (uu_panic_thread == pthread_self())
- abort();
- else
- for (;;)
- (void) pause();
-}
-
-int
-assfail(const char *astring, const char *file, int line)
-{
- __assert(astring, file, line);
- /*NOTREACHED*/
- return (0);
-}
-
-static void
-uu_lockup(void)
-{
- (void) pthread_mutex_lock(&uu_panic_lock);
-#if !defined(PTHREAD_ONCE_KEY_NP)
- (void) pthread_mutex_lock(&uu_key_lock);
-#endif
- uu_avl_lockup();
- uu_list_lockup();
-}
-
-static void
-uu_release(void)
-{
- (void) pthread_mutex_unlock(&uu_panic_lock);
-#if !defined(PTHREAD_ONCE_KEY_NP)
- (void) pthread_mutex_unlock(&uu_key_lock);
-#endif
- uu_avl_release();
- uu_list_release();
-}
-
-static void
-uu_release_child(void)
-{
- uu_panic_format = NULL;
- uu_panic_thread = 0;
-
- uu_release();
-}
-
-#pragma init(uu_init)
-static void
-uu_init(void)
-{
- (void) pthread_atfork(uu_lockup, uu_release, uu_release_child);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_open.c b/contrib/opensolaris/lib/libuutil/common/uu_open.c
deleted file mode 100644
index 7256662..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_open.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <sys/time.h>
-
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <stdio.h>
-#include <unistd.h>
-
-#ifdef _LP64
-#define TMPPATHFMT "%s/uu%ld"
-#else /* _LP64 */
-#define TMPPATHFMT "%s/uu%lld"
-#endif /* _LP64 */
-
-/*ARGSUSED*/
-int
-uu_open_tmp(const char *dir, uint_t uflags)
-{
- int f;
- char *fname = uu_zalloc(PATH_MAX);
-
- if (fname == NULL)
- return (-1);
-
- for (;;) {
- (void) snprintf(fname, PATH_MAX, "%s/uu%lld", dir, gethrtime());
-
- f = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600);
-
- if (f >= 0 || errno != EEXIST)
- break;
- }
-
- if (f >= 0)
- (void) unlink(fname);
-
- uu_free(fname);
-
- return (f);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_pname.c b/contrib/opensolaris/lib/libuutil/common/uu_pname.c
deleted file mode 100644
index 20626ac..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_pname.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <libintl.h>
-#include <limits.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <errno.h>
-#include <wchar.h>
-#include <unistd.h>
-
-static const char PNAME_FMT[] = "%s: ";
-static const char ERRNO_FMT[] = ": %s\n";
-
-static const char *pname;
-
-static void
-uu_die_internal(int status, const char *format, va_list alist) __NORETURN;
-
-int uu_exit_ok_value = EXIT_SUCCESS;
-int uu_exit_fatal_value = EXIT_FAILURE;
-int uu_exit_usage_value = 2;
-
-int *
-uu_exit_ok(void)
-{
- return (&uu_exit_ok_value);
-}
-
-int *
-uu_exit_fatal(void)
-{
- return (&uu_exit_fatal_value);
-}
-
-int *
-uu_exit_usage(void)
-{
- return (&uu_exit_usage_value);
-}
-
-void
-uu_alt_exit(int profile)
-{
- switch (profile) {
- case UU_PROFILE_DEFAULT:
- uu_exit_ok_value = EXIT_SUCCESS;
- uu_exit_fatal_value = EXIT_FAILURE;
- uu_exit_usage_value = 2;
- break;
- case UU_PROFILE_LAUNCHER:
- uu_exit_ok_value = EXIT_SUCCESS;
- uu_exit_fatal_value = 124;
- uu_exit_usage_value = 125;
- break;
- }
-}
-
-static void
-uu_warn_internal(int err, const char *format, va_list alist)
-{
- if (pname != NULL)
- (void) fprintf(stderr, PNAME_FMT, pname);
-
- (void) vfprintf(stderr, format, alist);
-
- if (strrchr(format, '\n') == NULL)
- (void) fprintf(stderr, ERRNO_FMT, strerror(err));
-}
-
-void
-uu_vwarn(const char *format, va_list alist)
-{
- uu_warn_internal(errno, format, alist);
-}
-
-/*PRINTFLIKE1*/
-void
-uu_warn(const char *format, ...)
-{
- va_list alist;
- va_start(alist, format);
- uu_warn_internal(errno, format, alist);
- va_end(alist);
-}
-
-static void
-uu_die_internal(int status, const char *format, va_list alist)
-{
- uu_warn_internal(errno, format, alist);
-#ifdef DEBUG
- {
- char *cp;
-
- if (!issetugid()) {
- cp = getenv("UU_DIE_ABORTS");
- if (cp != NULL && *cp != '\0')
- abort();
- }
- }
-#endif
- exit(status);
-}
-
-void
-uu_vdie(const char *format, va_list alist)
-{
- uu_die_internal(UU_EXIT_FATAL, format, alist);
-}
-
-/*PRINTFLIKE1*/
-void
-uu_die(const char *format, ...)
-{
- va_list alist;
- va_start(alist, format);
- uu_die_internal(UU_EXIT_FATAL, format, alist);
- va_end(alist);
-}
-
-void
-uu_vxdie(int status, const char *format, va_list alist)
-{
- uu_die_internal(status, format, alist);
-}
-
-/*PRINTFLIKE2*/
-void
-uu_xdie(int status, const char *format, ...)
-{
- va_list alist;
- va_start(alist, format);
- uu_die_internal(status, format, alist);
- va_end(alist);
-}
-
-const char *
-uu_setpname(char *arg0)
-{
- /*
- * Having a NULL argv[0], while uncommon, is possible. It
- * makes more sense to handle this event in uu_setpname rather
- * than in each of its consumers.
- */
- if (arg0 == NULL) {
- pname = "unknown_command";
- return (pname);
- }
-
- /*
- * Guard against '/' at end of command invocation.
- */
- for (;;) {
- char *p = strrchr(arg0, '/');
- if (p == NULL) {
- pname = arg0;
- break;
- } else {
- if (*(p + 1) == '\0') {
- *p = '\0';
- continue;
- }
-
- pname = p + 1;
- break;
- }
- }
-
- return (pname);
-}
-
-const char *
-uu_getpname(void)
-{
- return (pname);
-}
diff --git a/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c b/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c
deleted file mode 100644
index 8fd1148..0000000
--- a/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "libuutil_common.h"
-
-#include <limits.h>
-#include <ctype.h>
-
-#define MAX_BASE 36
-
-#define IS_DIGIT(x) ((x) >= '0' && (x) <= '9')
-
-#define CTOI(x) (((x) >= '0' && (x) <= '9') ? (x) - '0' : \
- ((x) >= 'a' && (x) <= 'z') ? (x) + 10 - 'a' : (x) + 10 - 'A')
-
-static int
-strtoint(const char *s_arg, uint64_t *out, uint32_t base, int sign)
-{
- const unsigned char *s = (const unsigned char *)s_arg;
-
- uint64_t val = 0;
- uint64_t multmax;
-
- unsigned c, i;
-
- int neg = 0;
-
- int bad_digit = 0;
- int bad_char = 0;
- int overflow = 0;
-
- if (s == NULL || base == 1 || base > MAX_BASE) {
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (-1);
- }
-
- while ((c = *s) != 0 && isspace(c))
- s++;
-
- switch (c) {
- case '-':
- if (!sign)
- overflow = 1; /* becomes underflow below */
- neg = 1;
- /*FALLTHRU*/
- case '+':
- c = *++s;
- break;
- default:
- break;
- }
-
- if (c == '\0') {
- uu_set_error(UU_ERROR_EMPTY);
- return (-1);
- }
-
- if (base == 0) {
- if (c != '0')
- base = 10;
- else if (s[1] == 'x' || s[1] == 'X')
- base = 16;
- else
- base = 8;
- }
-
- if (base == 16 && c == '0' && (s[1] == 'x' || s[1] == 'X'))
- c = *(s += 2);
-
- if ((val = CTOI(c)) >= base) {
- if (IS_DIGIT(c))
- bad_digit = 1;
- else
- bad_char = 1;
- val = 0;
- }
-
- multmax = (uint64_t)UINT64_MAX / (uint64_t)base;
-
- for (c = *++s; c != '\0'; c = *++s) {
- if ((i = CTOI(c)) >= base) {
- if (isspace(c))
- break;
- if (IS_DIGIT(c))
- bad_digit = 1;
- else
- bad_char = 1;
- i = 0;
- }
-
- if (val > multmax)
- overflow = 1;
-
- val *= base;
- if ((uint64_t)UINT64_MAX - val < (uint64_t)i)
- overflow = 1;
-
- val += i;
- }
-
- while ((c = *s) != 0) {
- if (!isspace(c))
- bad_char = 1;
- s++;
- }
-
- if (sign) {
- if (neg) {
- if (val > -(uint64_t)INT64_MIN)
- overflow = 1;
- } else {
- if (val > INT64_MAX)
- overflow = 1;
- }
- }
-
- if (neg)
- val = -val;
-
- if (bad_char | bad_digit | overflow) {
- if (bad_char)
- uu_set_error(UU_ERROR_INVALID_CHAR);
- else if (bad_digit)
- uu_set_error(UU_ERROR_INVALID_DIGIT);
- else if (overflow) {
- if (neg)
- uu_set_error(UU_ERROR_UNDERFLOW);
- else
- uu_set_error(UU_ERROR_OVERFLOW);
- }
- return (-1);
- }
-
- *out = val;
- return (0);
-}
-
-int
-uu_strtoint(const char *s, void *v, size_t sz, int base,
- int64_t min, int64_t max)
-{
- uint64_t val_u;
- int64_t val;
-
- if (min > max)
- goto bad_argument;
-
- switch (sz) {
- case 1:
- if (max > INT8_MAX || min < INT8_MIN)
- goto bad_argument;
- break;
- case 2:
- if (max > INT16_MAX || min < INT16_MIN)
- goto bad_argument;
- break;
- case 4:
- if (max > INT32_MAX || min < INT32_MIN)
- goto bad_argument;
- break;
- case 8:
- if (max > INT64_MAX || min < INT64_MIN)
- goto bad_argument;
- break;
- default:
- goto bad_argument;
- }
-
- if (min == 0 && max == 0) {
- min = -(1ULL << (8 * sz - 1));
- max = (1ULL << (8 * sz - 1)) - 1;
- }
-
- if (strtoint(s, &val_u, base, 1) == -1)
- return (-1);
-
- val = (int64_t)val_u;
-
- if (val < min) {
- uu_set_error(UU_ERROR_UNDERFLOW);
- return (-1);
- } else if (val > max) {
- uu_set_error(UU_ERROR_OVERFLOW);
- return (-1);
- }
-
- switch (sz) {
- case 1:
- *(int8_t *)v = val;
- return (0);
- case 2:
- *(int16_t *)v = val;
- return (0);
- case 4:
- *(int32_t *)v = val;
- return (0);
- case 8:
- *(int64_t *)v = val;
- return (0);
- default:
- break; /* fall through to bad_argument */
- }
-
-bad_argument:
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (-1);
-}
-
-int
-uu_strtouint(const char *s, void *v, size_t sz, int base,
- uint64_t min, uint64_t max)
-{
- uint64_t val;
-
- if (min > max)
- goto bad_argument;
-
- switch (sz) {
- case 1:
- if (max > UINT8_MAX)
- goto bad_argument;
- break;
- case 2:
- if (max > UINT16_MAX)
- goto bad_argument;
- break;
- case 4:
- if (max > UINT32_MAX)
- goto bad_argument;
- break;
- case 8:
- if (max > UINT64_MAX)
- goto bad_argument;
- break;
- default:
- goto bad_argument;
- }
-
- if (min == 0 && max == 0) {
- /* we have to be careful, since << can overflow */
- max = (1ULL << (8 * sz - 1)) * 2 - 1;
- }
-
- if (strtoint(s, &val, base, 0) == -1)
- return (-1);
-
- if (val < min) {
- uu_set_error(UU_ERROR_UNDERFLOW);
- return (-1);
- } else if (val > max) {
- uu_set_error(UU_ERROR_OVERFLOW);
- return (-1);
- }
-
- switch (sz) {
- case 1:
- *(uint8_t *)v = val;
- return (0);
- case 2:
- *(uint16_t *)v = val;
- return (0);
- case 4:
- *(uint32_t *)v = val;
- return (0);
- case 8:
- *(uint64_t *)v = val;
- return (0);
- default:
- break; /* shouldn't happen, fall through */
- }
-
-bad_argument:
- uu_set_error(UU_ERROR_INVALID_ARGUMENT);
- return (-1);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs.h b/contrib/opensolaris/lib/libzfs/common/libzfs.h
deleted file mode 100644
index 232324e..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBZFS_H
-#define _LIBZFS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <assert.h>
-#include <libnvpair.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/varargs.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_ioctl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Miscellaneous ZFS constants
- */
-#define ZFS_MAXNAMELEN MAXNAMELEN
-#define ZPOOL_MAXNAMELEN MAXNAMELEN
-#define ZFS_MAXPROPLEN MAXPATHLEN
-
-/*
- * libzfs errors
- */
-enum {
- EZFS_NOMEM = 2000, /* out of memory */
- EZFS_BADPROP, /* invalid property value */
- EZFS_PROPREADONLY, /* cannot set readonly property */
- EZFS_PROPTYPE, /* property does not apply to dataset type */
- EZFS_PROPNONINHERIT, /* property is not inheritable */
- EZFS_PROPSPACE, /* bad quota or reservation */
- EZFS_BADTYPE, /* dataset is not of appropriate type */
- EZFS_BUSY, /* pool or dataset is busy */
- EZFS_EXISTS, /* pool or dataset already exists */
- EZFS_NOENT, /* no such pool or dataset */
- EZFS_BADSTREAM, /* bad backup stream */
- EZFS_DSREADONLY, /* dataset is readonly */
- EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */
- EZFS_VOLHASDATA, /* volume already contains data */
- EZFS_INVALIDNAME, /* invalid dataset name */
- EZFS_BADRESTORE, /* unable to restore to destination */
- EZFS_BADBACKUP, /* backup failed */
- EZFS_BADTARGET, /* bad attach/detach/replace target */
- EZFS_NODEVICE, /* no such device in pool */
- EZFS_BADDEV, /* invalid device to add */
- EZFS_NOREPLICAS, /* no valid replicas */
- EZFS_RESILVERING, /* currently resilvering */
- EZFS_BADVERSION, /* unsupported version */
- EZFS_POOLUNAVAIL, /* pool is currently unavailable */
- EZFS_DEVOVERFLOW, /* too many devices in one vdev */
- EZFS_BADPATH, /* must be an absolute path */
- EZFS_CROSSTARGET, /* rename or clone across pool or dataset */
- EZFS_ZONED, /* used improperly in local zone */
- EZFS_MOUNTFAILED, /* failed to mount dataset */
- EZFS_UMOUNTFAILED, /* failed to unmount dataset */
- EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */
- EZFS_SHARENFSFAILED, /* share(1M) failed */
- EZFS_DEVLINKS, /* failed to create zvol links */
- EZFS_PERM, /* permission denied */
- EZFS_NOSPC, /* out of space */
- EZFS_IO, /* I/O error */
- EZFS_INTR, /* signal received */
- EZFS_ISSPARE, /* device is a hot spare */
- EZFS_INVALCONFIG, /* invalid vdev configuration */
- EZFS_RECURSIVE, /* recursive dependency */
- EZFS_NOHISTORY, /* no history object */
- EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
- EZFS_SHAREISCSIFAILED, /* iscsitgtd failed request to share */
- EZFS_POOLPROPS, /* couldn't retrieve pool props */
- EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */
- EZFS_POOL_INVALARG, /* invalid argument for this pool operation */
- EZFS_NAMETOOLONG, /* dataset name is too long */
- EZFS_UNKNOWN
-};
-
-/*
- * Basic handle types
- */
-typedef struct zfs_handle zfs_handle_t;
-typedef struct zpool_handle zpool_handle_t;
-typedef struct libzfs_handle libzfs_handle_t;
-
-/*
- * Library initialization
- */
-extern libzfs_handle_t *libzfs_init(void);
-extern void libzfs_fini(libzfs_handle_t *);
-
-extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
-extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
-
-extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
-
-extern int libzfs_errno(libzfs_handle_t *);
-extern const char *libzfs_error_action(libzfs_handle_t *);
-extern const char *libzfs_error_description(libzfs_handle_t *);
-
-/*
- * Basic handle functions
- */
-extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
-extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
-extern void zpool_close(zpool_handle_t *);
-extern const char *zpool_get_name(zpool_handle_t *);
-extern uint64_t zpool_get_guid(zpool_handle_t *);
-extern uint64_t zpool_get_space_used(zpool_handle_t *);
-extern uint64_t zpool_get_space_total(zpool_handle_t *);
-extern int zpool_get_root(zpool_handle_t *, char *, size_t);
-extern int zpool_get_state(zpool_handle_t *);
-extern uint64_t zpool_get_version(zpool_handle_t *);
-
-/*
- * Iterate over all active pools in the system.
- */
-typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
-extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
-
-/*
- * Functions to create and destroy pools
- */
-extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
- const char *);
-extern int zpool_destroy(zpool_handle_t *);
-extern int zpool_add(zpool_handle_t *, nvlist_t *);
-
-/*
- * Functions to manipulate pool and vdev state
- */
-extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
-
-extern int zpool_vdev_online(zpool_handle_t *, const char *);
-extern int zpool_vdev_offline(zpool_handle_t *, const char *, int);
-extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *,
- nvlist_t *, int);
-extern int zpool_vdev_detach(zpool_handle_t *, const char *);
-extern int zpool_vdev_remove(zpool_handle_t *, const char *);
-extern int zpool_clear(zpool_handle_t *, const char *);
-extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *);
-
-/*
- * Functions to manage pool properties
- */
-extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
-extern int zpool_get_prop(zpool_handle_t *, zfs_prop_t, char *,
- size_t proplen, zfs_source_t *);
-extern const char *zpool_prop_to_name(zpool_prop_t);
-extern const char *zpool_prop_values(zpool_prop_t);
-
-/*
- * Pool health statistics.
- */
-typedef enum {
- /*
- * The following correspond to faults as defined in the (fault.fs.zfs.*)
- * event namespace. Each is associated with a corresponding message ID.
- */
- ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */
- ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */
- ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */
- ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */
- ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */
- ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */
- ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */
- ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */
- ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */
- ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */
- ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */
-
- /*
- * The following are not faults per se, but still an error possibly
- * requiring administrative attention. There is no corresponding
- * message ID.
- */
- ZPOOL_STATUS_VERSION_OLDER, /* older on-disk version */
- ZPOOL_STATUS_RESILVERING, /* device being resilvered */
- ZPOOL_STATUS_OFFLINE_DEV, /* device online */
-
- /*
- * Finally, the following indicates a healthy pool.
- */
- ZPOOL_STATUS_OK
-} zpool_status_t;
-
-extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
-extern zpool_status_t zpool_import_status(nvlist_t *, char **);
-
-/*
- * Statistics and configuration functions.
- */
-extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
-extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
-extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
-
-/*
- * Import and export functions
- */
-extern int zpool_export(zpool_handle_t *);
-extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
- const char *);
-
-/*
- * Search for pools to import
- */
-extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
-
-/*
- * Miscellaneous pool functions
- */
-extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
-extern int zpool_upgrade(zpool_handle_t *);
-extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
-extern void zpool_log_history(libzfs_handle_t *, int, char **, const char *,
- boolean_t, boolean_t);
-extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
- size_t len);
-
-/*
- * Basic handle manipulations. These functions do not create or destroy the
- * underlying datasets, only the references to them.
- */
-extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
-extern void zfs_close(zfs_handle_t *);
-extern zfs_type_t zfs_get_type(const zfs_handle_t *);
-extern const char *zfs_get_name(const zfs_handle_t *);
-
-/*
- * Property management functions. Some functions are shared with the kernel,
- * and are found in sys/fs/zfs.h.
- */
-extern const char *zfs_prop_to_name(zfs_prop_t);
-extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
-extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
- zfs_source_t *, char *, size_t, boolean_t);
-extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
- zfs_source_t *, char *, size_t);
-extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
-extern const char *zfs_prop_get_string(zfs_handle_t *, zfs_prop_t);
-extern int zfs_prop_inherit(zfs_handle_t *, const char *);
-extern const char *zfs_prop_values(zfs_prop_t);
-extern int zfs_prop_valid_for_type(zfs_prop_t, int);
-extern const char *zfs_prop_default_string(zfs_prop_t prop);
-extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
-extern int zfs_prop_is_string(zfs_prop_t prop);
-extern const char *zfs_prop_column_name(zfs_prop_t);
-extern boolean_t zfs_prop_align_right(zfs_prop_t);
-extern void nicebool(int value, char *buf, size_t buflen);
-
-typedef struct zfs_proplist {
- zfs_prop_t pl_prop;
- char *pl_user_prop;
- struct zfs_proplist *pl_next;
- boolean_t pl_all;
- size_t pl_width;
- boolean_t pl_fixed;
-} zfs_proplist_t;
-
-typedef zfs_proplist_t zpool_proplist_t;
-
-extern int zfs_get_proplist(libzfs_handle_t *, char *, zfs_proplist_t **);
-extern int zpool_get_proplist(libzfs_handle_t *, char *, zpool_proplist_t **);
-extern int zfs_expand_proplist(zfs_handle_t *, zfs_proplist_t **);
-extern int zpool_expand_proplist(zpool_handle_t *, zpool_proplist_t **);
-extern void zfs_free_proplist(zfs_proplist_t *);
-extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
-
-#define ZFS_MOUNTPOINT_NONE "none"
-#define ZFS_MOUNTPOINT_LEGACY "legacy"
-
-/*
- * Functions for printing properties from zfs/zpool
- */
-typedef struct libzfs_get_cbdata {
- int cb_sources;
- int cb_columns[4];
- int cb_colwidths[5];
- boolean_t cb_scripted;
- boolean_t cb_literal;
- boolean_t cb_first;
- zfs_proplist_t *cb_proplist;
-} libzfs_get_cbdata_t;
-
-void libzfs_print_one_property(const char *, libzfs_get_cbdata_t *,
- const char *, const char *, zfs_source_t, const char *);
-
-#define GET_COL_NAME 1
-#define GET_COL_PROPERTY 2
-#define GET_COL_VALUE 3
-#define GET_COL_SOURCE 4
-
-/*
- * Iterator functions.
- */
-typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
-extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
-extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
-extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
-extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
-extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
-
-/*
- * Functions to create and destroy datasets.
- */
-extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
- nvlist_t *);
-extern int zfs_destroy(zfs_handle_t *);
-extern int zfs_destroy_snaps(zfs_handle_t *, char *);
-extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
-extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t);
-extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
-extern int zfs_rename(zfs_handle_t *, const char *, int);
-extern int zfs_send(zfs_handle_t *, const char *, int);
-extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int,
- boolean_t, int);
-extern int zfs_promote(zfs_handle_t *);
-
-/*
- * Miscellaneous functions.
- */
-extern const char *zfs_type_to_name(zfs_type_t);
-extern void zfs_refresh_properties(zfs_handle_t *);
-extern int zfs_name_valid(const char *, zfs_type_t);
-extern int zfs_disable(zfs_handle_t *);
-extern int zfs_enable(zfs_handle_t *);
-extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
-
-/*
- * Mount support functions.
- */
-extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
-extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
-extern int zfs_mount(zfs_handle_t *, const char *, int);
-extern int zfs_unmount(zfs_handle_t *, const char *, int);
-extern int zfs_unmountall(zfs_handle_t *, int);
-
-/*
- * Share support functions.
- */
-extern boolean_t zfs_is_shared(zfs_handle_t *);
-extern int zfs_share(zfs_handle_t *);
-extern int zfs_unshare(zfs_handle_t *);
-
-/*
- * Protocol-specifc share support functions.
- */
-extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
-extern int zfs_share_nfs(zfs_handle_t *);
-extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
-extern int zfs_unshareall_nfs(zfs_handle_t *);
-extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
-extern int zfs_share_iscsi(zfs_handle_t *);
-extern int zfs_unshare_iscsi(zfs_handle_t *);
-
-/*
- * FreeBSD-specific jail support function.
- */
-extern int zfs_jail(zfs_handle_t *, int, int);
-
-/*
- * When dealing with nvlists, verify() is extremely useful
- */
-#ifndef verify
-#ifdef NDEBUG
-#define verify(EX) ((void)(EX))
-#else
-#define verify(EX) assert(EX)
-#endif
-#endif
-
-/*
- * Utility function to convert a number to a human-readable form.
- */
-extern void zfs_nicenum(uint64_t, char *, size_t);
-extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
-
-/*
- * Pool destroy special. Remove the device information without destroying
- * the underlying dataset.
- */
-extern int zfs_remove_link(zfs_handle_t *);
-
-/*
- * Given a device or file, determine if it is part of a pool.
- */
-extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
- boolean_t *);
-
-/*
- * ftyp special. Read the label from a given device.
- */
-extern int zpool_read_label(int, nvlist_t **);
-
-/*
- * Create and remove zvol /dev links.
- */
-extern int zpool_create_zvol_links(zpool_handle_t *);
-extern int zpool_remove_zvol_links(zpool_handle_t *);
-
-/*
- * Enable and disable datasets within a pool by mounting/unmounting and
- * sharing/unsharing them.
- */
-extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
-extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
-
-#ifdef __FreeBSD__
-extern int zmount(const char *, const char *, int, char *, char *, int, char *,
- int);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LIBZFS_H */
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c b/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
deleted file mode 100644
index 5e6de6d..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
+++ /dev/null
@@ -1,599 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <libintl.h>
-#include <libuutil.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <zone.h>
-
-#include <libzfs.h>
-
-#include "libzfs_impl.h"
-
-/*
- * Structure to keep track of dataset state. Before changing the 'sharenfs' or
- * 'mountpoint' property, we record whether the filesystem was previously
- * mounted/shared. This prior state dictates whether we remount/reshare the
- * dataset after the property has been changed.
- *
- * The interface consists of the following sequence of functions:
- *
- * changelist_gather()
- * changelist_prefix()
- * < change property >
- * changelist_postfix()
- * changelist_free()
- *
- * Other interfaces:
- *
- * changelist_remove() - remove a node from a gathered list
- * changelist_rename() - renames all datasets appropriately when doing a rename
- * changelist_unshare() - unshares all the nodes in a given changelist
- * changelist_haszonedchild() - check if there is any child exported to
- * a local zone
- */
-typedef struct prop_changenode {
- zfs_handle_t *cn_handle;
- int cn_shared;
- int cn_mounted;
- int cn_zoned;
- uu_list_node_t cn_listnode;
-} prop_changenode_t;
-
-struct prop_changelist {
- zfs_prop_t cl_prop;
- zfs_prop_t cl_realprop;
- uu_list_pool_t *cl_pool;
- uu_list_t *cl_list;
- boolean_t cl_waslegacy;
- boolean_t cl_allchildren;
- boolean_t cl_alldependents;
- int cl_flags;
- boolean_t cl_haszonedchild;
- boolean_t cl_sorted;
-};
-
-/*
- * If the property is 'mountpoint', go through and unmount filesystems as
- * necessary. We don't do the same for 'sharenfs', because we can just re-share
- * with different options without interrupting service.
- */
-int
-changelist_prefix(prop_changelist_t *clp)
-{
- prop_changenode_t *cn;
- int ret = 0;
-
- if (clp->cl_prop != ZFS_PROP_MOUNTPOINT)
- return (0);
-
- for (cn = uu_list_first(clp->cl_list); cn != NULL;
- cn = uu_list_next(clp->cl_list, cn)) {
- /*
- * If we are in the global zone, but this dataset is exported
- * to a local zone, do nothing.
- */
- if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
- continue;
-
- if (ZFS_IS_VOLUME(cn->cn_handle)) {
- switch (clp->cl_realprop) {
- case ZFS_PROP_NAME:
- /*
- * If this was a rename, unshare the zvol, and
- * remove the /dev/zvol links.
- */
- (void) zfs_unshare_iscsi(cn->cn_handle);
-
- if (zvol_remove_link(cn->cn_handle->zfs_hdl,
- cn->cn_handle->zfs_name) != 0)
- ret = -1;
- break;
-
- case ZFS_PROP_VOLSIZE:
- /*
- * If this was a change to the volume size, we
- * need to unshare and reshare the volume.
- */
- (void) zfs_unshare_iscsi(cn->cn_handle);
- break;
- }
- } else if (zfs_unmount(cn->cn_handle, NULL, clp->cl_flags) != 0)
- ret = -1;
- }
-
- return (ret);
-}
-
-/*
- * If the property is 'mountpoint' or 'sharenfs', go through and remount and/or
- * reshare the filesystems as necessary. In changelist_gather() we recorded
- * whether the filesystem was previously shared or mounted. The action we take
- * depends on the previous state, and whether the value was previously 'legacy'.
- * For non-legacy properties, we only remount/reshare the filesystem if it was
- * previously mounted/shared. Otherwise, we always remount/reshare the
- * filesystem.
- */
-int
-changelist_postfix(prop_changelist_t *clp)
-{
- prop_changenode_t *cn;
- char shareopts[ZFS_MAXPROPLEN];
- int ret = 0;
-
- /*
- * If we're changing the mountpoint, attempt to destroy the underlying
- * mountpoint. All other datasets will have inherited from this dataset
- * (in which case their mountpoints exist in the filesystem in the new
- * location), or have explicit mountpoints set (in which case they won't
- * be in the changelist).
- */
- if ((cn = uu_list_last(clp->cl_list)) == NULL)
- return (0);
-
- if (clp->cl_prop == ZFS_PROP_MOUNTPOINT)
- remove_mountpoint(cn->cn_handle);
-
- /*
- * We walk the datasets in reverse, because we want to mount any parent
- * datasets before mounting the children.
- */
- for (cn = uu_list_last(clp->cl_list); cn != NULL;
- cn = uu_list_prev(clp->cl_list, cn)) {
- /*
- * If we are in the global zone, but this dataset is exported
- * to a local zone, do nothing.
- */
- if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
- continue;
-
- zfs_refresh_properties(cn->cn_handle);
-
- if (ZFS_IS_VOLUME(cn->cn_handle)) {
- /*
- * If we're doing a rename, recreate the /dev/zvol
- * links.
- */
- if (clp->cl_realprop == ZFS_PROP_NAME &&
- zvol_create_link(cn->cn_handle->zfs_hdl,
- cn->cn_handle->zfs_name) != 0) {
- ret = -1;
- } else if (cn->cn_shared ||
- clp->cl_prop == ZFS_PROP_SHAREISCSI) {
- if (zfs_prop_get(cn->cn_handle,
- ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0,
- B_FALSE) == 0 &&
- strcmp(shareopts, "off") == 0) {
- ret = zfs_unshare_iscsi(cn->cn_handle);
- } else {
- ret = zfs_share_iscsi(cn->cn_handle);
- }
- }
-
- continue;
- }
-
- if ((clp->cl_waslegacy || cn->cn_mounted) &&
- !zfs_is_mounted(cn->cn_handle, NULL) &&
- zfs_mount(cn->cn_handle, NULL, 0) != 0)
- ret = -1;
-
- /*
- * We always re-share even if the filesystem is currently
- * shared, so that we can adopt any new options.
- */
- if (cn->cn_shared ||
- (clp->cl_prop == ZFS_PROP_SHARENFS && clp->cl_waslegacy)) {
- if (zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
- shareopts, sizeof (shareopts), NULL, NULL, 0,
- B_FALSE) == 0 && strcmp(shareopts, "off") == 0) {
- ret = zfs_unshare_nfs(cn->cn_handle, NULL);
- } else {
- ret = zfs_share_nfs(cn->cn_handle);
- }
- }
- }
-
- return (ret);
-}
-
-/*
- * Is this "dataset" a child of "parent"?
- */
-static boolean_t
-isa_child_of(const char *dataset, const char *parent)
-{
- int len;
-
- len = strlen(parent);
-
- if (strncmp(dataset, parent, len) == 0 &&
- (dataset[len] == '@' || dataset[len] == '/' ||
- dataset[len] == '\0'))
- return (B_TRUE);
- else
- return (B_FALSE);
-
-}
-
-/*
- * If we rename a filesystem, child filesystem handles are no longer valid
- * since we identify each dataset by its name in the ZFS namespace. As a
- * result, we have to go through and fix up all the names appropriately. We
- * could do this automatically if libzfs kept track of all open handles, but
- * this is a lot less work.
- */
-void
-changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
-{
- prop_changenode_t *cn;
- char newname[ZFS_MAXNAMELEN];
-
- for (cn = uu_list_first(clp->cl_list); cn != NULL;
- cn = uu_list_next(clp->cl_list, cn)) {
- /*
- * Do not rename a clone that's not in the source hierarchy.
- */
- if (!isa_child_of(cn->cn_handle->zfs_name, src))
- continue;
-
- /*
- * Destroy the previous mountpoint if needed.
- */
- remove_mountpoint(cn->cn_handle);
-
- (void) strlcpy(newname, dst, sizeof (newname));
- (void) strcat(newname, cn->cn_handle->zfs_name + strlen(src));
-
- (void) strlcpy(cn->cn_handle->zfs_name, newname,
- sizeof (cn->cn_handle->zfs_name));
- }
-}
-
-/*
- * Given a gathered changelist for the 'sharenfs' property, unshare all the
- * datasets in the list.
- */
-int
-changelist_unshare(prop_changelist_t *clp)
-{
- prop_changenode_t *cn;
- int ret = 0;
-
- if (clp->cl_prop != ZFS_PROP_SHARENFS)
- return (0);
-
- for (cn = uu_list_first(clp->cl_list); cn != NULL;
- cn = uu_list_next(clp->cl_list, cn)) {
- if (zfs_unshare_nfs(cn->cn_handle, NULL) != 0)
- ret = -1;
- }
-
- return (ret);
-}
-
-/*
- * Check if there is any child exported to a local zone in a given changelist.
- * This information has already been recorded while gathering the changelist
- * via changelist_gather().
- */
-int
-changelist_haszonedchild(prop_changelist_t *clp)
-{
- return (clp->cl_haszonedchild);
-}
-
-/*
- * Remove a node from a gathered list.
- */
-void
-changelist_remove(zfs_handle_t *zhp, prop_changelist_t *clp)
-{
- prop_changenode_t *cn;
-
- for (cn = uu_list_first(clp->cl_list); cn != NULL;
- cn = uu_list_next(clp->cl_list, cn)) {
-
- if (strcmp(cn->cn_handle->zfs_name, zhp->zfs_name) == 0) {
- uu_list_remove(clp->cl_list, cn);
- zfs_close(cn->cn_handle);
- free(cn);
- return;
- }
- }
-}
-
-/*
- * Release any memory associated with a changelist.
- */
-void
-changelist_free(prop_changelist_t *clp)
-{
- prop_changenode_t *cn;
- void *cookie;
-
- if (clp->cl_list) {
- cookie = NULL;
- while ((cn = uu_list_teardown(clp->cl_list, &cookie)) != NULL) {
- zfs_close(cn->cn_handle);
- free(cn);
- }
-
- uu_list_destroy(clp->cl_list);
- }
- if (clp->cl_pool)
- uu_list_pool_destroy(clp->cl_pool);
-
- free(clp);
-}
-
-static int
-change_one(zfs_handle_t *zhp, void *data)
-{
- prop_changelist_t *clp = data;
- char property[ZFS_MAXPROPLEN];
- char where[64];
- prop_changenode_t *cn;
- zfs_source_t sourcetype;
-
- /*
- * We only want to unmount/unshare those filesystems that may inherit
- * from the target filesystem. If we find any filesystem with a
- * locally set mountpoint, we ignore any children since changing the
- * property will not affect them. If this is a rename, we iterate
- * over all children regardless, since we need them unmounted in
- * order to do the rename. Also, if this is a volume and we're doing
- * a rename, then always add it to the changelist.
- */
-
- if (!(ZFS_IS_VOLUME(zhp) && clp->cl_realprop == ZFS_PROP_NAME) &&
- zfs_prop_get(zhp, clp->cl_prop, property,
- sizeof (property), &sourcetype, where, sizeof (where),
- B_FALSE) != 0) {
- zfs_close(zhp);
- return (0);
- }
-
- if (clp->cl_alldependents || clp->cl_allchildren ||
- sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) {
- if ((cn = zfs_alloc(zfs_get_handle(zhp),
- sizeof (prop_changenode_t))) == NULL) {
- zfs_close(zhp);
- return (-1);
- }
-
- cn->cn_handle = zhp;
- cn->cn_mounted = zfs_is_mounted(zhp, NULL);
- cn->cn_shared = zfs_is_shared(zhp);
- cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
-
- /* Indicate if any child is exported to a local zone. */
- if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
- clp->cl_haszonedchild = B_TRUE;
-
- uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
-
- if (clp->cl_sorted) {
- uu_list_index_t idx;
-
- (void) uu_list_find(clp->cl_list, cn, NULL,
- &idx);
- uu_list_insert(clp->cl_list, cn, idx);
- } else {
- ASSERT(!clp->cl_alldependents);
- verify(uu_list_insert_before(clp->cl_list,
- uu_list_first(clp->cl_list), cn) == 0);
- }
-
- if (!clp->cl_alldependents)
- return (zfs_iter_children(zhp, change_one, data));
- } else {
- zfs_close(zhp);
- }
-
- return (0);
-}
-
-/*ARGSUSED*/
-static int
-compare_mountpoints(const void *a, const void *b, void *unused)
-{
- const prop_changenode_t *ca = a;
- const prop_changenode_t *cb = b;
-
- char mounta[MAXPATHLEN];
- char mountb[MAXPATHLEN];
-
- boolean_t hasmounta, hasmountb;
-
- /*
- * When unsharing or unmounting filesystems, we need to do it in
- * mountpoint order. This allows the user to have a mountpoint
- * hierarchy that is different from the dataset hierarchy, and still
- * allow it to be changed. However, if either dataset doesn't have a
- * mountpoint (because it is a volume or a snapshot), we place it at the
- * end of the list, because it doesn't affect our change at all.
- */
- hasmounta = (zfs_prop_get(ca->cn_handle, ZFS_PROP_MOUNTPOINT, mounta,
- sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
- hasmountb = (zfs_prop_get(cb->cn_handle, ZFS_PROP_MOUNTPOINT, mountb,
- sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
-
- if (!hasmounta && hasmountb)
- return (-1);
- else if (hasmounta && !hasmountb)
- return (1);
- else if (!hasmounta && !hasmountb)
- return (0);
- else
- return (strcmp(mountb, mounta));
-}
-
-/*
- * Given a ZFS handle and a property, construct a complete list of datasets
- * that need to be modified as part of this process. For anything but the
- * 'mountpoint' and 'sharenfs' properties, this just returns an empty list.
- * Otherwise, we iterate over all children and look for any datasets that
- * inherit the property. For each such dataset, we add it to the list and
- * mark whether it was shared beforehand.
- */
-prop_changelist_t *
-changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
-{
- prop_changelist_t *clp;
- prop_changenode_t *cn;
- zfs_handle_t *temp;
- char property[ZFS_MAXPROPLEN];
- uu_compare_fn_t *compare = NULL;
-
- if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
- return (NULL);
-
- /*
- * For mountpoint-related tasks, we want to sort everything by
- * mountpoint, so that we mount and unmount them in the appropriate
- * order, regardless of their position in the hierarchy.
- */
- if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
- prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS) {
- compare = compare_mountpoints;
- clp->cl_sorted = B_TRUE;
- }
-
- clp->cl_pool = uu_list_pool_create("changelist_pool",
- sizeof (prop_changenode_t),
- offsetof(prop_changenode_t, cn_listnode),
- compare, 0);
- if (clp->cl_pool == NULL) {
- assert(uu_error() == UU_ERROR_NO_MEMORY);
- (void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
- changelist_free(clp);
- return (NULL);
- }
-
- clp->cl_list = uu_list_create(clp->cl_pool, NULL,
- clp->cl_sorted ? UU_LIST_SORTED : 0);
- clp->cl_flags = flags;
-
- if (clp->cl_list == NULL) {
- assert(uu_error() == UU_ERROR_NO_MEMORY);
- (void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error");
- changelist_free(clp);
- return (NULL);
- }
-
- /*
- * If this is a rename or the 'zoned' property, we pretend we're
- * changing the mountpoint and flag it so we can catch all children in
- * change_one().
- *
- * Flag cl_alldependents to catch all children plus the dependents
- * (clones) that are not in the hierarchy.
- */
- if (prop == ZFS_PROP_NAME) {
- clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- clp->cl_alldependents = B_TRUE;
- } else if (prop == ZFS_PROP_ZONED) {
- clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- clp->cl_allchildren = B_TRUE;
- } else if (prop == ZFS_PROP_CANMOUNT) {
- clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- } else if (prop == ZFS_PROP_VOLSIZE) {
- clp->cl_prop = ZFS_PROP_MOUNTPOINT;
- } else {
- clp->cl_prop = prop;
- }
- clp->cl_realprop = prop;
-
- if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
- clp->cl_prop != ZFS_PROP_SHARENFS &&
- clp->cl_prop != ZFS_PROP_SHAREISCSI)
- return (clp);
-
- if (clp->cl_alldependents) {
- if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) {
- changelist_free(clp);
- return (NULL);
- }
- } else if (zfs_iter_children(zhp, change_one, clp) != 0) {
- changelist_free(clp);
- return (NULL);
- }
-
- /*
- * We have to re-open ourselves because we auto-close all the handles
- * and can't tell the difference.
- */
- if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
- ZFS_TYPE_ANY)) == NULL) {
- changelist_free(clp);
- return (NULL);
- }
-
- /*
- * Always add ourself to the list. We add ourselves to the end so that
- * we're the last to be unmounted.
- */
- if ((cn = zfs_alloc(zhp->zfs_hdl,
- sizeof (prop_changenode_t))) == NULL) {
- zfs_close(temp);
- changelist_free(clp);
- return (NULL);
- }
-
- cn->cn_handle = temp;
- cn->cn_mounted = zfs_is_mounted(temp, NULL);
- cn->cn_shared = zfs_is_shared(temp);
- cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
-
- uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
- if (clp->cl_sorted) {
- uu_list_index_t idx;
- (void) uu_list_find(clp->cl_list, cn, NULL, &idx);
- uu_list_insert(clp->cl_list, cn, idx);
- } else {
- verify(uu_list_insert_after(clp->cl_list,
- uu_list_last(clp->cl_list), cn) == 0);
- }
-
- /*
- * If the property was previously 'legacy' or 'none', record this fact,
- * as the behavior of changelist_postfix() will be different.
- */
- if (zfs_prop_get(zhp, prop, property, sizeof (property),
- NULL, NULL, 0, B_FALSE) == 0 &&
- (strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0 ||
- strcmp(property, "off") == 0))
- clp->cl_waslegacy = B_TRUE;
-
- return (clp);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_config.c b/contrib/opensolaris/lib/libzfs/common/libzfs_config.c
deleted file mode 100644
index 94640d1..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_config.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
- * single packed nvlist. While it would be nice to just read in this
- * file from userland, this wouldn't work from a local zone. So we have to have
- * a zpool ioctl to return the complete configuration for all pools. In the
- * global zone, this will be identical to reading the file and unpacking it in
- * userland.
- */
-
-#include <errno.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <libintl.h>
-#include <libuutil.h>
-
-#include "libzfs_impl.h"
-
-typedef struct config_node {
- char *cn_name;
- nvlist_t *cn_config;
- uu_avl_node_t cn_avl;
-} config_node_t;
-
-/* ARGSUSED */
-static int
-config_node_compare(const void *a, const void *b, void *unused)
-{
- int ret;
-
- const config_node_t *ca = (config_node_t *)a;
- const config_node_t *cb = (config_node_t *)b;
-
- ret = strcmp(ca->cn_name, cb->cn_name);
-
- if (ret < 0)
- return (-1);
- else if (ret > 0)
- return (1);
- else
- return (0);
-}
-
-void
-namespace_clear(libzfs_handle_t *hdl)
-{
- if (hdl->libzfs_ns_avl) {
- config_node_t *cn;
- void *cookie = NULL;
-
- while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl,
- &cookie)) != NULL) {
- nvlist_free(cn->cn_config);
- free(cn->cn_name);
- free(cn);
- }
-
- uu_avl_destroy(hdl->libzfs_ns_avl);
- hdl->libzfs_ns_avl = NULL;
- }
-
- if (hdl->libzfs_ns_avlpool) {
- uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
- hdl->libzfs_ns_avlpool = NULL;
- }
-}
-
-/*
- * Loads the pool namespace, or re-loads it if the cache has changed.
- */
-static int
-namespace_reload(libzfs_handle_t *hdl)
-{
- nvlist_t *config;
- config_node_t *cn;
- nvpair_t *elem;
- zfs_cmd_t zc = { 0 };
- void *cookie;
-
- if (hdl->libzfs_ns_gen == 0) {
- /*
- * This is the first time we've accessed the configuration
- * cache. Initialize the AVL tree and then fall through to the
- * common code.
- */
- if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
- sizeof (config_node_t),
- offsetof(config_node_t, cn_avl),
- config_node_compare, UU_DEFAULT)) == NULL)
- return (no_memory(hdl));
-
- if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
- NULL, UU_DEFAULT)) == NULL)
- return (no_memory(hdl));
- }
-
- if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
- return (-1);
-
- for (;;) {
- zc.zc_cookie = hdl->libzfs_ns_gen;
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
- switch (errno) {
- case EEXIST:
- /*
- * The namespace hasn't changed.
- */
- zcmd_free_nvlists(&zc);
- return (0);
-
- case ENOMEM:
- if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- break;
-
- default:
- zcmd_free_nvlists(&zc);
- return (zfs_standard_error(hdl, errno,
- dgettext(TEXT_DOMAIN, "failed to read "
- "pool configuration")));
- }
- } else {
- hdl->libzfs_ns_gen = zc.zc_cookie;
- break;
- }
- }
-
- if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
-
- zcmd_free_nvlists(&zc);
-
- /*
- * Clear out any existing configuration information.
- */
- cookie = NULL;
- while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) {
- nvlist_free(cn->cn_config);
- free(cn->cn_name);
- free(cn);
- }
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
- nvlist_t *child;
- uu_avl_index_t where;
-
- if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
- nvlist_free(config);
- return (-1);
- }
-
- if ((cn->cn_name = zfs_strdup(hdl,
- nvpair_name(elem))) == NULL) {
- free(cn);
- nvlist_free(config);
- return (-1);
- }
-
- verify(nvpair_value_nvlist(elem, &child) == 0);
- if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
- free(cn->cn_name);
- free(cn);
- nvlist_free(config);
- return (no_memory(hdl));
- }
- verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
- == NULL);
-
- uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
- }
-
- nvlist_free(config);
- return (0);
-}
-
-/*
- * Retrieve the configuration for the given pool. The configuration is a nvlist
- * describing the vdevs, as well as the statistics associated with each one.
- */
-nvlist_t *
-zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig)
-{
- if (oldconfig)
- *oldconfig = zhp->zpool_old_config;
- return (zhp->zpool_config);
-}
-
-/*
- * Refresh the vdev statistics associated with the given pool. This is used in
- * iostat to show configuration changes and determine the delta from the last
- * time the function was called. This function can fail, in case the pool has
- * been destroyed.
- */
-int
-zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing)
-{
- zfs_cmd_t zc = { 0 };
- int error;
- nvlist_t *config;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- *missing = B_FALSE;
- (void) strcpy(zc.zc_name, zhp->zpool_name);
-
- if (zhp->zpool_config_size == 0)
- zhp->zpool_config_size = 1 << 16;
-
- if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0)
- return (-1);
-
- for (;;) {
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
- &zc) == 0) {
- /*
- * The real error is returned in the zc_cookie field.
- */
- error = zc.zc_cookie;
- break;
- }
-
- if (errno == ENOMEM) {
- if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- } else {
- zcmd_free_nvlists(&zc);
- if (errno == ENOENT || errno == EINVAL)
- *missing = B_TRUE;
- zhp->zpool_state = POOL_STATE_UNAVAIL;
- return (0);
- }
- }
-
- if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
-
- zcmd_free_nvlists(&zc);
-
- zhp->zpool_config_size = zc.zc_nvlist_dst_size;
-
- if (zhp->zpool_config != NULL) {
- uint64_t oldtxg, newtxg;
-
- verify(nvlist_lookup_uint64(zhp->zpool_config,
- ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0);
- verify(nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0);
-
- if (zhp->zpool_old_config != NULL)
- nvlist_free(zhp->zpool_old_config);
-
- if (oldtxg != newtxg) {
- nvlist_free(zhp->zpool_config);
- zhp->zpool_old_config = NULL;
- } else {
- zhp->zpool_old_config = zhp->zpool_config;
- }
- }
-
- zhp->zpool_config = config;
- if (error)
- zhp->zpool_state = POOL_STATE_UNAVAIL;
- else
- zhp->zpool_state = POOL_STATE_ACTIVE;
-
- return (0);
-}
-
-/*
- * Iterate over all pools in the system.
- */
-int
-zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
-{
- config_node_t *cn;
- zpool_handle_t *zhp;
- int ret;
-
- if (namespace_reload(hdl) != 0)
- return (-1);
-
- for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
- cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
-
- if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
- return (-1);
-
- if (zhp == NULL)
- continue;
-
- if ((ret = func(zhp, data)) != 0)
- return (ret);
- }
-
- return (0);
-}
-
-/*
- * Iterate over root datasets, calling the given function for each. The zfs
- * handle passed each time must be explicitly closed by the callback.
- */
-int
-zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
-{
- config_node_t *cn;
- zfs_handle_t *zhp;
- int ret;
-
- if (namespace_reload(hdl) != 0)
- return (-1);
-
- for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
- cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
-
- if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
- continue;
-
- if ((ret = func(zhp, data)) != 0)
- return (ret);
- }
-
- return (0);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
deleted file mode 100644
index 4fc441a..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
+++ /dev/null
@@ -1,3855 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <libintl.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-#include <unistd.h>
-#include <zone.h>
-#include <fcntl.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <sys/mount.h>
-
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/zap.h>
-#include <libzfs.h>
-
-#include "zfs_namecheck.h"
-#include "zfs_prop.h"
-#include "libzfs_impl.h"
-
-static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
-
-/*
- * Given a single type (not a mask of types), return the type in a human
- * readable form.
- */
-const char *
-zfs_type_to_name(zfs_type_t type)
-{
- switch (type) {
- case ZFS_TYPE_FILESYSTEM:
- return (dgettext(TEXT_DOMAIN, "filesystem"));
- case ZFS_TYPE_SNAPSHOT:
- return (dgettext(TEXT_DOMAIN, "snapshot"));
- case ZFS_TYPE_VOLUME:
- return (dgettext(TEXT_DOMAIN, "volume"));
- }
-
- return (NULL);
-}
-
-/*
- * Given a path and mask of ZFS types, return a string describing this dataset.
- * This is used when we fail to open a dataset and we cannot get an exact type.
- * We guess what the type would have been based on the path and the mask of
- * acceptable types.
- */
-static const char *
-path_to_str(const char *path, int types)
-{
- /*
- * When given a single type, always report the exact type.
- */
- if (types == ZFS_TYPE_SNAPSHOT)
- return (dgettext(TEXT_DOMAIN, "snapshot"));
- if (types == ZFS_TYPE_FILESYSTEM)
- return (dgettext(TEXT_DOMAIN, "filesystem"));
- if (types == ZFS_TYPE_VOLUME)
- return (dgettext(TEXT_DOMAIN, "volume"));
-
- /*
- * The user is requesting more than one type of dataset. If this is the
- * case, consult the path itself. If we're looking for a snapshot, and
- * a '@' is found, then report it as "snapshot". Otherwise, remove the
- * snapshot attribute and try again.
- */
- if (types & ZFS_TYPE_SNAPSHOT) {
- if (strchr(path, '@') != NULL)
- return (dgettext(TEXT_DOMAIN, "snapshot"));
- return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
- }
-
-
- /*
- * The user has requested either filesystems or volumes.
- * We have no way of knowing a priori what type this would be, so always
- * report it as "filesystem" or "volume", our two primitive types.
- */
- if (types & ZFS_TYPE_FILESYSTEM)
- return (dgettext(TEXT_DOMAIN, "filesystem"));
-
- assert(types & ZFS_TYPE_VOLUME);
- return (dgettext(TEXT_DOMAIN, "volume"));
-}
-
-/*
- * Validate a ZFS path. This is used even before trying to open the dataset, to
- * provide a more meaningful error message. We place a more useful message in
- * 'buf' detailing exactly why the name was not valid.
- */
-static int
-zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type)
-{
- namecheck_err_t why;
- char what;
-
- if (dataset_namecheck(path, &why, &what) != 0) {
- if (hdl != NULL) {
- switch (why) {
- case NAME_ERR_TOOLONG:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "name is too long"));
- break;
-
- case NAME_ERR_LEADING_SLASH:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "leading slash in name"));
- break;
-
- case NAME_ERR_EMPTY_COMPONENT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "empty component in name"));
- break;
-
- case NAME_ERR_TRAILING_SLASH:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "trailing slash in name"));
- break;
-
- case NAME_ERR_INVALCHAR:
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "invalid character "
- "'%c' in name"), what);
- break;
-
- case NAME_ERR_MULTIPLE_AT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "multiple '@' delimiters in name"));
- break;
-
- case NAME_ERR_NOLETTER:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool doesn't begin with a letter"));
- break;
-
- case NAME_ERR_RESERVED:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "name is reserved"));
- break;
-
- case NAME_ERR_DISKLIKE:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "reserved disk name"));
- break;
- }
- }
-
- return (0);
- }
-
- if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
- if (hdl != NULL)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "snapshot delimiter '@' in filesystem name"));
- return (0);
- }
-
- if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
- if (hdl != NULL)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "missing '@' delimiter in snapshot name"));
- return (0);
- }
-
- return (-1);
-}
-
-int
-zfs_name_valid(const char *name, zfs_type_t type)
-{
- return (zfs_validate_name(NULL, name, type));
-}
-
-/*
- * This function takes the raw DSL properties, and filters out the user-defined
- * properties into a separate nvlist.
- */
-static int
-process_user_props(zfs_handle_t *zhp)
-{
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- nvpair_t *elem;
- nvlist_t *propval;
-
- nvlist_free(zhp->zfs_user_props);
-
- if (nvlist_alloc(&zhp->zfs_user_props, NV_UNIQUE_NAME, 0) != 0)
- return (no_memory(hdl));
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
- if (!zfs_prop_user(nvpair_name(elem)))
- continue;
-
- verify(nvpair_value_nvlist(elem, &propval) == 0);
- if (nvlist_add_nvlist(zhp->zfs_user_props,
- nvpair_name(elem), propval) != 0)
- return (no_memory(hdl));
- }
-
- return (0);
-}
-
-/*
- * Utility function to gather stats (objset and zpl) for the given object.
- */
-static int
-get_stats(zfs_handle_t *zhp)
-{
- zfs_cmd_t zc = { 0 };
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
- if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
- return (-1);
-
- while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
- if (errno == ENOMEM) {
- if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- } else {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- }
-
- zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
-
- (void) strlcpy(zhp->zfs_root, zc.zc_value, sizeof (zhp->zfs_root));
-
- if (zhp->zfs_props) {
- nvlist_free(zhp->zfs_props);
- zhp->zfs_props = NULL;
- }
-
- if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zfs_props) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
-
- zcmd_free_nvlists(&zc);
-
- if (process_user_props(zhp) != 0)
- return (-1);
-
- return (0);
-}
-
-/*
- * Refresh the properties currently stored in the handle.
- */
-void
-zfs_refresh_properties(zfs_handle_t *zhp)
-{
- (void) get_stats(zhp);
-}
-
-/*
- * Makes a handle from the given dataset name. Used by zfs_open() and
- * zfs_iter_* to create child handles on the fly.
- */
-zfs_handle_t *
-make_dataset_handle(libzfs_handle_t *hdl, const char *path)
-{
- zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
-
- if (zhp == NULL)
- return (NULL);
-
- zhp->zfs_hdl = hdl;
-
-top:
- (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
-
- if (get_stats(zhp) != 0) {
- free(zhp);
- return (NULL);
- }
-
- if (zhp->zfs_dmustats.dds_inconsistent) {
- zfs_cmd_t zc = { 0 };
-
- /*
- * If it is dds_inconsistent, then we've caught it in
- * the middle of a 'zfs receive' or 'zfs destroy', and
- * it is inconsistent from the ZPL's point of view, so
- * can't be mounted. However, it could also be that we
- * have crashed in the middle of one of those
- * operations, in which case we need to get rid of the
- * inconsistent state. We do that by either rolling
- * back to the previous snapshot (which will fail if
- * there is none), or destroying the filesystem. Note
- * that if we are still in the middle of an active
- * 'receive' or 'destroy', then the rollback and destroy
- * will fail with EBUSY and we will drive on as usual.
- */
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
- if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
- (void) zvol_remove_link(hdl, zhp->zfs_name);
- zc.zc_objset_type = DMU_OST_ZVOL;
- } else {
- zc.zc_objset_type = DMU_OST_ZFS;
- }
-
- /* If we can successfully roll it back, reget the stats */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
- goto top;
- /*
- * If we can sucessfully destroy it, pretend that it
- * never existed.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
- free(zhp);
- errno = ENOENT;
- return (NULL);
- }
- }
-
- /*
- * We've managed to open the dataset and gather statistics. Determine
- * the high-level type.
- */
- if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
- zhp->zfs_head_type = ZFS_TYPE_VOLUME;
- else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
- zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
- else
- abort();
-
- if (zhp->zfs_dmustats.dds_is_snapshot)
- zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
- else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
- zhp->zfs_type = ZFS_TYPE_VOLUME;
- else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
- zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
- else
- abort(); /* we should never see any other types */
-
- return (zhp);
-}
-
-/*
- * Opens the given snapshot, filesystem, or volume. The 'types'
- * argument is a mask of acceptable types. The function will print an
- * appropriate error message and return NULL if it can't be opened.
- */
-zfs_handle_t *
-zfs_open(libzfs_handle_t *hdl, const char *path, int types)
-{
- zfs_handle_t *zhp;
- char errbuf[1024];
-
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
-
- /*
- * Validate the name before we even try to open it.
- */
- if (!zfs_validate_name(hdl, path, ZFS_TYPE_ANY)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid dataset name"));
- (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
- return (NULL);
- }
-
- /*
- * Try to get stats for the dataset, which will tell us if it exists.
- */
- errno = 0;
- if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
- (void) zfs_standard_error(hdl, errno, errbuf);
- return (NULL);
- }
-
- if (!(types & zhp->zfs_type)) {
- (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
- zfs_close(zhp);
- return (NULL);
- }
-
- return (zhp);
-}
-
-/*
- * Release a ZFS handle. Nothing to do but free the associated memory.
- */
-void
-zfs_close(zfs_handle_t *zhp)
-{
- if (zhp->zfs_mntopts)
- free(zhp->zfs_mntopts);
- nvlist_free(zhp->zfs_props);
- nvlist_free(zhp->zfs_user_props);
- free(zhp);
-}
-
-/*
- * Given a numeric suffix, convert the value into a number of bits that the
- * resulting value must be shifted.
- */
-static int
-str2shift(libzfs_handle_t *hdl, const char *buf)
-{
- const char *ends = "BKMGTPEZ";
- int i;
-
- if (buf[0] == '\0')
- return (0);
- for (i = 0; i < strlen(ends); i++) {
- if (toupper(buf[0]) == ends[i])
- break;
- }
- if (i == strlen(ends)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid numeric suffix '%s'"), buf);
- return (-1);
- }
-
- /*
- * We want to allow trailing 'b' characters for 'GB' or 'Mb'. But don't
- * allow 'BB' - that's just weird.
- */
- if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
- toupper(buf[0]) != 'B'))
- return (10*i);
-
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid numeric suffix '%s'"), buf);
- return (-1);
-}
-
-/*
- * Convert a string of the form '100G' into a real number. Used when setting
- * properties or creating a volume. 'buf' is used to place an extended error
- * message for the caller to use.
- */
-static int
-nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
-{
- char *end;
- int shift;
-
- *num = 0;
-
- /* Check to see if this looks like a number. */
- if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
- if (hdl)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "bad numeric value '%s'"), value);
- return (-1);
- }
-
- /* Rely on stroll() to process the numeric portion. */
- errno = 0;
- *num = strtoll(value, &end, 10);
-
- /*
- * Check for ERANGE, which indicates that the value is too large to fit
- * in a 64-bit value.
- */
- if (errno == ERANGE) {
- if (hdl)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "numeric value is too large"));
- return (-1);
- }
-
- /*
- * If we have a decimal value, then do the computation with floating
- * point arithmetic. Otherwise, use standard arithmetic.
- */
- if (*end == '.') {
- double fval = strtod(value, &end);
-
- if ((shift = str2shift(hdl, end)) == -1)
- return (-1);
-
- fval *= pow(2, shift);
-
- if (fval > UINT64_MAX) {
- if (hdl)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "numeric value is too large"));
- return (-1);
- }
-
- *num = (uint64_t)fval;
- } else {
- if ((shift = str2shift(hdl, end)) == -1)
- return (-1);
-
- /* Check for overflow */
- if (shift >= 64 || (*num << shift) >> shift != *num) {
- if (hdl)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "numeric value is too large"));
- return (-1);
- }
-
- *num <<= shift;
- }
-
- return (0);
-}
-
-int
-zfs_nicestrtonum(libzfs_handle_t *hdl, const char *str, uint64_t *val)
-{
- return (nicestrtonum(hdl, str, val));
-}
-
-/*
- * The prop_parse_*() functions are designed to allow flexibility in callers
- * when setting properties. At the DSL layer, all properties are either 64-bit
- * numbers or strings. We want the user to be able to ignore this fact and
- * specify properties as native values (boolean, for example) or as strings (to
- * simplify command line utilities). This also handles converting index types
- * (compression, checksum, etc) from strings to their on-disk index.
- */
-
-static int
-prop_parse_boolean(libzfs_handle_t *hdl, nvpair_t *elem, uint64_t *val)
-{
- uint64_t ret;
-
- switch (nvpair_type(elem)) {
- case DATA_TYPE_STRING:
- {
- char *value;
- verify(nvpair_value_string(elem, &value) == 0);
-
- if (strcmp(value, "on") == 0) {
- ret = 1;
- } else if (strcmp(value, "off") == 0) {
- ret = 0;
- } else {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "property '%s' must be 'on' or 'off'"),
- nvpair_name(elem));
- return (-1);
- }
- break;
- }
-
- case DATA_TYPE_UINT64:
- {
- verify(nvpair_value_uint64(elem, &ret) == 0);
- if (ret > 1) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a boolean value"),
- nvpair_name(elem));
- return (-1);
- }
- break;
- }
-
- case DATA_TYPE_BOOLEAN_VALUE:
- {
- boolean_t value;
- verify(nvpair_value_boolean_value(elem, &value) == 0);
- ret = value;
- break;
- }
-
- default:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a boolean value"),
- nvpair_name(elem));
- return (-1);
- }
-
- *val = ret;
- return (0);
-}
-
-static int
-prop_parse_number(libzfs_handle_t *hdl, nvpair_t *elem, zfs_prop_t prop,
- uint64_t *val)
-{
- uint64_t ret;
- boolean_t isnone = B_FALSE;
-
- switch (nvpair_type(elem)) {
- case DATA_TYPE_STRING:
- {
- char *value;
- (void) nvpair_value_string(elem, &value);
- if (strcmp(value, "none") == 0) {
- isnone = B_TRUE;
- ret = 0;
- } else if (nicestrtonum(hdl, value, &ret) != 0) {
- return (-1);
- }
- break;
- }
-
- case DATA_TYPE_UINT64:
- (void) nvpair_value_uint64(elem, &ret);
- break;
-
- default:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a number"),
- nvpair_name(elem));
- return (-1);
- }
-
- /*
- * Quota special: force 'none' and don't allow 0.
- */
- if (ret == 0 && !isnone && prop == ZFS_PROP_QUOTA) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "use 'none' to disable quota"));
- return (-1);
- }
-
- *val = ret;
- return (0);
-}
-
-static int
-prop_parse_index(libzfs_handle_t *hdl, nvpair_t *elem, zfs_prop_t prop,
- uint64_t *val)
-{
- char *propname = nvpair_name(elem);
- char *value;
-
- if (nvpair_type(elem) != DATA_TYPE_STRING) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a string"), propname);
- return (-1);
- }
-
- (void) nvpair_value_string(elem, &value);
-
- if (zfs_prop_string_to_index(prop, value, val) != 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be one of '%s'"), propname,
- zfs_prop_values(prop));
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Check if the bootfs name has the same pool name as it is set to.
- * Assuming bootfs is a valid dataset name.
- */
-static boolean_t
-bootfs_poolname_valid(char *pool, char *bootfs)
-{
- char ch, *pname;
-
- /* get the pool name from the bootfs name */
- pname = bootfs;
- while (*bootfs && !isspace(*bootfs) && *bootfs != '/')
- bootfs++;
-
- ch = *bootfs;
- *bootfs = 0;
-
- if (strcmp(pool, pname) == 0) {
- *bootfs = ch;
- return (B_TRUE);
- }
-
- *bootfs = ch;
- return (B_FALSE);
-}
-
-/*
- * Given an nvlist of properties to set, validates that they are correct, and
- * parses any numeric properties (index, boolean, etc) if they are specified as
- * strings.
- */
-nvlist_t *
-zfs_validate_properties(libzfs_handle_t *hdl, zfs_type_t type, char *pool_name,
- nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
-{
- nvpair_t *elem;
- const char *propname;
- zfs_prop_t prop;
- uint64_t intval;
- char *strval;
- nvlist_t *ret;
- int isuser;
-
- if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
- (void) no_memory(hdl);
- return (NULL);
- }
-
- if (type == ZFS_TYPE_SNAPSHOT) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "snapshot properties cannot be modified"));
- (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
- goto error;
- }
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
- propname = nvpair_name(elem);
-
- /*
- * Make sure this property is valid and applies to this type.
- */
- if ((prop = zfs_name_to_prop_common(propname, type))
- == ZFS_PROP_INVAL) {
- isuser = zfs_prop_user(propname);
- if (!isuser || (isuser && (type & ZFS_TYPE_POOL))) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid property '%s'"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- } else {
- /*
- * If this is a user property, make sure it's a
- * string, and that it's less than
- * ZAP_MAXNAMELEN.
- */
- if (nvpair_type(elem) != DATA_TYPE_STRING) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a string"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP,
- errbuf);
- goto error;
- }
-
- if (strlen(nvpair_name(elem)) >=
- ZAP_MAXNAMELEN) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "property name '%s' is too long"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP,
- errbuf);
- goto error;
- }
- }
-
- (void) nvpair_value_string(elem, &strval);
- if (nvlist_add_string(ret, propname, strval) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
- continue;
- }
-
- /*
- * Normalize the name, to get rid of shorthand abbrevations.
- */
- propname = zfs_prop_to_name(prop);
-
- if (!zfs_prop_valid_for_type(prop, type)) {
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "'%s' does not "
- "apply to datasets of this type"), propname);
- (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
- goto error;
- }
-
- if (zfs_prop_readonly(prop) &&
- (prop != ZFS_PROP_VOLBLOCKSIZE || zhp != NULL)) {
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "'%s' is readonly"),
- propname);
- (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
- goto error;
- }
-
- /*
- * Convert any properties to the internal DSL value types.
- */
- strval = NULL;
- switch (zfs_prop_get_type(prop)) {
- case prop_type_boolean:
- if (prop_parse_boolean(hdl, elem, &intval) != 0) {
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- break;
-
- case prop_type_string:
- if (nvpair_type(elem) != DATA_TYPE_STRING) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a string"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- (void) nvpair_value_string(elem, &strval);
- if (strlen(strval) >= ZFS_MAXPROPLEN) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' is too long"), propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- break;
-
- case prop_type_number:
- if (prop_parse_number(hdl, elem, prop, &intval) != 0) {
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- break;
-
- case prop_type_index:
- if (prop_parse_index(hdl, elem, prop, &intval) != 0) {
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- break;
-
- default:
- abort();
- }
-
- /*
- * Add the result to our return set of properties.
- */
- if (strval) {
- if (nvlist_add_string(ret, propname, strval) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
- } else if (nvlist_add_uint64(ret, propname, intval) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
-
- /*
- * Perform some additional checks for specific properties.
- */
- switch (prop) {
- case ZFS_PROP_RECORDSIZE:
- case ZFS_PROP_VOLBLOCKSIZE:
- /* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
- if (intval < SPA_MINBLOCKSIZE ||
- intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be power of 2 from %u "
- "to %uk"), propname,
- (uint_t)SPA_MINBLOCKSIZE,
- (uint_t)SPA_MAXBLOCKSIZE >> 10);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- break;
-
- case ZFS_PROP_SHAREISCSI:
- if (strcmp(strval, "off") != 0 &&
- strcmp(strval, "on") != 0 &&
- strcmp(strval, "type=disk") != 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be 'on', 'off', or 'type=disk'"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
-
- break;
-
- case ZFS_PROP_MOUNTPOINT:
- if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
- strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
- break;
-
- if (strval[0] != '/') {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be an absolute path, "
- "'none', or 'legacy'"), propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
- }
- /*FALLTHRU*/
-
- case ZFS_PROP_SHARENFS:
- /*
- * For the mountpoint and sharenfs properties, check if
- * it can be set in a global/non-global zone based on
- * the zoned property value:
- *
- * global zone non-global zone
- * --------------------------------------------------
- * zoned=on mountpoint (no) mountpoint (yes)
- * sharenfs (no) sharenfs (no)
- *
- * zoned=off mountpoint (yes) N/A
- * sharenfs (yes)
- */
- if (zoned) {
- if (getzoneid() == GLOBAL_ZONEID) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' cannot be set on "
- "dataset in a non-global zone"),
- propname);
- (void) zfs_error(hdl, EZFS_ZONED,
- errbuf);
- goto error;
- } else if (prop == ZFS_PROP_SHARENFS) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' cannot be set in "
- "a non-global zone"), propname);
- (void) zfs_error(hdl, EZFS_ZONED,
- errbuf);
- goto error;
- }
- } else if (getzoneid() != GLOBAL_ZONEID) {
- /*
- * If zoned property is 'off', this must be in
- * a globle zone. If not, something is wrong.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' cannot be set while dataset "
- "'zoned' property is set"), propname);
- (void) zfs_error(hdl, EZFS_ZONED, errbuf);
- goto error;
- }
-
- break;
-
- case ZFS_PROP_BOOTFS:
- /*
- * bootfs property value has to be a dataset name and
- * the dataset has to be in the same pool as it sets to.
- */
- if (strval[0] != '\0' && (!zfs_name_valid(strval,
- ZFS_TYPE_FILESYSTEM) || !bootfs_poolname_valid(
- pool_name, strval))) {
-
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
- "is an invalid name"), strval);
- (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
- goto error;
- }
- break;
- }
-
- /*
- * For changes to existing volumes, we have some additional
- * checks to enforce.
- */
- if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
- uint64_t volsize = zfs_prop_get_int(zhp,
- ZFS_PROP_VOLSIZE);
- uint64_t blocksize = zfs_prop_get_int(zhp,
- ZFS_PROP_VOLBLOCKSIZE);
- char buf[64];
-
- switch (prop) {
- case ZFS_PROP_RESERVATION:
- if (intval > volsize) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' is greater than current "
- "volume size"), propname);
- (void) zfs_error(hdl, EZFS_BADPROP,
- errbuf);
- goto error;
- }
- break;
-
- case ZFS_PROP_VOLSIZE:
- if (intval % blocksize != 0) {
- zfs_nicenum(blocksize, buf,
- sizeof (buf));
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be a multiple of "
- "volume block size (%s)"),
- propname, buf);
- (void) zfs_error(hdl, EZFS_BADPROP,
- errbuf);
- goto error;
- }
-
- if (intval == 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' cannot be zero"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP,
- errbuf);
- goto error;
- }
- break;
- }
- }
- }
-
- /*
- * If this is an existing volume, and someone is setting the volsize,
- * make sure that it matches the reservation, or add it if necessary.
- */
- if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
- nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
- &intval) == 0) {
- uint64_t old_volsize = zfs_prop_get_int(zhp,
- ZFS_PROP_VOLSIZE);
- uint64_t old_reservation = zfs_prop_get_int(zhp,
- ZFS_PROP_RESERVATION);
- uint64_t new_reservation;
-
- if (old_volsize == old_reservation &&
- nvlist_lookup_uint64(ret,
- zfs_prop_to_name(ZFS_PROP_RESERVATION),
- &new_reservation) != 0) {
- if (nvlist_add_uint64(ret,
- zfs_prop_to_name(ZFS_PROP_RESERVATION),
- intval) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
- }
- }
-
- return (ret);
-
-error:
- nvlist_free(ret);
- return (NULL);
-}
-
-/*
- * Given a property name and value, set the property for the given dataset.
- */
-int
-zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
-{
- zfs_cmd_t zc = { 0 };
- int ret = -1;
- prop_changelist_t *cl = NULL;
- char errbuf[1024];
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- nvlist_t *nvl = NULL, *realprops;
- zfs_prop_t prop;
-
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
- zhp->zfs_name);
-
- if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
- nvlist_add_string(nvl, propname, propval) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
-
- if ((realprops = zfs_validate_properties(hdl, zhp->zfs_type, NULL, nvl,
- zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
- goto error;
- nvlist_free(nvl);
- nvl = realprops;
-
- prop = zfs_name_to_prop(propname);
-
- /* We don't support those properties on FreeBSD. */
- switch (prop) {
- case ZFS_PROP_SHAREISCSI:
- case ZFS_PROP_DEVICES:
- case ZFS_PROP_ACLMODE:
- case ZFS_PROP_ACLINHERIT:
- case ZFS_PROP_ISCSIOPTIONS:
- (void) snprintf(errbuf, sizeof (errbuf),
- "property '%s' not supported on FreeBSD", propname);
- ret = zfs_error(hdl, EZFS_PERM, errbuf);
- goto error;
- }
-
- if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
- goto error;
-
- if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "child dataset with inherited mountpoint is used "
- "in a non-global zone"));
- ret = zfs_error(hdl, EZFS_ZONED, errbuf);
- goto error;
- }
-
- if ((ret = changelist_prefix(cl)) != 0)
- goto error;
-
- /*
- * Execute the corresponding ioctl() to set this property.
- */
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
- if (zcmd_write_src_nvlist(hdl, &zc, nvl, NULL) != 0)
- goto error;
-
- ret = ioctl(hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc);
-
- if (ret != 0) {
- switch (errno) {
-
- case ENOSPC:
- /*
- * For quotas and reservations, ENOSPC indicates
- * something different; setting a quota or reservation
- * doesn't use any disk space.
- */
- switch (prop) {
- case ZFS_PROP_QUOTA:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "size is less than current used or "
- "reserved space"));
- (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
- break;
-
- case ZFS_PROP_RESERVATION:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "size is greater than available space"));
- (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
- break;
-
- default:
- (void) zfs_standard_error(hdl, errno, errbuf);
- break;
- }
- break;
-
- case EBUSY:
- if (prop == ZFS_PROP_VOLBLOCKSIZE)
- (void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
- else
- (void) zfs_standard_error(hdl, EBUSY, errbuf);
- break;
-
- case EROFS:
- (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
- break;
-
- case ENOTSUP:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool must be upgraded to allow gzip compression"));
- (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
- break;
-
- case EOVERFLOW:
- /*
- * This platform can't address a volume this big.
- */
-#ifdef _ILP32
- if (prop == ZFS_PROP_VOLSIZE) {
- (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
- break;
- }
-#endif
- /* FALLTHROUGH */
- default:
- (void) zfs_standard_error(hdl, errno, errbuf);
- }
- } else {
- /*
- * Refresh the statistics so the new property value
- * is reflected.
- */
- if ((ret = changelist_postfix(cl)) == 0)
- (void) get_stats(zhp);
- }
-
-error:
- nvlist_free(nvl);
- zcmd_free_nvlists(&zc);
- if (cl)
- changelist_free(cl);
- return (ret);
-}
-
-/*
- * Given a property, inherit the value from the parent dataset.
- */
-int
-zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
-{
- zfs_cmd_t zc = { 0 };
- int ret;
- prop_changelist_t *cl;
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
- zfs_prop_t prop;
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
-
- if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL) {
- /*
- * For user properties, the amount of work we have to do is very
- * small, so just do it here.
- */
- if (!zfs_prop_user(propname)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid property"));
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
- }
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
-
- if (ioctl(zhp->zfs_hdl->libzfs_fd,
- ZFS_IOC_SET_PROP, &zc) != 0)
- return (zfs_standard_error(hdl, errno, errbuf));
-
- return (0);
- }
-
- /*
- * Verify that this property is inheritable.
- */
- if (zfs_prop_readonly(prop))
- return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
-
- if (!zfs_prop_inheritable(prop))
- return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
-
- /*
- * Check to see if the value applies to this type
- */
- if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
- return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
-
- /*
- * Normalize the name, to get rid of shorthand abbrevations.
- */
- propname = zfs_prop_to_name(prop);
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
-
- if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
- zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "dataset is used in a non-global zone"));
- return (zfs_error(hdl, EZFS_ZONED, errbuf));
- }
-
- /*
- * Determine datasets which will be affected by this change, if any.
- */
- if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
- return (-1);
-
- if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "child dataset with inherited mountpoint is used "
- "in a non-global zone"));
- ret = zfs_error(hdl, EZFS_ZONED, errbuf);
- goto error;
- }
-
- if ((ret = changelist_prefix(cl)) != 0)
- goto error;
-
- if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd,
- ZFS_IOC_SET_PROP, &zc)) != 0) {
- return (zfs_standard_error(hdl, errno, errbuf));
- } else {
-
- if ((ret = changelist_postfix(cl)) != 0)
- goto error;
-
- /*
- * Refresh the statistics so the new property is reflected.
- */
- (void) get_stats(zhp);
- }
-
-error:
- changelist_free(cl);
- return (ret);
-}
-
-void
-nicebool(int value, char *buf, size_t buflen)
-{
- if (value)
- (void) strlcpy(buf, "on", buflen);
- else
- (void) strlcpy(buf, "off", buflen);
-}
-
-/*
- * True DSL properties are stored in an nvlist. The following two functions
- * extract them appropriately.
- */
-static uint64_t
-getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
-{
- nvlist_t *nv;
- uint64_t value;
-
- *source = NULL;
- if (nvlist_lookup_nvlist(zhp->zfs_props,
- zfs_prop_to_name(prop), &nv) == 0) {
- verify(nvlist_lookup_uint64(nv, ZFS_PROP_VALUE, &value) == 0);
- (void) nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source);
- } else {
- value = zfs_prop_default_numeric(prop);
- *source = "";
- }
-
- return (value);
-}
-
-static char *
-getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
-{
- nvlist_t *nv;
- char *value;
-
- *source = NULL;
- if (nvlist_lookup_nvlist(zhp->zfs_props,
- zfs_prop_to_name(prop), &nv) == 0) {
- verify(nvlist_lookup_string(nv, ZFS_PROP_VALUE, &value) == 0);
- (void) nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source);
- } else {
- if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
- value = "";
- *source = "";
- }
-
- return (value);
-}
-
-/*
- * Internal function for getting a numeric property. Both zfs_prop_get() and
- * zfs_prop_get_int() are built using this interface.
- *
- * Certain properties can be overridden using 'mount -o'. In this case, scan
- * the contents of the /etc/mnttab entry, searching for the appropriate options.
- * If they differ from the on-disk values, report the current values and mark
- * the source "temporary".
- */
-static int
-get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
- char **source, uint64_t *val)
-{
- struct mnttab mnt;
- char *mntopt_on = NULL;
- char *mntopt_off = NULL;
-
- *source = NULL;
-
- switch (prop) {
- case ZFS_PROP_ATIME:
- mntopt_on = MNTOPT_ATIME;
- mntopt_off = MNTOPT_NOATIME;
- break;
-
- case ZFS_PROP_DEVICES:
- mntopt_on = MNTOPT_DEVICES;
- mntopt_off = MNTOPT_NODEVICES;
- break;
-
- case ZFS_PROP_EXEC:
- mntopt_on = MNTOPT_EXEC;
- mntopt_off = MNTOPT_NOEXEC;
- break;
-
- case ZFS_PROP_READONLY:
- mntopt_on = MNTOPT_RO;
- mntopt_off = MNTOPT_RW;
- break;
-
- case ZFS_PROP_SETUID:
- mntopt_on = MNTOPT_SETUID;
- mntopt_off = MNTOPT_NOSETUID;
- break;
-
- case ZFS_PROP_XATTR:
- mntopt_on = MNTOPT_XATTR;
- mntopt_off = MNTOPT_NOXATTR;
- break;
- }
-
- /*
- * Because looking up the mount options is potentially expensive
- * (iterating over all of /etc/mnttab), we defer its calculation until
- * we're looking up a property which requires its presence.
- */
- if (!zhp->zfs_mntcheck &&
- (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
- struct mnttab entry, search = { 0 };
- FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
-
- search.mnt_special = (char *)zhp->zfs_name;
- search.mnt_fstype = MNTTYPE_ZFS;
- rewind(mnttab);
-
- if (getmntany(mnttab, &entry, &search) == 0) {
- zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
- entry.mnt_mntopts);
- if (zhp->zfs_mntopts == NULL)
- return (-1);
- }
-
- zhp->zfs_mntcheck = B_TRUE;
- }
-
- if (zhp->zfs_mntopts == NULL)
- mnt.mnt_mntopts = "";
- else
- mnt.mnt_mntopts = zhp->zfs_mntopts;
-
- switch (prop) {
- case ZFS_PROP_ATIME:
- case ZFS_PROP_DEVICES:
- case ZFS_PROP_EXEC:
- case ZFS_PROP_READONLY:
- case ZFS_PROP_SETUID:
- case ZFS_PROP_XATTR:
- *val = getprop_uint64(zhp, prop, source);
-
- if (hasmntopt(&mnt, mntopt_on) && !*val) {
- *val = B_TRUE;
- if (src)
- *src = ZFS_SRC_TEMPORARY;
- } else if (hasmntopt(&mnt, mntopt_off) && *val) {
- *val = B_FALSE;
- if (src)
- *src = ZFS_SRC_TEMPORARY;
- }
- break;
-
- case ZFS_PROP_RECORDSIZE:
- case ZFS_PROP_COMPRESSION:
- case ZFS_PROP_ZONED:
- case ZFS_PROP_CREATION:
- case ZFS_PROP_COMPRESSRATIO:
- case ZFS_PROP_REFERENCED:
- case ZFS_PROP_USED:
- case ZFS_PROP_CREATETXG:
- case ZFS_PROP_AVAILABLE:
- case ZFS_PROP_VOLSIZE:
- case ZFS_PROP_VOLBLOCKSIZE:
- *val = getprop_uint64(zhp, prop, source);
- break;
-
- case ZFS_PROP_CANMOUNT:
- *val = getprop_uint64(zhp, prop, source);
- if (*val == 0)
- *source = zhp->zfs_name;
- else
- *source = ""; /* default */
- break;
-
- case ZFS_PROP_QUOTA:
- case ZFS_PROP_RESERVATION:
- *val = getprop_uint64(zhp, prop, source);
- if (*val == 0)
- *source = ""; /* default */
- else
- *source = zhp->zfs_name;
- break;
-
- case ZFS_PROP_MOUNTED:
- *val = (zhp->zfs_mntopts != NULL);
- break;
-
- case ZFS_PROP_NUMCLONES:
- *val = zhp->zfs_dmustats.dds_num_clones;
- break;
-
- default:
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "cannot get non-numeric property"));
- return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
- dgettext(TEXT_DOMAIN, "internal error")));
- }
-
- return (0);
-}
-
-/*
- * Calculate the source type, given the raw source string.
- */
-static void
-get_source(zfs_handle_t *zhp, zfs_source_t *srctype, char *source,
- char *statbuf, size_t statlen)
-{
- if (statbuf == NULL || *srctype == ZFS_SRC_TEMPORARY)
- return;
-
- if (source == NULL) {
- *srctype = ZFS_SRC_NONE;
- } else if (source[0] == '\0') {
- *srctype = ZFS_SRC_DEFAULT;
- } else {
- if (strcmp(source, zhp->zfs_name) == 0) {
- *srctype = ZFS_SRC_LOCAL;
- } else {
- (void) strlcpy(statbuf, source, statlen);
- *srctype = ZFS_SRC_INHERITED;
- }
- }
-
-}
-
-/*
- * Retrieve a property from the given object. If 'literal' is specified, then
- * numbers are left as exact values. Otherwise, numbers are converted to a
- * human-readable form.
- *
- * Returns 0 on success, or -1 on error.
- */
-int
-zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
- zfs_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
-{
- char *source = NULL;
- uint64_t val;
- char *str;
- const char *root;
- const char *strval;
-
- /*
- * Check to see if this property applies to our object
- */
- if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
- return (-1);
-
- if (src)
- *src = ZFS_SRC_NONE;
-
- switch (prop) {
- case ZFS_PROP_ATIME:
- case ZFS_PROP_READONLY:
- case ZFS_PROP_SETUID:
- case ZFS_PROP_ZONED:
- case ZFS_PROP_DEVICES:
- case ZFS_PROP_EXEC:
- case ZFS_PROP_CANMOUNT:
- case ZFS_PROP_XATTR:
- /*
- * Basic boolean values are built on top of
- * get_numeric_property().
- */
- if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
- return (-1);
- nicebool(val, propbuf, proplen);
-
- break;
-
- case ZFS_PROP_AVAILABLE:
- case ZFS_PROP_RECORDSIZE:
- case ZFS_PROP_CREATETXG:
- case ZFS_PROP_REFERENCED:
- case ZFS_PROP_USED:
- case ZFS_PROP_VOLSIZE:
- case ZFS_PROP_VOLBLOCKSIZE:
- case ZFS_PROP_NUMCLONES:
- /*
- * Basic numeric values are built on top of
- * get_numeric_property().
- */
- if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
- return (-1);
- if (literal)
- (void) snprintf(propbuf, proplen, "%llu",
- (u_longlong_t)val);
- else
- zfs_nicenum(val, propbuf, proplen);
- break;
-
- case ZFS_PROP_COMPRESSION:
- case ZFS_PROP_CHECKSUM:
- case ZFS_PROP_SNAPDIR:
-#ifdef ZFS_NO_ACL
- case ZFS_PROP_ACLMODE:
- case ZFS_PROP_ACLINHERIT:
- case ZFS_PROP_COPIES:
- val = getprop_uint64(zhp, prop, &source);
- verify(zfs_prop_index_to_string(prop, val, &strval) == 0);
- (void) strlcpy(propbuf, strval, proplen);
- break;
-#else /* ZFS_NO_ACL */
- case ZFS_PROP_ACLMODE:
- case ZFS_PROP_ACLINHERIT:
- (void) strlcpy(propbuf, "<unsupported>", proplen);
- break;
-#endif /* ZFS_NO_ACL */
-
- case ZFS_PROP_CREATION:
- /*
- * 'creation' is a time_t stored in the statistics. We convert
- * this into a string unless 'literal' is specified.
- */
- {
- val = getprop_uint64(zhp, prop, &source);
- time_t time = (time_t)val;
- struct tm t;
-
- if (literal ||
- localtime_r(&time, &t) == NULL ||
- strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
- &t) == 0)
- (void) snprintf(propbuf, proplen, "%llu", val);
- }
- break;
-
- case ZFS_PROP_MOUNTPOINT:
- /*
- * Getting the precise mountpoint can be tricky.
- *
- * - for 'none' or 'legacy', return those values.
- * - for default mountpoints, construct it as /zfs/<dataset>
- * - for inherited mountpoints, we want to take everything
- * after our ancestor and append it to the inherited value.
- *
- * If the pool has an alternate root, we want to prepend that
- * root to any values we return.
- */
- root = zhp->zfs_root;
- str = getprop_string(zhp, prop, &source);
-
- if (str[0] == '\0') {
- (void) snprintf(propbuf, proplen, "%s/zfs/%s",
- root, zhp->zfs_name);
- } else if (str[0] == '/') {
- const char *relpath = zhp->zfs_name + strlen(source);
-
- if (relpath[0] == '/')
- relpath++;
- if (str[1] == '\0')
- str++;
-
- if (relpath[0] == '\0')
- (void) snprintf(propbuf, proplen, "%s%s",
- root, str);
- else
- (void) snprintf(propbuf, proplen, "%s%s%s%s",
- root, str, relpath[0] == '@' ? "" : "/",
- relpath);
- } else {
- /* 'legacy' or 'none' */
- (void) strlcpy(propbuf, str, proplen);
- }
-
- break;
-
- case ZFS_PROP_SHARENFS:
- case ZFS_PROP_SHAREISCSI:
- case ZFS_PROP_ISCSIOPTIONS:
- (void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
- proplen);
- break;
-
- case ZFS_PROP_ORIGIN:
- (void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
- proplen);
- /*
- * If there is no parent at all, return failure to indicate that
- * it doesn't apply to this dataset.
- */
- if (propbuf[0] == '\0')
- return (-1);
- break;
-
- case ZFS_PROP_QUOTA:
- case ZFS_PROP_RESERVATION:
- if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
- return (-1);
-
- /*
- * If quota or reservation is 0, we translate this into 'none'
- * (unless literal is set), and indicate that it's the default
- * value. Otherwise, we print the number nicely and indicate
- * that its set locally.
- */
- if (val == 0) {
- if (literal)
- (void) strlcpy(propbuf, "0", proplen);
- else
- (void) strlcpy(propbuf, "none", proplen);
- } else {
- if (literal)
- (void) snprintf(propbuf, proplen, "%llu",
- (u_longlong_t)val);
- else
- zfs_nicenum(val, propbuf, proplen);
- }
- break;
-
- case ZFS_PROP_COMPRESSRATIO:
- if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
- return (-1);
- (void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
- val / 100, (longlong_t)val % 100);
- break;
-
- case ZFS_PROP_TYPE:
- switch (zhp->zfs_type) {
- case ZFS_TYPE_FILESYSTEM:
- str = "filesystem";
- break;
- case ZFS_TYPE_VOLUME:
- str = "volume";
- break;
- case ZFS_TYPE_SNAPSHOT:
- str = "snapshot";
- break;
- default:
- abort();
- }
- (void) snprintf(propbuf, proplen, "%s", str);
- break;
-
- case ZFS_PROP_MOUNTED:
- /*
- * The 'mounted' property is a pseudo-property that described
- * whether the filesystem is currently mounted. Even though
- * it's a boolean value, the typical values of "on" and "off"
- * don't make sense, so we translate to "yes" and "no".
- */
- if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
- src, &source, &val) != 0)
- return (-1);
- if (val)
- (void) strlcpy(propbuf, "yes", proplen);
- else
- (void) strlcpy(propbuf, "no", proplen);
- break;
-
- case ZFS_PROP_NAME:
- /*
- * The 'name' property is a pseudo-property derived from the
- * dataset name. It is presented as a real property to simplify
- * consumers.
- */
- (void) strlcpy(propbuf, zhp->zfs_name, proplen);
- break;
-
- default:
- abort();
- }
-
- get_source(zhp, src, source, statbuf, statlen);
-
- return (0);
-}
-
-/*
- * Utility function to get the given numeric property. Does no validation that
- * the given property is the appropriate type; should only be used with
- * hard-coded property types.
- */
-uint64_t
-zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
-{
- char *source;
- zfs_source_t sourcetype = ZFS_SRC_NONE;
- uint64_t val;
-
- (void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
-
- return (val);
-}
-
-/*
- * Similar to zfs_prop_get(), but returns the value as an integer.
- */
-int
-zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
- zfs_source_t *src, char *statbuf, size_t statlen)
-{
- char *source;
-
- /*
- * Check to see if this property applies to our object
- */
- if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
- return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
- dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
- zfs_prop_to_name(prop)));
-
- if (src)
- *src = ZFS_SRC_NONE;
-
- if (get_numeric_property(zhp, prop, src, &source, value) != 0)
- return (-1);
-
- get_source(zhp, src, source, statbuf, statlen);
-
- return (0);
-}
-
-/*
- * Returns the name of the given zfs handle.
- */
-const char *
-zfs_get_name(const zfs_handle_t *zhp)
-{
- return (zhp->zfs_name);
-}
-
-/*
- * Returns the type of the given zfs handle.
- */
-zfs_type_t
-zfs_get_type(const zfs_handle_t *zhp)
-{
- return (zhp->zfs_type);
-}
-
-/*
- * Iterate over all child filesystems
- */
-int
-zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
-{
- zfs_cmd_t zc = { 0 };
- zfs_handle_t *nzhp;
- int ret;
-
- for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
- /*
- * Ignore private dataset names.
- */
- if (dataset_name_hidden(zc.zc_name))
- continue;
-
- /*
- * Silently ignore errors, as the only plausible explanation is
- * that the pool has since been removed.
- */
- if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
- zc.zc_name)) == NULL)
- continue;
-
- if ((ret = func(nzhp, data)) != 0)
- return (ret);
- }
-
- /*
- * An errno value of ESRCH indicates normal completion. If ENOENT is
- * returned, then the underlying dataset has been removed since we
- * obtained the handle.
- */
- if (errno != ESRCH && errno != ENOENT)
- return (zfs_standard_error(zhp->zfs_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
-
- return (0);
-}
-
-/*
- * Iterate over all snapshots
- */
-int
-zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
-{
- zfs_cmd_t zc = { 0 };
- zfs_handle_t *nzhp;
- int ret;
-
- for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
- &zc) == 0;
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
-
- if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
- zc.zc_name)) == NULL)
- continue;
-
- if ((ret = func(nzhp, data)) != 0)
- return (ret);
- }
-
- /*
- * An errno value of ESRCH indicates normal completion. If ENOENT is
- * returned, then the underlying dataset has been removed since we
- * obtained the handle. Silently ignore this case, and return success.
- */
- if (errno != ESRCH && errno != ENOENT)
- return (zfs_standard_error(zhp->zfs_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
-
- return (0);
-}
-
-/*
- * Iterate over all children, snapshots and filesystems
- */
-int
-zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
-{
- int ret;
-
- if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
- return (ret);
-
- return (zfs_iter_snapshots(zhp, func, data));
-}
-
-/*
- * Given a complete name, return just the portion that refers to the parent.
- * Can return NULL if this is a pool.
- */
-static int
-parent_name(const char *path, char *buf, size_t buflen)
-{
- char *loc;
-
- if ((loc = strrchr(path, '/')) == NULL)
- return (-1);
-
- (void) strncpy(buf, path, MIN(buflen, loc - path));
- buf[loc - path] = '\0';
-
- return (0);
-}
-
-/*
- * Checks to make sure that the given path has a parent, and that it exists. We
- * also fetch the 'zoned' property, which is used to validate property settings
- * when creating new datasets.
- */
-static int
-check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned)
-{
- zfs_cmd_t zc = { 0 };
- char parent[ZFS_MAXNAMELEN];
- char *slash;
- zfs_handle_t *zhp;
- char errbuf[1024];
-
- (void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
- path);
-
- /* get parent, and check to see if this is just a pool */
- if (parent_name(path, parent, sizeof (parent)) != 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "missing dataset name"));
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
- }
-
- /* check to see if the pool exists */
- if ((slash = strchr(parent, '/')) == NULL)
- slash = parent + strlen(parent);
- (void) strncpy(zc.zc_name, parent, slash - parent);
- zc.zc_name[slash - parent] = '\0';
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
- errno == ENOENT) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "no such pool '%s'"), zc.zc_name);
- return (zfs_error(hdl, EZFS_NOENT, errbuf));
- }
-
- /* check to see if the parent dataset exists */
- if ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
- switch (errno) {
- case ENOENT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "parent does not exist"));
- return (zfs_error(hdl, EZFS_NOENT, errbuf));
-
- default:
- return (zfs_standard_error(hdl, errno, errbuf));
- }
- }
-
- *zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
- /* we are in a non-global zone, but parent is in the global zone */
- if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
- (void) zfs_standard_error(hdl, EPERM, errbuf);
- zfs_close(zhp);
- return (-1);
- }
-
- /* make sure parent is a filesystem */
- if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "parent is not a filesystem"));
- (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
- zfs_close(zhp);
- return (-1);
- }
-
- zfs_close(zhp);
- return (0);
-}
-
-/*
- * Create a new filesystem or volume.
- */
-int
-zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
- nvlist_t *props)
-{
- zfs_cmd_t zc = { 0 };
- int ret;
- uint64_t size = 0;
- uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
- char errbuf[1024];
- uint64_t zoned;
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot create '%s'"), path);
-
- /* validate the path, taking care to note the extended error message */
- if (!zfs_validate_name(hdl, path, type))
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-
- /* validate parents exist */
- if (check_parents(hdl, path, &zoned) != 0)
- return (-1);
-
- /*
- * The failure modes when creating a dataset of a different type over
- * one that already exists is a little strange. In particular, if you
- * try to create a dataset on top of an existing dataset, the ioctl()
- * will return ENOENT, not EEXIST. To prevent this from happening, we
- * first try to see if the dataset exists.
- */
- (void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "dataset already exists"));
- return (zfs_error(hdl, EZFS_EXISTS, errbuf));
- }
-
- if (type == ZFS_TYPE_VOLUME)
- zc.zc_objset_type = DMU_OST_ZVOL;
- else
- zc.zc_objset_type = DMU_OST_ZFS;
-
- if (props && (props = zfs_validate_properties(hdl, type, NULL, props,
- zoned, NULL, errbuf)) == 0)
- return (-1);
-
- if (type == ZFS_TYPE_VOLUME) {
- /*
- * If we are creating a volume, the size and block size must
- * satisfy a few restraints. First, the blocksize must be a
- * valid block size between SPA_{MIN,MAX}BLOCKSIZE. Second, the
- * volsize must be a multiple of the block size, and cannot be
- * zero.
- */
- if (props == NULL || nvlist_lookup_uint64(props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
- nvlist_free(props);
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "missing volume size"));
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
- }
-
- if ((ret = nvlist_lookup_uint64(props,
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- &blocksize)) != 0) {
- if (ret == ENOENT) {
- blocksize = zfs_prop_default_numeric(
- ZFS_PROP_VOLBLOCKSIZE);
- } else {
- nvlist_free(props);
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "missing volume block size"));
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
- }
- }
-
- if (size == 0) {
- nvlist_free(props);
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "volume size cannot be zero"));
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
- }
-
- if (size % blocksize != 0) {
- nvlist_free(props);
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "volume size must be a multiple of volume block "
- "size"));
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
- }
- }
-
- if (props &&
- zcmd_write_src_nvlist(hdl, &zc, props, NULL) != 0)
- return (-1);
- nvlist_free(props);
-
- /* create the dataset */
- ret = ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
-
- if (ret == 0 && type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(hdl, path);
- if (ret) {
- (void) zfs_standard_error(hdl, errno,
- dgettext(TEXT_DOMAIN,
- "Volume successfully created, but device links "
- "were not created"));
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- }
-
- zcmd_free_nvlists(&zc);
-
- /* check for failure */
- if (ret != 0) {
- char parent[ZFS_MAXNAMELEN];
- (void) parent_name(path, parent, sizeof (parent));
-
- switch (errno) {
- case ENOENT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "no such parent '%s'"), parent);
- return (zfs_error(hdl, EZFS_NOENT, errbuf));
-
- case EINVAL:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "parent '%s' is not a filesystem"), parent);
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
-
- case EDOM:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "volume block size must be power of 2 from "
- "%u to %uk"),
- (uint_t)SPA_MINBLOCKSIZE,
- (uint_t)SPA_MAXBLOCKSIZE >> 10);
-
- return (zfs_error(hdl, EZFS_BADPROP, errbuf));
-
-#ifdef _ILP32
- case EOVERFLOW:
- /*
- * This platform can't address a volume this big.
- */
- if (type == ZFS_TYPE_VOLUME)
- return (zfs_error(hdl, EZFS_VOLTOOBIG,
- errbuf));
-#endif
- /* FALLTHROUGH */
- default:
- return (zfs_standard_error(hdl, errno, errbuf));
- }
- }
-
- return (0);
-}
-
-/*
- * Destroys the given dataset. The caller must make sure that the filesystem
- * isn't mounted, and that there are no active dependents.
- */
-int
-zfs_destroy(zfs_handle_t *zhp)
-{
- zfs_cmd_t zc = { 0 };
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
- if (ZFS_IS_VOLUME(zhp)) {
- /*
- * Unconditionally unshare this zvol ignoring failure as it
- * indicates only that the volume wasn't shared initially.
- */
- (void) zfs_unshare_iscsi(zhp);
-
- if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
- return (-1);
-
- zc.zc_objset_type = DMU_OST_ZVOL;
- } else {
- zc.zc_objset_type = DMU_OST_ZFS;
- }
-
- if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) != 0) {
- return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
- zhp->zfs_name));
- }
-
- remove_mountpoint(zhp);
-
- return (0);
-}
-
-struct destroydata {
- char *snapname;
- boolean_t gotone;
- boolean_t closezhp;
-};
-
-static int
-zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
-{
- struct destroydata *dd = arg;
- zfs_handle_t *szhp;
- char name[ZFS_MAXNAMELEN];
- boolean_t closezhp = dd->closezhp;
- int rv;
-
- (void) strlcpy(name, zhp->zfs_name, sizeof (name));
- (void) strlcat(name, "@", sizeof (name));
- (void) strlcat(name, dd->snapname, sizeof (name));
-
- szhp = make_dataset_handle(zhp->zfs_hdl, name);
- if (szhp) {
- dd->gotone = B_TRUE;
- zfs_close(szhp);
- }
-
- if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- (void) zvol_remove_link(zhp->zfs_hdl, name);
- /*
- * NB: this is simply a best-effort. We don't want to
- * return an error, because then we wouldn't visit all
- * the volumes.
- */
- }
-
- dd->closezhp = B_TRUE;
- rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
- if (closezhp)
- zfs_close(zhp);
- return (rv);
-}
-
-/*
- * Destroys all snapshots with the given name in zhp & descendants.
- */
-int
-zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
-{
- zfs_cmd_t zc = { 0 };
- int ret;
- struct destroydata dd = { 0 };
-
- dd.snapname = snapname;
- (void) zfs_remove_link_cb(zhp, &dd);
-
- if (!dd.gotone) {
- return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
- dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
- zhp->zfs_name, snapname));
- }
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
-
- ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY_SNAPS, &zc);
- if (ret != 0) {
- char errbuf[1024];
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot destroy '%s@%s'"), zc.zc_name, snapname);
-
- switch (errno) {
- case EEXIST:
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "snapshot is cloned"));
- return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
-
- default:
- return (zfs_standard_error(zhp->zfs_hdl, errno,
- errbuf));
- }
- }
-
- return (0);
-}
-
-/*
- * Clones the given dataset. The target must be of the same type as the source.
- */
-int
-zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
-{
- zfs_cmd_t zc = { 0 };
- char parent[ZFS_MAXNAMELEN];
- int ret;
- char errbuf[1024];
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- zfs_type_t type;
- uint64_t zoned;
-
- assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot create '%s'"), target);
-
- /* validate the target name */
- if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM))
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-
- /* validate parents exist */
- if (check_parents(hdl, target, &zoned) != 0)
- return (-1);
-
- (void) parent_name(target, parent, sizeof (parent));
-
- /* do the clone */
- if (ZFS_IS_VOLUME(zhp)) {
- zc.zc_objset_type = DMU_OST_ZVOL;
- type = ZFS_TYPE_VOLUME;
- } else {
- zc.zc_objset_type = DMU_OST_ZFS;
- type = ZFS_TYPE_FILESYSTEM;
- }
-
- if (props) {
- if ((props = zfs_validate_properties(hdl, type, NULL, props,
- zoned, zhp, errbuf)) == NULL)
- return (-1);
-
- if (zcmd_write_src_nvlist(hdl, &zc, props, NULL) != 0) {
- nvlist_free(props);
- return (-1);
- }
-
- nvlist_free(props);
- }
-
- (void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
- ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
-
- zcmd_free_nvlists(&zc);
-
- if (ret != 0) {
- switch (errno) {
-
- case ENOENT:
- /*
- * The parent doesn't exist. We should have caught this
- * above, but there may a race condition that has since
- * destroyed the parent.
- *
- * At this point, we don't know whether it's the source
- * that doesn't exist anymore, or whether the target
- * dataset doesn't exist.
- */
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "no such parent '%s'"), parent);
- return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
-
- case EXDEV:
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "source and target pools differ"));
- return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
- errbuf));
-
- default:
- return (zfs_standard_error(zhp->zfs_hdl, errno,
- errbuf));
- }
- } else if (ZFS_IS_VOLUME(zhp)) {
- ret = zvol_create_link(zhp->zfs_hdl, target);
- }
-
- return (ret);
-}
-
-typedef struct promote_data {
- char cb_mountpoint[MAXPATHLEN];
- const char *cb_target;
- const char *cb_errbuf;
- uint64_t cb_pivot_txg;
-} promote_data_t;
-
-static int
-promote_snap_cb(zfs_handle_t *zhp, void *data)
-{
- promote_data_t *pd = data;
- zfs_handle_t *szhp;
- char snapname[MAXPATHLEN];
- int rv = 0;
-
- /* We don't care about snapshots after the pivot point */
- if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
- zfs_close(zhp);
- return (0);
- }
-
- /* Remove the device link if it's a zvol. */
- if (ZFS_IS_VOLUME(zhp))
- (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
-
- /* Check for conflicting names */
- (void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
- (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
- szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
- if (szhp != NULL) {
- zfs_close(szhp);
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "snapshot name '%s' from origin \n"
- "conflicts with '%s' from target"),
- zhp->zfs_name, snapname);
- rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
- }
- zfs_close(zhp);
- return (rv);
-}
-
-static int
-promote_snap_done_cb(zfs_handle_t *zhp, void *data)
-{
- promote_data_t *pd = data;
-
- /* We don't care about snapshots after the pivot point */
- if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
- /* Create the device link if it's a zvol. */
- if (ZFS_IS_VOLUME(zhp))
- (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
- }
-
- zfs_close(zhp);
- return (0);
-}
-
-/*
- * Promotes the given clone fs to be the clone parent.
- */
-int
-zfs_promote(zfs_handle_t *zhp)
-{
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- zfs_cmd_t zc = { 0 };
- char parent[MAXPATHLEN];
- char *cp;
- int ret;
- zfs_handle_t *pzhp;
- promote_data_t pd;
- char errbuf[1024];
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot promote '%s'"), zhp->zfs_name);
-
- if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "snapshots can not be promoted"));
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
- }
-
- (void) strlcpy(parent, zhp->zfs_dmustats.dds_clone_of, sizeof (parent));
- if (parent[0] == '\0') {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "not a cloned filesystem"));
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
- }
- cp = strchr(parent, '@');
- *cp = '\0';
-
- /* Walk the snapshots we will be moving */
- pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
- if (pzhp == NULL)
- return (-1);
- pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
- zfs_close(pzhp);
- pd.cb_target = zhp->zfs_name;
- pd.cb_errbuf = errbuf;
- pzhp = zfs_open(hdl, parent, ZFS_TYPE_ANY);
- if (pzhp == NULL)
- return (-1);
- (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
- sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
- ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
- if (ret != 0) {
- zfs_close(pzhp);
- return (-1);
- }
-
- /* issue the ioctl */
- (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_clone_of,
- sizeof (zc.zc_value));
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- ret = ioctl(hdl->libzfs_fd, ZFS_IOC_PROMOTE, &zc);
-
- if (ret != 0) {
- int save_errno = errno;
-
- (void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
- zfs_close(pzhp);
-
- switch (save_errno) {
- case EEXIST:
- /*
- * There is a conflicting snapshot name. We
- * should have caught this above, but they could
- * have renamed something in the mean time.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "conflicting snapshot name from parent '%s'"),
- parent);
- return (zfs_error(hdl, EZFS_EXISTS, errbuf));
-
- default:
- return (zfs_standard_error(hdl, save_errno, errbuf));
- }
- } else {
- (void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
- }
-
- zfs_close(pzhp);
- return (ret);
-}
-
-struct createdata {
- const char *cd_snapname;
- int cd_ifexists;
-};
-
-static int
-zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
-{
- struct createdata *cd = arg;
- int ret;
-
- if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- char name[MAXPATHLEN];
-
- (void) strlcpy(name, zhp->zfs_name, sizeof (name));
- (void) strlcat(name, "@", sizeof (name));
- (void) strlcat(name, cd->cd_snapname, sizeof (name));
- (void) zvol_create_link_common(zhp->zfs_hdl, name,
- cd->cd_ifexists);
- /*
- * NB: this is simply a best-effort. We don't want to
- * return an error, because then we wouldn't visit all
- * the volumes.
- */
- }
-
- ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
-
- zfs_close(zhp);
-
- return (ret);
-}
-
-/*
- * Takes a snapshot of the given dataset.
- */
-int
-zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive)
-{
- const char *delim;
- char *parent;
- zfs_handle_t *zhp;
- zfs_cmd_t zc = { 0 };
- int ret;
- char errbuf[1024];
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot snapshot '%s'"), path);
-
- /* validate the target name */
- if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT))
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-
- /* make sure the parent exists and is of the appropriate type */
- delim = strchr(path, '@');
- if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL)
- return (-1);
- (void) strncpy(parent, path, delim - path);
- parent[delim - path] = '\0';
-
- if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
- ZFS_TYPE_VOLUME)) == NULL) {
- free(parent);
- return (-1);
- }
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
- zc.zc_cookie = recursive;
- ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT, &zc);
-
- /*
- * if it was recursive, the one that actually failed will be in
- * zc.zc_name.
- */
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
- if (ret == 0 && recursive) {
- struct createdata cd;
-
- cd.cd_snapname = delim + 1;
- cd.cd_ifexists = B_FALSE;
- (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
- }
- if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(zhp->zfs_hdl, path);
- if (ret != 0) {
- (void) ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY,
- &zc);
- }
- }
-
- if (ret != 0)
- (void) zfs_standard_error(hdl, errno, errbuf);
-
- free(parent);
- zfs_close(zhp);
-
- return (ret);
-}
-
-/*
- * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
- * NULL) to the file descriptor specified by outfd.
- */
-int
-zfs_send(zfs_handle_t *zhp, const char *fromsnap, int outfd)
-{
- zfs_cmd_t zc = { 0 };
- char errbuf[1024];
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- if (fromsnap)
- (void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
- zc.zc_cookie = outfd;
-
- if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc) != 0) {
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot send '%s'"), zhp->zfs_name);
-
- switch (errno) {
-
- case EXDEV:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "not an earlier snapshot from the same fs"));
- return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
-
- case EDQUOT:
- case EFBIG:
- case EIO:
- case ENOLINK:
- case ENOSPC:
- case ENXIO:
- case EPIPE:
- case ERANGE:
- case EFAULT:
- case EROFS:
- zfs_error_aux(hdl, strerror(errno));
- return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
-
- default:
- return (zfs_standard_error(hdl, errno, errbuf));
- }
- }
-
- return (0);
-}
-
-/*
- * Create ancestors of 'target', but not target itself, and not
- * ancestors whose names are shorter than prefixlen. Die if
- * prefixlen-ancestor does not exist.
- */
-static int
-create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
-{
- zfs_handle_t *h;
- char *cp;
-
- /* make sure prefix exists */
- cp = strchr(target + prefixlen, '/');
- *cp = '\0';
- h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
- *cp = '/';
- if (h == NULL)
- return (-1);
- zfs_close(h);
-
- /*
- * Attempt to create, mount, and share any ancestor filesystems,
- * up to the prefixlen-long one.
- */
- for (cp = target + prefixlen + 1;
- cp = strchr(cp, '/'); *cp = '/', cp++) {
- const char *opname;
-
- *cp = '\0';
-
- h = make_dataset_handle(hdl, target);
- if (h) {
- /* it already exists, nothing to do here */
- zfs_close(h);
- continue;
- }
-
- opname = dgettext(TEXT_DOMAIN, "create");
- if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
- NULL) != 0)
- goto ancestorerr;
-
- opname = dgettext(TEXT_DOMAIN, "open");
- h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
- if (h == NULL)
- goto ancestorerr;
-
- opname = dgettext(TEXT_DOMAIN, "mount");
- if (zfs_mount(h, NULL, 0) != 0)
- goto ancestorerr;
-
- opname = dgettext(TEXT_DOMAIN, "share");
- if (zfs_share(h) != 0)
- goto ancestorerr;
-
- zfs_close(h);
-
- continue;
-ancestorerr:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "failed to %s ancestor '%s'"), opname, target);
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Restores a backup of tosnap from the file descriptor specified by infd.
- */
-int
-zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
- int verbose, int dryrun, boolean_t force, int infd)
-{
- zfs_cmd_t zc = { 0 };
- time_t begin_time;
- int ioctl_err, err, bytes, size, choplen;
- char *cp;
- dmu_replay_record_t drr;
- struct drr_begin *drrb = &zc.zc_begin_record;
- char errbuf[1024];
- prop_changelist_t *clp;
- char chopprefix[ZFS_MAXNAMELEN];
-
- begin_time = time(NULL);
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot receive"));
-
- /* read in the BEGIN record */
- cp = (char *)&drr;
- bytes = 0;
- do {
- size = read(infd, cp, sizeof (drr) - bytes);
- cp += size;
- bytes += size;
- } while (size > 0);
-
- if (size < 0 || bytes != sizeof (drr)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
- "stream (failed to read first record)"));
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
- }
-
- zc.zc_begin_record = drr.drr_u.drr_begin;
-
- if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
- drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
- "stream (bad magic number)"));
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
- }
-
- if (drrb->drr_version != DMU_BACKUP_VERSION &&
- drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
- "0x%llx is supported (stream is version 0x%llx)"),
- DMU_BACKUP_VERSION, drrb->drr_version);
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
- }
-
- if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
- "stream (bad snapshot name)"));
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
- }
- /*
- * Determine how much of the snapshot name stored in the stream
- * we are going to tack on to the name they specified on the
- * command line, and how much we are going to chop off.
- *
- * If they specified a snapshot, chop the entire name stored in
- * the stream.
- */
- (void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname);
- if (isprefix) {
- /*
- * They specified a fs with -d, we want to tack on
- * everything but the pool name stored in the stream
- */
- if (strchr(tosnap, '@')) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
- "argument - snapshot not allowed with -d"));
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
- }
- cp = strchr(chopprefix, '/');
- if (cp == NULL)
- cp = strchr(chopprefix, '@');
- *cp = '\0';
- } else if (strchr(tosnap, '@') == NULL) {
- /*
- * If they specified a filesystem without -d, we want to
- * tack on everything after the fs specified in the
- * first name from the stream.
- */
- cp = strchr(chopprefix, '@');
- *cp = '\0';
- }
- choplen = strlen(chopprefix);
-
- /*
- * Determine name of destination snapshot, store in zc_value.
- */
- (void) strcpy(zc.zc_value, tosnap);
- (void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen,
- sizeof (zc.zc_value));
- if (!zfs_validate_name(hdl, zc.zc_value, ZFS_TYPE_SNAPSHOT))
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-
- (void) strcpy(zc.zc_name, zc.zc_value);
- if (drrb->drr_fromguid) {
- /* incremental backup stream */
- zfs_handle_t *h;
-
- /* do the recvbackup ioctl to the containing fs */
- *strchr(zc.zc_name, '@') = '\0';
-
- /* make sure destination fs exists */
- h = zfs_open(hdl, zc.zc_name,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
- if (h == NULL)
- return (-1);
- if (!dryrun) {
- /*
- * We need to unmount all the dependents of the dataset
- * and the dataset itself. If it's a volume
- * then remove device link.
- */
- if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
- clp = changelist_gather(h, ZFS_PROP_NAME, 0);
- if (clp == NULL)
- return (-1);
- if (changelist_prefix(clp) != 0) {
- changelist_free(clp);
- return (-1);
- }
- } else {
- (void) zvol_remove_link(hdl, h->zfs_name);
- }
- }
- zfs_close(h);
- } else {
- /* full backup stream */
-
- /* Make sure destination fs does not exist */
- *strchr(zc.zc_name, '@') = '\0';
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "destination '%s' exists"), zc.zc_name);
- return (zfs_error(hdl, EZFS_EXISTS, errbuf));
- }
-
- if (strchr(zc.zc_name, '/') == NULL) {
- /*
- * they're trying to do a recv into a
- * nonexistant topmost filesystem.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "destination does not exist"), zc.zc_name);
- return (zfs_error(hdl, EZFS_EXISTS, errbuf));
- }
-
- /* Do the recvbackup ioctl to the fs's parent. */
- *strrchr(zc.zc_name, '/') = '\0';
-
- if (isprefix && (err = create_parents(hdl,
- zc.zc_value, strlen(tosnap))) != 0) {
- return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
- }
-
- }
-
- zc.zc_cookie = infd;
- zc.zc_guid = force;
- if (verbose) {
- (void) printf("%s %s stream of %s into %s\n",
- dryrun ? "would receive" : "receiving",
- drrb->drr_fromguid ? "incremental" : "full",
- drr.drr_u.drr_begin.drr_toname,
- zc.zc_value);
- (void) fflush(stdout);
- }
- if (dryrun)
- return (0);
- err = ioctl_err = ioctl(hdl->libzfs_fd, ZFS_IOC_RECVBACKUP, &zc);
- if (ioctl_err != 0) {
- switch (errno) {
- case ENODEV:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "most recent snapshot does not match incremental "
- "source"));
- (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
- break;
- case ETXTBSY:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "destination has been modified since most recent "
- "snapshot"));
- (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
- break;
- case EEXIST:
- if (drrb->drr_fromguid == 0) {
- /* it's the containing fs that exists */
- cp = strchr(zc.zc_value, '@');
- *cp = '\0';
- }
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "destination already exists"));
- (void) zfs_error_fmt(hdl, EZFS_EXISTS,
- dgettext(TEXT_DOMAIN, "cannot restore to %s"),
- zc.zc_value);
- break;
- case EINVAL:
- (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
- break;
- case ECKSUM:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid stream (checksum mismatch)"));
- (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
- break;
- default:
- (void) zfs_standard_error(hdl, errno, errbuf);
- }
- }
-
- /*
- * Mount or recreate the /dev links for the target filesystem
- * (if created, or if we tore them down to do an incremental
- * restore), and the /dev links for the new snapshot (if
- * created). Also mount any children of the target filesystem
- * if we did an incremental receive.
- */
- cp = strchr(zc.zc_value, '@');
- if (cp && (ioctl_err == 0 || drrb->drr_fromguid)) {
- zfs_handle_t *h;
-
- *cp = '\0';
- h = zfs_open(hdl, zc.zc_value,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
- *cp = '@';
- if (h) {
- if (h->zfs_type == ZFS_TYPE_VOLUME) {
- err = zvol_create_link(hdl, h->zfs_name);
- if (err == 0 && ioctl_err == 0)
- err = zvol_create_link(hdl,
- zc.zc_value);
- } else {
- if (drrb->drr_fromguid) {
- err = changelist_postfix(clp);
- changelist_free(clp);
- } else {
- err = zfs_mount(h, NULL, 0);
- }
- }
- zfs_close(h);
- }
- }
-
- if (err || ioctl_err)
- return (-1);
-
- if (verbose) {
- char buf1[64];
- char buf2[64];
- uint64_t bytes = zc.zc_cookie;
- time_t delta = time(NULL) - begin_time;
- if (delta == 0)
- delta = 1;
- zfs_nicenum(bytes, buf1, sizeof (buf1));
- zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
-
- (void) printf("received %sb stream in %lu seconds (%sb/sec)\n",
- buf1, delta, buf2);
- }
-
- return (0);
-}
-
-/*
- * Destroy any more recent snapshots. We invoke this callback on any dependents
- * of the snapshot first. If the 'cb_dependent' member is non-zero, then this
- * is a dependent and we should just destroy it without checking the transaction
- * group.
- */
-typedef struct rollback_data {
- const char *cb_target; /* the snapshot */
- uint64_t cb_create; /* creation time reference */
- prop_changelist_t *cb_clp; /* changelist pointer */
- int cb_error;
- boolean_t cb_dependent;
-} rollback_data_t;
-
-static int
-rollback_destroy(zfs_handle_t *zhp, void *data)
-{
- rollback_data_t *cbp = data;
-
- if (!cbp->cb_dependent) {
- if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
- zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
- zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
- cbp->cb_create) {
-
- cbp->cb_dependent = B_TRUE;
- if (zfs_iter_dependents(zhp, B_FALSE, rollback_destroy,
- cbp) != 0)
- cbp->cb_error = 1;
- cbp->cb_dependent = B_FALSE;
-
- if (zfs_destroy(zhp) != 0)
- cbp->cb_error = 1;
- else
- changelist_remove(zhp, cbp->cb_clp);
- }
- } else {
- if (zfs_destroy(zhp) != 0)
- cbp->cb_error = 1;
- else
- changelist_remove(zhp, cbp->cb_clp);
- }
-
- zfs_close(zhp);
- return (0);
-}
-
-/*
- * Rollback the dataset to its latest snapshot.
- */
-static int
-do_rollback(zfs_handle_t *zhp)
-{
- int ret;
- zfs_cmd_t zc = { 0 };
-
- assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
- zhp->zfs_type == ZFS_TYPE_VOLUME);
-
- if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
- zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
- return (-1);
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
- if (ZFS_IS_VOLUME(zhp))
- zc.zc_objset_type = DMU_OST_ZVOL;
- else
- zc.zc_objset_type = DMU_OST_ZFS;
-
- /*
- * We rely on the consumer to verify that there are no newer snapshots
- * for the given dataset. Given these constraints, we can simply pass
- * the name on to the ioctl() call. There is still an unlikely race
- * condition where the user has taken a snapshot since we verified that
- * this was the most recent.
- */
- if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_ROLLBACK,
- &zc)) != 0) {
- (void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
- zhp->zfs_name);
- } else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
- }
-
- return (ret);
-}
-
-/*
- * Given a dataset, rollback to a specific snapshot, discarding any
- * data changes since then and making it the active dataset.
- *
- * Any snapshots more recent than the target are destroyed, along with
- * their dependents.
- */
-int
-zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, int flag)
-{
- int ret;
- rollback_data_t cb = { 0 };
- prop_changelist_t *clp;
-
- /*
- * Unmount all dependendents of the dataset and the dataset itself.
- * The list we need to gather is the same as for doing rename
- */
- clp = changelist_gather(zhp, ZFS_PROP_NAME, flag ? MS_FORCE: 0);
- if (clp == NULL)
- return (-1);
-
- if ((ret = changelist_prefix(clp)) != 0)
- goto out;
-
- /*
- * Destroy all recent snapshots and its dependends.
- */
- cb.cb_target = snap->zfs_name;
- cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
- cb.cb_clp = clp;
- (void) zfs_iter_children(zhp, rollback_destroy, &cb);
-
- if ((ret = cb.cb_error) != 0) {
- (void) changelist_postfix(clp);
- goto out;
- }
-
- /*
- * Now that we have verified that the snapshot is the latest,
- * rollback to the given snapshot.
- */
- ret = do_rollback(zhp);
-
- if (ret != 0) {
- (void) changelist_postfix(clp);
- goto out;
- }
-
- /*
- * We only want to re-mount the filesystem if it was mounted in the
- * first place.
- */
- ret = changelist_postfix(clp);
-
-out:
- changelist_free(clp);
- return (ret);
-}
-
-/*
- * Iterate over all dependents for a given dataset. This includes both
- * hierarchical dependents (children) and data dependents (snapshots and
- * clones). The bulk of the processing occurs in get_dependents() in
- * libzfs_graph.c.
- */
-int
-zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
- zfs_iter_f func, void *data)
-{
- char **dependents;
- size_t count;
- int i;
- zfs_handle_t *child;
- int ret = 0;
-
- if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name,
- &dependents, &count) != 0)
- return (-1);
-
- for (i = 0; i < count; i++) {
- if ((child = make_dataset_handle(zhp->zfs_hdl,
- dependents[i])) == NULL)
- continue;
-
- if ((ret = func(child, data)) != 0)
- break;
- }
-
- for (i = 0; i < count; i++)
- free(dependents[i]);
- free(dependents);
-
- return (ret);
-}
-
-/*
- * Renames the given dataset.
- */
-int
-zfs_rename(zfs_handle_t *zhp, const char *target, int recursive)
-{
- int ret;
- zfs_cmd_t zc = { 0 };
- char *delim;
- prop_changelist_t *cl = NULL;
- zfs_handle_t *zhrp = NULL;
- char *parentname = NULL;
- char parent[ZFS_MAXNAMELEN];
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- char errbuf[1024];
-
- /* if we have the same exact name, just return success */
- if (strcmp(zhp->zfs_name, target) == 0)
- return (0);
-
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot rename to '%s'"), target);
-
- /*
- * Make sure the target name is valid
- */
- if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
- if ((strchr(target, '@') == NULL) ||
- *target == '@') {
- /*
- * Snapshot target name is abbreviated,
- * reconstruct full dataset name
- */
- (void) strlcpy(parent, zhp->zfs_name,
- sizeof (parent));
- delim = strchr(parent, '@');
- if (strchr(target, '@') == NULL)
- *(++delim) = '\0';
- else
- *delim = '\0';
- (void) strlcat(parent, target, sizeof (parent));
- target = parent;
- } else {
- /*
- * Make sure we're renaming within the same dataset.
- */
- delim = strchr(target, '@');
- if (strncmp(zhp->zfs_name, target, delim - target)
- != 0 || zhp->zfs_name[delim - target] != '@') {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "snapshots must be part of same "
- "dataset"));
- return (zfs_error(hdl, EZFS_CROSSTARGET,
- errbuf));
- }
- }
- if (!zfs_validate_name(hdl, target, zhp->zfs_type))
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
- } else {
- if (recursive) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "recursive rename must be a snapshot"));
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
- }
-
- if (!zfs_validate_name(hdl, target, zhp->zfs_type))
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
- uint64_t unused;
-
- /* validate parents */
- if (check_parents(hdl, target, &unused) != 0)
- return (-1);
-
- (void) parent_name(target, parent, sizeof (parent));
-
- /* make sure we're in the same pool */
- verify((delim = strchr(target, '/')) != NULL);
- if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
- zhp->zfs_name[delim - target] != '/') {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "datasets must be within same pool"));
- return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
- }
-
- /* new name cannot be a child of the current dataset name */
- if (strncmp(parent, zhp->zfs_name,
- strlen(zhp->zfs_name)) == 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "New dataset name cannot be a descendent of "
- "current dataset name"));
- return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
- }
- }
-
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
-
- if (getzoneid() == GLOBAL_ZONEID &&
- zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "dataset is used in a non-global zone"));
- return (zfs_error(hdl, EZFS_ZONED, errbuf));
- }
-
- if (recursive) {
- struct destroydata dd;
-
- parentname = strdup(zhp->zfs_name);
- delim = strchr(parentname, '@');
- *delim = '\0';
- zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_ANY);
- if (zhrp == NULL) {
- return (-1);
- }
-
- dd.snapname = delim + 1;
- dd.gotone = B_FALSE;
- dd.closezhp = B_FALSE;
-
- /* We remove any zvol links prior to renaming them */
- ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
- if (ret) {
- goto error;
- }
- } else {
- if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL)
- return (-1);
-
- if (changelist_haszonedchild(cl)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "child dataset with inherited mountpoint is used "
- "in a non-global zone"));
- (void) zfs_error(hdl, EZFS_ZONED, errbuf);
- goto error;
- }
-
- if ((ret = changelist_prefix(cl)) != 0)
- goto error;
- }
-
- if (ZFS_IS_VOLUME(zhp))
- zc.zc_objset_type = DMU_OST_ZVOL;
- else
- zc.zc_objset_type = DMU_OST_ZFS;
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
-
- zc.zc_cookie = recursive;
-
- if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc)) != 0) {
- /*
- * if it was recursive, the one that actually failed will
- * be in zc.zc_name
- */
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot rename to '%s'"), zc.zc_name);
-
- if (recursive && errno == EEXIST) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "a child dataset already has a snapshot "
- "with the new name"));
- (void) zfs_error(hdl, EZFS_CROSSTARGET, errbuf);
- } else {
- (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
- }
-
- /*
- * On failure, we still want to remount any filesystems that
- * were previously mounted, so we don't alter the system state.
- */
- if (recursive) {
- struct createdata cd;
-
- /* only create links for datasets that had existed */
- cd.cd_snapname = delim + 1;
- cd.cd_ifexists = B_TRUE;
- (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
- &cd);
- } else {
- (void) changelist_postfix(cl);
- }
- } else {
- if (recursive) {
- struct createdata cd;
-
- /* only create links for datasets that had existed */
- cd.cd_snapname = strchr(target, '@') + 1;
- cd.cd_ifexists = B_TRUE;
- ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
- &cd);
- } else {
- changelist_rename(cl, zfs_get_name(zhp), target);
- ret = changelist_postfix(cl);
- }
- }
-
-error:
- if (parentname) {
- free(parentname);
- }
- if (zhrp) {
- zfs_close(zhrp);
- }
- if (cl) {
- changelist_free(cl);
- }
- return (ret);
-}
-
-/*
- * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
- * poke devfsadm to create the /dev link, and then wait for the link to appear.
- */
-int
-zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
-{
- return (zvol_create_link_common(hdl, dataset, B_FALSE));
-}
-
-static int
-zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
-{
- zfs_cmd_t zc = { 0 };
-#if 0
- di_devlink_handle_t dhdl;
-#endif
-
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
- /*
- * Issue the appropriate ioctl.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
- switch (errno) {
- case EEXIST:
- /*
- * Silently ignore the case where the link already
- * exists. This allows 'zfs volinit' to be run multiple
- * times without errors.
- */
- return (0);
-
- case ENOENT:
- /*
- * Dataset does not exist in the kernel. If we
- * don't care (see zfs_rename), then ignore the
- * error quietly.
- */
- if (ifexists) {
- return (0);
- }
-
- /* FALLTHROUGH */
-
- default:
- return (zfs_standard_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot create device links "
- "for '%s'"), dataset));
- }
- }
-
-#if 0
- /*
- * Call devfsadm and wait for the links to magically appear.
- */
- if ((dhdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) {
- zfs_error_aux(hdl, strerror(errno));
- (void) zfs_error_fmt(hdl, EZFS_DEVLINKS,
- dgettext(TEXT_DOMAIN, "cannot create device links "
- "for '%s'"), dataset);
- (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
- return (-1);
- } else {
- (void) di_devlink_fini(&dhdl);
- }
-#endif
-
- return (0);
-}
-
-/*
- * Remove a minor node for the given zvol and the associated /dev links.
- */
-int
-zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
-{
- zfs_cmd_t zc = { 0 };
-
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
- switch (errno) {
- case ENXIO:
- /*
- * Silently ignore the case where the link no longer
- * exists, so that 'zfs volfini' can be run multiple
- * times without errors.
- */
- return (0);
-
- default:
- return (zfs_standard_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot remove device "
- "links for '%s'"), dataset));
- }
- }
-
- return (0);
-}
-
-nvlist_t *
-zfs_get_user_props(zfs_handle_t *zhp)
-{
- return (zhp->zfs_user_props);
-}
-
-/*
- * Given a comma-separated list of properties, contruct a property list
- * containing both user-defined and native properties. This function will
- * return a NULL list if 'all' is specified, which can later be expanded on a
- * per-dataset basis by zfs_expand_proplist().
- */
-int
-zfs_get_proplist_common(libzfs_handle_t *hdl, char *fields,
- zfs_proplist_t **listp, zfs_type_t type)
-{
- size_t len;
- char *s, *p;
- char c;
- zfs_prop_t prop;
- zfs_proplist_t *entry;
- zfs_proplist_t **last;
-
- *listp = NULL;
- last = listp;
-
- /*
- * If 'all' is specified, return a NULL list.
- */
- if (strcmp(fields, "all") == 0)
- return (0);
-
- /*
- * If no fields were specified, return an error.
- */
- if (fields[0] == '\0') {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "no properties specified"));
- return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
- "bad property list")));
- }
-
- /*
- * It would be nice to use getsubopt() here, but the inclusion of column
- * aliases makes this more effort than it's worth.
- */
- s = fields;
- while (*s != '\0') {
- if ((p = strchr(s, ',')) == NULL) {
- len = strlen(s);
- p = s + len;
- } else {
- len = p - s;
- }
-
- /*
- * Check for empty options.
- */
- if (len == 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "empty property name"));
- return (zfs_error(hdl, EZFS_BADPROP,
- dgettext(TEXT_DOMAIN, "bad property list")));
- }
-
- /*
- * Check all regular property names.
- */
- c = s[len];
- s[len] = '\0';
- prop = zfs_name_to_prop_common(s, type);
-
- if (prop != ZFS_PROP_INVAL &&
- !zfs_prop_valid_for_type(prop, type))
- prop = ZFS_PROP_INVAL;
-
- /*
- * When no property table entry can be found, return failure if
- * this is a pool property or if this isn't a user-defined
- * dataset property,
- */
- if (prop == ZFS_PROP_INVAL &&
- (type & ZFS_TYPE_POOL || !zfs_prop_user(s))) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "invalid property '%s'"), s);
- return (zfs_error(hdl, EZFS_BADPROP,
- dgettext(TEXT_DOMAIN, "bad property list")));
- }
-
- if ((entry = zfs_alloc(hdl, sizeof (zfs_proplist_t))) == NULL)
- return (-1);
-
- entry->pl_prop = prop;
- if (prop == ZFS_PROP_INVAL) {
- if ((entry->pl_user_prop =
- zfs_strdup(hdl, s)) == NULL) {
- free(entry);
- return (-1);
- }
- entry->pl_width = strlen(s);
- } else {
- entry->pl_width = zfs_prop_width(prop,
- &entry->pl_fixed);
- }
-
- *last = entry;
- last = &entry->pl_next;
-
- s = p;
- if (c == ',')
- s++;
- }
-
- return (0);
-}
-
-int
-zfs_get_proplist(libzfs_handle_t *hdl, char *fields, zfs_proplist_t **listp)
-{
- return (zfs_get_proplist_common(hdl, fields, listp, ZFS_TYPE_ANY));
-}
-
-void
-zfs_free_proplist(zfs_proplist_t *pl)
-{
- zfs_proplist_t *next;
-
- while (pl != NULL) {
- next = pl->pl_next;
- free(pl->pl_user_prop);
- free(pl);
- pl = next;
- }
-}
-
-typedef struct expand_data {
- zfs_proplist_t **last;
- libzfs_handle_t *hdl;
-} expand_data_t;
-
-static zfs_prop_t
-zfs_expand_proplist_cb(zfs_prop_t prop, void *cb)
-{
- zfs_proplist_t *entry;
- expand_data_t *edp = cb;
-
- if ((entry = zfs_alloc(edp->hdl, sizeof (zfs_proplist_t))) == NULL)
- return (ZFS_PROP_INVAL);
-
- entry->pl_prop = prop;
- entry->pl_width = zfs_prop_width(prop, &entry->pl_fixed);
- entry->pl_all = B_TRUE;
-
- *(edp->last) = entry;
- edp->last = &entry->pl_next;
-
- return (ZFS_PROP_CONT);
-}
-
-int
-zfs_expand_proplist_common(libzfs_handle_t *hdl, zfs_proplist_t **plp,
- zfs_type_t type)
-{
- zfs_proplist_t *entry;
- zfs_proplist_t **last;
- expand_data_t exp;
-
- if (*plp == NULL) {
- /*
- * If this is the very first time we've been called for an 'all'
- * specification, expand the list to include all native
- * properties.
- */
- last = plp;
-
- exp.last = last;
- exp.hdl = hdl;
-
- if (zfs_prop_iter_common(zfs_expand_proplist_cb, &exp, type,
- B_FALSE) == ZFS_PROP_INVAL)
- return (-1);
-
- /*
- * Add 'name' to the beginning of the list, which is handled
- * specially.
- */
- if ((entry = zfs_alloc(hdl,
- sizeof (zfs_proplist_t))) == NULL)
- return (-1);
-
- entry->pl_prop = ZFS_PROP_NAME;
- entry->pl_width = zfs_prop_width(ZFS_PROP_NAME,
- &entry->pl_fixed);
- entry->pl_all = B_TRUE;
- entry->pl_next = *plp;
- *plp = entry;
- }
- return (0);
-}
-
-/*
- * This function is used by 'zfs list' to determine the exact set of columns to
- * display, and their maximum widths. This does two main things:
- *
- * - If this is a list of all properties, then expand the list to include
- * all native properties, and set a flag so that for each dataset we look
- * for new unique user properties and add them to the list.
- *
- * - For non fixed-width properties, keep track of the maximum width seen
- * so that we can size the column appropriately.
- */
-int
-zfs_expand_proplist(zfs_handle_t *zhp, zfs_proplist_t **plp)
-{
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- zfs_proplist_t *entry;
- zfs_proplist_t **last, **start;
- nvlist_t *userprops, *propval;
- nvpair_t *elem;
- char *strval;
- char buf[ZFS_MAXPROPLEN];
-
- if (zfs_expand_proplist_common(hdl, plp, ZFS_TYPE_ANY) != 0)
- return (-1);
-
- userprops = zfs_get_user_props(zhp);
-
- entry = *plp;
- if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
- /*
- * Go through and add any user properties as necessary. We
- * start by incrementing our list pointer to the first
- * non-native property.
- */
- start = plp;
- while (*start != NULL) {
- if ((*start)->pl_prop == ZFS_PROP_INVAL)
- break;
- start = &(*start)->pl_next;
- }
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
- /*
- * See if we've already found this property in our list.
- */
- for (last = start; *last != NULL;
- last = &(*last)->pl_next) {
- if (strcmp((*last)->pl_user_prop,
- nvpair_name(elem)) == 0)
- break;
- }
-
- if (*last == NULL) {
- if ((entry = zfs_alloc(hdl,
- sizeof (zfs_proplist_t))) == NULL ||
- ((entry->pl_user_prop = zfs_strdup(hdl,
- nvpair_name(elem)))) == NULL) {
- free(entry);
- return (-1);
- }
-
- entry->pl_prop = ZFS_PROP_INVAL;
- entry->pl_width = strlen(nvpair_name(elem));
- entry->pl_all = B_TRUE;
- *last = entry;
- }
- }
- }
-
- /*
- * Now go through and check the width of any non-fixed columns
- */
- for (entry = *plp; entry != NULL; entry = entry->pl_next) {
- if (entry->pl_fixed)
- continue;
-
- if (entry->pl_prop != ZFS_PROP_INVAL) {
- if (zfs_prop_get(zhp, entry->pl_prop,
- buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
- if (strlen(buf) > entry->pl_width)
- entry->pl_width = strlen(buf);
- }
- } else if (nvlist_lookup_nvlist(userprops,
- entry->pl_user_prop, &propval) == 0) {
- verify(nvlist_lookup_string(propval,
- ZFS_PROP_VALUE, &strval) == 0);
- if (strlen(strval) > entry->pl_width)
- entry->pl_width = strlen(strval);
- }
- }
-
- return (0);
-}
-
-/*
- * Attach/detach the given filesystem to/from the given jail.
- */
-int
-zfs_jail(zfs_handle_t *zhp, int jailid, int attach)
-{
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- zfs_cmd_t zc = { 0 };
- char errbuf[1024];
- int cmd, ret;
-
- if (attach) {
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name);
- } else {
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name);
- }
-
- switch (zhp->zfs_type) {
- case ZFS_TYPE_VOLUME:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "volumes can not be jailed"));
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
- case ZFS_TYPE_SNAPSHOT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "snapshots can not be jailed"));
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
- }
- assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
-
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- zc.zc_objset_type = DMU_OST_ZFS;
- zc.zc_jailid = jailid;
-
- cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL;
- if ((ret = ioctl(hdl->libzfs_fd, cmd, &zc)) != 0)
- zfs_standard_error(hdl, errno, errbuf);
-
- return (ret);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c b/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
deleted file mode 100644
index c283016..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_graph.c
+++ /dev/null
@@ -1,646 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Iterate over all children of the current object. This includes the normal
- * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
- * walk all datasets in the pool, and construct a directed graph of the form:
- *
- * home
- * |
- * +----+----+
- * | |
- * v v ws
- * bar baz |
- * | |
- * v v
- * @yesterday ----> foo
- *
- * In order to construct this graph, we have to walk every dataset in the pool,
- * because the clone parent is stored as a property of the child, not the
- * parent. The parent only keeps track of the number of clones.
- *
- * In the normal case (without clones) this would be rather expensive. To avoid
- * unnecessary computation, we first try a walk of the subtree hierarchy
- * starting from the initial node. At each dataset, we construct a node in the
- * graph and an edge leading from its parent. If we don't see any snapshots
- * with a non-zero clone count, then we are finished.
- *
- * If we do find a cloned snapshot, then we finish the walk of the current
- * subtree, but indicate that we need to do a complete walk. We then perform a
- * global walk of all datasets, avoiding the subtree we already processed.
- *
- * At the end of this, we'll end up with a directed graph of all relevant (and
- * possible some irrelevant) datasets in the system. We need to both find our
- * limiting subgraph and determine a safe ordering in which to destroy the
- * datasets. We do a topological ordering of our graph starting at our target
- * dataset, and then walk the results in reverse.
- *
- * It's possible for the graph to have cycles if, for example, the user renames
- * a clone to be the parent of its origin snapshot. The user can request to
- * generate an error in this case, or ignore the cycle and continue.
- *
- * When removing datasets, we want to destroy the snapshots in chronological
- * order (because this is the most efficient method). In order to accomplish
- * this, we store the creation transaction group with each vertex and keep each
- * vertex's edges sorted according to this value. The topological sort will
- * automatically walk the snapshots in the correct order.
- */
-
-#include <assert.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-#include <unistd.h>
-
-#include <libzfs.h>
-
-#include "libzfs_impl.h"
-#include "zfs_namecheck.h"
-
-#define MIN_EDGECOUNT 4
-
-/*
- * Vertex structure. Indexed by dataset name, this structure maintains a list
- * of edges to other vertices.
- */
-struct zfs_edge;
-typedef struct zfs_vertex {
- char zv_dataset[ZFS_MAXNAMELEN];
- struct zfs_vertex *zv_next;
- int zv_visited;
- uint64_t zv_txg;
- struct zfs_edge **zv_edges;
- int zv_edgecount;
- int zv_edgealloc;
-} zfs_vertex_t;
-
-enum {
- VISIT_SEEN = 1,
- VISIT_SORT_PRE,
- VISIT_SORT_POST
-};
-
-/*
- * Edge structure. Simply maintains a pointer to the destination vertex. There
- * is no need to store the source vertex, since we only use edges in the context
- * of the source vertex.
- */
-typedef struct zfs_edge {
- zfs_vertex_t *ze_dest;
- struct zfs_edge *ze_next;
-} zfs_edge_t;
-
-#define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
-
-/*
- * Graph structure. Vertices are maintained in a hash indexed by dataset name.
- */
-typedef struct zfs_graph {
- zfs_vertex_t **zg_hash;
- size_t zg_size;
- size_t zg_nvertex;
-} zfs_graph_t;
-
-/*
- * Allocate a new edge pointing to the target vertex.
- */
-static zfs_edge_t *
-zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
-{
- zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
-
- if (zep == NULL)
- return (NULL);
-
- zep->ze_dest = dest;
-
- return (zep);
-}
-
-/*
- * Destroy an edge.
- */
-static void
-zfs_edge_destroy(zfs_edge_t *zep)
-{
- free(zep);
-}
-
-/*
- * Allocate a new vertex with the given name.
- */
-static zfs_vertex_t *
-zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
-{
- zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
-
- if (zvp == NULL)
- return (NULL);
-
- assert(strlen(dataset) < ZFS_MAXNAMELEN);
-
- (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
-
- if ((zvp->zv_edges = zfs_alloc(hdl,
- MIN_EDGECOUNT * sizeof (void *))) == NULL) {
- free(zvp);
- return (NULL);
- }
-
- zvp->zv_edgealloc = MIN_EDGECOUNT;
-
- return (zvp);
-}
-
-/*
- * Destroy a vertex. Frees up any associated edges.
- */
-static void
-zfs_vertex_destroy(zfs_vertex_t *zvp)
-{
- int i;
-
- for (i = 0; i < zvp->zv_edgecount; i++)
- zfs_edge_destroy(zvp->zv_edges[i]);
-
- free(zvp->zv_edges);
- free(zvp);
-}
-
-/*
- * Given a vertex, add an edge to the destination vertex.
- */
-static int
-zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
- zfs_vertex_t *dest)
-{
- zfs_edge_t *zep = zfs_edge_create(hdl, dest);
-
- if (zep == NULL)
- return (-1);
-
- if (zvp->zv_edgecount == zvp->zv_edgealloc) {
- void *ptr;
-
- if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
- zvp->zv_edgealloc * sizeof (void *),
- zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
- return (-1);
-
- zvp->zv_edges = ptr;
- zvp->zv_edgealloc *= 2;
- }
-
- zvp->zv_edges[zvp->zv_edgecount++] = zep;
-
- return (0);
-}
-
-static int
-zfs_edge_compare(const void *a, const void *b)
-{
- const zfs_edge_t *ea = *((zfs_edge_t **)a);
- const zfs_edge_t *eb = *((zfs_edge_t **)b);
-
- if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
- return (-1);
- if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
- return (1);
- return (0);
-}
-
-/*
- * Sort the given vertex edges according to the creation txg of each vertex.
- */
-static void
-zfs_vertex_sort_edges(zfs_vertex_t *zvp)
-{
- if (zvp->zv_edgecount == 0)
- return;
-
- qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
- zfs_edge_compare);
-}
-
-/*
- * Construct a new graph object. We allow the size to be specified as a
- * parameter so in the future we can size the hash according to the number of
- * datasets in the pool.
- */
-static zfs_graph_t *
-zfs_graph_create(libzfs_handle_t *hdl, size_t size)
-{
- zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
-
- if (zgp == NULL)
- return (NULL);
-
- zgp->zg_size = size;
- if ((zgp->zg_hash = zfs_alloc(hdl,
- size * sizeof (zfs_vertex_t *))) == NULL) {
- free(zgp);
- return (NULL);
- }
-
- return (zgp);
-}
-
-/*
- * Destroy a graph object. We have to iterate over all the hash chains,
- * destroying each vertex in the process.
- */
-static void
-zfs_graph_destroy(zfs_graph_t *zgp)
-{
- int i;
- zfs_vertex_t *current, *next;
-
- for (i = 0; i < zgp->zg_size; i++) {
- current = zgp->zg_hash[i];
- while (current != NULL) {
- next = current->zv_next;
- zfs_vertex_destroy(current);
- current = next;
- }
- }
-
- free(zgp->zg_hash);
- free(zgp);
-}
-
-/*
- * Graph hash function. Classic bernstein k=33 hash function, taken from
- * usr/src/cmd/sgs/tools/common/strhash.c
- */
-static size_t
-zfs_graph_hash(zfs_graph_t *zgp, const char *str)
-{
- size_t hash = 5381;
- int c;
-
- while ((c = *str++) != 0)
- hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
-
- return (hash % zgp->zg_size);
-}
-
-/*
- * Given a dataset name, finds the associated vertex, creating it if necessary.
- */
-static zfs_vertex_t *
-zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
- uint64_t txg)
-{
- size_t idx = zfs_graph_hash(zgp, dataset);
- zfs_vertex_t *zvp;
-
- for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
- if (strcmp(zvp->zv_dataset, dataset) == 0) {
- if (zvp->zv_txg == 0)
- zvp->zv_txg = txg;
- return (zvp);
- }
- }
-
- if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
- return (NULL);
-
- zvp->zv_next = zgp->zg_hash[idx];
- zvp->zv_txg = txg;
- zgp->zg_hash[idx] = zvp;
- zgp->zg_nvertex++;
-
- return (zvp);
-}
-
-/*
- * Given two dataset names, create an edge between them. For the source vertex,
- * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
- * created it as a destination of another edge. If 'dest' is NULL, then this
- * is an individual vertex (i.e. the starting vertex), so don't add an edge.
- */
-static int
-zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
- const char *dest, uint64_t txg)
-{
- zfs_vertex_t *svp, *dvp;
-
- if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
- return (-1);
- svp->zv_visited = VISIT_SEEN;
- if (dest != NULL) {
- dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
- if (dvp == NULL)
- return (-1);
- if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Iterate over all children of the given dataset, adding any vertices as
- * necessary. Returns 0 if no cloned snapshots were seen, -1 if there was an
- * error, or 1 otherwise. This is a simple recursive algorithm - the ZFS
- * namespace typically is very flat. We manually invoke the necessary ioctl()
- * calls to avoid the overhead and additional semantics of zfs_open().
- */
-static int
-iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
-{
- zfs_cmd_t zc = { 0 };
- int ret = 0, err;
- zfs_vertex_t *zvp;
-
- /*
- * Look up the source vertex, and avoid it if we've seen it before.
- */
- zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
- if (zvp == NULL)
- return (-1);
- if (zvp->zv_visited == VISIT_SEEN)
- return (0);
-
- /*
- * We check the clone parent here instead of within the loop, so that if
- * the root dataset has been promoted from a clone, we find its parent
- * appropriately.
- */
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 &&
- zc.zc_objset_stats.dds_clone_of[0] != '\0') {
- if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
- zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
- return (-1);
- }
-
- for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
-
- /*
- * Ignore private dataset names.
- */
- if (dataset_name_hidden(zc.zc_name))
- continue;
-
- /*
- * Get statistics for this dataset, to determine the type of the
- * dataset and clone statistics. If this fails, the dataset has
- * since been removed, and we're pretty much screwed anyway.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
- continue;
-
- /*
- * Add an edge between the parent and the child.
- */
- if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg) != 0)
- return (-1);
-
- /*
- * Iterate over all children
- */
- err = iterate_children(hdl, zgp, zc.zc_name);
- if (err == -1)
- return (-1);
- else if (err == 1)
- ret = 1;
-
- /*
- * Indicate if we found a dataset with a non-zero clone count.
- */
- if (zc.zc_objset_stats.dds_num_clones != 0)
- ret = 1;
- }
-
- /*
- * Now iterate over all snapshots.
- */
- bzero(&zc, sizeof (zc));
-
- for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
-
- /*
- * Get statistics for this dataset, to determine the type of the
- * dataset and clone statistics. If this fails, the dataset has
- * since been removed, and we're pretty much screwed anyway.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
- continue;
-
- /*
- * Add an edge between the parent and the child.
- */
- if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
- zc.zc_objset_stats.dds_creation_txg) != 0)
- return (-1);
-
- /*
- * Indicate if we found a dataset with a non-zero clone count.
- */
- if (zc.zc_objset_stats.dds_num_clones != 0)
- ret = 1;
- }
-
- zvp->zv_visited = VISIT_SEEN;
-
- return (ret);
-}
-
-/*
- * Construct a complete graph of all necessary vertices. First, we iterate over
- * only our object's children. If we don't find any cloned snapshots, then we
- * simple return that. Otherwise, we have to start at the pool root and iterate
- * over all datasets.
- */
-static zfs_graph_t *
-construct_graph(libzfs_handle_t *hdl, const char *dataset)
-{
- zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
- zfs_cmd_t zc = { 0 };
- int ret = 0;
-
- if (zgp == NULL)
- return (zgp);
-
- /*
- * We need to explicitly check whether this dataset has clones or not,
- * since iterate_children() only checks the children.
- */
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
- (void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
-
- if (zc.zc_objset_stats.dds_num_clones != 0 ||
- (ret = iterate_children(hdl, zgp, dataset)) != 0) {
- /*
- * Determine pool name and try again.
- */
- char *pool, *slash;
-
- if ((slash = strchr(dataset, '/')) != NULL ||
- (slash = strchr(dataset, '@')) != NULL) {
- pool = zfs_alloc(hdl, slash - dataset + 1);
- if (pool == NULL) {
- zfs_graph_destroy(zgp);
- return (NULL);
- }
- (void) strncpy(pool, dataset, slash - dataset);
- pool[slash - dataset] = '\0';
-
- if (iterate_children(hdl, zgp, pool) == -1 ||
- zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
- free(pool);
- zfs_graph_destroy(zgp);
- return (NULL);
- }
-
- free(pool);
- }
- }
-
- if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
- zfs_graph_destroy(zgp);
- return (NULL);
- }
-
- return (zgp);
-}
-
-/*
- * Given a graph, do a recursive topological sort into the given array. This is
- * really just a depth first search, so that the deepest nodes appear first.
- * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
- */
-static int
-topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
- size_t *idx, zfs_vertex_t *zgv)
-{
- int i;
-
- if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
- /*
- * If we've already seen this vertex as part of our depth-first
- * search, then we have a cyclic dependency, and we must return
- * an error.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "recursive dependency at '%s'"),
- zgv->zv_dataset);
- return (zfs_error(hdl, EZFS_RECURSIVE,
- dgettext(TEXT_DOMAIN,
- "cannot determine dependent datasets")));
- } else if (zgv->zv_visited >= VISIT_SORT_PRE) {
- /*
- * If we've already processed this as part of the topological
- * sort, then don't bother doing so again.
- */
- return (0);
- }
-
- zgv->zv_visited = VISIT_SORT_PRE;
-
- /* avoid doing a search if we don't have to */
- zfs_vertex_sort_edges(zgv);
- for (i = 0; i < zgv->zv_edgecount; i++) {
- if (topo_sort(hdl, allowrecursion, result, idx,
- zgv->zv_edges[i]->ze_dest) != 0)
- return (-1);
- }
-
- /* we may have visited this in the course of the above */
- if (zgv->zv_visited == VISIT_SORT_POST)
- return (0);
-
- if ((result[*idx] = zfs_alloc(hdl,
- strlen(zgv->zv_dataset) + 1)) == NULL)
- return (-1);
-
- (void) strcpy(result[*idx], zgv->zv_dataset);
- *idx += 1;
- zgv->zv_visited = VISIT_SORT_POST;
- return (0);
-}
-
-/*
- * The only public interface for this file. Do the dirty work of constructing a
- * child list for the given object. Construct the graph, do the toplogical
- * sort, and then return the array of strings to the caller.
- *
- * The 'allowrecursion' parameter controls behavior when cycles are found. If
- * it is set, the the cycle is ignored and the results returned as if the cycle
- * did not exist. If it is not set, then the routine will generate an error if
- * a cycle is found.
- */
-int
-get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
- const char *dataset, char ***result, size_t *count)
-{
- zfs_graph_t *zgp;
- zfs_vertex_t *zvp;
-
- if ((zgp = construct_graph(hdl, dataset)) == NULL)
- return (-1);
-
- if ((*result = zfs_alloc(hdl,
- zgp->zg_nvertex * sizeof (char *))) == NULL) {
- zfs_graph_destroy(zgp);
- return (-1);
- }
-
- if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
- free(*result);
- zfs_graph_destroy(zgp);
- return (-1);
- }
-
- *count = 0;
- if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
- free(*result);
- zfs_graph_destroy(zgp);
- return (-1);
- }
-
- /*
- * Get rid of the last entry, which is our starting vertex and not
- * strictly a dependent.
- */
- assert(*count > 0);
- free((*result)[*count - 1]);
- (*count)--;
-
- zfs_graph_destroy(zgp);
-
- return (0);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
deleted file mode 100644
index 9581331..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _LIBFS_IMPL_H
-#define _LIBFS_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_acl.h>
-#include <sys/nvpair.h>
-
-#include <libuutil.h>
-#include <libzfs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct libzfs_handle {
- int libzfs_error;
- int libzfs_fd;
- FILE *libzfs_mnttab;
- FILE *libzfs_sharetab;
- uu_avl_pool_t *libzfs_ns_avlpool;
- uu_avl_t *libzfs_ns_avl;
- uint64_t libzfs_ns_gen;
- int libzfs_desc_active;
- char libzfs_action[1024];
- char libzfs_desc[1024];
- int libzfs_printerr;
-};
-
-struct zfs_handle {
- libzfs_handle_t *zfs_hdl;
- char zfs_name[ZFS_MAXNAMELEN];
- zfs_type_t zfs_type; /* type including snapshot */
- zfs_type_t zfs_head_type; /* type excluding snapshot */
- dmu_objset_stats_t zfs_dmustats;
- nvlist_t *zfs_props;
- nvlist_t *zfs_user_props;
- boolean_t zfs_mntcheck;
- char *zfs_mntopts;
- char zfs_root[MAXPATHLEN];
-};
-
-/*
- * This is different from checking zfs_type, because it will also catch
- * snapshots of volumes.
- */
-#define ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
-
-struct zpool_handle {
- libzfs_handle_t *zpool_hdl;
- char zpool_name[ZPOOL_MAXNAMELEN];
- int zpool_state;
- size_t zpool_config_size;
- nvlist_t *zpool_config;
- nvlist_t *zpool_old_config;
- nvlist_t *zpool_props;
-};
-
-int zfs_error(libzfs_handle_t *, int, const char *);
-int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
-void zfs_error_aux(libzfs_handle_t *, const char *, ...);
-void *zfs_alloc(libzfs_handle_t *, size_t);
-void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
-char *zfs_strdup(libzfs_handle_t *, const char *);
-int no_memory(libzfs_handle_t *);
-
-int zfs_standard_error(libzfs_handle_t *, int, const char *);
-int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
-int zpool_standard_error(libzfs_handle_t *, int, const char *);
-int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
-
-int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
- size_t *);
-
-int zfs_expand_proplist_common(libzfs_handle_t *, zfs_proplist_t **,
- zfs_type_t);
-int zfs_get_proplist_common(libzfs_handle_t *, char *, zfs_proplist_t **,
- zfs_type_t);
-zfs_prop_t zfs_prop_iter_common(zfs_prop_f, void *, zfs_type_t, boolean_t);
-zfs_prop_t zfs_name_to_prop_common(const char *, zfs_type_t);
-
-nvlist_t *zfs_validate_properties(libzfs_handle_t *, zfs_type_t, char *,
- nvlist_t *, uint64_t, zfs_handle_t *zhp, const char *errbuf);
-
-typedef struct prop_changelist prop_changelist_t;
-
-int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
-int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *, size_t *);
-int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
-int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
-void zcmd_free_nvlists(zfs_cmd_t *);
-
-int changelist_prefix(prop_changelist_t *);
-int changelist_postfix(prop_changelist_t *);
-void changelist_rename(prop_changelist_t *, const char *, const char *);
-void changelist_remove(zfs_handle_t *, prop_changelist_t *);
-void changelist_free(prop_changelist_t *);
-prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int);
-int changelist_unshare(prop_changelist_t *);
-int changelist_haszonedchild(prop_changelist_t *);
-
-void remove_mountpoint(zfs_handle_t *);
-
-zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
-
-int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
-
-int zvol_create_link(libzfs_handle_t *, const char *);
-int zvol_remove_link(libzfs_handle_t *, const char *);
-int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
-
-void namespace_clear(libzfs_handle_t *);
-
-#ifdef __FreeBSD__
-/*
- * This is FreeBSD version of ioctl, because Solaris' ioctl() updates
- * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
- * error is returned zc_nvlist_dst_size won't be updated.
- */
-static __inline int
-zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc)
-{
- size_t oldsize;
- int ret;
-
- oldsize = zc->zc_nvlist_dst_size;
- ret = ioctl(fd, cmd, zc);
- if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
- ret = -1;
- errno = ENOMEM;
- }
-
- return (ret);
-}
-#define ioctl(fd, cmd, zc) zcmd_ioctl((fd), (cmd), (zc))
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LIBFS_IMPL_H */
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_import.c b/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
deleted file mode 100644
index 1c77045..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Pool import support functions.
- *
- * To import a pool, we rely on reading the configuration information from the
- * ZFS label of each device. If we successfully read the label, then we
- * organize the configuration information in the following hierarchy:
- *
- * pool guid -> toplevel vdev guid -> label txg
- *
- * Duplicate entries matching this same tuple will be discarded. Once we have
- * examined every device, we pick the best label txg config for each toplevel
- * vdev. We then arrange these toplevel vdevs into a complete pool config, and
- * update any paths that have changed. Finally, we attempt to import the pool
- * using our derived config, and record the results.
- */
-
-#include <devid.h>
-#include <dirent.h>
-#include <errno.h>
-#include <libintl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <libgeom.h>
-
-#include <sys/vdev_impl.h>
-
-#include "libzfs.h"
-#include "libzfs_impl.h"
-
-/*
- * Intermediate structures used to gather configuration information.
- */
-typedef struct config_entry {
- uint64_t ce_txg;
- nvlist_t *ce_config;
- struct config_entry *ce_next;
-} config_entry_t;
-
-typedef struct vdev_entry {
- uint64_t ve_guid;
- config_entry_t *ve_configs;
- struct vdev_entry *ve_next;
-} vdev_entry_t;
-
-typedef struct pool_entry {
- uint64_t pe_guid;
- vdev_entry_t *pe_vdevs;
- struct pool_entry *pe_next;
-} pool_entry_t;
-
-typedef struct name_entry {
- char *ne_name;
- uint64_t ne_guid;
- struct name_entry *ne_next;
-} name_entry_t;
-
-typedef struct pool_list {
- pool_entry_t *pools;
- name_entry_t *names;
-} pool_list_t;
-
-static char *
-get_devid(const char *path)
-{
- int fd;
- ddi_devid_t devid;
- char *minor, *ret;
-
- if ((fd = open(path, O_RDONLY)) < 0)
- return (NULL);
-
- minor = NULL;
- ret = NULL;
- if (devid_get(fd, &devid) == 0) {
- if (devid_get_minor_name(fd, &minor) == 0)
- ret = devid_str_encode(devid, minor);
- if (minor != NULL)
- devid_str_free(minor);
- devid_free(devid);
- }
- (void) close(fd);
-
- return (ret);
-}
-
-/*
- * Go through and fix up any path and/or devid information for the given vdev
- * configuration.
- */
-static int
-fix_paths(nvlist_t *nv, name_entry_t *names)
-{
- nvlist_t **child;
- uint_t c, children;
- uint64_t guid;
- name_entry_t *ne, *best;
- char *path, *devid;
- int matched;
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0) {
- for (c = 0; c < children; c++)
- if (fix_paths(child[c], names) != 0)
- return (-1);
- return (0);
- }
-
- /*
- * This is a leaf (file or disk) vdev. In either case, go through
- * the name list and see if we find a matching guid. If so, replace
- * the path and see if we can calculate a new devid.
- *
- * There may be multiple names associated with a particular guid, in
- * which case we have overlapping slices or multiple paths to the same
- * disk. If this is the case, then we want to pick the path that is
- * the most similar to the original, where "most similar" is the number
- * of matching characters starting from the end of the path. This will
- * preserve slice numbers even if the disks have been reorganized, and
- * will also catch preferred disk names if multiple paths exist.
- */
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
- path = NULL;
-
- matched = 0;
- best = NULL;
- for (ne = names; ne != NULL; ne = ne->ne_next) {
- if (ne->ne_guid == guid) {
- const char *src, *dst;
- int count;
-
- if (path == NULL) {
- best = ne;
- break;
- }
-
- src = ne->ne_name + strlen(ne->ne_name) - 1;
- dst = path + strlen(path) - 1;
- for (count = 0; src >= ne->ne_name && dst >= path;
- src--, dst--, count++)
- if (*src != *dst)
- break;
-
- /*
- * At this point, 'count' is the number of characters
- * matched from the end.
- */
- if (count > matched || best == NULL) {
- best = ne;
- matched = count;
- }
- }
- }
-
- if (best == NULL)
- return (0);
-
- if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
- return (-1);
-
- if ((devid = get_devid(best->ne_name)) == NULL) {
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
- } else {
- if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
- return (-1);
- devid_str_free(devid);
- }
-
- return (0);
-}
-
-/*
- * Add the given configuration to the list of known devices.
- */
-static int
-add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
- nvlist_t *config)
-{
- uint64_t pool_guid, vdev_guid, top_guid, txg, state;
- pool_entry_t *pe;
- vdev_entry_t *ve;
- config_entry_t *ce;
- name_entry_t *ne;
-
- /*
- * If this is a hot spare not currently in use, add it to the list of
- * names to translate, but don't do anything else.
- */
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &state) == 0 && state == POOL_STATE_SPARE &&
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
- if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
- return (-1);
-
- if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
- free(ne);
- return (-1);
- }
- ne->ne_guid = vdev_guid;
- ne->ne_next = pl->names;
- pl->names = ne;
- return (0);
- }
-
- /*
- * If we have a valid config but cannot read any of these fields, then
- * it means we have a half-initialized label. In vdev_label_init()
- * we write a label with txg == 0 so that we can identify the device
- * in case the user refers to the same disk later on. If we fail to
- * create the pool, we'll be left with a label in this state
- * which should not be considered part of a valid pool.
- */
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &pool_guid) != 0 ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
- &vdev_guid) != 0 ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
- &top_guid) != 0 ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0 || txg == 0) {
- nvlist_free(config);
- return (0);
- }
-
- /*
- * First, see if we know about this pool. If not, then add it to the
- * list of known pools.
- */
- for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
- if (pe->pe_guid == pool_guid)
- break;
- }
-
- if (pe == NULL) {
- if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
- nvlist_free(config);
- return (-1);
- }
- pe->pe_guid = pool_guid;
- pe->pe_next = pl->pools;
- pl->pools = pe;
- }
-
- /*
- * Second, see if we know about this toplevel vdev. Add it if its
- * missing.
- */
- for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
- if (ve->ve_guid == top_guid)
- break;
- }
-
- if (ve == NULL) {
- if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
- nvlist_free(config);
- return (-1);
- }
- ve->ve_guid = top_guid;
- ve->ve_next = pe->pe_vdevs;
- pe->pe_vdevs = ve;
- }
-
- /*
- * Third, see if we have a config with a matching transaction group. If
- * so, then we do nothing. Otherwise, add it to the list of known
- * configs.
- */
- for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
- if (ce->ce_txg == txg)
- break;
- }
-
- if (ce == NULL) {
- if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
- nvlist_free(config);
- return (-1);
- }
- ce->ce_txg = txg;
- ce->ce_config = config;
- ce->ce_next = ve->ve_configs;
- ve->ve_configs = ce;
- } else {
- nvlist_free(config);
- }
-
- /*
- * At this point we've successfully added our config to the list of
- * known configs. The last thing to do is add the vdev guid -> path
- * mappings so that we can fix up the configuration as necessary before
- * doing the import.
- */
- if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
- return (-1);
-
- if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
- free(ne);
- return (-1);
- }
-
- ne->ne_guid = vdev_guid;
- ne->ne_next = pl->names;
- pl->names = ne;
-
- return (0);
-}
-
-/*
- * Returns true if the named pool matches the given GUID.
- */
-static int
-pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
- boolean_t *isactive)
-{
- zpool_handle_t *zhp;
- uint64_t theguid;
-
- if (zpool_open_silent(hdl, name, &zhp) != 0)
- return (-1);
-
- if (zhp == NULL) {
- *isactive = B_FALSE;
- return (0);
- }
-
- verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
- &theguid) == 0);
-
- zpool_close(zhp);
-
- *isactive = (theguid == guid);
- return (0);
-}
-
-/*
- * Convert our list of pools into the definitive set of configurations. We
- * start by picking the best config for each toplevel vdev. Once that's done,
- * we assemble the toplevel vdevs into a full config for the pool. We make a
- * pass to fix up any incorrect paths, and then add it to the main list to
- * return to the user.
- */
-static nvlist_t *
-get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
-{
- pool_entry_t *pe;
- vdev_entry_t *ve;
- config_entry_t *ce;
- nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
- nvlist_t **spares;
- uint_t i, nspares;
- boolean_t config_seen;
- uint64_t best_txg;
- char *name, *hostname;
- zfs_cmd_t zc = { 0 };
- uint64_t version, guid;
- size_t len;
- int err;
- uint_t children = 0;
- nvlist_t **child = NULL;
- uint_t c;
- boolean_t isactive;
- uint64_t hostid;
-
- if (nvlist_alloc(&ret, 0, 0) != 0)
- goto nomem;
-
- for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
- uint64_t id;
-
- if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
- goto nomem;
- config_seen = B_FALSE;
-
- /*
- * Iterate over all toplevel vdevs. Grab the pool configuration
- * from the first one we find, and then go through the rest and
- * add them as necessary to the 'vdevs' member of the config.
- */
- for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
-
- /*
- * Determine the best configuration for this vdev by
- * selecting the config with the latest transaction
- * group.
- */
- best_txg = 0;
- for (ce = ve->ve_configs; ce != NULL;
- ce = ce->ce_next) {
-
- if (ce->ce_txg > best_txg) {
- tmp = ce->ce_config;
- best_txg = ce->ce_txg;
- }
- }
-
- if (!config_seen) {
- /*
- * Copy the relevant pieces of data to the pool
- * configuration:
- *
- * version
- * pool guid
- * name
- * pool state
- * hostid (if available)
- * hostname (if available)
- */
- uint64_t state;
-
- verify(nvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_VERSION, &version) == 0);
- if (nvlist_add_uint64(config,
- ZPOOL_CONFIG_VERSION, version) != 0)
- goto nomem;
- verify(nvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
- if (nvlist_add_uint64(config,
- ZPOOL_CONFIG_POOL_GUID, guid) != 0)
- goto nomem;
- verify(nvlist_lookup_string(tmp,
- ZPOOL_CONFIG_POOL_NAME, &name) == 0);
- if (nvlist_add_string(config,
- ZPOOL_CONFIG_POOL_NAME, name) != 0)
- goto nomem;
- verify(nvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_POOL_STATE, &state) == 0);
- if (nvlist_add_uint64(config,
- ZPOOL_CONFIG_POOL_STATE, state) != 0)
- goto nomem;
- hostid = 0;
- if (nvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
- if (nvlist_add_uint64(config,
- ZPOOL_CONFIG_HOSTID, hostid) != 0)
- goto nomem;
- verify(nvlist_lookup_string(tmp,
- ZPOOL_CONFIG_HOSTNAME,
- &hostname) == 0);
- if (nvlist_add_string(config,
- ZPOOL_CONFIG_HOSTNAME,
- hostname) != 0)
- goto nomem;
- }
-
- config_seen = B_TRUE;
- }
-
- /*
- * Add this top-level vdev to the child array.
- */
- verify(nvlist_lookup_nvlist(tmp,
- ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
- verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
- &id) == 0);
- if (id >= children) {
- nvlist_t **newchild;
-
- newchild = zfs_alloc(hdl, (id + 1) *
- sizeof (nvlist_t *));
- if (newchild == NULL)
- goto nomem;
-
- for (c = 0; c < children; c++)
- newchild[c] = child[c];
-
- free(child);
- child = newchild;
- children = id + 1;
- }
- if (nvlist_dup(nvtop, &child[id], 0) != 0)
- goto nomem;
-
- }
-
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
-
- /*
- * Look for any missing top-level vdevs. If this is the case,
- * create a faked up 'missing' vdev as a placeholder. We cannot
- * simply compress the child array, because the kernel performs
- * certain checks to make sure the vdev IDs match their location
- * in the configuration.
- */
- for (c = 0; c < children; c++)
- if (child[c] == NULL) {
- nvlist_t *missing;
- if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
- 0) != 0)
- goto nomem;
- if (nvlist_add_string(missing,
- ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_MISSING) != 0 ||
- nvlist_add_uint64(missing,
- ZPOOL_CONFIG_ID, c) != 0 ||
- nvlist_add_uint64(missing,
- ZPOOL_CONFIG_GUID, 0ULL) != 0) {
- nvlist_free(missing);
- goto nomem;
- }
- child[c] = missing;
- }
-
- /*
- * Put all of this pool's top-level vdevs into a root vdev.
- */
- if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
- goto nomem;
- if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_ROOT) != 0 ||
- nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
- nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
- nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- child, children) != 0) {
- nvlist_free(nvroot);
- goto nomem;
- }
-
- for (c = 0; c < children; c++)
- nvlist_free(child[c]);
- free(child);
- children = 0;
- child = NULL;
-
- /*
- * Go through and fix up any paths and/or devids based on our
- * known list of vdev GUID -> path mappings.
- */
- if (fix_paths(nvroot, pl->names) != 0) {
- nvlist_free(nvroot);
- goto nomem;
- }
-
- /*
- * Add the root vdev to this pool's configuration.
- */
- if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- nvroot) != 0) {
- nvlist_free(nvroot);
- goto nomem;
- }
- nvlist_free(nvroot);
-
- /*
- * Determine if this pool is currently active, in which case we
- * can't actually import it.
- */
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
-
- if (pool_active(hdl, name, guid, &isactive) != 0)
- goto error;
-
- if (isactive) {
- nvlist_free(config);
- config = NULL;
- continue;
- }
-
- /*
- * Try to do the import in order to get vdev state.
- */
- if (zcmd_write_src_nvlist(hdl, &zc, config, &len) != 0)
- goto error;
-
- nvlist_free(config);
- config = NULL;
-
- if (zcmd_alloc_dst_nvlist(hdl, &zc, len * 2) != 0) {
- zcmd_free_nvlists(&zc);
- goto error;
- }
-
- while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
- &zc)) != 0 && errno == ENOMEM) {
- if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
- goto error;
- }
- }
-
- if (err) {
- (void) zpool_standard_error(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot discover pools"));
- zcmd_free_nvlists(&zc);
- goto error;
- }
-
- if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
- zcmd_free_nvlists(&zc);
- goto error;
- }
-
- zcmd_free_nvlists(&zc);
-
- /*
- * Go through and update the paths for spares, now that we have
- * them.
- */
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- for (i = 0; i < nspares; i++) {
- if (fix_paths(spares[i], pl->names) != 0)
- goto nomem;
- }
- }
-
- /*
- * Restore the original information read from the actual label.
- */
- (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
- DATA_TYPE_UINT64);
- (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
- DATA_TYPE_STRING);
- if (hostid != 0) {
- verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
- hostid) == 0);
- verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
- hostname) == 0);
- }
-
- /*
- * Add this pool to the list of configs.
- */
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
- if (nvlist_add_nvlist(ret, name, config) != 0)
- goto nomem;
-
- nvlist_free(config);
- config = NULL;
- }
-
- return (ret);
-
-nomem:
- (void) no_memory(hdl);
-error:
- nvlist_free(config);
- nvlist_free(ret);
- for (c = 0; c < children; c++)
- nvlist_free(child[c]);
- free(child);
-
- return (NULL);
-}
-
-/*
- * Return the offset of the given label.
- */
-static uint64_t
-label_offset(size_t size, int l)
-{
- return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
- 0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
-}
-
-/*
- * Given a file descriptor, read the label information and return an nvlist
- * describing the configuration, if there is one.
- */
-int
-zpool_read_label(int fd, nvlist_t **config)
-{
- struct stat64 statbuf;
- int l;
- vdev_label_t *label;
- uint64_t state, txg;
-
- *config = NULL;
-
- if (fstat64(fd, &statbuf) == -1)
- return (0);
-
- if ((label = malloc(sizeof (vdev_label_t))) == NULL)
- return (-1);
-
- for (l = 0; l < VDEV_LABELS; l++) {
- if (pread(fd, label, sizeof (vdev_label_t),
- label_offset(statbuf.st_size, l)) != sizeof (vdev_label_t))
- continue;
-
- if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
- sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
- continue;
-
- if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
- &state) != 0 || state > POOL_STATE_SPARE) {
- nvlist_free(*config);
- continue;
- }
-
- if (state != POOL_STATE_SPARE &&
- (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0 || txg == 0)) {
- nvlist_free(*config);
- continue;
- }
-
- free(label);
- return (0);
- }
-
- free(label);
- *config = NULL;
- return (0);
-}
-
-/*
- * Given a list of directories to search, find all pools stored on disk. This
- * includes partial pools which are not available to import. If no args are
- * given (argc is 0), then the default directory (/dev) is searched.
- */
-nvlist_t *
-zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
-{
- int i;
- char path[MAXPATHLEN];
- nvlist_t *ret = NULL, *config;
- int fd;
- pool_list_t pools = { 0 };
- pool_entry_t *pe, *penext;
- vdev_entry_t *ve, *venext;
- config_entry_t *ce, *cenext;
- name_entry_t *ne, *nenext;
- struct gmesh mesh;
- struct gclass *mp;
- struct ggeom *gp;
- struct gprovider *pp;
-
- /*
- * Go through and read the label configuration information from every
- * possible device, organizing the information according to pool GUID
- * and toplevel GUID.
- */
-
- fd = geom_gettree(&mesh);
- assert(fd == 0);
-
- LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
- LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
- LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
-
- (void) snprintf(path, sizeof (path), "%s%s",
- _PATH_DEV, pp->lg_name);
-
- if ((fd = open64(path, O_RDONLY)) < 0)
- continue;
-
- if ((zpool_read_label(fd, &config)) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
-
- (void) close(fd);
-
- if (config == NULL)
- continue;
-
- if (add_config(hdl, &pools, path, config) != 0)
- goto error;
- }
- }
- }
-
- geom_deletetree(&mesh);
-
- ret = get_configs(hdl, &pools);
-
-error:
- for (pe = pools.pools; pe != NULL; pe = penext) {
- penext = pe->pe_next;
- for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
- venext = ve->ve_next;
- for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
- cenext = ce->ce_next;
- if (ce->ce_config)
- nvlist_free(ce->ce_config);
- free(ce);
- }
- free(ve);
- }
- free(pe);
- }
-
- for (ne = pools.names; ne != NULL; ne = nenext) {
- nenext = ne->ne_next;
- if (ne->ne_name)
- free(ne->ne_name);
- free(ne);
- }
-
- return (ret);
-}
-
-boolean_t
-find_guid(nvlist_t *nv, uint64_t guid)
-{
- uint64_t tmp;
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
- if (tmp == guid)
- return (B_TRUE);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0) {
- for (c = 0; c < children; c++)
- if (find_guid(child[c], guid))
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-typedef struct spare_cbdata {
- uint64_t cb_guid;
- zpool_handle_t *cb_zhp;
-} spare_cbdata_t;
-
-static int
-find_spare(zpool_handle_t *zhp, void *data)
-{
- spare_cbdata_t *cbp = data;
- nvlist_t **spares;
- uint_t i, nspares;
- uint64_t guid;
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- for (i = 0; i < nspares; i++) {
- verify(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &guid) == 0);
- if (guid == cbp->cb_guid) {
- cbp->cb_zhp = zhp;
- return (1);
- }
- }
- }
-
- zpool_close(zhp);
- return (0);
-}
-
-/*
- * Determines if the pool is in use. If so, it returns true and the state of
- * the pool as well as the name of the pool. Both strings are allocated and
- * must be freed by the caller.
- */
-int
-zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
- boolean_t *inuse)
-{
- nvlist_t *config;
- char *name;
- boolean_t ret;
- uint64_t guid, vdev_guid;
- zpool_handle_t *zhp;
- nvlist_t *pool_config;
- uint64_t stateval, isspare;
- spare_cbdata_t cb = { 0 };
- boolean_t isactive;
-
- *inuse = B_FALSE;
-
- if (zpool_read_label(fd, &config) != 0) {
- (void) no_memory(hdl);
- return (-1);
- }
-
- if (config == NULL)
- return (0);
-
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &stateval) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
- &vdev_guid) == 0);
-
- if (stateval != POOL_STATE_SPARE) {
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
- }
-
- switch (stateval) {
- case POOL_STATE_EXPORTED:
- ret = B_TRUE;
- break;
-
- case POOL_STATE_ACTIVE:
- /*
- * For an active pool, we have to determine if it's really part
- * of a currently active pool (in which case the pool will exist
- * and the guid will be the same), or whether it's part of an
- * active pool that was disconnected without being explicitly
- * exported.
- */
- if (pool_active(hdl, name, guid, &isactive) != 0) {
- nvlist_free(config);
- return (-1);
- }
-
- if (isactive) {
- /*
- * Because the device may have been removed while
- * offlined, we only report it as active if the vdev is
- * still present in the config. Otherwise, pretend like
- * it's not in use.
- */
- if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
- (pool_config = zpool_get_config(zhp, NULL))
- != NULL) {
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(pool_config,
- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
- ret = find_guid(nvroot, vdev_guid);
- } else {
- ret = B_FALSE;
- }
-
- /*
- * If this is an active spare within another pool, we
- * treat it like an unused hot spare. This allows the
- * user to create a pool with a hot spare that currently
- * in use within another pool. Since we return B_TRUE,
- * libdiskmgt will continue to prevent generic consumers
- * from using the device.
- */
- if (ret && nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
- stateval = POOL_STATE_SPARE;
-
- if (zhp != NULL)
- zpool_close(zhp);
- } else {
- stateval = POOL_STATE_POTENTIALLY_ACTIVE;
- ret = B_TRUE;
- }
- break;
-
- case POOL_STATE_SPARE:
- /*
- * For a hot spare, it can be either definitively in use, or
- * potentially active. To determine if it's in use, we iterate
- * over all pools in the system and search for one with a spare
- * with a matching guid.
- *
- * Due to the shared nature of spares, we don't actually report
- * the potentially active case as in use. This means the user
- * can freely create pools on the hot spares of exported pools,
- * but to do otherwise makes the resulting code complicated, and
- * we end up having to deal with this case anyway.
- */
- cb.cb_zhp = NULL;
- cb.cb_guid = vdev_guid;
- if (zpool_iter(hdl, find_spare, &cb) == 1) {
- name = (char *)zpool_get_name(cb.cb_zhp);
- ret = TRUE;
- } else {
- ret = FALSE;
- }
- break;
-
- default:
- ret = B_FALSE;
- }
-
-
- if (ret) {
- if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
- nvlist_free(config);
- return (-1);
- }
- *state = (pool_state_t)stateval;
- }
-
- if (cb.cb_zhp)
- zpool_close(cb.cb_zhp);
-
- nvlist_free(config);
- *inuse = ret;
- return (0);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c b/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
deleted file mode 100644
index b4bc945..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
+++ /dev/null
@@ -1,986 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Routines to manage ZFS mounts. We separate all the nasty routines that have
- * to deal with the OS. The following functions are the main entry points --
- * they are used by mount and unmount and when changing a filesystem's
- * mountpoint.
- *
- * zfs_is_mounted()
- * zfs_mount()
- * zfs_unmount()
- * zfs_unmountall()
- *
- * This file also contains the functions used to manage sharing filesystems via
- * NFS and iSCSI:
- *
- * zfs_is_shared()
- * zfs_share()
- * zfs_unshare()
- *
- * zfs_is_shared_nfs()
- * zfs_share_nfs()
- * zfs_unshare_nfs()
- * zfs_unshareall_nfs()
- * zfs_is_shared_iscsi()
- * zfs_share_iscsi()
- * zfs_unshare_iscsi()
- *
- * The following functions are available for pool consumers, and will
- * mount/unmount and share/unshare all datasets within pool:
- *
- * zpool_enable_datasets()
- * zpool_disable_datasets()
- */
-
-#include <dirent.h>
-#include <dlfcn.h>
-#include <errno.h>
-#include <libgen.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-#include <unistd.h>
-#include <zone.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-
-#include <libzfs.h>
-
-#include "libzfs_impl.h"
-
-static int (*iscsitgt_zfs_share)(const char *);
-static int (*iscsitgt_zfs_unshare)(const char *);
-static int (*iscsitgt_zfs_is_shared)(const char *);
-
-#pragma init(zfs_iscsi_init)
-static void
-zfs_iscsi_init(void)
-{
- void *libiscsitgt;
-
- if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
- RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
- (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_zfs_share")) == NULL ||
- (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_zfs_unshare")) == NULL ||
- (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_zfs_is_shared")) == NULL) {
- iscsitgt_zfs_share = NULL;
- iscsitgt_zfs_unshare = NULL;
- iscsitgt_zfs_is_shared = NULL;
- }
-}
-
-/*
- * Search the sharetab for the given mountpoint, returning true if it is found.
- */
-static boolean_t
-is_shared(libzfs_handle_t *hdl, const char *mountpoint)
-{
- char buf[MAXPATHLEN], *tab;
-
- if (hdl->libzfs_sharetab == NULL)
- return (0);
-
- (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
-
- while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
-
- /* the mountpoint is the first entry on each line */
- if ((tab = strchr(buf, '\t')) != NULL) {
- *tab = '\0';
- if (strcmp(buf, mountpoint) == 0)
- return (B_TRUE);
- }
- }
-
- return (B_FALSE);
-}
-
-#if 0
-/*
- * Returns true if the specified directory is empty. If we can't open the
- * directory at all, return true so that the mount can fail with a more
- * informative error message.
- */
-static boolean_t
-dir_is_empty(const char *dirname)
-{
- DIR *dirp;
- struct dirent64 *dp;
-
- if ((dirp = opendir(dirname)) == NULL)
- return (B_TRUE);
-
- while ((dp = readdir64(dirp)) != NULL) {
-
- if (strcmp(dp->d_name, ".") == 0 ||
- strcmp(dp->d_name, "..") == 0)
- continue;
-
- (void) closedir(dirp);
- return (B_FALSE);
- }
-
- (void) closedir(dirp);
- return (B_TRUE);
-}
-#endif
-
-/*
- * Checks to see if the mount is active. If the filesystem is mounted, we fill
- * in 'where' with the current mountpoint, and return 1. Otherwise, we return
- * 0.
- */
-boolean_t
-is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
-{
- struct mnttab search = { 0 }, entry;
-
- /*
- * Search for the entry in /etc/mnttab. We don't bother getting the
- * mountpoint, as we can just search for the special device. This will
- * also let us find mounts when the mountpoint is 'legacy'.
- */
- search.mnt_special = (char *)special;
- search.mnt_fstype = MNTTYPE_ZFS;
-
- rewind(zfs_hdl->libzfs_mnttab);
- if (getmntany(zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
- return (B_FALSE);
-
- if (where != NULL)
- *where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
-
- return (B_TRUE);
-}
-
-boolean_t
-zfs_is_mounted(zfs_handle_t *zhp, char **where)
-{
- return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
-}
-
-/*
- * Returns true if the given dataset is mountable, false otherwise. Returns the
- * mountpoint in 'buf'.
- */
-static boolean_t
-zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
- zfs_source_t *source)
-{
- char sourceloc[ZFS_MAXNAMELEN];
- zfs_source_t sourcetype;
-
- if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
- return (B_FALSE);
-
- verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
- &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
-
- if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
- strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
- return (B_FALSE);
-
- if (!zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT))
- return (B_FALSE);
-
- if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
- getzoneid() == GLOBAL_ZONEID)
- return (B_FALSE);
-
- if (source)
- *source = sourcetype;
-
- return (B_TRUE);
-}
-
-/*
- * Mount the given filesystem.
- */
-int
-zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
-{
- struct stat buf;
- char mountpoint[ZFS_MAXPROPLEN];
- char mntopts[MNT_LINE_MAX];
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- if (options == NULL)
- mntopts[0] = '\0';
- else
- (void) strlcpy(mntopts, options, sizeof (mntopts));
-
- if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
- return (0);
-
- /* Create the directory if it doesn't already exist */
- if (lstat(mountpoint, &buf) != 0) {
- if (mkdirp(mountpoint, 0755) != 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "failed to create mountpoint"));
- return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
- dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
- mountpoint));
- }
- }
-
-#if 0 /* FreeBSD: overlay mounts are not checked. */
- /*
- * Determine if the mountpoint is empty. If so, refuse to perform the
- * mount. We don't perform this check if MS_OVERLAY is specified, which
- * would defeat the point. We also avoid this check if 'remount' is
- * specified.
- */
- if ((flags & MS_OVERLAY) == 0 &&
- strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
- !dir_is_empty(mountpoint)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "directory is not empty"));
- return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
- dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
- }
-#endif
-
- /* perform the mount */
- if (zmount(zfs_get_name(zhp), mountpoint, flags,
- MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
- /*
- * Generic errors are nasty, but there are just way too many
- * from mount(), and they're well-understood. We pick a few
- * common ones to improve upon.
- */
- if (errno == EBUSY)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "mountpoint or dataset is busy"));
- else
- zfs_error_aux(hdl, strerror(errno));
-
- return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
- dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
- zhp->zfs_name));
- }
-
- return (0);
-}
-
-/*
- * Unmount a single filesystem.
- */
-static int
-unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
-{
- if (unmount(mountpoint, flags) != 0) {
- zfs_error_aux(hdl, strerror(errno));
- return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
- dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
- mountpoint));
- }
-
- return (0);
-}
-
-/*
- * Unmount the given filesystem.
- */
-int
-zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
-{
- struct mnttab search = { 0 }, entry;
-
- /* check to see if need to unmount the filesystem */
- search.mnt_special = zhp->zfs_name;
- search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zhp->zfs_hdl->libzfs_mnttab);
- if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
- getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
-
- if (mountpoint == NULL)
- mountpoint = entry.mnt_mountp;
-
- /*
- * Unshare and unmount the filesystem
- */
- if (zfs_unshare_nfs(zhp, mountpoint) != 0 ||
- unmount_one(zhp->zfs_hdl, mountpoint, flags) != 0)
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Unmount this filesystem and any children inheriting the mountpoint property.
- * To do this, just act like we're changing the mountpoint property, but don't
- * remount the filesystems afterwards.
- */
-int
-zfs_unmountall(zfs_handle_t *zhp, int flags)
-{
- prop_changelist_t *clp;
- int ret;
-
- clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, flags);
- if (clp == NULL)
- return (-1);
-
- ret = changelist_prefix(clp);
- changelist_free(clp);
-
- return (ret);
-}
-
-boolean_t
-zfs_is_shared(zfs_handle_t *zhp)
-{
- if (ZFS_IS_VOLUME(zhp))
- return (zfs_is_shared_iscsi(zhp));
-
- return (zfs_is_shared_nfs(zhp, NULL));
-}
-
-int
-zfs_share(zfs_handle_t *zhp)
-{
- if (ZFS_IS_VOLUME(zhp))
- return (zfs_share_iscsi(zhp));
-
- return (zfs_share_nfs(zhp));
-}
-
-int
-zfs_unshare(zfs_handle_t *zhp)
-{
- if (ZFS_IS_VOLUME(zhp))
- return (zfs_unshare_iscsi(zhp));
-
- return (zfs_unshare_nfs(zhp, NULL));
-}
-
-/*
- * Check to see if the filesystem is currently shared.
- */
-boolean_t
-zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
-{
- char *mountpoint;
-
- if (!zfs_is_mounted(zhp, &mountpoint))
- return (B_FALSE);
-
- if (is_shared(zhp->zfs_hdl, mountpoint)) {
- if (where != NULL)
- *where = mountpoint;
- else
- free(mountpoint);
- return (B_TRUE);
- } else {
- free(mountpoint);
- return (B_FALSE);
- }
-}
-
-/*
- * Share the given filesystem according to the options in 'sharenfs'. We rely
- * on share(1M) to the dirty work for us.
- */
-int
-zfs_share_nfs(zfs_handle_t *zhp)
-{
- char mountpoint[ZFS_MAXPROPLEN];
- char shareopts[ZFS_MAXPROPLEN];
- char buf[MAXPATHLEN];
- FILE *fp;
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
- return (0);
-
- /*
- * Return success if there are no share options.
- */
- if (zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts),
- NULL, NULL, 0, B_FALSE) != 0 ||
- strcmp(shareopts, "off") == 0)
- return (0);
-
- /*
- * If the 'zoned' property is set, then zfs_is_mountable() will have
- * already bailed out if we are in the global zone. But local
- * zones cannot be NFS servers, so we ignore it for local zones as well.
- */
- if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
- return (0);
-
-#ifdef __FreeBSD__
- {
- int error;
-
- if (strcmp(shareopts, "on") == 0)
- error = fsshare(ZFS_EXPORTS_PATH, mountpoint, "");
- else
- error = fsshare(ZFS_EXPORTS_PATH, mountpoint, shareopts);
- if (error != 0) {
- zfs_error_aux(hdl, "%s", strerror(error));
- (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
- dgettext(TEXT_DOMAIN, "cannot share '%s'"),
- zfs_get_name(zhp));
- return (-1);
- }
- }
-#else
- /*
- * Invoke the share(1M) command. We always do this, even if it's
- * currently shared, as the options may have changed.
- */
- if (strcmp(shareopts, "on") == 0)
- (void) snprintf(buf, sizeof (buf), "/usr/sbin/share "
- "-F nfs \"%s\" 2>&1", mountpoint);
- else
- (void) snprintf(buf, sizeof (buf), "/usr/sbin/share "
- "-F nfs -o \"%s\" \"%s\" 2>&1", shareopts,
- mountpoint);
-
- if ((fp = popen(buf, "r")) == NULL)
- return (zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
- dgettext(TEXT_DOMAIN, "cannot share '%s'"),
- zfs_get_name(zhp)));
-
- /*
- * share(1M) should only produce output if there is some kind
- * of error. All output begins with "share_nfs: ", so we trim
- * this off to get to the real error.
- */
- if (fgets(buf, sizeof (buf), fp) != NULL) {
- char *colon = strchr(buf, ':');
-
- while (buf[strlen(buf) - 1] == '\n')
- buf[strlen(buf) - 1] = '\0';
-
- if (colon != NULL)
- zfs_error_aux(hdl, colon + 2);
-
- (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
- dgettext(TEXT_DOMAIN, "cannot share '%s'"),
- zfs_get_name(zhp));
-
- verify(pclose(fp) != 0);
- return (-1);
- }
-
- verify(pclose(fp) == 0);
-#endif
-
- return (0);
-}
-
-/*
- * Unshare a filesystem by mountpoint.
- */
-static int
-unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint)
-{
- char buf[MAXPATHLEN];
- FILE *fp;
-
-#ifdef __FreeBSD__
- {
- int error;
-
- error = fsunshare(ZFS_EXPORTS_PATH, mountpoint);
- if (error != 0) {
- zfs_error_aux(hdl, "%s", strerror(error));
- return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
- dgettext(TEXT_DOMAIN,
- "cannot unshare '%s'"), name));
- }
- }
-#else
- (void) snprintf(buf, sizeof (buf),
- "/usr/sbin/unshare \"%s\" 2>&1",
- mountpoint);
-
- if ((fp = popen(buf, "r")) == NULL)
- return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
- dgettext(TEXT_DOMAIN,
- "cannot unshare '%s'"), name));
-
- /*
- * unshare(1M) should only produce output if there is
- * some kind of error. All output begins with "unshare
- * nfs: ", so we trim this off to get to the real error.
- */
- if (fgets(buf, sizeof (buf), fp) != NULL) {
- char *colon = strchr(buf, ':');
-
- while (buf[strlen(buf) - 1] == '\n')
- buf[strlen(buf) - 1] = '\0';
-
- if (colon != NULL)
- zfs_error_aux(hdl, colon + 2);
-
- verify(pclose(fp) != 0);
-
- return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
- dgettext(TEXT_DOMAIN,
- "cannot unshare '%s'"), name));
- }
-
- verify(pclose(fp) == 0);
-#endif
-
- return (0);
-}
-
-/*
- * Unshare the given filesystem.
- */
-int
-zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
-{
- struct mnttab search = { 0 }, entry;
-
- /* check to see if need to unmount the filesystem */
- search.mnt_special = (char *)zfs_get_name(zhp);
- search.mnt_fstype = MNTTYPE_ZFS;
- rewind(zhp->zfs_hdl->libzfs_mnttab);
- if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
- getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
-
- if (mountpoint == NULL)
- mountpoint = entry.mnt_mountp;
-
- if (is_shared(zhp->zfs_hdl, mountpoint) &&
- unshare_one(zhp->zfs_hdl, zhp->zfs_name, mountpoint) != 0)
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Same as zfs_unmountall(), but for NFS unshares.
- */
-int
-zfs_unshareall_nfs(zfs_handle_t *zhp)
-{
- prop_changelist_t *clp;
- int ret;
-
- clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0);
- if (clp == NULL)
- return (-1);
-
- ret = changelist_unshare(clp);
- changelist_free(clp);
-
- return (ret);
-}
-
-/*
- * Remove the mountpoint associated with the current dataset, if necessary.
- * We only remove the underlying directory if:
- *
- * - The mountpoint is not 'none' or 'legacy'
- * - The mountpoint is non-empty
- * - The mountpoint is the default or inherited
- * - The 'zoned' property is set, or we're in a local zone
- *
- * Any other directories we leave alone.
- */
-void
-remove_mountpoint(zfs_handle_t *zhp)
-{
- char mountpoint[ZFS_MAXPROPLEN];
- zfs_source_t source;
-
- if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
- &source))
- return;
-
- if (source == ZFS_SRC_DEFAULT ||
- source == ZFS_SRC_INHERITED) {
- /*
- * Try to remove the directory, silently ignoring any errors.
- * The filesystem may have since been removed or moved around,
- * and this error isn't really useful to the administrator in
- * any way.
- */
- (void) rmdir(mountpoint);
- }
-}
-
-boolean_t
-zfs_is_shared_iscsi(zfs_handle_t *zhp)
-{
- return (iscsitgt_zfs_is_shared != NULL &&
- iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
-}
-
-int
-zfs_share_iscsi(zfs_handle_t *zhp)
-{
- char shareopts[ZFS_MAXPROPLEN];
- const char *dataset = zhp->zfs_name;
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- /*
- * Return success if there are no share options.
- */
- if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
- strcmp(shareopts, "off") == 0)
- return (0);
-
-/* We don't support iSCSI on FreeBSD yet. */
-#ifdef TODO
- if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0)
- return (zfs_error_fmt(hdl, EZFS_SHAREISCSIFAILED,
- dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
-#endif
-
- return (0);
-}
-
-int
-zfs_unshare_iscsi(zfs_handle_t *zhp)
-{
- const char *dataset = zfs_get_name(zhp);
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-/* We don't support iSCSI on FreeBSD yet. */
-#ifdef TODO
- /*
- * Return if the volume is not shared
- */
- if (!zfs_is_shared_iscsi(zhp))
- return (0);
-
- /*
- * If this fails with ENODEV it indicates that zvol wasn't shared so
- * we should return success in that case.
- */
- if (iscsitgt_zfs_unshare == NULL ||
- (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV))
- return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
- dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
-#endif
-
- return (0);
-}
-
-typedef struct mount_cbdata {
- zfs_handle_t **cb_datasets;
- int cb_used;
- int cb_alloc;
-} mount_cbdata_t;
-
-static int
-mount_cb(zfs_handle_t *zhp, void *data)
-{
- mount_cbdata_t *cbp = data;
-
- if (!(zfs_get_type(zhp) & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME))) {
- zfs_close(zhp);
- return (0);
- }
-
- if (cbp->cb_alloc == cbp->cb_used) {
- void *ptr;
-
- if ((ptr = zfs_realloc(zhp->zfs_hdl,
- cbp->cb_datasets, cbp->cb_alloc * sizeof (void *),
- cbp->cb_alloc * 2 * sizeof (void *))) == NULL)
- return (-1);
- cbp->cb_datasets = ptr;
-
- cbp->cb_alloc *= 2;
- }
-
- cbp->cb_datasets[cbp->cb_used++] = zhp;
-
- return (zfs_iter_children(zhp, mount_cb, cbp));
-}
-
-static int
-dataset_cmp(const void *a, const void *b)
-{
- zfs_handle_t **za = (zfs_handle_t **)a;
- zfs_handle_t **zb = (zfs_handle_t **)b;
- char mounta[MAXPATHLEN];
- char mountb[MAXPATHLEN];
- boolean_t gota, gotb;
-
- if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
- sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
- if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
- sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
-
- if (gota && gotb)
- return (strcmp(mounta, mountb));
-
- if (gota)
- return (-1);
- if (gotb)
- return (1);
-
- return (strcmp(zfs_get_name(a), zfs_get_name(b)));
-}
-
-/*
- * Mount and share all datasets within the given pool. This assumes that no
- * datasets within the pool are currently mounted. Because users can create
- * complicated nested hierarchies of mountpoints, we first gather all the
- * datasets and mountpoints within the pool, and sort them by mountpoint. Once
- * we have the list of all filesystems, we iterate over them in order and mount
- * and/or share each one.
- */
-#pragma weak zpool_mount_datasets = zpool_enable_datasets
-int
-zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
-{
- mount_cbdata_t cb = { 0 };
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- zfs_handle_t *zfsp;
- int i, ret = -1;
-
- /*
- * Gather all datasets within the pool.
- */
- if ((cb.cb_datasets = zfs_alloc(hdl, 4 * sizeof (void *))) == NULL)
- return (-1);
- cb.cb_alloc = 4;
-
- if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_ANY)) == NULL)
- goto out;
-
- cb.cb_datasets[0] = zfsp;
- cb.cb_used = 1;
-
- if (zfs_iter_children(zfsp, mount_cb, &cb) != 0)
- goto out;
-
- /*
- * Sort the datasets by mountpoint.
- */
- qsort(cb.cb_datasets, cb.cb_used, sizeof (void *), dataset_cmp);
-
- /*
- * And mount all the datasets.
- */
- ret = 0;
- for (i = 0; i < cb.cb_used; i++) {
- if (zfs_mount(cb.cb_datasets[i], mntopts, flags) != 0 ||
- zfs_share(cb.cb_datasets[i]) != 0)
- ret = -1;
- }
-
-out:
- for (i = 0; i < cb.cb_used; i++)
- zfs_close(cb.cb_datasets[i]);
- free(cb.cb_datasets);
-
- return (ret);
-}
-
-
-static int
-zvol_cb(const char *dataset, void *data)
-{
- libzfs_handle_t *hdl = data;
- zfs_handle_t *zhp;
-
- /*
- * Ignore snapshots and ignore failures from non-existant datasets.
- */
- if (strchr(dataset, '@') != NULL ||
- (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
- return (0);
-
- (void) zfs_unshare_iscsi(zhp);
-
- zfs_close(zhp);
-
- return (0);
-}
-
-static int
-mountpoint_compare(const void *a, const void *b)
-{
- const char *mounta = *((char **)a);
- const char *mountb = *((char **)b);
-
- return (strcmp(mountb, mounta));
-}
-
-/*
- * Unshare and unmount all datasets within the given pool. We don't want to
- * rely on traversing the DSL to discover the filesystems within the pool,
- * because this may be expensive (if not all of them are mounted), and can fail
- * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and
- * gather all the filesystems that are currently mounted.
- */
-#pragma weak zpool_unmount_datasets = zpool_disable_datasets
-int
-zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
-{
- int used, alloc;
- struct statfs *sfs;
- size_t namelen;
- char **mountpoints = NULL;
- zfs_handle_t **datasets = NULL;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- int i, j, n;
- int ret = -1;
- int flags = (force ? MS_FORCE : 0);
-
- /*
- * First unshare all zvols.
- */
- if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
- return (-1);
-
- namelen = strlen(zhp->zpool_name);
-
- used = alloc = 0;
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
- return (-1);
- }
- for (j = 0; j < n; j++) {
- /*
- * Ignore non-ZFS entries.
- */
- if (strcmp(sfs[j].f_fstypename, MNTTYPE_ZFS) != 0)
- continue;
-
- /*
- * Ignore filesystems not within this pool.
- */
- if (strncmp(sfs[j].f_mntfromname, zhp->zpool_name, namelen) != 0 ||
- (sfs[j].f_mntfromname[namelen] != '/' &&
- sfs[j].f_mntfromname[namelen] != '\0'))
- continue;
-
- /*
- * At this point we've found a filesystem within our pool. Add
- * it to our growing list.
- */
- if (used == alloc) {
- if (alloc == 0) {
- if ((mountpoints = zfs_alloc(hdl,
- 8 * sizeof (void *))) == NULL)
- goto out;
-
- if ((datasets = zfs_alloc(hdl,
- 8 * sizeof (void *))) == NULL)
- goto out;
-
- alloc = 8;
- } else {
- void *ptr;
-
- if ((ptr = zfs_realloc(hdl, mountpoints,
- alloc * sizeof (void *),
- alloc * 2 * sizeof (void *))) == NULL)
- goto out;
- mountpoints = ptr;
-
- if ((ptr = zfs_realloc(hdl, datasets,
- alloc * sizeof (void *),
- alloc * 2 * sizeof (void *))) == NULL)
- goto out;
- datasets = ptr;
-
- alloc *= 2;
- }
- }
-
- if ((mountpoints[used] = zfs_strdup(hdl,
- sfs[j].f_mntonname)) == NULL)
- goto out;
-
- /*
- * This is allowed to fail, in case there is some I/O error. It
- * is only used to determine if we need to remove the underlying
- * mountpoint, so failure is not fatal.
- */
- datasets[used] = make_dataset_handle(hdl, sfs[j].f_mntfromname);
-
- used++;
- }
-
- /*
- * At this point, we have the entire list of filesystems, so sort it by
- * mountpoint.
- */
- qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
-
- /*
- * Walk through and first unshare everything.
- */
- for (i = 0; i < used; i++) {
- if (is_shared(hdl, mountpoints[i]) &&
- unshare_one(hdl, mountpoints[i], mountpoints[i]) != 0)
- goto out;
- }
-
- /*
- * Now unmount everything, removing the underlying directories as
- * appropriate.
- */
- for (i = 0; i < used; i++) {
- if (unmount_one(hdl, mountpoints[i], flags) != 0)
- goto out;
- }
-
- for (i = 0; i < used; i++) {
- if (datasets[i])
- remove_mountpoint(datasets[i]);
- }
-
- ret = 0;
-out:
- for (i = 0; i < used; i++) {
- if (datasets[i])
- zfs_close(datasets[i]);
- free(mountpoints[i]);
- }
- free(datasets);
- free(mountpoints);
-
- return (ret);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
deleted file mode 100644
index 8580837..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
+++ /dev/null
@@ -1,2055 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <devid.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-#include <unistd.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zio.h>
-#include <strings.h>
-#include <umem.h>
-
-#include "zfs_namecheck.h"
-#include "zfs_prop.h"
-#include "libzfs_impl.h"
-
-/*
- * Validate the given pool name, optionally putting an extended error message in
- * 'buf'.
- */
-static boolean_t
-zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
-{
- namecheck_err_t why;
- char what;
- int ret;
-
- ret = pool_namecheck(pool, &why, &what);
-
- /*
- * The rules for reserved pool names were extended at a later point.
- * But we need to support users with existing pools that may now be
- * invalid. So we only check for this expanded set of names during a
- * create (or import), and only in userland.
- */
- if (ret == 0 && !isopen &&
- (strncmp(pool, "mirror", 6) == 0 ||
- strncmp(pool, "raidz", 5) == 0 ||
- strncmp(pool, "spare", 5) == 0)) {
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "name is reserved"));
- return (B_FALSE);
- }
-
-
- if (ret != 0) {
- if (hdl != NULL) {
- switch (why) {
- case NAME_ERR_TOOLONG:
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "name is too long"));
- break;
-
- case NAME_ERR_INVALCHAR:
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "invalid character "
- "'%c' in pool name"), what);
- break;
-
- case NAME_ERR_NOLETTER:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "name must begin with a letter"));
- break;
-
- case NAME_ERR_RESERVED:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "name is reserved"));
- break;
-
- case NAME_ERR_DISKLIKE:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool name is reserved"));
- break;
-
- case NAME_ERR_LEADING_SLASH:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "leading slash in name"));
- break;
-
- case NAME_ERR_EMPTY_COMPONENT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "empty component in name"));
- break;
-
- case NAME_ERR_TRAILING_SLASH:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "trailing slash in name"));
- break;
-
- case NAME_ERR_MULTIPLE_AT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "multiple '@' delimiters in name"));
- break;
-
- }
- }
- return (B_FALSE);
- }
-
- return (B_TRUE);
-}
-
-static int
-zpool_get_all_props(zpool_handle_t *zhp)
-{
- zfs_cmd_t zc = { 0 };
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-
- if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
- return (-1);
-
- while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
- if (errno == ENOMEM) {
- if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- } else {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- }
-
- if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
- zcmd_free_nvlists(&zc);
- return (-1);
- }
-
- zcmd_free_nvlists(&zc);
-
- return (0);
-}
-
-/*
- * Open a handle to the given pool, even if the pool is currently in the FAULTED
- * state.
- */
-zpool_handle_t *
-zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
-{
- zpool_handle_t *zhp;
- boolean_t missing;
-
- /*
- * Make sure the pool name is valid.
- */
- if (!zpool_name_valid(hdl, B_TRUE, pool)) {
- (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
- dgettext(TEXT_DOMAIN, "cannot open '%s'"),
- pool);
- return (NULL);
- }
-
- if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
- return (NULL);
-
- zhp->zpool_hdl = hdl;
- (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
-
- if (zpool_refresh_stats(zhp, &missing) != 0) {
- zpool_close(zhp);
- return (NULL);
- }
-
- if (missing) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "no such pool"));
- (void) zfs_error_fmt(hdl, EZFS_NOENT,
- dgettext(TEXT_DOMAIN, "cannot open '%s'"),
- pool);
- zpool_close(zhp);
- return (NULL);
- }
-
- return (zhp);
-}
-
-/*
- * Like the above, but silent on error. Used when iterating over pools (because
- * the configuration cache may be out of date).
- */
-int
-zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
-{
- zpool_handle_t *zhp;
- boolean_t missing;
-
- if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
- return (-1);
-
- zhp->zpool_hdl = hdl;
- (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
-
- if (zpool_refresh_stats(zhp, &missing) != 0) {
- zpool_close(zhp);
- return (-1);
- }
-
- if (missing) {
- zpool_close(zhp);
- *ret = NULL;
- return (0);
- }
-
- *ret = zhp;
- return (0);
-}
-
-/*
- * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
- * state.
- */
-zpool_handle_t *
-zpool_open(libzfs_handle_t *hdl, const char *pool)
-{
- zpool_handle_t *zhp;
-
- if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
- return (NULL);
-
- if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
- (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
- dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
- zpool_close(zhp);
- return (NULL);
- }
-
- return (zhp);
-}
-
-/*
- * Close the handle. Simply frees the memory associated with the handle.
- */
-void
-zpool_close(zpool_handle_t *zhp)
-{
- if (zhp->zpool_config)
- nvlist_free(zhp->zpool_config);
- if (zhp->zpool_old_config)
- nvlist_free(zhp->zpool_old_config);
- if (zhp->zpool_props)
- nvlist_free(zhp->zpool_props);
- free(zhp);
-}
-
-/*
- * Return the name of the pool.
- */
-const char *
-zpool_get_name(zpool_handle_t *zhp)
-{
- return (zhp->zpool_name);
-}
-
-/*
- * Return the GUID of the pool.
- */
-uint64_t
-zpool_get_guid(zpool_handle_t *zhp)
-{
- uint64_t guid;
-
- verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
- return (guid);
-}
-
-/*
- * Return the version of the pool.
- */
-uint64_t
-zpool_get_version(zpool_handle_t *zhp)
-{
- uint64_t version;
-
- verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
- &version) == 0);
-
- return (version);
-}
-
-/*
- * Return the amount of space currently consumed by the pool.
- */
-uint64_t
-zpool_get_space_used(zpool_handle_t *zhp)
-{
- nvlist_t *nvroot;
- vdev_stat_t *vs;
- uint_t vsc;
-
- verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
-
- return (vs->vs_alloc);
-}
-
-/*
- * Return the total space in the pool.
- */
-uint64_t
-zpool_get_space_total(zpool_handle_t *zhp)
-{
- nvlist_t *nvroot;
- vdev_stat_t *vs;
- uint_t vsc;
-
- verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
-
- return (vs->vs_space);
-}
-
-/*
- * Return the alternate root for this pool, if any.
- */
-int
-zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
-{
- zfs_cmd_t zc = { 0 };
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
- zc.zc_value[0] == '\0')
- return (-1);
-
- (void) strlcpy(buf, zc.zc_value, buflen);
-
- return (0);
-}
-
-/*
- * Return the state of the pool (ACTIVE or UNAVAILABLE)
- */
-int
-zpool_get_state(zpool_handle_t *zhp)
-{
- return (zhp->zpool_state);
-}
-
-/*
- * Create the named pool, using the provided vdev list. It is assumed
- * that the consumer has already validated the contents of the nvlist, so we
- * don't have to worry about error semantics.
- */
-int
-zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
- const char *altroot)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
-
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot create '%s'"), pool);
-
- if (!zpool_name_valid(hdl, B_FALSE, pool))
- return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
-
- if (altroot != NULL && altroot[0] != '/')
- return (zfs_error_fmt(hdl, EZFS_BADPATH,
- dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
-
- if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
- return (-1);
-
- (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
-
- if (altroot != NULL)
- (void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
- zcmd_free_nvlists(&zc);
-
- switch (errno) {
- case EBUSY:
- /*
- * This can happen if the user has specified the same
- * device multiple times. We can't reliably detect this
- * until we try to add it and see we already have a
- * label.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more vdevs refer to the same device"));
- return (zfs_error(hdl, EZFS_BADDEV, msg));
-
- case EOVERFLOW:
- /*
- * This occurs when one of the devices is below
- * SPA_MINDEVSIZE. Unfortunately, we can't detect which
- * device was the problem device since there's no
- * reliable way to determine device size from userland.
- */
- {
- char buf[64];
-
- zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
-
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more devices is less than the "
- "minimum size (%s)"), buf);
- }
- return (zfs_error(hdl, EZFS_BADDEV, msg));
-
- case ENOSPC:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more devices is out of space"));
- return (zfs_error(hdl, EZFS_BADDEV, msg));
-
- default:
- return (zpool_standard_error(hdl, errno, msg));
- }
- }
-
- zcmd_free_nvlists(&zc);
-
- /*
- * If this is an alternate root pool, then we automatically set the
- * mountpoint of the root dataset to be '/'.
- */
- if (altroot != NULL) {
- zfs_handle_t *zhp;
-
- verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
- verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
- "/") == 0);
-
- zfs_close(zhp);
- }
-
- return (0);
-}
-
-/*
- * Destroy the given pool. It is up to the caller to ensure that there are no
- * datasets left in the pool.
- */
-int
-zpool_destroy(zpool_handle_t *zhp)
-{
- zfs_cmd_t zc = { 0 };
- zfs_handle_t *zfp = NULL;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- char msg[1024];
-
- if (zhp->zpool_state == POOL_STATE_ACTIVE &&
- (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
- ZFS_TYPE_FILESYSTEM)) == NULL)
- return (-1);
-
- if (zpool_remove_zvol_links(zhp) != 0)
- return (-1);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot destroy '%s'"), zhp->zpool_name);
-
- if (errno == EROFS) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more devices is read only"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
- } else {
- (void) zpool_standard_error(hdl, errno, msg);
- }
-
- if (zfp)
- zfs_close(zfp);
- return (-1);
- }
-
- if (zfp) {
- remove_mountpoint(zfp);
- zfs_close(zfp);
- }
-
- return (0);
-}
-
-/*
- * Add the given vdevs to the pool. The caller must have already performed the
- * necessary verification to ensure that the vdev specification is well-formed.
- */
-int
-zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
-{
- zfs_cmd_t zc = { 0 };
- int ret;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- char msg[1024];
- nvlist_t **spares;
- uint_t nspares;
-
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot add to '%s'"), zhp->zpool_name);
-
- if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
- nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
- "upgraded to add hot spares"));
- return (zfs_error(hdl, EZFS_BADVERSION, msg));
- }
-
- if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
- return (-1);
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
- switch (errno) {
- case EBUSY:
- /*
- * This can happen if the user has specified the same
- * device multiple times. We can't reliably detect this
- * until we try to add it and see we already have a
- * label.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more vdevs refer to the same device"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
- break;
-
- case EOVERFLOW:
- /*
- * This occurrs when one of the devices is below
- * SPA_MINDEVSIZE. Unfortunately, we can't detect which
- * device was the problem device since there's no
- * reliable way to determine device size from userland.
- */
- {
- char buf[64];
-
- zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
-
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "device is less than the minimum "
- "size (%s)"), buf);
- }
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
- break;
-
- case ENOTSUP:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool must be upgraded to add raidz2 vdevs"));
- (void) zfs_error(hdl, EZFS_BADVERSION, msg);
- break;
-
- case EDOM:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "root pool can not have concatenated devices"));
- (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
- break;
-
- default:
- (void) zpool_standard_error(hdl, errno, msg);
- }
-
- ret = -1;
- } else {
- ret = 0;
- }
-
- zcmd_free_nvlists(&zc);
-
- return (ret);
-}
-
-/*
- * Exports the pool from the system. The caller must ensure that there are no
- * mounted datasets in the pool.
- */
-int
-zpool_export(zpool_handle_t *zhp)
-{
- zfs_cmd_t zc = { 0 };
-
- if (zpool_remove_zvol_links(zhp) != 0)
- return (-1);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
- return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot export '%s'"),
- zhp->zpool_name));
- return (0);
-}
-
-/*
- * Import the given pool using the known configuration. The configuration
- * should have come from zpool_find_import(). The 'newname' and 'altroot'
- * parameters control whether the pool is imported with a different name or with
- * an alternate root, respectively.
- */
-int
-zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
- const char *altroot)
-{
- zfs_cmd_t zc = { 0 };
- char *thename;
- char *origname;
- int ret;
-
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &origname) == 0);
-
- if (newname != NULL) {
- if (!zpool_name_valid(hdl, B_FALSE, newname))
- return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
- dgettext(TEXT_DOMAIN, "cannot import '%s'"),
- newname));
- thename = (char *)newname;
- } else {
- thename = origname;
- }
-
- if (altroot != NULL && altroot[0] != '/')
- return (zfs_error_fmt(hdl, EZFS_BADPATH,
- dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
- altroot));
-
- (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
-
- if (altroot != NULL)
- (void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
- else
- zc.zc_value[0] = '\0';
-
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &zc.zc_guid) == 0);
-
- if (zcmd_write_src_nvlist(hdl, &zc, config, NULL) != 0)
- return (-1);
-
- ret = 0;
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
- char desc[1024];
- if (newname == NULL)
- (void) snprintf(desc, sizeof (desc),
- dgettext(TEXT_DOMAIN, "cannot import '%s'"),
- thename);
- else
- (void) snprintf(desc, sizeof (desc),
- dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
- origname, thename);
-
- switch (errno) {
- case ENOTSUP:
- /*
- * Unsupported version.
- */
- (void) zfs_error(hdl, EZFS_BADVERSION, desc);
- break;
-
- case EINVAL:
- (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
- break;
-
- default:
- (void) zpool_standard_error(hdl, errno, desc);
- }
-
- ret = -1;
- } else {
- zpool_handle_t *zhp;
- /*
- * This should never fail, but play it safe anyway.
- */
- if (zpool_open_silent(hdl, thename, &zhp) != 0) {
- ret = -1;
- } else if (zhp != NULL) {
- ret = zpool_create_zvol_links(zhp);
- zpool_close(zhp);
- }
- }
-
- zcmd_free_nvlists(&zc);
- return (ret);
-}
-
-/*
- * Scrub the pool.
- */
-int
-zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- zc.zc_cookie = type;
-
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
- return (0);
-
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
-
- if (errno == EBUSY)
- return (zfs_error(hdl, EZFS_RESILVERING, msg));
- else
- return (zpool_standard_error(hdl, errno, msg));
-}
-
-/*
- * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
- * spare; but FALSE if its an INUSE spare.
- */
-static nvlist_t *
-vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
- boolean_t *avail_spare)
-{
- uint_t c, children;
- nvlist_t **child;
- uint64_t theguid, present;
- char *path;
- uint64_t wholedisk = 0;
- nvlist_t *ret;
-
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
-
- if (search == NULL &&
- nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
- /*
- * If the device has never been present since import, the only
- * reliable way to match the vdev is by GUID.
- */
- if (theguid == guid)
- return (nv);
- } else if (search != NULL &&
- nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &wholedisk);
- if (wholedisk) {
- /*
- * For whole disks, the internal path has 's0', but the
- * path passed in by the user doesn't.
- */
- if (strlen(search) == strlen(path) - 2 &&
- strncmp(search, path, strlen(search)) == 0)
- return (nv);
- } else if (strcmp(search, path) == 0) {
- return (nv);
- }
- }
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
- return (NULL);
-
- for (c = 0; c < children; c++)
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
- avail_spare)) != NULL)
- return (ret);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
- &child, &children) == 0) {
- for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
- avail_spare)) != NULL) {
- *avail_spare = B_TRUE;
- return (ret);
- }
- }
- }
-
- return (NULL);
-}
-
-nvlist_t *
-zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare)
-{
- char buf[MAXPATHLEN];
- const char *search;
- char *end;
- nvlist_t *nvroot;
- uint64_t guid;
-
- guid = strtoull(path, &end, 10);
- if (guid != 0 && *end == '\0') {
- search = NULL;
- } else if (path[0] != '/') {
- (void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
- search = buf;
- } else {
- search = path;
- }
-
- verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- *avail_spare = B_FALSE;
- return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare));
-}
-
-/*
- * Returns TRUE if the given guid corresponds to a spare (INUSE or not).
- */
-static boolean_t
-is_spare(zpool_handle_t *zhp, uint64_t guid)
-{
- uint64_t spare_guid;
- nvlist_t *nvroot;
- nvlist_t **spares;
- uint_t nspares;
- int i;
-
- verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- for (i = 0; i < nspares; i++) {
- verify(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &spare_guid) == 0);
- if (guid == spare_guid)
- return (B_TRUE);
- }
- }
-
- return (B_FALSE);
-}
-
-/*
- * Bring the specified vdev online
- */
-int
-zpool_vdev_online(zpool_handle_t *zhp, const char *path)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- nvlist_t *tgt;
- boolean_t avail_spare;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot online %s"), path);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
-
- if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
-
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
- return (0);
-
- return (zpool_standard_error(hdl, errno, msg));
-}
-
-/*
- * Take the specified vdev offline
- */
-int
-zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- nvlist_t *tgt;
- boolean_t avail_spare;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
-
- if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
-
- zc.zc_cookie = istmp;
-
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
- return (0);
-
- switch (errno) {
- case EBUSY:
-
- /*
- * There are no other replicas of this device.
- */
- return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
-
- default:
- return (zpool_standard_error(hdl, errno, msg));
- }
-}
-
-/*
- * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
- * a hot spare.
- */
-static boolean_t
-is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
-{
- nvlist_t **child;
- uint_t c, children;
- char *type;
-
- if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
- &children) == 0) {
- verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
- &type) == 0);
-
- if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
- children == 2 && child[which] == tgt)
- return (B_TRUE);
-
- for (c = 0; c < children; c++)
- if (is_replacing_spare(child[c], tgt, which))
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-/*
- * Attach new_disk (fully described by nvroot) to old_disk.
- * If 'replacing' is specified, tne new disk will replace the old one.
- */
-int
-zpool_vdev_attach(zpool_handle_t *zhp,
- const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- int ret;
- nvlist_t *tgt;
- boolean_t avail_spare;
- uint64_t val;
- char *path;
- nvlist_t **child;
- uint_t children;
- nvlist_t *config_root;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- if (replacing)
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot replace %s with %s"), old_disk, new_disk);
- else
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot attach %s to %s"), new_disk, old_disk);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
-
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
- zc.zc_cookie = replacing;
-
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0 || children != 1) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "new device must be a single disk"));
- return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
- }
-
- verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
- ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
-
- /*
- * If the target is a hot spare that has been swapped in, we can only
- * replace it with another hot spare.
- */
- if (replacing &&
- nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
- nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
- (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
- !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "can only be replaced by another hot spare"));
- return (zfs_error(hdl, EZFS_BADTARGET, msg));
- }
-
- /*
- * If we are attempting to replace a spare, it canot be applied to an
- * already spared device.
- */
- if (replacing &&
- nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
- zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
- is_replacing_spare(config_root, tgt, 0)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "device has already been replaced with a spare"));
- return (zfs_error(hdl, EZFS_BADTARGET, msg));
- }
-
- if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
- return (-1);
-
- ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
-
- zcmd_free_nvlists(&zc);
-
- if (ret == 0)
- return (0);
-
- switch (errno) {
- case ENOTSUP:
- /*
- * Can't attach to or replace this type of vdev.
- */
- if (replacing)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "cannot replace a replacing device"));
- else
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "can only attach to mirrors and top-level "
- "disks"));
- (void) zfs_error(hdl, EZFS_BADTARGET, msg);
- break;
-
- case EINVAL:
- /*
- * The new device must be a single disk.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "new device must be a single disk"));
- (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
- break;
-
- case EBUSY:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
- new_disk);
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
- break;
-
- case EOVERFLOW:
- /*
- * The new device is too small.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "device is too small"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
- break;
-
- case EDOM:
- /*
- * The new device has a different alignment requirement.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "devices have different sector alignment"));
- (void) zfs_error(hdl, EZFS_BADDEV, msg);
- break;
-
- case ENAMETOOLONG:
- /*
- * The resulting top-level vdev spec won't fit in the label.
- */
- (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
- break;
-
- default:
- (void) zpool_standard_error(hdl, errno, msg);
- }
-
- return (-1);
-}
-
-/*
- * Detach the specified device.
- */
-int
-zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- nvlist_t *tgt;
- boolean_t avail_spare;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
-
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
- return (0);
-
- switch (errno) {
-
- case ENOTSUP:
- /*
- * Can't detach from this type of vdev.
- */
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
- "applicable to mirror and replacing vdevs"));
- (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
- break;
-
- case EBUSY:
- /*
- * There are no other replicas of this device.
- */
- (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
- break;
-
- default:
- (void) zpool_standard_error(hdl, errno, msg);
- }
-
- return (-1);
-}
-
-/*
- * Remove the given device. Currently, this is supported only for hot spares.
- */
-int
-zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- nvlist_t *tgt;
- boolean_t avail_spare;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- if (!avail_spare) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "only inactive hot spares can be removed"));
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
- }
-
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
- return (0);
-
- return (zpool_standard_error(hdl, errno, msg));
-}
-
-/*
- * Clear the errors for the pool, or the particular device if specified.
- */
-int
-zpool_clear(zpool_handle_t *zhp, const char *path)
-{
- zfs_cmd_t zc = { 0 };
- char msg[1024];
- nvlist_t *tgt;
- boolean_t avail_spare;
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- if (path)
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
- path);
- else
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
- zhp->zpool_name);
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (path) {
- if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- if (avail_spare)
- return (zfs_error(hdl, EZFS_ISSPARE, msg));
-
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
- &zc.zc_guid) == 0);
- }
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
- return (0);
-
- return (zpool_standard_error(hdl, errno, msg));
-}
-
-/*
- * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
- * hierarchy.
- */
-int
-zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
- void *data)
-{
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- char (*paths)[MAXPATHLEN];
- char path[MAXPATHLEN];
- size_t size = 4;
- int curr, fd, base, ret = 0;
- DIR *dirp;
- struct dirent *dp;
- struct stat st;
-
- if ((base = open(ZVOL_FULL_DEV_DIR, O_RDONLY)) < 0)
- return (errno == ENOENT ? 0 : -1);
-
- snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
- zhp->zpool_name);
- if (stat(path, &st) != 0) {
- int err = errno;
- (void) close(base);
- return (err == ENOENT ? 0 : -1);
- }
-
- /*
- * Oddly this wasn't a directory -- ignore that failure since we
- * know there are no links lower in the (non-existant) hierarchy.
- */
- if (!S_ISDIR(st.st_mode)) {
- (void) close(base);
- return (0);
- }
-
- if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
- (void) close(base);
- return (-1);
- }
-
- (void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
- curr = 0;
-
- while (curr >= 0) {
- snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
- paths[curr]);
- if (lstat(path, &st) != 0)
- goto err;
-
- if (S_ISDIR(st.st_mode)) {
- if ((dirp = opendir(path)) == NULL) {
- goto err;
- }
-
- while ((dp = readdir(dirp)) != NULL) {
- if (dp->d_name[0] == '.')
- continue;
-
- if (curr + 1 == size) {
- paths = zfs_realloc(hdl, paths,
- size * sizeof (paths[0]),
- size * 2 * sizeof (paths[0]));
- if (paths == NULL) {
- (void) closedir(dirp);
- goto err;
- }
-
- size *= 2;
- }
-
- (void) strlcpy(paths[curr + 1], paths[curr],
- sizeof (paths[curr + 1]));
- (void) strlcat(paths[curr], "/",
- sizeof (paths[curr]));
- (void) strlcat(paths[curr], dp->d_name,
- sizeof (paths[curr]));
- curr++;
- }
-
- (void) closedir(dirp);
-
- } else {
- if ((ret = cb(paths[curr], data)) != 0)
- break;
- }
-
- curr--;
- }
-
- free(paths);
- (void) close(base);
-
- return (ret);
-
-err:
- free(paths);
- (void) close(base);
- return (-1);
-}
-
-typedef struct zvol_cb {
- zpool_handle_t *zcb_pool;
- boolean_t zcb_create;
-} zvol_cb_t;
-
-/*ARGSUSED*/
-static int
-do_zvol_create(zfs_handle_t *zhp, void *data)
-{
- int ret;
-
- if (ZFS_IS_VOLUME(zhp))
- (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
-
- ret = zfs_iter_children(zhp, do_zvol_create, NULL);
-
- zfs_close(zhp);
-
- return (ret);
-}
-
-/*
- * Iterate over all zvols in the pool and make any necessary minor nodes.
- */
-int
-zpool_create_zvol_links(zpool_handle_t *zhp)
-{
- zfs_handle_t *zfp;
- int ret;
-
- /*
- * If the pool is unavailable, just return success.
- */
- if ((zfp = make_dataset_handle(zhp->zpool_hdl,
- zhp->zpool_name)) == NULL)
- return (0);
-
- ret = zfs_iter_children(zfp, do_zvol_create, NULL);
-
- zfs_close(zfp);
- return (ret);
-}
-
-static int
-do_zvol_remove(const char *dataset, void *data)
-{
- zpool_handle_t *zhp = data;
-
- return (zvol_remove_link(zhp->zpool_hdl, dataset));
-}
-
-/*
- * Iterate over all zvols in the pool and remove any minor nodes. We iterate
- * by examining the /dev links so that a corrupted pool doesn't impede this
- * operation.
- */
-int
-zpool_remove_zvol_links(zpool_handle_t *zhp)
-{
- return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
-}
-
-/*
- * Convert from a devid string to a path.
- */
-static char *
-devid_to_path(char *devid_str)
-{
- ddi_devid_t devid;
- char *minor;
- char *path;
- devid_nmlist_t *list = NULL;
- int ret;
-
- if (devid_str_decode(devid_str, &devid, &minor) != 0)
- return (NULL);
-
- ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
-
- devid_str_free(minor);
- devid_free(devid);
-
- if (ret != 0)
- return (NULL);
-
- if ((path = strdup(list[0].devname)) == NULL)
- return (NULL);
-
- devid_free_nmlist(list);
-
- return (path);
-}
-
-/*
- * Convert from a path to a devid string.
- */
-static char *
-path_to_devid(const char *path)
-{
- int fd;
- ddi_devid_t devid;
- char *minor, *ret;
-
- if ((fd = open(path, O_RDONLY)) < 0)
- return (NULL);
-
- minor = NULL;
- ret = NULL;
- if (devid_get(fd, &devid) == 0) {
- if (devid_get_minor_name(fd, &minor) == 0)
- ret = devid_str_encode(devid, minor);
- if (minor != NULL)
- devid_str_free(minor);
- devid_free(devid);
- }
- (void) close(fd);
-
- return (ret);
-}
-
-/*
- * Issue the necessary ioctl() to update the stored path value for the vdev. We
- * ignore any failure here, since a common case is for an unprivileged user to
- * type 'zpool status', and we'll display the correct information anyway.
- */
-static void
-set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
-{
- zfs_cmd_t zc = { 0 };
-
- (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
- &zc.zc_guid) == 0);
-
- (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
-}
-
-/*
- * Given a vdev, return the name to display in iostat. If the vdev has a path,
- * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
- * We also check if this is a whole disk, in which case we strip off the
- * trailing 's0' slice name.
- *
- * This routine is also responsible for identifying when disks have been
- * reconfigured in a new location. The kernel will have opened the device by
- * devid, but the path will still refer to the old location. To catch this, we
- * first do a path -> devid translation (which is fast for the common case). If
- * the devid matches, we're done. If not, we do a reverse devid -> path
- * translation and issue the appropriate ioctl() to update the path of the vdev.
- * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
- * of these checks.
- */
-char *
-zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
-{
- char *path, *devid;
- uint64_t value;
- char buf[64];
-
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
- &value) == 0) {
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
- &value) == 0);
- (void) snprintf(buf, sizeof (buf), "%llu",
- (u_longlong_t)value);
- path = buf;
- } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
-
- if (zhp != NULL &&
- nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
- /*
- * Determine if the current path is correct.
- */
- char *newdevid = path_to_devid(path);
-
- if (newdevid == NULL ||
- strcmp(devid, newdevid) != 0) {
- char *newpath;
-
- if ((newpath = devid_to_path(devid)) != NULL) {
- /*
- * Update the path appropriately.
- */
- set_path(zhp, nv, newpath);
- if (nvlist_add_string(nv,
- ZPOOL_CONFIG_PATH, newpath) == 0)
- verify(nvlist_lookup_string(nv,
- ZPOOL_CONFIG_PATH,
- &path) == 0);
- free(newpath);
- }
- }
-
- if (newdevid)
- devid_str_free(newdevid);
- }
-
- if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- path += sizeof(_PATH_DEV) - 1;
-
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &value) == 0 && value) {
- char *tmp = zfs_strdup(hdl, path);
- if (tmp == NULL)
- return (NULL);
- tmp[strlen(path) - 2] = '\0';
- return (tmp);
- }
- } else {
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
-
- /*
- * If it's a raidz device, we need to stick in the parity level.
- */
- if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
- &value) == 0);
- (void) snprintf(buf, sizeof (buf), "%s%llu", path,
- (u_longlong_t)value);
- path = buf;
- }
- }
-
- return (zfs_strdup(hdl, path));
-}
-
-static int
-zbookmark_compare(const void *a, const void *b)
-{
- return (memcmp(a, b, sizeof (zbookmark_t)));
-}
-
-/*
- * Retrieve the persistent error log, uniquify the members, and return to the
- * caller.
- */
-int
-zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
-{
- zfs_cmd_t zc = { 0 };
- uint64_t count;
- zbookmark_t *zb = NULL;
- int i;
-
- /*
- * Retrieve the raw error list from the kernel. If the number of errors
- * has increased, allocate more space and continue until we get the
- * entire list.
- */
- verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
- &count) == 0);
- if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
- count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
- return (-1);
- zc.zc_nvlist_dst_size = count;
- (void) strcpy(zc.zc_name, zhp->zpool_name);
- for (;;) {
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
- &zc) != 0) {
- free((void *)(uintptr_t)zc.zc_nvlist_dst);
- if (errno == ENOMEM) {
- count = zc.zc_nvlist_dst_size;
- if ((zc.zc_nvlist_dst = (uintptr_t)
- zfs_alloc(zhp->zpool_hdl, count *
- sizeof (zbookmark_t))) == (uintptr_t)NULL)
- return (-1);
- } else {
- return (-1);
- }
- } else {
- break;
- }
- }
-
- /*
- * Sort the resulting bookmarks. This is a little confusing due to the
- * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last
- * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
- * _not_ copied as part of the process. So we point the start of our
- * array appropriate and decrement the total number of elements.
- */
- zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
- zc.zc_nvlist_dst_size;
- count -= zc.zc_nvlist_dst_size;
-
- qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
-
- verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
-
- /*
- * Fill in the nverrlistp with nvlist's of dataset and object numbers.
- */
- for (i = 0; i < count; i++) {
- nvlist_t *nv;
-
- /* ignoring zb_blkid and zb_level for now */
- if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
- zb[i-1].zb_object == zb[i].zb_object)
- continue;
-
- if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
- goto nomem;
- if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
- zb[i].zb_objset) != 0) {
- nvlist_free(nv);
- goto nomem;
- }
- if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
- zb[i].zb_object) != 0) {
- nvlist_free(nv);
- goto nomem;
- }
- if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
- nvlist_free(nv);
- goto nomem;
- }
- nvlist_free(nv);
- }
-
- free((void *)(uintptr_t)zc.zc_nvlist_dst);
- return (0);
-
-nomem:
- free((void *)(uintptr_t)zc.zc_nvlist_dst);
- return (no_memory(zhp->zpool_hdl));
-}
-
-/*
- * Upgrade a ZFS pool to the latest on-disk version.
- */
-int
-zpool_upgrade(zpool_handle_t *zhp)
-{
- zfs_cmd_t zc = { 0 };
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) strcpy(zc.zc_name, zhp->zpool_name);
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
- return (zpool_standard_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
- zhp->zpool_name));
-
- return (0);
-}
-
-/*
- * Log command history.
- *
- * 'pool' is B_TRUE if we are logging a command for 'zpool'; B_FALSE
- * otherwise ('zfs'). 'pool_create' is B_TRUE if we are logging the creation
- * of the pool; B_FALSE otherwise. 'path' is the pathanme containing the
- * poolname. 'argc' and 'argv' are used to construct the command string.
- */
-void
-zpool_log_history(libzfs_handle_t *hdl, int argc, char **argv, const char *path,
- boolean_t pool, boolean_t pool_create)
-{
- char cmd_buf[HIS_MAX_RECORD_LEN];
- char *dspath;
- zfs_cmd_t zc = { 0 };
- int i;
-
- /* construct the command string */
- (void) strcpy(cmd_buf, pool ? "zpool" : "zfs");
- for (i = 0; i < argc; i++) {
- if (strlen(cmd_buf) + 1 + strlen(argv[i]) > HIS_MAX_RECORD_LEN)
- break;
- (void) strcat(cmd_buf, " ");
- (void) strcat(cmd_buf, argv[i]);
- }
-
- /* figure out the poolname */
- dspath = strpbrk(path, "/@");
- if (dspath == NULL) {
- (void) strcpy(zc.zc_name, path);
- } else {
- (void) strncpy(zc.zc_name, path, dspath - path);
- zc.zc_name[dspath-path] = '\0';
- }
-
- zc.zc_history = (uint64_t)(uintptr_t)cmd_buf;
- zc.zc_history_len = strlen(cmd_buf);
-
- /* overloading zc_history_offset */
- zc.zc_history_offset = pool_create;
-
- (void) ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_LOG_HISTORY, &zc);
-}
-
-/*
- * Perform ioctl to get some command history of a pool.
- *
- * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the
- * logical offset of the history buffer to start reading from.
- *
- * Upon return, 'off' is the next logical offset to read from and
- * 'len' is the actual amount of bytes read into 'buf'.
- */
-static int
-get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
-{
- zfs_cmd_t zc = { 0 };
- libzfs_handle_t *hdl = zhp->zpool_hdl;
-
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-
- zc.zc_history = (uint64_t)(uintptr_t)buf;
- zc.zc_history_len = *len;
- zc.zc_history_offset = *off;
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
- switch (errno) {
- case EPERM:
- return (zfs_error_fmt(hdl, EZFS_PERM,
- dgettext(TEXT_DOMAIN,
- "cannot show history for pool '%s'"),
- zhp->zpool_name));
- case ENOENT:
- return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
- dgettext(TEXT_DOMAIN, "cannot get history for pool "
- "'%s'"), zhp->zpool_name));
- case ENOTSUP:
- return (zfs_error_fmt(hdl, EZFS_BADVERSION,
- dgettext(TEXT_DOMAIN, "cannot get history for pool "
- "'%s', pool must be upgraded"), zhp->zpool_name));
- default:
- return (zpool_standard_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN,
- "cannot get history for '%s'"), zhp->zpool_name));
- }
- }
-
- *len = zc.zc_history_len;
- *off = zc.zc_history_offset;
-
- return (0);
-}
-
-/*
- * Process the buffer of nvlists, unpacking and storing each nvlist record
- * into 'records'. 'leftover' is set to the number of bytes that weren't
- * processed as there wasn't a complete record.
- */
-static int
-zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
- nvlist_t ***records, uint_t *numrecords)
-{
- uint64_t reclen;
- nvlist_t *nv;
- int i;
-
- while (bytes_read > sizeof (reclen)) {
-
- /* get length of packed record (stored as little endian) */
- for (i = 0, reclen = 0; i < sizeof (reclen); i++)
- reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
-
- if (bytes_read < sizeof (reclen) + reclen)
- break;
-
- /* unpack record */
- if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
- return (ENOMEM);
- bytes_read -= sizeof (reclen) + reclen;
- buf += sizeof (reclen) + reclen;
-
- /* add record to nvlist array */
- (*numrecords)++;
- if (ISP2(*numrecords + 1)) {
- *records = realloc(*records,
- *numrecords * 2 * sizeof (nvlist_t *));
- }
- (*records)[*numrecords - 1] = nv;
- }
-
- *leftover = bytes_read;
- return (0);
-}
-
-#define HIS_BUF_LEN (128*1024)
-
-/*
- * Retrieve the command history of a pool.
- */
-int
-zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
-{
- char buf[HIS_BUF_LEN];
- uint64_t off = 0;
- nvlist_t **records = NULL;
- uint_t numrecords = 0;
- int err, i;
-
- do {
- uint64_t bytes_read = sizeof (buf);
- uint64_t leftover;
-
- if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
- break;
-
- /* if nothing else was read in, we're at EOF, just return */
- if (!bytes_read)
- break;
-
- if ((err = zpool_history_unpack(buf, bytes_read,
- &leftover, &records, &numrecords)) != 0)
- break;
- off -= leftover;
-
- /* CONSTCOND */
- } while (1);
-
- if (!err) {
- verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
- verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
- records, numrecords) == 0);
- }
- for (i = 0; i < numrecords; i++)
- nvlist_free(records[i]);
- free(records);
-
- return (err);
-}
-
-void
-zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
- char *pathname, size_t len)
-{
- zfs_cmd_t zc = { 0 };
- boolean_t mounted = B_FALSE;
- char *mntpnt = NULL;
- char dsname[MAXNAMELEN];
-
- if (dsobj == 0) {
- /* special case for the MOS */
- (void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
- return;
- }
-
- /* get the dataset's name */
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- zc.zc_obj = dsobj;
- if (ioctl(zhp->zpool_hdl->libzfs_fd,
- ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
- /* just write out a path of two object numbers */
- (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
- dsobj, obj);
- return;
- }
- (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
-
- /* find out if the dataset is mounted */
- mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
-
- /* get the corrupted object's path */
- (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
- zc.zc_obj = obj;
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
- &zc) == 0) {
- if (mounted) {
- (void) snprintf(pathname, len, "%s%s", mntpnt,
- zc.zc_value);
- } else {
- (void) snprintf(pathname, len, "%s:%s",
- dsname, zc.zc_value);
- }
- } else {
- (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
- }
- free(mntpnt);
-}
-
-int
-zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
-{
- zfs_cmd_t zc = { 0 };
- int ret = -1;
- char errbuf[1024];
- nvlist_t *nvl = NULL;
- nvlist_t *realprops;
-
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
- zhp->zpool_name);
-
- if (zpool_get_version(zhp) < ZFS_VERSION_BOOTFS) {
- zfs_error_aux(zhp->zpool_hdl,
- dgettext(TEXT_DOMAIN, "pool must be "
- "upgraded to support pool properties"));
- return (zfs_error(zhp->zpool_hdl, EZFS_BADVERSION, errbuf));
- }
-
- if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
- return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
-
- if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
- nvlist_add_string(nvl, propname, propval) != 0) {
- return (no_memory(zhp->zpool_hdl));
- }
-
- if ((realprops = zfs_validate_properties(zhp->zpool_hdl, ZFS_TYPE_POOL,
- zhp->zpool_name, nvl, 0, NULL, errbuf)) == NULL) {
- nvlist_free(nvl);
- return (-1);
- }
-
- nvlist_free(nvl);
- nvl = realprops;
-
- /*
- * Execute the corresponding ioctl() to set this property.
- */
- (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-
- if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl, NULL) != 0)
- return (-1);
-
- ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SET_PROPS, &zc);
- zcmd_free_nvlists(&zc);
-
- if (ret)
- (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
-
- return (ret);
-}
-
-int
-zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *propbuf,
- size_t proplen, zfs_source_t *srctype)
-{
- uint64_t value;
- char msg[1024], *strvalue;
- nvlist_t *nvp;
- zfs_source_t src = ZFS_SRC_NONE;
-
- (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
- "cannot get property '%s'"), zpool_prop_to_name(prop));
-
- if (zpool_get_version(zhp) < ZFS_VERSION_BOOTFS) {
- zfs_error_aux(zhp->zpool_hdl,
- dgettext(TEXT_DOMAIN, "pool must be "
- "upgraded to support pool properties"));
- return (zfs_error(zhp->zpool_hdl, EZFS_BADVERSION, msg));
- }
-
- if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
- return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, msg));
-
- /*
- * the "name" property is special cased
- */
- if (!zfs_prop_valid_for_type(prop, ZFS_TYPE_POOL) &&
- prop != ZFS_PROP_NAME)
- return (-1);
-
- switch (prop) {
- case ZFS_PROP_NAME:
- (void) strlcpy(propbuf, zhp->zpool_name, proplen);
- break;
-
- case ZFS_PROP_BOOTFS:
- if (nvlist_lookup_nvlist(zhp->zpool_props,
- zpool_prop_to_name(prop), &nvp) != 0) {
- strvalue = (char *)zfs_prop_default_string(prop);
- if (strvalue == NULL)
- strvalue = "-";
- src = ZFS_SRC_DEFAULT;
- } else {
- VERIFY(nvlist_lookup_uint64(nvp,
- ZFS_PROP_SOURCE, &value) == 0);
- src = value;
- VERIFY(nvlist_lookup_string(nvp, ZFS_PROP_VALUE,
- &strvalue) == 0);
- if (strlen(strvalue) >= proplen)
- return (-1);
- }
- (void) strcpy(propbuf, strvalue);
- break;
-
- default:
- return (-1);
- }
- if (srctype)
- *srctype = src;
- return (0);
-}
-
-int
-zpool_get_proplist(libzfs_handle_t *hdl, char *fields, zpool_proplist_t **listp)
-{
- return (zfs_get_proplist_common(hdl, fields, listp, ZFS_TYPE_POOL));
-}
-
-
-int
-zpool_expand_proplist(zpool_handle_t *zhp, zpool_proplist_t **plp)
-{
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- zpool_proplist_t *entry;
- char buf[ZFS_MAXPROPLEN];
-
- if (zfs_expand_proplist_common(hdl, plp, ZFS_TYPE_POOL) != 0)
- return (-1);
-
- for (entry = *plp; entry != NULL; entry = entry->pl_next) {
-
- if (entry->pl_fixed)
- continue;
-
- if (entry->pl_prop != ZFS_PROP_INVAL &&
- zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
- NULL) == 0) {
- if (strlen(buf) > entry->pl_width)
- entry->pl_width = strlen(buf);
- }
- }
-
- return (0);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_status.c b/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
deleted file mode 100644
index 3eba97a..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This file contains the functions which analyze the status of a pool. This
- * include both the status of an active pool, as well as the status exported
- * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of
- * the pool. This status is independent (to a certain degree) from the state of
- * the pool. A pool's state descsribes only whether or not it is capable of
- * providing the necessary fault tolerance for data. The status describes the
- * overall status of devices. A pool that is online can still have a device
- * that is experiencing errors.
- *
- * Only a subset of the possible faults can be detected using 'zpool status',
- * and not all possible errors correspond to a FMA message ID. The explanation
- * is left up to the caller, depending on whether it is a live pool or an
- * import.
- */
-
-#include <libzfs.h>
-#include <string.h>
-#include <unistd.h>
-#include "libzfs_impl.h"
-
-/*
- * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines
- * in libzfs.h. Note that there are some status results which go past the end
- * of this table, and hence have no associated message ID.
- */
-static char *zfs_msgid_table[] = {
- "ZFS-8000-14",
- "ZFS-8000-2Q",
- "ZFS-8000-3C",
- "ZFS-8000-4J",
- "ZFS-8000-5E",
- "ZFS-8000-6X",
- "ZFS-8000-72",
- "ZFS-8000-8A",
- "ZFS-8000-9P",
- "ZFS-8000-A5",
- "ZFS-8000-EY"
-};
-
-/*
- * If the pool is active, a certain class of static errors is overridden by the
- * faults as analayzed by FMA. These faults have separate knowledge articles,
- * and the article referred to by 'zpool status' must match that indicated by
- * the syslog error message. We override missing data as well as corrupt pool.
- */
-static char *zfs_msgid_table_active[] = {
- "ZFS-8000-14",
- "ZFS-8000-D3", /* overridden */
- "ZFS-8000-D3", /* overridden */
- "ZFS-8000-4J",
- "ZFS-8000-5E",
- "ZFS-8000-6X",
- "ZFS-8000-CS", /* overridden */
- "ZFS-8000-8A",
- "ZFS-8000-9P",
- "ZFS-8000-CS", /* overridden */
-};
-
-#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
-
-/* ARGSUSED */
-static int
-vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
-{
- return (state == VDEV_STATE_CANT_OPEN &&
- aux == VDEV_AUX_OPEN_FAILED);
-}
-
-/* ARGSUSED */
-static int
-vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
-{
- return (errs != 0);
-}
-
-/* ARGSUSED */
-static int
-vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
-{
- return (state == VDEV_STATE_CANT_OPEN);
-}
-
-/* ARGSUSED */
-static int
-vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
-{
- return (state == VDEV_STATE_OFFLINE);
-}
-
-/*
- * Detect if any leaf devices that have seen errors or could not be opened.
- */
-static boolean_t
-find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
-{
- nvlist_t **child;
- vdev_stat_t *vs;
- uint_t c, children;
- char *type;
-
- /*
- * Ignore problems within a 'replacing' vdev, since we're presumably in
- * the process of repairing any such errors, and don't want to call them
- * out again. We'll pick up the fact that a resilver is happening
- * later.
- */
- verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
- if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
- return (B_FALSE);
-
- if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
- &children) == 0) {
- for (c = 0; c < children; c++)
- if (find_vdev_problem(child[c], func))
- return (B_TRUE);
- } else {
- verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
-
- if (func(vs->vs_state, vs->vs_aux,
- vs->vs_read_errors +
- vs->vs_write_errors +
- vs->vs_checksum_errors))
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-/*
- * Active pool health status.
- *
- * To determine the status for a pool, we make several passes over the config,
- * picking the most egregious error we find. In order of importance, we do the
- * following:
- *
- * - Check for a complete and valid configuration
- * - Look for any missing devices in a non-replicated config
- * - Check for any data errors
- * - Check for any missing devices in a replicated config
- * - Look for any devices showing errors
- * - Check for any resilvering devices
- *
- * There can obviously be multiple errors within a single pool, so this routine
- * only picks the most damaging of all the current errors to report.
- */
-static zpool_status_t
-check_status(nvlist_t *config, boolean_t isimport)
-{
- nvlist_t *nvroot;
- vdev_stat_t *vs;
- uint_t vsc;
- uint64_t nerr;
- uint64_t version;
- uint64_t stateval;
- uint64_t hostid = 0;
-
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
- &version) == 0);
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &stateval) == 0);
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
-
- /*
- * Pool last accessed by another system.
- */
- if (hostid != 0 && (unsigned long)hostid != gethostid() &&
- stateval == POOL_STATE_ACTIVE)
- return (ZPOOL_STATUS_HOSTID_MISMATCH);
-
- /*
- * Newer on-disk version.
- */
- if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
- vs->vs_aux == VDEV_AUX_VERSION_NEWER)
- return (ZPOOL_STATUS_VERSION_NEWER);
-
- /*
- * Check that the config is complete.
- */
- if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
- vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
- return (ZPOOL_STATUS_BAD_GUID_SUM);
-
- /*
- * Missing devices in non-replicated config.
- */
- if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
- find_vdev_problem(nvroot, vdev_missing))
- return (ZPOOL_STATUS_MISSING_DEV_NR);
-
- if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
- find_vdev_problem(nvroot, vdev_broken))
- return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
-
- /*
- * Corrupted pool metadata
- */
- if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
- vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
- return (ZPOOL_STATUS_CORRUPT_POOL);
-
- /*
- * Persistent data errors.
- */
- if (!isimport) {
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
- &nerr) == 0 && nerr != 0)
- return (ZPOOL_STATUS_CORRUPT_DATA);
- }
-
- /*
- * Missing devices in a replicated config.
- */
- if (find_vdev_problem(nvroot, vdev_missing))
- return (ZPOOL_STATUS_MISSING_DEV_R);
- if (find_vdev_problem(nvroot, vdev_broken))
- return (ZPOOL_STATUS_CORRUPT_LABEL_R);
-
- /*
- * Devices with errors
- */
- if (!isimport && find_vdev_problem(nvroot, vdev_errors))
- return (ZPOOL_STATUS_FAILING_DEV);
-
- /*
- * Offlined devices
- */
- if (find_vdev_problem(nvroot, vdev_offlined))
- return (ZPOOL_STATUS_OFFLINE_DEV);
-
- /*
- * Currently resilvering
- */
- if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
- return (ZPOOL_STATUS_RESILVERING);
-
- /*
- * Outdated, but usable, version
- */
- if (version < ZFS_VERSION)
- return (ZPOOL_STATUS_VERSION_OLDER);
-
- return (ZPOOL_STATUS_OK);
-}
-
-zpool_status_t
-zpool_get_status(zpool_handle_t *zhp, char **msgid)
-{
- zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
-
- if (ret >= NMSGID)
- *msgid = NULL;
- else
- *msgid = zfs_msgid_table_active[ret];
-
- return (ret);
-}
-
-zpool_status_t
-zpool_import_status(nvlist_t *config, char **msgid)
-{
- zpool_status_t ret = check_status(config, B_TRUE);
-
- if (ret >= NMSGID)
- *msgid = NULL;
- else
- *msgid = zfs_msgid_table[ret];
-
- return (ret);
-}
diff --git a/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
deleted file mode 100644
index c706126..0000000
--- a/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
+++ /dev/null
@@ -1,853 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Internal utility routines for the ZFS library.
- */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <libintl.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h>
-#include <unistd.h>
-#include <sys/mnttab.h>
-#include <sys/mntent.h>
-#include <sys/types.h>
-
-#include <libzfs.h>
-
-#include "libzfs_impl.h"
-
-int
-libzfs_errno(libzfs_handle_t *hdl)
-{
- return (hdl->libzfs_error);
-}
-
-const char *
-libzfs_error_action(libzfs_handle_t *hdl)
-{
- return (hdl->libzfs_action);
-}
-
-const char *
-libzfs_error_description(libzfs_handle_t *hdl)
-{
- if (hdl->libzfs_desc[0] != '\0')
- return (hdl->libzfs_desc);
-
- switch (hdl->libzfs_error) {
- case EZFS_NOMEM:
- return (dgettext(TEXT_DOMAIN, "out of memory"));
- case EZFS_BADPROP:
- return (dgettext(TEXT_DOMAIN, "invalid property value"));
- case EZFS_PROPREADONLY:
- return (dgettext(TEXT_DOMAIN, "read only property"));
- case EZFS_PROPTYPE:
- return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
- "datasets of this type"));
- case EZFS_PROPNONINHERIT:
- return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
- case EZFS_PROPSPACE:
- return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
- case EZFS_BADTYPE:
- return (dgettext(TEXT_DOMAIN, "operation not applicable to "
- "datasets of this type"));
- case EZFS_BUSY:
- return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
- case EZFS_EXISTS:
- return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
- case EZFS_NOENT:
- return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
- case EZFS_BADSTREAM:
- return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
- case EZFS_DSREADONLY:
- return (dgettext(TEXT_DOMAIN, "dataset is read only"));
- case EZFS_VOLTOOBIG:
- return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
- "this system"));
- case EZFS_VOLHASDATA:
- return (dgettext(TEXT_DOMAIN, "volume has data"));
- case EZFS_INVALIDNAME:
- return (dgettext(TEXT_DOMAIN, "invalid name"));
- case EZFS_BADRESTORE:
- return (dgettext(TEXT_DOMAIN, "unable to restore to "
- "destination"));
- case EZFS_BADBACKUP:
- return (dgettext(TEXT_DOMAIN, "backup failed"));
- case EZFS_BADTARGET:
- return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
- case EZFS_NODEVICE:
- return (dgettext(TEXT_DOMAIN, "no such device in pool"));
- case EZFS_BADDEV:
- return (dgettext(TEXT_DOMAIN, "invalid device"));
- case EZFS_NOREPLICAS:
- return (dgettext(TEXT_DOMAIN, "no valid replicas"));
- case EZFS_RESILVERING:
- return (dgettext(TEXT_DOMAIN, "currently resilvering"));
- case EZFS_BADVERSION:
- return (dgettext(TEXT_DOMAIN, "unsupported version"));
- case EZFS_POOLUNAVAIL:
- return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
- case EZFS_DEVOVERFLOW:
- return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
- case EZFS_BADPATH:
- return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
- case EZFS_CROSSTARGET:
- return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
- "pools"));
- case EZFS_ZONED:
- return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
- case EZFS_MOUNTFAILED:
- return (dgettext(TEXT_DOMAIN, "mount failed"));
- case EZFS_UMOUNTFAILED:
- return (dgettext(TEXT_DOMAIN, "umount failed"));
- case EZFS_UNSHARENFSFAILED:
- return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
- case EZFS_SHARENFSFAILED:
- return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
- case EZFS_DEVLINKS:
- return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
- case EZFS_PERM:
- return (dgettext(TEXT_DOMAIN, "permission denied"));
- case EZFS_NOSPC:
- return (dgettext(TEXT_DOMAIN, "out of space"));
- case EZFS_IO:
- return (dgettext(TEXT_DOMAIN, "I/O error"));
- case EZFS_INTR:
- return (dgettext(TEXT_DOMAIN, "signal received"));
- case EZFS_ISSPARE:
- return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
- "spare"));
- case EZFS_INVALCONFIG:
- return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
- case EZFS_RECURSIVE:
- return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
- case EZFS_NOHISTORY:
- return (dgettext(TEXT_DOMAIN, "no history available"));
- case EZFS_UNSHAREISCSIFAILED:
- return (dgettext(TEXT_DOMAIN,
- "iscsitgtd failed request to unshare"));
- case EZFS_SHAREISCSIFAILED:
- return (dgettext(TEXT_DOMAIN,
- "iscsitgtd failed request to share"));
- case EZFS_POOLPROPS:
- return (dgettext(TEXT_DOMAIN, "failed to retrieve "
- "pool properties"));
- case EZFS_POOL_NOTSUP:
- return (dgettext(TEXT_DOMAIN, "operation not supported "
- "on this type of pool"));
- case EZFS_POOL_INVALARG:
- return (dgettext(TEXT_DOMAIN, "invalid argument for "
- "this pool operation"));
- case EZFS_NAMETOOLONG:
- return (dgettext(TEXT_DOMAIN, "dataset name is too long"));
- case EZFS_UNKNOWN:
- return (dgettext(TEXT_DOMAIN, "unknown error"));
- default:
- assert(hdl->libzfs_error == 0);
- return (dgettext(TEXT_DOMAIN, "no error"));
- }
-}
-
-/*PRINTFLIKE2*/
-void
-zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
-
- (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
- fmt, ap);
- hdl->libzfs_desc_active = 1;
-
- va_end(ap);
-}
-
-static void
-zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
-{
- (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
- fmt, ap);
- hdl->libzfs_error = error;
-
- if (hdl->libzfs_desc_active)
- hdl->libzfs_desc_active = 0;
- else
- hdl->libzfs_desc[0] = '\0';
-
- if (hdl->libzfs_printerr) {
- if (error == EZFS_UNKNOWN) {
- (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
- "error: %s\n"), libzfs_error_description(hdl));
- abort();
- }
-
- (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
- libzfs_error_description(hdl));
- if (error == EZFS_NOMEM)
- exit(1);
- }
-}
-
-int
-zfs_error(libzfs_handle_t *hdl, int error, const char *msg)
-{
- return (zfs_error_fmt(hdl, error, "%s", msg));
-}
-
-/*PRINTFLIKE3*/
-int
-zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
-
- zfs_verror(hdl, error, fmt, ap);
-
- va_end(ap);
-
- return (-1);
-}
-
-static int
-zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
- va_list ap)
-{
- switch (error) {
- case EPERM:
- case EACCES:
- zfs_verror(hdl, EZFS_PERM, fmt, ap);
- return (-1);
-
- case EIO:
- zfs_verror(hdl, EZFS_IO, fmt, ap);
- return (-1);
-
- case EINTR:
- zfs_verror(hdl, EZFS_INTR, fmt, ap);
- return (-1);
- }
-
- return (0);
-}
-
-int
-zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
-{
- return (zfs_standard_error_fmt(hdl, error, "%s", msg));
-}
-
-/*PRINTFLIKE3*/
-int
-zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
-
- if (zfs_common_error(hdl, error, fmt, ap) != 0) {
- va_end(ap);
- return (-1);
- }
-
-
- switch (error) {
- case ENXIO:
- zfs_verror(hdl, EZFS_IO, fmt, ap);
- break;
-
- case ENOENT:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "dataset does not exist"));
- zfs_verror(hdl, EZFS_NOENT, fmt, ap);
- break;
-
- case ENOSPC:
- case EDQUOT:
- zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
- return (-1);
-
- case EEXIST:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "dataset already exists"));
- zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
- break;
-
- case EBUSY:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "dataset is busy"));
- zfs_verror(hdl, EZFS_BUSY, fmt, ap);
- break;
-
- case ENAMETOOLONG:
- zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
- break;
-
- default:
- zfs_error_aux(hdl, strerror(errno));
- zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
- break;
- }
-
- va_end(ap);
- return (-1);
-}
-
-int
-zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
-{
- return (zpool_standard_error_fmt(hdl, error, "%s", msg));
-}
-
-/*PRINTFLIKE3*/
-int
-zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
-
- if (zfs_common_error(hdl, error, fmt, ap) != 0) {
- va_end(ap);
- return (-1);
- }
-
- switch (error) {
- case ENODEV:
- zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
- break;
-
- case ENOENT:
- zfs_error_aux(hdl,
- dgettext(TEXT_DOMAIN, "no such pool or dataset"));
- zfs_verror(hdl, EZFS_NOENT, fmt, ap);
- break;
-
- case EEXIST:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool already exists"));
- zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
- break;
-
- case EBUSY:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
- zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
- break;
-
- case ENXIO:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "one or more devices is currently unavailable"));
- zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
- break;
-
- case ENAMETOOLONG:
- zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
- break;
-
- case ENOTSUP:
- zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap);
- break;
-
- case EINVAL:
- zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap);
- break;
-
- default:
- zfs_error_aux(hdl, strerror(error));
- zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
- }
-
- va_end(ap);
- return (-1);
-}
-
-/*
- * Display an out of memory error message and abort the current program.
- */
-int
-no_memory(libzfs_handle_t *hdl)
-{
- return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
-}
-
-/*
- * A safe form of malloc() which will die if the allocation fails.
- */
-void *
-zfs_alloc(libzfs_handle_t *hdl, size_t size)
-{
- void *data;
-
- if ((data = calloc(1, size)) == NULL)
- (void) no_memory(hdl);
-
- return (data);
-}
-
-/*
- * A safe form of realloc(), which also zeroes newly allocated space.
- */
-void *
-zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize)
-{
- void *ret;
-
- if ((ret = realloc(ptr, newsize)) == NULL) {
- (void) no_memory(hdl);
- free(ptr);
- return (NULL);
- }
-
- bzero((char *)ret + oldsize, (newsize - oldsize));
- return (ret);
-}
-
-/*
- * A safe form of strdup() which will die if the allocation fails.
- */
-char *
-zfs_strdup(libzfs_handle_t *hdl, const char *str)
-{
- char *ret;
-
- if ((ret = strdup(str)) == NULL)
- (void) no_memory(hdl);
-
- return (ret);
-}
-
-/*
- * Convert a number to an appropriately human-readable output.
- */
-void
-zfs_nicenum(uint64_t num, char *buf, size_t buflen)
-{
- uint64_t n = num;
- int index = 0;
- char u;
-
- while (n >= 1024) {
- n /= 1024;
- index++;
- }
-
- u = " KMGTPE"[index];
-
- if (index == 0) {
- (void) snprintf(buf, buflen, "%llu", n);
- } else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
- /*
- * If this is an even multiple of the base, always display
- * without any decimal precision.
- */
- (void) snprintf(buf, buflen, "%llu%c", n, u);
- } else {
- /*
- * We want to choose a precision that reflects the best choice
- * for fitting in 5 characters. This can get rather tricky when
- * we have numbers that are very close to an order of magnitude.
- * For example, when displaying 10239 (which is really 9.999K),
- * we want only a single place of precision for 10.0K. We could
- * develop some complex heuristics for this, but it's much
- * easier just to try each combination in turn.
- */
- int i;
- for (i = 2; i >= 0; i--) {
- (void) snprintf(buf, buflen, "%.*f%c", i,
- (double)num / (1ULL << 10 * index), u);
- if (strlen(buf) <= 5)
- break;
- }
- }
-}
-
-void
-libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
-{
- hdl->libzfs_printerr = printerr;
-}
-
-static int
-libzfs_load(void)
-{
- int error;
-
- if (modfind("zfs") < 0) {
- /* Not present in kernel, try loading it. */
- if (kldload("zfs") < 0 || modfind("zfs") < 0) {
- if (errno != EEXIST)
- return (error);
- }
- }
- return (0);
-}
-
-libzfs_handle_t *
-libzfs_init(void)
-{
- libzfs_handle_t *hdl;
-
- if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
- return (NULL);
- }
-
- if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
- if (libzfs_load() == 0)
- hdl->libzfs_fd = open(ZFS_DEV, O_RDWR);
- if (hdl->libzfs_fd < 0) {
- free(hdl);
- return (NULL);
- }
- }
-
- if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
- (void) close(hdl->libzfs_fd);
- free(hdl);
- return (NULL);
- }
-
- hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r");
-
- return (hdl);
-}
-
-void
-libzfs_fini(libzfs_handle_t *hdl)
-{
- (void) close(hdl->libzfs_fd);
- if (hdl->libzfs_mnttab)
- (void) fclose(hdl->libzfs_mnttab);
- if (hdl->libzfs_sharetab)
- (void) fclose(hdl->libzfs_sharetab);
- namespace_clear(hdl);
- free(hdl);
-}
-
-libzfs_handle_t *
-zpool_get_handle(zpool_handle_t *zhp)
-{
- return (zhp->zpool_hdl);
-}
-
-libzfs_handle_t *
-zfs_get_handle(zfs_handle_t *zhp)
-{
- return (zhp->zfs_hdl);
-}
-
-/*
- * Given a name, determine whether or not it's a valid path
- * (starts with '/' or "./"). If so, walk the mnttab trying
- * to match the device number. If not, treat the path as an
- * fs/vol/snap name.
- */
-zfs_handle_t *
-zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
-{
- struct statfs statbuf;
-
- if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
- /*
- * It's not a valid path, assume it's a name of type 'argtype'.
- */
- return (zfs_open(hdl, path, argtype));
- }
-
- if (statfs(path, &statbuf) != 0) {
- (void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
- return (NULL);
- }
-
- if (strcmp(statbuf.f_fstypename, MNTTYPE_ZFS) != 0) {
- (void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
- path);
- return (NULL);
- }
-
- return (zfs_open(hdl, statbuf.f_mntfromname, ZFS_TYPE_FILESYSTEM));
-}
-
-/*
- * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from
- * an ioctl().
- */
-int
-zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
-{
- if (len == 0)
- len = 2048;
- zc->zc_nvlist_dst_size = len;
- if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
- zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
- return (-1);
-
- return (0);
-}
-
-/*
- * Called when an ioctl() which returns an nvlist fails with ENOMEM. This will
- * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was
- * filled in by the kernel to indicate the actual required size.
- */
-int
-zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc)
-{
- free((void *)(uintptr_t)zc->zc_nvlist_dst);
- if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
- zfs_alloc(hdl, zc->zc_nvlist_dst_size))
- == 0)
- return (-1);
-
- return (0);
-}
-
-/*
- * Called to free the src and dst nvlists stored in the command structure.
- */
-void
-zcmd_free_nvlists(zfs_cmd_t *zc)
-{
- free((void *)(uintptr_t)zc->zc_nvlist_src);
- free((void *)(uintptr_t)zc->zc_nvlist_dst);
-}
-
-int
-zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl,
- size_t *size)
-{
- char *packed;
- size_t len;
-
- verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0);
-
- if ((packed = zfs_alloc(hdl, len)) == NULL)
- return (-1);
-
- verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
-
- zc->zc_nvlist_src = (uint64_t)(uintptr_t)packed;
- zc->zc_nvlist_src_size = len;
-
- if (size)
- *size = len;
- return (0);
-}
-
-/*
- * Unpacks an nvlist from the ZFS ioctl command structure.
- */
-int
-zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
-{
- if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst,
- zc->zc_nvlist_dst_size, nvlp, 0) != 0)
- return (no_memory(hdl));
-
- return (0);
-}
-
-static void
-zfs_print_prop_headers(libzfs_get_cbdata_t *cbp)
-{
- zfs_proplist_t *pl = cbp->cb_proplist;
- int i;
- char *title;
- size_t len;
-
- cbp->cb_first = B_FALSE;
- if (cbp->cb_scripted)
- return;
-
- /*
- * Start with the length of the column headers.
- */
- cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME"));
- cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN,
- "PROPERTY"));
- cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
- "VALUE"));
- cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
- "SOURCE"));
-
- /*
- * Go through and calculate the widths for each column. For the
- * 'source' column, we kludge it up by taking the worst-case scenario of
- * inheriting from the longest name. This is acceptable because in the
- * majority of cases 'SOURCE' is the last column displayed, and we don't
- * use the width anyway. Note that the 'VALUE' column can be oversized,
- * if the name of the property is much longer the any values we find.
- */
- for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
- /*
- * 'PROPERTY' column
- */
- if (pl->pl_prop != ZFS_PROP_INVAL) {
- len = strlen(zfs_prop_to_name(pl->pl_prop));
- if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
- cbp->cb_colwidths[GET_COL_PROPERTY] = len;
- } else {
- len = strlen(pl->pl_user_prop);
- if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
- cbp->cb_colwidths[GET_COL_PROPERTY] = len;
- }
-
- /*
- * 'VALUE' column
- */
- if ((pl->pl_prop != ZFS_PROP_NAME || !pl->pl_all) &&
- pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
- cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
-
- /*
- * 'NAME' and 'SOURCE' columns
- */
- if (pl->pl_prop == ZFS_PROP_NAME &&
- pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) {
- cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width;
- cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width +
- strlen(dgettext(TEXT_DOMAIN, "inherited from"));
- }
- }
-
- /*
- * Now go through and print the headers.
- */
- for (i = 0; i < 4; i++) {
- switch (cbp->cb_columns[i]) {
- case GET_COL_NAME:
- title = dgettext(TEXT_DOMAIN, "NAME");
- break;
- case GET_COL_PROPERTY:
- title = dgettext(TEXT_DOMAIN, "PROPERTY");
- break;
- case GET_COL_VALUE:
- title = dgettext(TEXT_DOMAIN, "VALUE");
- break;
- case GET_COL_SOURCE:
- title = dgettext(TEXT_DOMAIN, "SOURCE");
- break;
- default:
- title = NULL;
- }
-
- if (title != NULL) {
- if (i == 3 || cbp->cb_columns[i + 1] == 0)
- (void) printf("%s", title);
- else
- (void) printf("%-*s ",
- cbp->cb_colwidths[cbp->cb_columns[i]],
- title);
- }
- }
- (void) printf("\n");
-}
-
-/*
- * Display a single line of output, according to the settings in the callback
- * structure.
- */
-void
-libzfs_print_one_property(const char *name, libzfs_get_cbdata_t *cbp,
- const char *propname, const char *value, zfs_source_t sourcetype,
- const char *source)
-{
- int i;
- const char *str;
- char buf[128];
-
- /*
- * Ignore those source types that the user has chosen to ignore.
- */
- if ((sourcetype & cbp->cb_sources) == 0)
- return;
-
- if (cbp->cb_first)
- zfs_print_prop_headers(cbp);
-
- for (i = 0; i < 4; i++) {
- switch (cbp->cb_columns[i]) {
- case GET_COL_NAME:
- str = name;
- break;
-
- case GET_COL_PROPERTY:
- str = propname;
- break;
-
- case GET_COL_VALUE:
- str = value;
- break;
-
- case GET_COL_SOURCE:
- switch (sourcetype) {
- case ZFS_SRC_NONE:
- str = "-";
- break;
-
- case ZFS_SRC_DEFAULT:
- str = "default";
- break;
-
- case ZFS_SRC_LOCAL:
- str = "local";
- break;
-
- case ZFS_SRC_TEMPORARY:
- str = "temporary";
- break;
-
- case ZFS_SRC_INHERITED:
- (void) snprintf(buf, sizeof (buf),
- "inherited from %s", source);
- str = buf;
- break;
- }
- break;
-
- default:
- continue;
- }
-
- if (cbp->cb_columns[i + 1] == 0)
- (void) printf("%s", str);
- else if (cbp->cb_scripted)
- (void) printf("%s\t", str);
- else
- (void) printf("%-*s ",
- cbp->cb_colwidths[cbp->cb_columns[i]],
- str);
-
- }
-
- (void) printf("\n");
-}
diff --git a/contrib/opensolaris/lib/libzpool/common/kernel.c b/contrib/opensolaris/lib/libzpool/common/kernel.c
deleted file mode 100644
index 30c5a0c..0000000
--- a/contrib/opensolaris/lib/libzpool/common/kernel.c
+++ /dev/null
@@ -1,852 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <assert.h>
-#include <fcntl.h>
-#include <poll.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <zlib.h>
-#include <sys/spa.h>
-#include <sys/stat.h>
-#include <sys/processor.h>
-#include <sys/zfs_context.h>
-#include <sys/zmod.h>
-#include <sys/utsname.h>
-
-/*
- * Emulation of kernel services in userland.
- */
-
-int hz = 119; /* frequency when using gethrtime() >> 23 for lbolt */
-uint64_t physmem;
-vnode_t *rootdir = (vnode_t *)0xabcd1234;
-char hw_serial[11];
-
-struct utsname utsname = {
- "userland", "libzpool", "1", "1", "na"
-};
-
-/*
- * =========================================================================
- * threads
- * =========================================================================
- */
-/*ARGSUSED*/
-kthread_t *
-zk_thread_create(void (*func)(), void *arg)
-{
- thread_t tid;
-
- VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
- &tid) == 0);
-
- return ((void *)(uintptr_t)tid);
-}
-
-/*
- * =========================================================================
- * kstats
- * =========================================================================
- */
-/*ARGSUSED*/
-kstat_t *
-kstat_create(char *module, int instance, char *name, char *class,
- uchar_t type, ulong_t ndata, uchar_t ks_flag)
-{
- return (NULL);
-}
-
-/*ARGSUSED*/
-void
-kstat_install(kstat_t *ksp)
-{}
-
-/*ARGSUSED*/
-void
-kstat_delete(kstat_t *ksp)
-{}
-
-/*
- * =========================================================================
- * mutexes
- * =========================================================================
- */
-void
-zmutex_init(kmutex_t *mp)
-{
- mp->m_owner = NULL;
- (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
-}
-
-void
-zmutex_destroy(kmutex_t *mp)
-{
- ASSERT(mp->m_owner == NULL);
- (void) _mutex_destroy(&(mp)->m_lock);
- mp->m_owner = (void *)-1UL;
-}
-
-void
-mutex_enter(kmutex_t *mp)
-{
- ASSERT(mp->m_owner != (void *)-1UL);
- ASSERT(mp->m_owner != curthread);
- VERIFY(mutex_lock(&mp->m_lock) == 0);
- ASSERT(mp->m_owner == NULL);
- mp->m_owner = curthread;
-}
-
-int
-mutex_tryenter(kmutex_t *mp)
-{
- ASSERT(mp->m_owner != (void *)-1UL);
- if (mutex_trylock(&mp->m_lock) == 0) {
- ASSERT(mp->m_owner == NULL);
- mp->m_owner = curthread;
- return (1);
- } else {
- return (0);
- }
-}
-
-void
-mutex_exit(kmutex_t *mp)
-{
- ASSERT(mp->m_owner == curthread);
- mp->m_owner = NULL;
- VERIFY(mutex_unlock(&mp->m_lock) == 0);
-}
-
-void *
-mutex_owner(kmutex_t *mp)
-{
- return (mp->m_owner);
-}
-
-/*
- * =========================================================================
- * rwlocks
- * =========================================================================
- */
-/*ARGSUSED*/
-void
-rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
-{
- rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
- rwlp->rw_owner = NULL;
- rwlp->rw_count = 0;
-}
-
-void
-rw_destroy(krwlock_t *rwlp)
-{
- rwlock_destroy(&rwlp->rw_lock);
- rwlp->rw_owner = (void *)-1UL;
- rwlp->rw_count = -2;
-}
-
-void
-rw_enter(krwlock_t *rwlp, krw_t rw)
-{
- //ASSERT(!RW_LOCK_HELD(rwlp));
- ASSERT(rwlp->rw_owner != (void *)-1UL);
- ASSERT(rwlp->rw_owner != curthread);
-
- if (rw == RW_READER) {
- (void) rw_rdlock(&rwlp->rw_lock);
- ASSERT(rwlp->rw_count >= 0);
- atomic_add_int(&rwlp->rw_count, 1);
- } else {
- (void) rw_wrlock(&rwlp->rw_lock);
- ASSERT(rwlp->rw_count == 0);
- rwlp->rw_count = -1;
- rwlp->rw_owner = curthread;
- }
-}
-
-void
-rw_exit(krwlock_t *rwlp)
-{
- ASSERT(rwlp->rw_owner != (void *)-1UL);
-
- if (rwlp->rw_owner == curthread) {
- /* Write locked. */
- ASSERT(rwlp->rw_count == -1);
- rwlp->rw_count = 0;
- rwlp->rw_owner = NULL;
- } else {
- /* Read locked. */
- ASSERT(rwlp->rw_count > 0);
- atomic_add_int(&rwlp->rw_count, -1);
- }
- (void) rw_unlock(&rwlp->rw_lock);
-}
-
-int
-rw_tryenter(krwlock_t *rwlp, krw_t rw)
-{
- int rv;
-
- ASSERT(rwlp->rw_owner != (void *)-1UL);
- ASSERT(rwlp->rw_owner != curthread);
-
- if (rw == RW_READER)
- rv = rw_tryrdlock(&rwlp->rw_lock);
- else
- rv = rw_trywrlock(&rwlp->rw_lock);
-
- if (rv == 0) {
- ASSERT(rwlp->rw_owner == NULL);
- if (rw == RW_READER) {
- ASSERT(rwlp->rw_count >= 0);
- atomic_add_int(&rwlp->rw_count, 1);
- } else {
- ASSERT(rwlp->rw_count == 0);
- rwlp->rw_count = -1;
- rwlp->rw_owner = curthread;
- }
- return (1);
- }
-
- return (0);
-}
-
-/*ARGSUSED*/
-int
-rw_tryupgrade(krwlock_t *rwlp)
-{
- ASSERT(rwlp->rw_owner != (void *)-1UL);
-
- return (0);
-}
-
-int
-rw_lock_held(krwlock_t *rwlp)
-{
-
- return (rwlp->rw_count != 0);
-}
-
-/*
- * =========================================================================
- * condition variables
- * =========================================================================
- */
-/*ARGSUSED*/
-void
-cv_init(kcondvar_t *cv, char *name, int type, void *arg)
-{
- VERIFY(cond_init(cv, name, NULL) == 0);
-}
-
-void
-cv_destroy(kcondvar_t *cv)
-{
- VERIFY(cond_destroy(cv) == 0);
-}
-
-void
-cv_wait(kcondvar_t *cv, kmutex_t *mp)
-{
- ASSERT(mutex_owner(mp) == curthread);
- mp->m_owner = NULL;
- int ret = cond_wait(cv, &mp->m_lock);
- VERIFY(ret == 0 || ret == EINTR);
- mp->m_owner = curthread;
-}
-
-clock_t
-cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
-{
- int error;
- struct timespec ts;
- struct timeval tv;
- clock_t delta;
-
- ASSERT(abstime > 0);
-top:
- delta = abstime;
- if (delta <= 0)
- return (-1);
-
- if (gettimeofday(&tv, NULL) != 0)
- assert(!"gettimeofday() failed");
-
- ts.tv_sec = tv.tv_sec + delta / hz;
- ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
- ASSERT(ts.tv_nsec >= 0);
-
- if(ts.tv_nsec >= NANOSEC) {
- ts.tv_sec++;
- ts.tv_nsec -= NANOSEC;
- }
-
- ASSERT(mutex_owner(mp) == curthread);
- mp->m_owner = NULL;
- error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
- mp->m_owner = curthread;
-
- if (error == EINTR)
- goto top;
-
- if (error == ETIMEDOUT)
- return (-1);
-
- ASSERT(error == 0);
-
- return (1);
-}
-
-void
-cv_signal(kcondvar_t *cv)
-{
- VERIFY(cond_signal(cv) == 0);
-}
-
-void
-cv_broadcast(kcondvar_t *cv)
-{
- VERIFY(cond_broadcast(cv) == 0);
-}
-
-/*
- * =========================================================================
- * vnode operations
- * =========================================================================
- */
-/*
- * Note: for the xxxat() versions of these functions, we assume that the
- * starting vp is always rootdir (which is true for spa_directory.c, the only
- * ZFS consumer of these interfaces). We assert this is true, and then emulate
- * them by adding '/' in front of the path.
- */
-
-/*ARGSUSED*/
-int
-vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
-{
- int fd;
- vnode_t *vp;
- int old_umask;
- char realpath[MAXPATHLEN];
- struct stat64 st;
-
- /*
- * If we're accessing a real disk from userland, we need to use
- * the character interface to avoid caching. This is particularly
- * important if we're trying to look at a real in-kernel storage
- * pool from userland, e.g. via zdb, because otherwise we won't
- * see the changes occurring under the segmap cache.
- * On the other hand, the stupid character device returns zero
- * for its size. So -- gag -- we open the block device to get
- * its size, and remember it for subsequent VOP_GETATTR().
- */
- if (strncmp(path, "/dev/", 5) == 0) {
- char *dsk;
- fd = open64(path, O_RDONLY);
- if (fd == -1)
- return (errno);
- if (fstat64(fd, &st) == -1) {
- close(fd);
- return (errno);
- }
- close(fd);
- (void) sprintf(realpath, "%s", path);
- dsk = strstr(path, "/dsk/");
- if (dsk != NULL)
- (void) sprintf(realpath + (dsk - path) + 1, "r%s",
- dsk + 1);
- } else {
- (void) sprintf(realpath, "%s", path);
- if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
- return (errno);
- }
-
- if (flags & FCREAT)
- old_umask = umask(0);
-
- /*
- * The construct 'flags - FREAD' conveniently maps combinations of
- * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
- */
- fd = open64(realpath, flags - FREAD, mode);
-
- if (flags & FCREAT)
- (void) umask(old_umask);
-
- if (fd == -1)
- return (errno);
-
- if (fstat64(fd, &st) == -1) {
- close(fd);
- return (errno);
- }
-
- (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
-
- *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
-
- vp->v_fd = fd;
- if (S_ISCHR(st.st_mode))
- ioctl(fd, DIOCGMEDIASIZE, &vp->v_size);
- else
- vp->v_size = st.st_size;
- vp->v_path = spa_strdup(path);
-
- return (0);
-}
-
-int
-vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
- int x3, vnode_t *startvp)
-{
- char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
- int ret;
-
- ASSERT(startvp == rootdir);
- (void) sprintf(realpath, "/%s", path);
-
- ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
-
- umem_free(realpath, strlen(path) + 2);
-
- return (ret);
-}
-
-/*ARGSUSED*/
-int
-vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
- int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
-{
- ssize_t iolen, split;
-
- if (uio == UIO_READ) {
- iolen = pread64(vp->v_fd, addr, len, offset);
- } else {
- /*
- * To simulate partial disk writes, we split writes into two
- * system calls so that the process can be killed in between.
- */
- split = (len > 0 ? rand() % len : 0);
- iolen = pwrite64(vp->v_fd, addr, split, offset);
- iolen += pwrite64(vp->v_fd, (char *)addr + split,
- len - split, offset + split);
- }
-
- if (iolen == -1)
- return (errno);
- if (residp)
- *residp = len - iolen;
- else if (iolen != len)
- return (EIO);
- return (0);
-}
-
-void
-vn_close(vnode_t *vp)
-{
- close(vp->v_fd);
- spa_strfree(vp->v_path);
- umem_free(vp, sizeof (vnode_t));
-}
-
-#ifdef ZFS_DEBUG
-
-/*
- * =========================================================================
- * Figure out which debugging statements to print
- * =========================================================================
- */
-
-static char *dprintf_string;
-static int dprintf_print_all;
-
-int
-dprintf_find_string(const char *string)
-{
- char *tmp_str = dprintf_string;
- int len = strlen(string);
-
- /*
- * Find out if this is a string we want to print.
- * String format: file1.c,function_name1,file2.c,file3.c
- */
-
- while (tmp_str != NULL) {
- if (strncmp(tmp_str, string, len) == 0 &&
- (tmp_str[len] == ',' || tmp_str[len] == '\0'))
- return (1);
- tmp_str = strchr(tmp_str, ',');
- if (tmp_str != NULL)
- tmp_str++; /* Get rid of , */
- }
- return (0);
-}
-
-void
-dprintf_setup(int *argc, char **argv)
-{
- int i, j;
-
- /*
- * Debugging can be specified two ways: by setting the
- * environment variable ZFS_DEBUG, or by including a
- * "debug=..." argument on the command line. The command
- * line setting overrides the environment variable.
- */
-
- for (i = 1; i < *argc; i++) {
- int len = strlen("debug=");
- /* First look for a command line argument */
- if (strncmp("debug=", argv[i], len) == 0) {
- dprintf_string = argv[i] + len;
- /* Remove from args */
- for (j = i; j < *argc; j++)
- argv[j] = argv[j+1];
- argv[j] = NULL;
- (*argc)--;
- }
- }
-
- if (dprintf_string == NULL) {
- /* Look for ZFS_DEBUG environment variable */
- dprintf_string = getenv("ZFS_DEBUG");
- }
-
- /*
- * Are we just turning on all debugging?
- */
- if (dprintf_find_string("on"))
- dprintf_print_all = 1;
-}
-
-/*
- * =========================================================================
- * debug printfs
- * =========================================================================
- */
-void
-__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
-{
- const char *newfile;
- va_list adx;
-
- /*
- * Get rid of annoying "../common/" prefix to filename.
- */
- newfile = strrchr(file, '/');
- if (newfile != NULL) {
- newfile = newfile + 1; /* Get rid of leading / */
- } else {
- newfile = file;
- }
-
- if (dprintf_print_all ||
- dprintf_find_string(newfile) ||
- dprintf_find_string(func)) {
- /* Print out just the function name if requested */
- flockfile(stdout);
- if (dprintf_find_string("pid"))
- (void) printf("%d ", getpid());
- if (dprintf_find_string("tid"))
- (void) printf("%u ", thr_self());
-#if 0
- if (dprintf_find_string("cpu"))
- (void) printf("%u ", getcpuid());
-#endif
- if (dprintf_find_string("time"))
- (void) printf("%llu ", gethrtime());
- if (dprintf_find_string("long"))
- (void) printf("%s, line %d: ", newfile, line);
- (void) printf("%s: ", func);
- va_start(adx, fmt);
- (void) vprintf(fmt, adx);
- va_end(adx);
- funlockfile(stdout);
- }
-}
-
-#endif /* ZFS_DEBUG */
-
-/*
- * =========================================================================
- * cmn_err() and panic()
- * =========================================================================
- */
-static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
-static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
-
-void
-vpanic(const char *fmt, va_list adx)
-{
- (void) fprintf(stderr, "error: ");
- (void) vfprintf(stderr, fmt, adx);
- (void) fprintf(stderr, "\n");
-
- abort(); /* think of it as a "user-level crash dump" */
-}
-
-void
-panic(const char *fmt, ...)
-{
- va_list adx;
-
- va_start(adx, fmt);
- vpanic(fmt, adx);
- va_end(adx);
-}
-
-void
-vcmn_err(int ce, const char *fmt, va_list adx)
-{
- if (ce == CE_PANIC)
- vpanic(fmt, adx);
- if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
- (void) fprintf(stderr, "%s", ce_prefix[ce]);
- (void) vfprintf(stderr, fmt, adx);
- (void) fprintf(stderr, "%s", ce_suffix[ce]);
- }
-}
-
-/*PRINTFLIKE2*/
-void
-cmn_err(int ce, const char *fmt, ...)
-{
- va_list adx;
-
- va_start(adx, fmt);
- vcmn_err(ce, fmt, adx);
- va_end(adx);
-}
-
-/*
- * =========================================================================
- * kobj interfaces
- * =========================================================================
- */
-struct _buf *
-kobj_open_file(char *name)
-{
- struct _buf *file;
- vnode_t *vp;
-
- /* set vp as the _fd field of the file */
- if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0)
- return ((void *)-1UL);
-
- file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
- file->_fd = (intptr_t)vp;
- return (file);
-}
-
-int
-kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
- ssize_t resid;
-
- vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
- UIO_SYSSPACE, 0, 0, 0, &resid);
-
- return (size - resid);
-}
-
-void
-kobj_close_file(struct _buf *file)
-{
- vn_close((vnode_t *)file->_fd);
- umem_free(file, sizeof (struct _buf));
-}
-
-int
-kobj_get_filesize(struct _buf *file, uint64_t *size)
-{
- struct stat64 st;
- vnode_t *vp = (vnode_t *)file->_fd;
-
- if (fstat64(vp->v_fd, &st) == -1) {
- vn_close(vp);
- return (errno);
- }
- *size = st.st_size;
- return (0);
-}
-
-/*
- * =========================================================================
- * misc routines
- * =========================================================================
- */
-
-void
-delay(clock_t ticks)
-{
- poll(0, 0, ticks * (1000 / hz));
-}
-
-#if 0
-/*
- * Find highest one bit set.
- * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
- * High order bit is 31 (or 63 in _LP64 kernel).
- */
-int
-highbit(ulong_t i)
-{
- register int h = 1;
-
- if (i == 0)
- return (0);
-#ifdef _LP64
- if (i & 0xffffffff00000000ul) {
- h += 32; i >>= 32;
- }
-#endif
- if (i & 0xffff0000) {
- h += 16; i >>= 16;
- }
- if (i & 0xff00) {
- h += 8; i >>= 8;
- }
- if (i & 0xf0) {
- h += 4; i >>= 4;
- }
- if (i & 0xc) {
- h += 2; i >>= 2;
- }
- if (i & 0x2) {
- h += 1;
- }
- return (h);
-}
-#endif
-
-static int
-random_get_bytes_common(uint8_t *ptr, size_t len, char *devname)
-{
- int fd = open(devname, O_RDONLY);
- size_t resid = len;
- ssize_t bytes;
-
- ASSERT(fd != -1);
-
- while (resid != 0) {
- bytes = read(fd, ptr, resid);
- ASSERT(bytes >= 0);
- ptr += bytes;
- resid -= bytes;
- }
-
- close(fd);
-
- return (0);
-}
-
-int
-random_get_bytes(uint8_t *ptr, size_t len)
-{
- return (random_get_bytes_common(ptr, len, "/dev/random"));
-}
-
-int
-random_get_pseudo_bytes(uint8_t *ptr, size_t len)
-{
- return (random_get_bytes_common(ptr, len, "/dev/urandom"));
-}
-
-int
-ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
-{
- char *end;
-
- *result = strtoul(hw_serial, &end, base);
- if (*result == 0)
- return (errno);
- return (0);
-}
-
-/*
- * =========================================================================
- * kernel emulation setup & teardown
- * =========================================================================
- */
-static int
-umem_out_of_memory(void)
-{
- char errmsg[] = "out of memory -- generating core dump\n";
-
- write(fileno(stderr), errmsg, sizeof (errmsg));
- abort();
- return (0);
-}
-
-void
-kernel_init(int mode)
-{
- umem_nofail_callback(umem_out_of_memory);
-
- physmem = sysconf(_SC_PHYS_PAGES);
-
- dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
- (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
-
- snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
-
- spa_init(mode);
-}
-
-void
-kernel_fini(void)
-{
- spa_fini();
-}
-
-int
-z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
-{
- int ret;
- uLongf len = *dstlen;
-
- if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
- *dstlen = (size_t)len;
-
- return (ret);
-}
-
-int
-z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
- int level)
-{
- int ret;
- uLongf len = *dstlen;
-
- if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
- *dstlen = (size_t)len;
-
- return (ret);
-}
diff --git a/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
deleted file mode 100644
index 74c9019..0000000
--- a/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZFS_CONTEXT_H
-#define _SYS_ZFS_CONTEXT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define _SYS_MUTEX_H
-#define _SYS_RWLOCK_H
-#define _SYS_CONDVAR_H
-#define _SYS_SYSTM_H
-#define _SYS_DEBUG_H
-#define _SYS_T_LOCK_H
-#define _SYS_VNODE_H
-#define _SYS_VFS_H
-#define _SYS_SUNDDI_H
-#define _SYS_CALLB_H
-#define _SYS_SCHED_H_
-
-#include <solaris.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdarg.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <strings.h>
-#include <thread.h>
-#include <assert.h>
-#include <limits.h>
-#include <dirent.h>
-#include <time.h>
-#include <math.h>
-#include <umem.h>
-#include <vmem.h>
-#include <fsshare.h>
-#include <sys/note.h>
-#include <sys/types.h>
-#include <sys/atomic.h>
-#include <sys/sysmacros.h>
-#include <sys/bitmap.h>
-#include <sys/resource.h>
-#include <sys/byteorder.h>
-#include <sys/list.h>
-#include <sys/time.h>
-#include <sys/uio.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <sys/zfs_debug.h>
-#include <sys/debug.h>
-#include <sys/sdt.h>
-#include <sys/kstat.h>
-#include <sys/kernel.h>
-#include <sys/disk.h>
-#include <machine/atomic.h>
-
-#define ZFS_EXPORTS_PATH "/etc/zfs/exports"
-
-/*
- * Debugging
- */
-
-/*
- * Note that we are not using the debugging levels.
- */
-
-#define CE_CONT 0 /* continuation */
-#define CE_NOTE 1 /* notice */
-#define CE_WARN 2 /* warning */
-#define CE_PANIC 3 /* panic */
-#define CE_IGNORE 4 /* print nothing */
-
-/*
- * ZFS debugging
- */
-
-#define ZFS_LOG(...) do { } while (0)
-
-typedef u_longlong_t rlim64_t;
-#define RLIM64_INFINITY ((rlim64_t)-3)
-
-#ifdef ZFS_DEBUG
-extern void dprintf_setup(int *argc, char **argv);
-#endif /* ZFS_DEBUG */
-
-extern void cmn_err(int, const char *, ...);
-extern void vcmn_err(int, const char *, __va_list);
-extern void panic(const char *, ...);
-extern void vpanic(const char *, __va_list);
-
-/* This definition is copied from assert.h. */
-#if defined(__STDC__)
-#if __STDC_VERSION__ - 0 >= 199901L
-#define verify(EX) (void)((EX) || \
- (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
-#else
-#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
-#endif /* __STDC_VERSION__ - 0 >= 199901L */
-#else
-#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
-#endif /* __STDC__ */
-
-
-#define VERIFY verify
-#define ASSERT assert
-
-extern void __assert(const char *, const char *, int);
-
-#ifdef lint
-#define VERIFY3_IMPL(x, y, z, t) if (x == z) ((void)0)
-#else
-/* BEGIN CSTYLED */
-#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
- const TYPE __left = (TYPE)(LEFT); \
- const TYPE __right = (TYPE)(RIGHT); \
- if (!(__left OP __right)) { \
- char *__buf = alloca(256); \
- (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
- #LEFT, #OP, #RIGHT, \
- (u_longlong_t)__left, #OP, (u_longlong_t)__right); \
- __assert(__buf, __FILE__, __LINE__); \
- } \
-_NOTE(CONSTCOND) } while (0)
-/* END CSTYLED */
-#endif /* lint */
-
-#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
-#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
-#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
-
-#ifdef NDEBUG
-#define ASSERT3S(x, y, z) ((void)0)
-#define ASSERT3U(x, y, z) ((void)0)
-#define ASSERT3P(x, y, z) ((void)0)
-#else
-#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
-#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
-#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
-#endif
-
-/*
- * Dtrace SDT probes have different signatures in userland than they do in
- * kernel. If they're being used in kernel code, re-define them out of
- * existence for their counterparts in libzpool.
- */
-
-#ifdef DTRACE_PROBE1
-#undef DTRACE_PROBE1
-#define DTRACE_PROBE1(a, b, c) ((void)0)
-#endif /* DTRACE_PROBE1 */
-
-#ifdef DTRACE_PROBE2
-#undef DTRACE_PROBE2
-#define DTRACE_PROBE2(a, b, c, d, e) ((void)0)
-#endif /* DTRACE_PROBE2 */
-
-#ifdef DTRACE_PROBE3
-#undef DTRACE_PROBE3
-#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0)
-#endif /* DTRACE_PROBE3 */
-
-#ifdef DTRACE_PROBE4
-#undef DTRACE_PROBE4
-#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0)
-#endif /* DTRACE_PROBE4 */
-
-/*
- * Threads
- */
-#define curthread ((void *)(uintptr_t)thr_self())
-
-typedef struct kthread kthread_t;
-
-#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
- zk_thread_create(func, arg)
-#define thread_exit() thr_exit(NULL)
-
-extern kthread_t *zk_thread_create(void (*func)(), void *arg);
-
-#define issig(why) (FALSE)
-#define ISSIG(thr, why) (FALSE)
-
-/*
- * Mutexes
- */
-typedef struct kmutex {
- void *m_owner;
- mutex_t m_lock;
-} kmutex_t;
-
-#define MUTEX_DEFAULT USYNC_THREAD
-#undef MUTEX_HELD
-#define MUTEX_HELD(m) ((m)->m_owner == curthread)
-
-/*
- * Argh -- we have to get cheesy here because the kernel and userland
- * have different signatures for the same routine.
- */
-//extern int _mutex_init(mutex_t *mp, int type, void *arg);
-//extern int _mutex_destroy(mutex_t *mp);
-
-#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp))
-#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp))
-
-extern void zmutex_init(kmutex_t *mp);
-extern void zmutex_destroy(kmutex_t *mp);
-extern void mutex_enter(kmutex_t *mp);
-extern void mutex_exit(kmutex_t *mp);
-extern int mutex_tryenter(kmutex_t *mp);
-extern void *mutex_owner(kmutex_t *mp);
-
-/*
- * RW locks
- */
-typedef struct krwlock {
- int rw_count;
- void *rw_owner;
- rwlock_t rw_lock;
-} krwlock_t;
-
-typedef int krw_t;
-
-#define RW_READER 0
-#define RW_WRITER 1
-#define RW_DEFAULT USYNC_THREAD
-
-#undef RW_READ_HELD
-
-#undef RW_WRITE_HELD
-#define RW_WRITE_HELD(x) ((x)->rw_owner == curthread)
-#define RW_LOCK_HELD(x) rw_lock_held(x)
-
-extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
-extern void rw_destroy(krwlock_t *rwlp);
-extern void rw_enter(krwlock_t *rwlp, krw_t rw);
-extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
-extern int rw_tryupgrade(krwlock_t *rwlp);
-extern void rw_exit(krwlock_t *rwlp);
-extern int rw_lock_held(krwlock_t *rwlp);
-#define rw_downgrade(rwlp) do { } while (0)
-
-/*
- * Condition variables
- */
-typedef cond_t kcondvar_t;
-
-#define CV_DEFAULT USYNC_THREAD
-
-extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
-extern void cv_destroy(kcondvar_t *cv);
-extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
-extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
-extern void cv_signal(kcondvar_t *cv);
-extern void cv_broadcast(kcondvar_t *cv);
-
-/*
- * Kernel memory
- */
-#define KM_SLEEP UMEM_NOFAIL
-#define KM_NOSLEEP UMEM_DEFAULT
-#define KMC_NODEBUG UMC_NODEBUG
-#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
-#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
-#define kmem_free(_b, _s) umem_free(_b, _s)
-#define kmem_size() (physmem * PAGESIZE)
-#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
- umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
-#define kmem_cache_destroy(_c) umem_cache_destroy(_c)
-#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
-#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
-#define kmem_debugging() 0
-#define kmem_cache_reap_now(c)
-
-typedef umem_cache_t kmem_cache_t;
-
-/*
- * Task queues
- */
-typedef struct taskq taskq_t;
-typedef uintptr_t taskqid_t;
-typedef void (task_func_t)(void *);
-
-#define TASKQ_PREPOPULATE 0x0001
-#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
-#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
-
-#define TQ_SLEEP KM_SLEEP /* Can block for memory */
-#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
-#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
-
-extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
-extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
-extern void taskq_destroy(taskq_t *);
-extern void taskq_wait(taskq_t *);
-extern int taskq_member(taskq_t *, void *);
-
-/*
- * vnodes
- */
-typedef struct vnode {
- uint64_t v_size;
- int v_fd;
- char *v_path;
-} vnode_t;
-
-typedef struct vattr {
- uint_t va_mask; /* bit-mask of attributes */
- u_offset_t va_size; /* file size in bytes */
-} vattr_t;
-
-#define AT_TYPE 0x0001
-#define AT_MODE 0x0002
-#define AT_UID 0x0004
-#define AT_GID 0x0008
-#define AT_FSID 0x0010
-#define AT_NODEID 0x0020
-#define AT_NLINK 0x0040
-#define AT_SIZE 0x0080
-#define AT_ATIME 0x0100
-#define AT_MTIME 0x0200
-#define AT_CTIME 0x0400
-#define AT_RDEV 0x0800
-#define AT_BLKSIZE 0x1000
-#define AT_NBLOCKS 0x2000
-#define AT_SEQ 0x8000
-
-#define CRCREAT 0
-
-#define VOP_CLOSE(vp, f, c, o, cr) 0
-#define VOP_PUTPAGE(vp, of, sz, fl, cr) 0
-#define VOP_GETATTR(vp, vap, fl, cr) ((vap)->va_size = (vp)->v_size, 0)
-
-#define VOP_FSYNC(vp, f, cr) fsync((vp)->v_fd)
-
-#define VN_RELE(vp) vn_close(vp)
-
-extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
- int x2, int x3);
-extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
- int x2, int x3, vnode_t *vp);
-extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
- offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
-extern void vn_close(vnode_t *vp);
-
-#define vn_remove(path, x1, x2) remove(path)
-#define vn_rename(from, to, seg) rename((from), (to))
-#define vn_is_readonly(vp) B_FALSE
-
-extern vnode_t *rootdir;
-
-#include <sys/file.h> /* for FREAD, FWRITE, etc */
-#define FTRUNC O_TRUNC
-
-/*
- * Random stuff
- */
-#define lbolt (gethrtime() >> 23)
-#define lbolt64 (gethrtime() >> 23)
-//#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
-
-extern void delay(clock_t ticks);
-
-#define gethrestime_sec() time(NULL)
-
-#define max_ncpus 64
-
-#define minclsyspri 60
-#define maxclsyspri 99
-
-#define CPU_SEQID (thr_self() & (max_ncpus - 1))
-
-#define kcred NULL
-#define CRED() NULL
-
-extern uint64_t physmem;
-
-extern int highbit(ulong_t i);
-extern int random_get_bytes(uint8_t *ptr, size_t len);
-extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
-
-extern void kernel_init(int);
-extern void kernel_fini(void);
-
-struct spa;
-extern void nicenum(uint64_t num, char *buf);
-extern void show_pool_stats(struct spa *);
-
-typedef struct callb_cpr {
- kmutex_t *cc_lockp;
-} callb_cpr_t;
-
-#define CALLB_CPR_INIT(cp, lockp, func, name) { \
- (cp)->cc_lockp = lockp; \
-}
-
-#define CALLB_CPR_SAFE_BEGIN(cp) { \
- ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
-}
-
-#define CALLB_CPR_SAFE_END(cp, lockp) { \
- ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
-}
-
-#define CALLB_CPR_EXIT(cp) { \
- ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
- mutex_exit((cp)->cc_lockp); \
-}
-
-#define zone_dataset_visible(x, y) (1)
-#define INGLOBALZONE(z) (1)
-
-/*
- * Hostname information
- */
-extern struct utsname utsname;
-extern char hw_serial[];
-extern int ddi_strtoul(const char *str, char **nptr, int base,
- unsigned long *result);
-
-/* ZFS Boot Related stuff. */
-
-struct _buf {
- intptr_t _fd;
-};
-
-struct bootstat {
- uint64_t st_size;
-};
-
-extern struct _buf *kobj_open_file(char *name);
-extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
- unsigned off);
-extern void kobj_close_file(struct _buf *file);
-extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
-/* Random compatibility stuff. */
-#define lbolt (gethrtime() >> 23)
-#define lbolt64 (gethrtime() >> 23)
-
-extern int hz;
-extern uint64_t physmem;
-
-#define gethrestime_sec() time(NULL)
-
-#define pwrite64(d, p, n, o) pwrite(d, p, n, o)
-#define readdir64(d) readdir(d)
-#define SIGPENDING(td) (0)
-#define root_mount_wait() do { } while (0)
-#define root_mounted() (1)
-
-struct file {
- void *dummy;
-};
-
-#define FCREAT O_CREAT
-#define FOFFMAX 0x0
-
-#define SX_SYSINIT(name, lock, desc)
-
-#define SYSCTL_DECL(...)
-#define SYSCTL_NODE(...)
-#define SYSCTL_INT(...)
-#define SYSCTL_ULONG(...)
-#ifdef TUNABLE_INT
-#undef TUNABLE_INT
-#undef TUNABLE_ULONG
-#endif
-#define TUNABLE_INT(...)
-#define TUNABLE_ULONG(...)
-
-/* Errors */
-
-#ifndef ERESTART
-#define ERESTART (-1)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZFS_CONTEXT_H */
diff --git a/contrib/opensolaris/lib/libzpool/common/taskq.c b/contrib/opensolaris/lib/libzpool/common/taskq.c
deleted file mode 100644
index f7b6571..0000000
--- a/contrib/opensolaris/lib/libzpool/common/taskq.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-
-int taskq_now;
-
-typedef struct task {
- struct task *task_next;
- struct task *task_prev;
- task_func_t *task_func;
- void *task_arg;
-} task_t;
-
-#define TASKQ_ACTIVE 0x00010000
-
-struct taskq {
- kmutex_t tq_lock;
- krwlock_t tq_threadlock;
- kcondvar_t tq_dispatch_cv;
- kcondvar_t tq_wait_cv;
- thread_t *tq_threadlist;
- int tq_flags;
- int tq_active;
- int tq_nthreads;
- int tq_nalloc;
- int tq_minalloc;
- int tq_maxalloc;
- task_t *tq_freelist;
- task_t tq_task;
-};
-
-static task_t *
-task_alloc(taskq_t *tq, int tqflags)
-{
- task_t *t;
-
- if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
- tq->tq_freelist = t->task_next;
- } else {
- mutex_exit(&tq->tq_lock);
- if (tq->tq_nalloc >= tq->tq_maxalloc) {
- if (!(tqflags & KM_SLEEP)) {
- mutex_enter(&tq->tq_lock);
- return (NULL);
- }
- /*
- * We don't want to exceed tq_maxalloc, but we can't
- * wait for other tasks to complete (and thus free up
- * task structures) without risking deadlock with
- * the caller. So, we just delay for one second
- * to throttle the allocation rate.
- */
- delay(hz);
- }
- t = kmem_alloc(sizeof (task_t), tqflags);
- mutex_enter(&tq->tq_lock);
- if (t != NULL)
- tq->tq_nalloc++;
- }
- return (t);
-}
-
-static void
-task_free(taskq_t *tq, task_t *t)
-{
- if (tq->tq_nalloc <= tq->tq_minalloc) {
- t->task_next = tq->tq_freelist;
- tq->tq_freelist = t;
- } else {
- tq->tq_nalloc--;
- mutex_exit(&tq->tq_lock);
- kmem_free(t, sizeof (task_t));
- mutex_enter(&tq->tq_lock);
- }
-}
-
-taskqid_t
-taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
-{
- task_t *t;
-
- if (taskq_now) {
- func(arg);
- return (1);
- }
-
- mutex_enter(&tq->tq_lock);
- ASSERT(tq->tq_flags & TASKQ_ACTIVE);
- if ((t = task_alloc(tq, tqflags)) == NULL) {
- mutex_exit(&tq->tq_lock);
- return (0);
- }
- t->task_next = &tq->tq_task;
- t->task_prev = tq->tq_task.task_prev;
- t->task_next->task_prev = t;
- t->task_prev->task_next = t;
- t->task_func = func;
- t->task_arg = arg;
- cv_signal(&tq->tq_dispatch_cv);
- mutex_exit(&tq->tq_lock);
- return (1);
-}
-
-void
-taskq_wait(taskq_t *tq)
-{
- mutex_enter(&tq->tq_lock);
- while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
- cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
- mutex_exit(&tq->tq_lock);
-}
-
-static void *
-taskq_thread(void *arg)
-{
- taskq_t *tq = arg;
- task_t *t;
-
- mutex_enter(&tq->tq_lock);
- while (tq->tq_flags & TASKQ_ACTIVE) {
- if ((t = tq->tq_task.task_next) == &tq->tq_task) {
- if (--tq->tq_active == 0)
- cv_broadcast(&tq->tq_wait_cv);
- cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
- tq->tq_active++;
- continue;
- }
- t->task_prev->task_next = t->task_next;
- t->task_next->task_prev = t->task_prev;
- mutex_exit(&tq->tq_lock);
-
- rw_enter(&tq->tq_threadlock, RW_READER);
- t->task_func(t->task_arg);
- rw_exit(&tq->tq_threadlock);
-
- mutex_enter(&tq->tq_lock);
- task_free(tq, t);
- }
- tq->tq_nthreads--;
- cv_broadcast(&tq->tq_wait_cv);
- mutex_exit(&tq->tq_lock);
- return (NULL);
-}
-
-/*ARGSUSED*/
-taskq_t *
-taskq_create(const char *name, int nthreads, pri_t pri,
- int minalloc, int maxalloc, uint_t flags)
-{
- taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
- int t;
-
- rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
- tq->tq_flags = flags | TASKQ_ACTIVE;
- tq->tq_active = nthreads;
- tq->tq_nthreads = nthreads;
- tq->tq_minalloc = minalloc;
- tq->tq_maxalloc = maxalloc;
- tq->tq_task.task_next = &tq->tq_task;
- tq->tq_task.task_prev = &tq->tq_task;
- tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP);
-
- if (flags & TASKQ_PREPOPULATE) {
- mutex_enter(&tq->tq_lock);
- while (minalloc-- > 0)
- task_free(tq, task_alloc(tq, KM_SLEEP));
- mutex_exit(&tq->tq_lock);
- }
-
- for (t = 0; t < nthreads; t++)
- (void) thr_create(0, 0, taskq_thread,
- tq, THR_BOUND, &tq->tq_threadlist[t]);
-
- return (tq);
-}
-
-void
-taskq_destroy(taskq_t *tq)
-{
- int t;
- int nthreads = tq->tq_nthreads;
-
- taskq_wait(tq);
-
- mutex_enter(&tq->tq_lock);
-
- tq->tq_flags &= ~TASKQ_ACTIVE;
- cv_broadcast(&tq->tq_dispatch_cv);
-
- while (tq->tq_nthreads != 0)
- cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
-
- tq->tq_minalloc = 0;
- while (tq->tq_nalloc != 0) {
- ASSERT(tq->tq_freelist != NULL);
- task_free(tq, task_alloc(tq, KM_SLEEP));
- }
-
- mutex_exit(&tq->tq_lock);
-
- for (t = 0; t < nthreads; t++)
- (void) thr_join(tq->tq_threadlist[t], NULL, NULL);
-
- kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t));
-
- rw_destroy(&tq->tq_threadlock);
-
- kmem_free(tq, sizeof (taskq_t));
-}
-
-int
-taskq_member(taskq_t *tq, void *t)
-{
- int i;
-
- if (taskq_now)
- return (1);
-
- for (i = 0; i < tq->tq_nthreads; i++)
- if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t)
- return (1);
-
- return (0);
-}
diff --git a/contrib/opensolaris/lib/libzpool/common/util.c b/contrib/opensolaris/lib/libzpool/common/util.c
deleted file mode 100644
index df49adb..0000000
--- a/contrib/opensolaris/lib/libzpool/common/util.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <assert.h>
-#include <sys/zfs_context.h>
-#include <sys/avl.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/spa.h>
-#include <sys/fs/zfs.h>
-#include <sys/refcount.h>
-
-/*
- * Routines needed by more than one client of libzpool.
- */
-
-void
-nicenum(uint64_t num, char *buf)
-{
- uint64_t n = num;
- int index = 0;
- char u;
-
- while (n >= 1024) {
- n = (n + (1024 / 2)) / 1024; /* Round up or down */
- index++;
- }
-
- u = " KMGTPE"[index];
-
- if (index == 0) {
- (void) sprintf(buf, "%llu", (u_longlong_t)n);
- } else if (n < 10 && (num & (num - 1)) != 0) {
- (void) sprintf(buf, "%.2f%c",
- (double)num / (1ULL << 10 * index), u);
- } else if (n < 100 && (num & (num - 1)) != 0) {
- (void) sprintf(buf, "%.1f%c",
- (double)num / (1ULL << 10 * index), u);
- } else {
- (void) sprintf(buf, "%llu%c", (u_longlong_t)n, u);
- }
-}
-
-static void
-show_vdev_stats(const char *desc, nvlist_t *nv, int indent)
-{
- nvlist_t **child;
- uint_t c, children;
- vdev_stat_t *vs;
- uint64_t sec;
- char used[6], avail[6];
- char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
-
- if (indent == 0) {
- (void) printf(" "
- " capacity operations bandwidth ---- errors ----\n");
- (void) printf("description "
- "used avail read write read write read write cksum\n");
- }
-
- VERIFY(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
-
- sec = MAX(1, vs->vs_timestamp / NANOSEC);
-
- nicenum(vs->vs_alloc, used);
- nicenum(vs->vs_space - vs->vs_alloc, avail);
- nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops);
- nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops);
- nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes);
- nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes);
- nicenum(vs->vs_read_errors, rerr);
- nicenum(vs->vs_write_errors, werr);
- nicenum(vs->vs_checksum_errors, cerr);
-
- (void) printf("%*s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
- indent, "",
- indent - 19 - (vs->vs_space ? 0 : 12), desc,
- vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
- vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
- rops, wops, rbytes, wbytes, rerr, werr, cerr);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0)
- return;
-
- for (c = 0; c < children; c++) {
- nvlist_t *cnv = child[c];
- char *cname, *tname;
- uint64_t np;
- if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
- nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
- cname = "<unknown>";
- tname = calloc(1, strlen(cname) + 2);
- (void) strcpy(tname, cname);
- if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
- tname[strlen(tname)] = '0' + np;
- show_vdev_stats(tname, cnv, indent + 2);
- free(tname);
- }
-}
-
-void
-show_pool_stats(spa_t *spa)
-{
- nvlist_t *config, *nvroot;
- char *name;
-
- spa_config_enter(spa, RW_READER, FTAG);
- config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
- spa_config_exit(spa, FTAG);
-
- VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
-
- show_vdev_stats(name, nvroot, 0);
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_atomic.c b/sys/compat/opensolaris/kern/opensolaris_atomic.c
deleted file mode 100644
index fb6ef2e..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_atomic.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/atomic.h>
-
-#ifdef _KERNEL
-#include <sys/kernel.h>
-
-struct mtx atomic_mtx;
-MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF);
-#else
-#include <pthread.h>
-
-#define mtx_lock(lock) pthread_mutex_lock(lock)
-#define mtx_unlock(lock) pthread_mutex_unlock(lock)
-
-static pthread_mutex_t atomic_mtx;
-
-static __attribute__((constructor)) void
-atomic_init(void)
-{
- pthread_mutex_init(&atomic_mtx, NULL);
-}
-#endif
-
-#ifndef __LP64__
-void
-atomic_add_64(volatile uint64_t *target, int64_t delta)
-{
-
- mtx_lock(&atomic_mtx);
- *target += delta;
- mtx_unlock(&atomic_mtx);
-}
-#endif
-
-uint64_t
-atomic_add_64_nv(volatile uint64_t *target, int64_t delta)
-{
- uint64_t newval;
-
- mtx_lock(&atomic_mtx);
- newval = (*target += delta);
- mtx_unlock(&atomic_mtx);
- return (newval);
-}
-
-#if defined(__sparc64__) || defined(__powerpc__) || defined(__arm__)
-void
-atomic_or_8(volatile uint8_t *target, uint8_t value)
-{
- mtx_lock(&atomic_mtx);
- *target |= value;
- mtx_unlock(&atomic_mtx);
-}
-#endif
-
-uint8_t
-atomic_or_8_nv(volatile uint8_t *target, uint8_t value)
-{
- uint8_t newval;
-
- mtx_lock(&atomic_mtx);
- newval = (*target |= value);
- mtx_unlock(&atomic_mtx);
- return (newval);
-}
-
-#ifndef __LP64__
-void *
-atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
-{
- void *oldval, **trg;
-
- mtx_lock(&atomic_mtx);
- trg = __DEVOLATILE(void **, target);
- oldval = *trg;
- if (oldval == cmp)
- *trg = newval;
- mtx_unlock(&atomic_mtx);
- return (oldval);
-}
-#endif
-
-#ifndef __sparc64__
-uint64_t
-atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t newval)
-{
- uint64_t oldval;
-
- mtx_lock(&atomic_mtx);
- oldval = *target;
- if (oldval == cmp)
- *target = newval;
- mtx_unlock(&atomic_mtx);
- return (oldval);
-}
-#endif
-
-void
-membar_producer(void)
-{
- /* nothing */
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_kmem.c b/sys/compat/opensolaris/kern/opensolaris_kmem.c
deleted file mode 100644
index e511620..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_kmem.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*-
- * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kmem.h>
-#include <sys/debug.h>
-#include <sys/mutex.h>
-
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-
-#ifdef KMEM_DEBUG
-#include <sys/queue.h>
-#include <sys/stack.h>
-#endif
-
-#ifdef _KERNEL
-static MALLOC_DEFINE(M_SOLARIS, "solaris", "Solaris");
-#else
-#define malloc(size, type, flags) malloc(size)
-#define free(addr, type) free(addr)
-#endif
-
-#ifdef KMEM_DEBUG
-struct kmem_item {
- struct stack stack;
- LIST_ENTRY(kmem_item) next;
-};
-static LIST_HEAD(, kmem_item) kmem_items;
-static struct mtx kmem_items_mtx;
-MTX_SYSINIT(kmem_items_mtx, &kmem_items_mtx, "kmem_items", MTX_DEF);
-#endif /* KMEM_DEBUG */
-
-void *
-zfs_kmem_alloc(size_t size, int kmflags)
-{
- void *p;
-#ifdef KMEM_DEBUG
- struct kmem_item *i;
-
- size += sizeof(struct kmem_item);
-#endif
- p = malloc(size, M_SOLARIS, kmflags);
-#ifndef _KERNEL
- if (kmflags & KM_SLEEP)
- assert(p != NULL);
-#endif
-#ifdef KMEM_DEBUG
- if (p != NULL) {
- i = p;
- p = (u_char *)p + sizeof(struct kmem_item);
- stack_save(&i->stack);
- mtx_lock(&kmem_items_mtx);
- LIST_INSERT_HEAD(&kmem_items, i, next);
- mtx_unlock(&kmem_items_mtx);
- }
-#endif
- return (p);
-}
-
-void
-zfs_kmem_free(void *buf, size_t size __unused)
-{
-#ifdef KMEM_DEBUG
- struct kmem_item *i;
-
- buf = (u_char *)buf - sizeof(struct kmem_item);
- mtx_lock(&kmem_items_mtx);
- LIST_FOREACH(i, &kmem_items, next) {
- if (i == buf)
- break;
- }
- ASSERT(i != NULL);
- LIST_REMOVE(i, next);
- mtx_unlock(&kmem_items_mtx);
-#endif
- free(buf, M_SOLARIS);
-}
-
-uint64_t
-kmem_size(void)
-{
-
- return ((uint64_t)vm_kmem_size);
-}
-
-uint64_t
-kmem_used(void)
-{
-
- return ((uint64_t)kmem_map->size);
-}
-
-static int
-kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
-{
- struct kmem_cache *cache = private;
-
- return (cache->kc_constructor(mem, cache->kc_private, flags));
-}
-
-static void
-kmem_std_destructor(void *mem, int size __unused, void *private)
-{
- struct kmem_cache *cache = private;
-
- cache->kc_destructor(mem, cache->kc_private);
-}
-
-kmem_cache_t *
-kmem_cache_create(char *name, size_t bufsize, size_t align,
- int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
- void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags)
-{
- kmem_cache_t *cache;
-
- ASSERT(vmp == NULL);
-
- cache = kmem_alloc(sizeof(*cache), KM_SLEEP);
- strlcpy(cache->kc_name, name, sizeof(cache->kc_name));
- cache->kc_constructor = constructor;
- cache->kc_destructor = destructor;
- cache->kc_private = private;
-#ifdef _KERNEL
- cache->kc_zone = uma_zcreate(cache->kc_name, bufsize,
- constructor != NULL ? kmem_std_constructor : NULL,
- destructor != NULL ? kmem_std_destructor : NULL,
- NULL, NULL, align > 0 ? align - 1 : 0, cflags);
-#else
- cache->kc_size = bufsize;
-#endif
-
- return (cache);
-}
-
-void
-kmem_cache_destroy(kmem_cache_t *cache)
-{
- uma_zdestroy(cache->kc_zone);
- kmem_free(cache, sizeof(*cache));
-}
-
-void *
-kmem_cache_alloc(kmem_cache_t *cache, int flags)
-{
-#ifdef _KERNEL
- return (uma_zalloc_arg(cache->kc_zone, cache, flags));
-#else
- void *p;
-
- p = kmem_alloc(cache->kc_size, flags);
- if (p != NULL) {
- kmem_std_constructor(p, cache->kc_size, cache->kc_private,
- flags);
- }
- return (p);
-#endif
-}
-
-void
-kmem_cache_free(kmem_cache_t *cache, void *buf)
-{
-#ifdef _KERNEL
- uma_zfree_arg(cache->kc_zone, buf, cache);
-#else
- kmem_std_destructor(buf, cache->kc_size, cache->kc_private);
- kmem_free(buf, cache->kc_size);
-#endif
-}
-
-#ifdef _KERNEL
-extern void zone_drain(uma_zone_t zone);
-void
-kmem_cache_reap_now(kmem_cache_t *cache)
-{
- zone_drain(cache->kc_zone);
-}
-
-void
-kmem_reap(void)
-{
- uma_reclaim();
-}
-#else
-void
-kmem_cache_reap_now(kmem_cache_t *cache __unused)
-{
-}
-
-void
-kmem_reap(void)
-{
-}
-#endif
-
-int
-kmem_debugging(void)
-{
- return (0);
-}
-
-void *
-calloc(size_t n, size_t s)
-{
- return (kmem_zalloc(n * s, KM_NOSLEEP));
-}
-
-#ifdef KMEM_DEBUG
-static void
-kmem_show(void *dummy __unused)
-{
- struct kmem_item *i;
-
- mtx_lock(&kmem_items_mtx);
- if (LIST_EMPTY(&kmem_items))
- printf("KMEM_DEBUG: No leaked elements.\n");
- else {
- printf("KMEM_DEBUG: Leaked elements:\n\n");
- LIST_FOREACH(i, &kmem_items, next) {
- printf("address=%p\n", i);
- stack_print(&i->stack);
- printf("\n");
- }
- }
- mtx_unlock(&kmem_items_mtx);
-}
-
-SYSUNINIT(sol_kmem, SI_SUB_DRIVERS, SI_ORDER_FIRST, kmem_show, NULL);
-#endif /* KMEM_DEBUG */
diff --git a/sys/compat/opensolaris/kern/opensolaris_kobj.c b/sys/compat/opensolaris/kern/opensolaris_kobj.c
deleted file mode 100644
index efb2885..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_kobj.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/kthread.h>
-#include <sys/namei.h>
-#include <sys/proc.h>
-#include <sys/filedesc.h>
-#include <sys/fcntl.h>
-#include <sys/linker.h>
-#include <sys/kobj.h>
-
-void
-kobj_free(void *address, size_t size)
-{
-
- kmem_free(address, size);
-}
-
-void *
-kobj_alloc(size_t size, int flag)
-{
-
- return (kmem_alloc(size, (flag & KM_NOWAIT) ? KM_NOSLEEP : KM_SLEEP));
-}
-
-void *
-kobj_zalloc(size_t size, int flag)
-{
- void *p;
-
- if ((p = kobj_alloc(size, flag)) != NULL)
- bzero(p, size);
- return (p);
-}
-
-static void *
-kobj_open_file_vnode(const char *file)
-{
- struct thread *td = curthread;
- struct nameidata nd;
- int error, flags;
-
- if (td->td_proc->p_fd->fd_rdir == NULL)
- td->td_proc->p_fd->fd_rdir = rootvnode;
- if (td->td_proc->p_fd->fd_cdir == NULL)
- td->td_proc->p_fd->fd_cdir = rootvnode;
-
- flags = FREAD;
- NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, file, td);
- error = vn_open_cred(&nd, &flags, 0, curthread->td_ucred, NULL);
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (error != 0)
- return (NULL);
- /* We just unlock so we hold a reference. */
- VOP_UNLOCK(nd.ni_vp, 0);
- return (nd.ni_vp);
-}
-
-static void *
-kobj_open_file_loader(const char *file)
-{
-
- return (preload_search_by_name(file));
-}
-
-struct _buf *
-kobj_open_file(const char *file)
-{
- struct _buf *out;
-
- out = kmem_alloc(sizeof(*out), KM_SLEEP);
- out->mounted = root_mounted();
- /*
- * If root is already mounted we read file using file system,
- * if not, we use loader.
- */
- if (out->mounted)
- out->ptr = kobj_open_file_vnode(file);
- else
- out->ptr = kobj_open_file_loader(file);
- if (out->ptr == NULL) {
- kmem_free(out, sizeof(*out));
- return ((struct _buf *)-1);
- }
- return (out);
-}
-
-static int
-kobj_get_filesize_vnode(struct _buf *file, uint64_t *size)
-{
- struct vnode *vp = file->ptr;
- struct thread *td = curthread;
- struct vattr va;
- int error;
-
- vn_lock(vp, LK_SHARED | LK_RETRY);
- error = VOP_GETATTR(vp, &va, td->td_ucred, td);
- VOP_UNLOCK(vp, 0);
- if (error == 0)
- *size = (uint64_t)va.va_size;
- return (error);
-}
-
-static int
-kobj_get_filesize_loader(struct _buf *file, uint64_t *size)
-{
- void *ptr;
-
- ptr = preload_search_info(file->ptr, MODINFO_SIZE);
- if (ptr == NULL)
- return (ENOENT);
- *size = (uint64_t)*(size_t *)ptr;
- return (0);
-}
-
-int
-kobj_get_filesize(struct _buf *file, uint64_t *size)
-{
-
- if (file->mounted)
- return (kobj_get_filesize_vnode(file, size));
- else
- return (kobj_get_filesize_loader(file, size));
-}
-
-int
-kobj_read_file_vnode(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
- struct vnode *vp = file->ptr;
- struct thread *td = curthread;
- struct uio auio;
- struct iovec aiov;
- int error;
-
- bzero(&aiov, sizeof(aiov));
- bzero(&auio, sizeof(auio));
-
- aiov.iov_base = buf;
- aiov.iov_len = size;
-
- auio.uio_iov = &aiov;
- auio.uio_offset = (off_t)off;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = UIO_READ;
- auio.uio_iovcnt = 1;
- auio.uio_resid = size;
- auio.uio_td = td;
-
- vn_lock(vp, LK_SHARED | LK_RETRY);
- error = VOP_READ(vp, &auio, IO_UNIT | IO_SYNC, td->td_ucred);
- VOP_UNLOCK(vp, 0);
- return (error != 0 ? -1 : size - auio.uio_resid);
-}
-
-int
-kobj_read_file_loader(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
- char *ptr;
-
- ptr = preload_search_info(file->ptr, MODINFO_ADDR);
- if (ptr == NULL)
- return (ENOENT);
- ptr = *(void **)ptr;
- bcopy(ptr + off, buf, size);
- return (0);
-}
-
-int
-kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
-{
-
- if (file->mounted)
- return (kobj_read_file_vnode(file, buf, size, off));
- else
- return (kobj_read_file_loader(file, buf, size, off));
-}
-
-void
-kobj_close_file(struct _buf *file)
-{
-
- if (file->mounted) {
- struct vnode *vp = file->ptr;
- struct thread *td = curthread;
- int flags = FREAD;
-
- vn_close(vp, flags, td->td_ucred, td);
- }
- kmem_free(file, sizeof(*file));
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_kstat.c b/sys/compat/opensolaris/kern/opensolaris_kstat.c
deleted file mode 100644
index 6d0b7cf..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_kstat.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/sysctl.h>
-#include <sys/kstat.h>
-
-static MALLOC_DEFINE(M_KSTAT, "kstat_data", "Kernel statistics");
-
-SYSCTL_NODE(, OID_AUTO, kstat, CTLFLAG_RW, 0, "Kernel statistics");
-
-kstat_t *
-kstat_create(char *module, int instance, char *name, char *class, uchar_t type,
- ulong_t ndata, uchar_t flags)
-{
- struct sysctl_oid *root;
- kstat_t *ksp;
-
- KASSERT(instance == 0, ("instance=%d", instance));
- KASSERT(type == KSTAT_TYPE_NAMED, ("type=%hhu", type));
- KASSERT(flags == KSTAT_FLAG_VIRTUAL, ("flags=%02hhx", flags));
-
- /*
- * Allocate the main structure. We don't need to copy module/class/name
- * stuff in here, because it is only used for sysctl node creation
- * done in this function.
- */
- ksp = malloc(sizeof(*ksp), M_KSTAT, M_WAITOK);
- ksp->ks_ndata = ndata;
-
- /*
- * Create sysctl tree for those statistics:
- *
- * kstat.<module>.<class>.<name>.
- */
- sysctl_ctx_init(&ksp->ks_sysctl_ctx);
- root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx,
- SYSCTL_STATIC_CHILDREN(_kstat), OID_AUTO, module, CTLFLAG_RW, 0,
- "");
- if (root == NULL) {
- printf("%s: Cannot create kstat.%s tree!\n", __func__, module);
- sysctl_ctx_free(&ksp->ks_sysctl_ctx);
- free(ksp, M_KSTAT);
- return (NULL);
- }
- root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
- OID_AUTO, class, CTLFLAG_RW, 0, "");
- if (root == NULL) {
- printf("%s: Cannot create kstat.%s.%s tree!\n", __func__,
- module, class);
- sysctl_ctx_free(&ksp->ks_sysctl_ctx);
- free(ksp, M_KSTAT);
- return (NULL);
- }
- root = SYSCTL_ADD_NODE(&ksp->ks_sysctl_ctx, SYSCTL_CHILDREN(root),
- OID_AUTO, name, CTLFLAG_RW, 0, "");
- if (root == NULL) {
- printf("%s: Cannot create kstat.%s.%s.%s tree!\n", __func__,
- module, class, name);
- sysctl_ctx_free(&ksp->ks_sysctl_ctx);
- free(ksp, M_KSTAT);
- return (NULL);
- }
- ksp->ks_sysctl_root = root;
-
- return (ksp);
-}
-
-static int
-kstat_sysctl(SYSCTL_HANDLER_ARGS)
-{
- kstat_named_t *ksent = arg1;
- uint64_t val;
-
- val = ksent->value.ui64;
- return sysctl_handle_quad(oidp, &val, 0, req);
-}
-
-void
-kstat_install(kstat_t *ksp)
-{
- kstat_named_t *ksent;
- u_int i;
-
- ksent = ksp->ks_data;
- for (i = 0; i < ksp->ks_ndata; i++, ksent++) {
- KASSERT(ksent->data_type == KSTAT_DATA_UINT64,
- ("data_type=%d", ksent->data_type));
- SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
- SYSCTL_CHILDREN(ksp->ks_sysctl_root), OID_AUTO, ksent->name,
- CTLTYPE_QUAD | CTLFLAG_RD, ksent, sizeof(*ksent),
- kstat_sysctl, "QU", "");
- }
-}
-
-void
-kstat_delete(kstat_t *ksp)
-{
-
- sysctl_ctx_free(&ksp->ks_sysctl_ctx);
- free(ksp, M_KSTAT);
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_misc.c b/sys/compat/opensolaris/kern/opensolaris_misc.c
deleted file mode 100644
index a89d478..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_misc.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/libkern.h>
-#include <sys/misc.h>
-#include <sys/sunddi.h>
-
-char hw_serial[11] = "0";
-
-struct opensolaris_utsname utsname = {
- .nodename = hostname
-};
-
-int
-ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result)
-{
- char *end;
-
- if (str == hw_serial) {
- *result = hostid;
- return (0);
- }
-
- *result = strtoul(str, &end, base);
- if (*result == 0)
- return (EINVAL);
- return (0);
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_policy.c b/sys/compat/opensolaris/kern/opensolaris_policy.c
deleted file mode 100644
index a09c9ec..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_policy.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/priv.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/policy.h>
-
-int
-secpolicy_zfs(struct ucred *cred)
-{
-
- return (priv_check_cred(cred, PRIV_VFS_MOUNT, 0));
-}
-
-int
-secpolicy_sys_config(struct ucred *cred, int checkonly __unused)
-{
-
- return (priv_check_cred(cred, PRIV_ZFS_POOL_CONFIG, 0));
-}
-
-int
-secpolicy_zinject(struct ucred *cred)
-{
-
- return (priv_check_cred(cred, PRIV_ZFS_INJECT, 0));
-}
-
-int
-secpolicy_fs_unmount(struct ucred *cred, struct mount *vfsp __unused)
-{
-
- return (priv_check_cred(cred, PRIV_VFS_UNMOUNT, 0));
-}
-
-/*
- * This check is done in kern_link(), so we could just return 0 here.
- */
-extern int hardlink_check_uid;
-int
-secpolicy_basic_link(struct ucred *cred)
-{
-
- if (!hardlink_check_uid)
- return (0);
- return (priv_check_cred(cred, PRIV_VFS_LINK, 0));
-}
-
-int
-secpolicy_vnode_stky_modify(struct ucred *cred)
-{
-
- return (EPERM);
-}
-
-int
-secpolicy_vnode_remove(struct ucred *cred)
-{
-
- return (priv_check_cred(cred, PRIV_VFS_ADMIN, 0));
-}
-
-int
-secpolicy_vnode_access(struct ucred *cred, struct vnode *vp, uint64_t owner,
- int mode)
-{
-
- if ((mode & VREAD) && priv_check_cred(cred, PRIV_VFS_READ, 0) != 0) {
- return (EACCES);
- }
- if ((mode & VWRITE) &&
- priv_check_cred(cred, PRIV_VFS_WRITE, 0) != 0) {
- return (EACCES);
- }
- if (mode & VEXEC) {
- if (vp->v_type == VDIR) {
- if (priv_check_cred(cred, PRIV_VFS_LOOKUP, 0) != 0) {
- return (EACCES);
- }
- } else {
- if (priv_check_cred(cred, PRIV_VFS_EXEC, 0) != 0) {
- return (EACCES);
- }
- }
- }
- return (0);
-}
-
-int
-secpolicy_vnode_setdac(struct ucred *cred, uid_t owner)
-{
-
- if (owner == cred->cr_uid)
- return (0);
- return (priv_check_cred(cred, PRIV_VFS_ADMIN, 0));
-}
-
-int
-secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp, struct vattr *vap,
- const struct vattr *ovap, int flags,
- int unlocked_access(void *, int, struct ucred *), void *node)
-{
- int mask = vap->va_mask;
- int error;
-
- if (mask & AT_SIZE) {
- if (vp->v_type == VDIR)
- return (EISDIR);
- error = unlocked_access(node, VWRITE, cred);
- if (error)
- return (error);
- }
- if (mask & AT_MODE) {
- /*
- * If not the owner of the file then check privilege
- * for two things: the privilege to set the mode at all
- * and, if we're setting setuid, we also need permissions
- * to add the set-uid bit, if we're not the owner.
- * In the specific case of creating a set-uid root
- * file, we need even more permissions.
- */
- error = secpolicy_vnode_setdac(cred, ovap->va_uid);
- if (error)
- return (error);
- error = secpolicy_setid_setsticky_clear(vp, vap, ovap, cred);
- if (error)
- return (error);
- } else {
- vap->va_mode = ovap->va_mode;
- }
- if (mask & (AT_UID | AT_GID)) {
- error = secpolicy_vnode_setdac(cred, ovap->va_uid);
- if (error)
- return (error);
-
- /*
- * To change the owner of a file, or change the group of a file to a
- * group of which we are not a member, the caller must have
- * privilege.
- */
- if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
- ((mask & AT_GID) && vap->va_gid != ovap->va_gid &&
- !groupmember(vap->va_gid, cred))) {
- error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
- if (error)
- return (error);
- }
-
- if (((mask & AT_UID) && vap->va_uid != ovap->va_uid) ||
- ((mask & AT_GID) && vap->va_gid != ovap->va_gid)) {
- secpolicy_setid_clear(vap, cred);
- }
- }
- if (mask & (AT_ATIME | AT_MTIME)) {
- /*
- * From utimes(2):
- * If times is NULL, ... The caller must be the owner of
- * the file, have permission to write the file, or be the
- * super-user.
- * If times is non-NULL, ... The caller must be the owner of
- * the file or be the super-user.
- */
- error = secpolicy_vnode_setdac(cred, ovap->va_uid);
- if (error && (vap->va_vaflags & VA_UTIMES_NULL))
- error = unlocked_access(node, VWRITE, cred);
- if (error)
- return (error);
- }
- return (0);
-}
-
-int
-secpolicy_vnode_create_gid(struct ucred *cred)
-{
-
- return (EPERM);
-}
-
-int
-secpolicy_vnode_setids_setgids(struct ucred *cred, gid_t gid)
-{
-
- if (!groupmember(gid, cred))
- return (priv_check_cred(cred, PRIV_VFS_SETGID, 0));
- return (0);
-}
-
-int
-secpolicy_vnode_setid_retain(struct ucred *cred, boolean_t issuidroot __unused)
-{
-
- return (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0));
-}
-
-void
-secpolicy_setid_clear(struct vattr *vap, struct ucred *cred)
-{
-
- if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0) {
- if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
- vap->va_mask |= AT_MODE;
- vap->va_mode &= ~(S_ISUID|S_ISGID);
- }
- }
-}
-
-int
-secpolicy_setid_setsticky_clear(struct vnode *vp, struct vattr *vap,
- const struct vattr *ovap, struct ucred *cred)
-{
- int error;
-
- /*
- * Privileged processes may set the sticky bit on non-directories,
- * as well as set the setgid bit on a file with a group that the process
- * is not a member of. Both of these are allowed in jail(8).
- */
- if (vp->v_type != VDIR && (vap->va_mode & S_ISTXT)) {
- if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
- return (EFTYPE);
- }
- /*
- * Check for privilege if attempting to set the
- * group-id bit.
- */
- if ((vap->va_mode & S_ISGID) != 0) {
- error = secpolicy_vnode_setids_setgids(cred, ovap->va_gid);
- if (error)
- return (error);
- }
- return (0);
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_string.c b/sys/compat/opensolaris/kern/opensolaris_string.c
deleted file mode 100644
index 4448f34..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_string.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/param.h>
-#include <sys/string.h>
-
-#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
-
-#define IS_ALPHA(c) \
- (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
-
-char *
-strpbrk(const char *s, const char *b)
-{
- const char *p;
-
- do {
- for (p = b; *p != '\0' && *p != *s; ++p)
- ;
- if (*p != '\0')
- return ((char *)s);
- } while (*s++);
-
- return (NULL);
-}
-
-/*
- * Convert a string into a valid C identifier by replacing invalid
- * characters with '_'. Also makes sure the string is nul-terminated
- * and takes up at most n bytes.
- */
-void
-strident_canon(char *s, size_t n)
-{
- char c;
- char *end = s + n - 1;
-
- if ((c = *s) == 0)
- return;
-
- if (!IS_ALPHA(c) && c != '_')
- *s = '_';
-
- while (s < end && ((c = *(++s)) != 0)) {
- if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
- *s = '_';
- }
- *s = 0;
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_vfs.c b/sys/compat/opensolaris/kern/opensolaris_vfs.c
deleted file mode 100644
index 166eeed..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_vfs.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*-
- * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/mount.h>
-#include <sys/cred.h>
-#include <sys/vfs.h>
-#include <sys/priv.h>
-#include <sys/libkern.h>
-
-MALLOC_DECLARE(M_MOUNT);
-
-TAILQ_HEAD(vfsoptlist, vfsopt);
-struct vfsopt {
- TAILQ_ENTRY(vfsopt) link;
- char *name;
- void *value;
- int len;
-};
-
-void
-vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
- int flags __unused)
-{
- struct vfsopt *opt;
- size_t namesize;
-
- if (vfsp->mnt_opt == NULL) {
- vfsp->mnt_opt = malloc(sizeof(*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
- TAILQ_INIT(vfsp->mnt_opt);
- }
-
- opt = malloc(sizeof(*opt), M_MOUNT, M_WAITOK);
-
- namesize = strlen(name) + 1;
- opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
- strlcpy(opt->name, name, namesize);
-
- if (arg == NULL) {
- opt->value = NULL;
- opt->len = 0;
- } else {
- opt->len = strlen(arg) + 1;
- opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
- bcopy(arg, opt->value, opt->len);
- }
- /* TODO: Locking. */
- TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
-}
-
-void
-vfs_clearmntopt(vfs_t *vfsp, const char *name)
-{
- struct vfsopt *opt;
-
- if (vfsp->mnt_opt == NULL)
- return;
- /* TODO: Locking. */
- TAILQ_FOREACH(opt, vfsp->mnt_opt, link) {
- if (strcmp(opt->name, name) == 0)
- break;
- }
- if (opt != NULL) {
- TAILQ_REMOVE(vfsp->mnt_opt, opt, link);
- free(opt->name, M_MOUNT);
- if (opt->value != NULL)
- free(opt->value, M_MOUNT);
- free(opt, M_MOUNT);
- }
-}
-
-int
-vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
-{
- struct vfsoptlist *opts = vfsp->mnt_opt;
- int error;
-
- if (opts == NULL)
- return (0);
- error = vfs_getopt(opts, opt, (void **)argp, NULL);
- return (error != 0 ? 0 : 1);
-}
-
-int
-traverse(vnode_t **cvpp, int lktype)
-{
- kthread_t *td = curthread;
- vnode_t *cvp;
- vnode_t *tvp;
- vfs_t *vfsp;
- int error;
-
- cvp = *cvpp;
- tvp = NULL;
-
- /*
- * If this vnode is mounted on, then we transparently indirect
- * to the vnode which is the root of the mounted file system.
- * Before we do this we must check that an unmount is not in
- * progress on this vnode.
- */
-
- for (;;) {
- /*
- * Reached the end of the mount chain?
- */
- vfsp = vn_mountedvfs(cvp);
- if (vfsp == NULL)
- break;
- /*
- * tvp is NULL for *cvpp vnode, which we can't unlock.
- */
- if (tvp != NULL)
- vput(cvp);
- else
- vrele(cvp);
-
- /*
- * The read lock must be held across the call to VFS_ROOT() to
- * prevent a concurrent unmount from destroying the vfs.
- */
- error = VFS_ROOT(vfsp, lktype, &tvp, td);
- if (error != 0)
- return (error);
- cvp = tvp;
- }
-
- *cvpp = cvp;
- return (0);
-}
-
-int
-domount(kthread_t *td, vnode_t *vp, const char *fstype, char *fspath,
- char *fspec, int fsflags)
-{
- struct mount *mp;
- struct vfsconf *vfsp;
- struct ucred *newcr, *oldcr;
- int error;
-
- /*
- * Be ultra-paranoid about making sure the type and fspath
- * variables will fit in our mp buffers, including the
- * terminating NUL.
- */
- if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
- return (ENAMETOOLONG);
-
- vfsp = vfs_byname_kld(fstype, td, &error);
- if (vfsp == NULL)
- return (ENODEV);
-
- if (vp->v_type != VDIR)
- return (ENOTDIR);
- VI_LOCK(vp);
- if ((vp->v_iflag & VI_MOUNT) != 0 ||
- vp->v_mountedhere != NULL) {
- VI_UNLOCK(vp);
- return (EBUSY);
- }
- vp->v_iflag |= VI_MOUNT;
- VI_UNLOCK(vp);
-
- /*
- * Allocate and initialize the filesystem.
- */
- vn_lock(vp, LK_SHARED | LK_RETRY);
- mp = vfs_mount_alloc(vp, vfsp, fspath, td);
- VOP_UNLOCK(vp, 0);
-
- mp->mnt_optnew = NULL;
- vfs_setmntopt(mp, "from", fspec, 0);
- mp->mnt_optnew = mp->mnt_opt;
- mp->mnt_opt = NULL;
-
- /*
- * Set the mount level flags.
- * crdup() can sleep, so do it before acquiring a mutex.
- */
- newcr = crdup(kcred);
- MNT_ILOCK(mp);
- if (fsflags & MNT_RDONLY)
- mp->mnt_flag |= MNT_RDONLY;
- mp->mnt_flag &=~ MNT_UPDATEMASK;
- mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
- /*
- * Unprivileged user can trigger mounting a snapshot, but we don't want
- * him to unmount it, so we switch to privileged credentials.
- */
- oldcr = mp->mnt_cred;
- mp->mnt_cred = newcr;
- mp->mnt_stat.f_owner = mp->mnt_cred->cr_uid;
- MNT_IUNLOCK(mp);
- crfree(oldcr);
- /*
- * Mount the filesystem.
- * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
- * get. No freeing of cn_pnbuf.
- */
- error = VFS_MOUNT(mp, td);
-
- if (!error) {
- if (mp->mnt_opt != NULL)
- vfs_freeopts(mp->mnt_opt);
- mp->mnt_opt = mp->mnt_optnew;
- (void)VFS_STATFS(mp, &mp->mnt_stat, td);
- }
- /*
- * Prevent external consumers of mount options from reading
- * mnt_optnew.
- */
- mp->mnt_optnew = NULL;
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- /*
- * Put the new filesystem on the mount list after root.
- */
-#ifdef FREEBSD_NAMECACHE
- cache_purge(vp);
-#endif
- if (!error) {
- vnode_t *mvp;
-
- VI_LOCK(vp);
- vp->v_iflag &= ~VI_MOUNT;
- VI_UNLOCK(vp);
- vp->v_mountedhere = mp;
- mtx_lock(&mountlist_mtx);
- TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
- mtx_unlock(&mountlist_mtx);
- vfs_event_signal(NULL, VQ_MOUNT, 0);
- if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp, td))
- panic("mount: lost mount");
- mountcheckdirs(vp, mvp);
- vput(mvp);
- VOP_UNLOCK(vp, 0);
- if ((mp->mnt_flag & MNT_RDONLY) == 0)
- error = vfs_allocate_syncvnode(mp);
- vfs_unbusy(mp, td);
- if (error)
- vrele(vp);
- else
- vfs_mountedfrom(mp, fspec);
- } else {
- VI_LOCK(vp);
- vp->v_iflag &= ~VI_MOUNT;
- VI_UNLOCK(vp);
- VOP_UNLOCK(vp, 0);
- vfs_unbusy(mp, td);
- vfs_mount_destroy(mp);
- }
- return (error);
-}
diff --git a/sys/compat/opensolaris/kern/opensolaris_zone.c b/sys/compat/opensolaris/kern/opensolaris_zone.c
deleted file mode 100644
index 3059a78..0000000
--- a/sys/compat/opensolaris/kern/opensolaris_zone.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/sx.h>
-#include <sys/malloc.h>
-#include <sys/queue.h>
-#include <sys/jail.h>
-#include <sys/priv.h>
-#include <sys/zone.h>
-
-static MALLOC_DEFINE(M_ZONES, "zones_data", "Zones data");
-
-/*
- * Structure to record list of ZFS datasets exported to a zone.
- */
-typedef struct zone_dataset {
- LIST_ENTRY(zone_dataset) zd_next;
- char zd_dataset[0];
-} zone_dataset_t;
-
-LIST_HEAD(zone_dataset_head, zone_dataset);
-
-static struct prison_service *zone_prison_service = NULL;
-
-int
-zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
-{
- struct zone_dataset_head *head;
- zone_dataset_t *zd, *zd2;
- struct prison *pr;
- int error;
-
- if ((error = priv_check_cred(cred, PRIV_ZFS_JAIL, 0)) != 0)
- return (error);
-
- /* Allocate memory before we grab prison's mutex. */
- zd = malloc(sizeof(*zd) + strlen(dataset) + 1, M_ZONES, M_WAITOK);
-
- sx_slock(&allprison_lock);
- pr = prison_find(jailid); /* Locks &pr->pr_mtx. */
- sx_sunlock(&allprison_lock);
- if (pr == NULL) {
- free(zd, M_ZONES);
- return (ENOENT);
- }
-
- head = prison_service_data_get(zone_prison_service, pr);
- LIST_FOREACH(zd2, head, zd_next) {
- if (strcmp(dataset, zd2->zd_dataset) == 0) {
- free(zd, M_ZONES);
- error = EEXIST;
- goto failure;
- }
- }
- strcpy(zd->zd_dataset, dataset);
- LIST_INSERT_HEAD(head, zd, zd_next);
-failure:
- mtx_unlock(&pr->pr_mtx);
- return (error);
-}
-
-int
-zone_dataset_detach(struct ucred *cred, const char *dataset, int jailid)
-{
- struct zone_dataset_head *head;
- zone_dataset_t *zd;
- struct prison *pr;
- int error;
-
- if ((error = priv_check_cred(cred, PRIV_ZFS_JAIL, 0)) != 0)
- return (error);
-
- sx_slock(&allprison_lock);
- pr = prison_find(jailid);
- sx_sunlock(&allprison_lock);
- if (pr == NULL)
- return (ENOENT);
- head = prison_service_data_get(zone_prison_service, pr);
- LIST_FOREACH(zd, head, zd_next) {
- if (strcmp(dataset, zd->zd_dataset) == 0) {
- LIST_REMOVE(zd, zd_next);
- free(zd, M_ZONES);
- goto success;
- }
- }
- error = ENOENT;
-success:
- mtx_unlock(&pr->pr_mtx);
- return (error);
-}
-
-/*
- * Returns true if the named dataset is visible in the current zone.
- * The 'write' parameter is set to 1 if the dataset is also writable.
- */
-int
-zone_dataset_visible(const char *dataset, int *write)
-{
- struct zone_dataset_head *head;
- zone_dataset_t *zd;
- struct prison *pr;
- size_t len;
- int ret = 0;
-
- if (dataset[0] == '\0')
- return (0);
- if (INGLOBALZONE(curproc)) {
- if (write != NULL)
- *write = 1;
- return (1);
- }
- pr = curthread->td_ucred->cr_prison;
- mtx_lock(&pr->pr_mtx);
- head = prison_service_data_get(zone_prison_service, pr);
-
- /*
- * Walk the list once, looking for datasets which match exactly, or
- * specify a dataset underneath an exported dataset. If found, return
- * true and note that it is writable.
- */
- LIST_FOREACH(zd, head, zd_next) {
- len = strlen(zd->zd_dataset);
- if (strlen(dataset) >= len &&
- bcmp(dataset, zd->zd_dataset, len) == 0 &&
- (dataset[len] == '\0' || dataset[len] == '/' ||
- dataset[len] == '@')) {
- if (write)
- *write = 1;
- ret = 1;
- goto end;
- }
- }
-
- /*
- * Walk the list a second time, searching for datasets which are parents
- * of exported datasets. These should be visible, but read-only.
- *
- * Note that we also have to support forms such as 'pool/dataset/', with
- * a trailing slash.
- */
- LIST_FOREACH(zd, head, zd_next) {
- len = strlen(dataset);
- if (dataset[len - 1] == '/')
- len--; /* Ignore trailing slash */
- if (len < strlen(zd->zd_dataset) &&
- bcmp(dataset, zd->zd_dataset, len) == 0 &&
- zd->zd_dataset[len] == '/') {
- if (write)
- *write = 0;
- ret = 1;
- goto end;
- }
- }
-end:
- mtx_unlock(&pr->pr_mtx);
- return (ret);
-}
-
-static int
-zone_create(struct prison_service *psrv, struct prison *pr)
-{
- struct zone_dataset_head *head;
-
- head = malloc(sizeof(*head), M_ZONES, M_WAITOK);
- LIST_INIT(head);
- mtx_lock(&pr->pr_mtx);
- prison_service_data_set(psrv, pr, head);
- mtx_unlock(&pr->pr_mtx);
- return (0);
-}
-
-static int
-zone_destroy(struct prison_service *psrv, struct prison *pr)
-{
- struct zone_dataset_head *head;
- zone_dataset_t *zd;
-
- mtx_lock(&pr->pr_mtx);
- head = prison_service_data_del(psrv, pr);
- mtx_unlock(&pr->pr_mtx);
- while ((zd = LIST_FIRST(head)) != NULL) {
- LIST_REMOVE(zd, zd_next);
- free(zd, M_ZONES);
- }
- free(head, M_ZONES);
- return (0);
-}
-
-static void
-zone_sysinit(void *arg __unused)
-{
-
- zone_prison_service = prison_service_register("zfs", zone_create,
- zone_destroy);
-}
-
-static void
-zone_sysuninit(void *arg __unused)
-{
-
- prison_service_deregister(zone_prison_service);
-}
-
-SYSINIT(zone_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysinit, NULL);
-SYSUNINIT(zone_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, zone_sysuninit, NULL);
diff --git a/sys/compat/opensolaris/machine/endian.h b/sys/compat/opensolaris/machine/endian.h
deleted file mode 100644
index 855189f..0000000
--- a/sys/compat/opensolaris/machine/endian.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_MACHINE_ENDIAN_H_
-#define _OPENSOLARIS_MACHINE_ENDIAN_H_
-
-#include_next <machine/endian.h>
-
-/*
- * Solaris defines _LITTLE_ENDIAN or _BIG_ENDIAN, but never both and decides
- * which architecture it is based on which thing is defined.
- */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
-#undef _BIG_ENDIAN
-#else
-#undef _LITTLE_ENDIAN
-#endif
-
-#endif /* !_OPENSOLARIS_MACHINE_ENDIAN_H_ */
diff --git a/sys/compat/opensolaris/rpc/xdr.h b/sys/compat/opensolaris/rpc/xdr.h
deleted file mode 100644
index d845c27..0000000
--- a/sys/compat/opensolaris/rpc/xdr.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * Portions of this source code were derived from Berkeley 4.3 BSD
- * under license from the Regents of the University of California.
- */
-
-#ifndef _OPENSOLARIS_RPC_XDR_H_
-#define _OPENSOLARIS_RPC_XDR_H_
-
-#include_next <rpc/xdr.h>
-
-#ifndef _KERNEL
-#include_next <rpc/xdr.h>
-
-/*
- * Strangely, my glibc version (2.3.6) doesn't have xdr_control(), so
- * we have to hack it in here (source taken from OpenSolaris).
- * By the way, it is assumed the xdrmem implementation is used.
- */
-
-#undef xdr_control
-#define xdr_control(a,b,c) xdrmem_control(a,b,c)
-
-/*
- * These are the request arguments to XDR_CONTROL.
- *
- * XDR_PEEK - returns the contents of the next XDR unit on the XDR stream.
- * XDR_SKIPBYTES - skips the next N bytes in the XDR stream.
- * XDR_RDMAGET - for xdr implementation over RDMA, gets private flags from
- * the XDR stream being moved over RDMA
- * XDR_RDMANOCHUNK - for xdr implementaion over RDMA, sets private flags in
- * the XDR stream moving over RDMA.
- */
-#define XDR_PEEK 2
-#define XDR_SKIPBYTES 3
-#define XDR_RDMAGET 4
-#define XDR_RDMASET 5
-
-/* FIXME: probably doesn't work */
-static __inline bool_t
-xdrmem_control(XDR *xdrs, int request, void *info)
-{
- xdr_bytesrec *xptr;
- int32_t *int32p;
- int len;
-
- switch (request) {
-
- case XDR_GET_BYTES_AVAIL:
- xptr = (xdr_bytesrec *)info;
- xptr->xc_is_last_record = TRUE;
- xptr->xc_num_avail = xdrs->x_handy;
- return (TRUE);
-
- case XDR_PEEK:
- /*
- * Return the next 4 byte unit in the XDR stream.
- */
- if (xdrs->x_handy < sizeof (int32_t))
- return (FALSE);
- int32p = (int32_t *)info;
- *int32p = (int32_t)ntohl((uint32_t)
- (*((int32_t *)(xdrs->x_private))));
- return (TRUE);
-
- case XDR_SKIPBYTES:
- /*
- * Skip the next N bytes in the XDR stream.
- */
- int32p = (int32_t *)info;
- len = RNDUP((int)(*int32p));
- if ((xdrs->x_handy -= len) < 0)
- return (FALSE);
- xdrs->x_private += len;
- return (TRUE);
-
- }
- return (FALSE);
-}
-#endif /* !_KERNEL */
-
-#endif /* !_OPENSOLARIS_RPC_XDR_H_ */
diff --git a/sys/compat/opensolaris/sys/acl.h b/sys/compat/opensolaris/sys/acl.h
deleted file mode 100644
index 4fba790..0000000
--- a/sys/compat/opensolaris/sys/acl.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _OPENSOLARIS_SYS_ACL_H_
-#define _OPENSOLARIS_SYS_ACL_H_
-
-#include_next <sys/acl.h>
-
-#define MAX_ACL_ENTRIES (1024) /* max entries of each type */
-
-typedef struct acl_entry aclent_t;
-#define a_type ae_tag
-#define a_id ae_id
-#define a_perm ae_perm
-
-typedef struct ace {
- uid_t a_who; /* uid or gid */
- uint32_t a_access_mask; /* read,write,... */
- uint16_t a_flags; /* see below */
- uint16_t a_type; /* allow or deny */
-} ace_t;
-
-#if 0
-/*
- * The following are Defined types for an aclent_t.
- */
-#define USER_OBJ (0x01) /* object owner */
-#define USER (0x02) /* additional users */
-#define GROUP_OBJ (0x04) /* owning group of the object */
-#define GROUP (0x08) /* additional groups */
-#define CLASS_OBJ (0x10) /* file group class and mask entry */
-#define OTHER_OBJ (0x20) /* other entry for the object */
-#define ACL_DEFAULT (0x1000) /* default flag */
-/* default object owner */
-#define DEF_USER_OBJ (ACL_DEFAULT | USER_OBJ)
-/* defalut additional users */
-#define DEF_USER (ACL_DEFAULT | USER)
-/* default owning group */
-#define DEF_GROUP_OBJ (ACL_DEFAULT | GROUP_OBJ)
-/* default additional groups */
-#define DEF_GROUP (ACL_DEFAULT | GROUP)
-/* default mask entry */
-#define DEF_CLASS_OBJ (ACL_DEFAULT | CLASS_OBJ)
-/* default other entry */
-#define DEF_OTHER_OBJ (ACL_DEFAULT | OTHER_OBJ)
-#endif
-
-/*
- * The following are defined for ace_t.
- */
-#define ACE_READ_DATA 0x00000001
-#define ACE_LIST_DIRECTORY 0x00000001
-#define ACE_WRITE_DATA 0x00000002
-#define ACE_ADD_FILE 0x00000002
-#define ACE_APPEND_DATA 0x00000004
-#define ACE_ADD_SUBDIRECTORY 0x00000004
-#define ACE_READ_NAMED_ATTRS 0x00000008
-#define ACE_WRITE_NAMED_ATTRS 0x00000010
-#define ACE_EXECUTE 0x00000020
-#define ACE_DELETE_CHILD 0x00000040
-#define ACE_READ_ATTRIBUTES 0x00000080
-#define ACE_WRITE_ATTRIBUTES 0x00000100
-#define ACE_DELETE 0x00010000
-#define ACE_READ_ACL 0x00020000
-#define ACE_WRITE_ACL 0x00040000
-#define ACE_WRITE_OWNER 0x00080000
-#define ACE_SYNCHRONIZE 0x00100000
-
-#define ACE_FILE_INHERIT_ACE 0x0001
-#define ACE_DIRECTORY_INHERIT_ACE 0x0002
-#define ACE_NO_PROPAGATE_INHERIT_ACE 0x0004
-#define ACE_INHERIT_ONLY_ACE 0x0008
-#define ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x0010
-#define ACE_FAILED_ACCESS_ACE_FLAG 0x0020
-#define ACE_IDENTIFIER_GROUP 0x0040
-#define ACE_OWNER 0x1000
-#define ACE_GROUP 0x2000
-#define ACE_EVERYONE 0x4000
-
-#define ACE_ACCESS_ALLOWED_ACE_TYPE 0x0000
-#define ACE_ACCESS_DENIED_ACE_TYPE 0x0001
-#define ACE_SYSTEM_AUDIT_ACE_TYPE 0x0002
-#define ACE_SYSTEM_ALARM_ACE_TYPE 0x0003
-
-#define ACE_ALL_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
- ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
- ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
- ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
- ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
-
-/*
- * The following flags are supported by both NFSv4 ACLs and ace_t.
- */
-#define ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \
- ACE_DIRECTORY_INHERIT_ACE | \
- ACE_NO_PROPAGATE_INHERIT_ACE | \
- ACE_INHERIT_ONLY_ACE | \
- ACE_IDENTIFIER_GROUP)
-
-#define ACE_TYPE_FLAGS (ACE_OWNER|ACE_GROUP|ACE_EVERYONE|ACE_IDENTIFIER_GROUP)
-
-#if 0
-/* cmd args to acl(2) for aclent_t */
-#define GETACL 1
-#define SETACL 2
-#define GETACLCNT 3
-#endif
-
-/* cmd's to manipulate ace acls. */
-#define ACE_GETACL 4
-#define ACE_SETACL 5
-#define ACE_GETACLCNT 6
-
-#if 0
-/* minimal acl entries from GETACLCNT */
-#define MIN_ACL_ENTRIES 4
-
-#if !defined(_KERNEL)
-
-/* acl check errors */
-#define GRP_ERROR 1
-#define USER_ERROR 2
-#define OTHER_ERROR 3
-#define CLASS_ERROR 4
-#define DUPLICATE_ERROR 5
-#define MISS_ERROR 6
-#define MEM_ERROR 7
-#define ENTRY_ERROR 8
-
-
-/*
- * similar to ufs_acl.h: changed to char type for user commands (tar, cpio)
- * Attribute types
- */
-#define UFSD_FREE ('0') /* Free entry */
-#define UFSD_ACL ('1') /* Access Control Lists */
-#define UFSD_DFACL ('2') /* reserved for future use */
-#define ACE_ACL ('3') /* ace_t style acls */
-
-/*
- * flag to [f]acl_get()
- * controls whether a trivial acl should be returned.
- */
-#define ACL_NO_TRIVIAL 0x2
-
-
-/*
- * Flags to control acl_totext()
- */
-
-#define ACL_APPEND_ID 0x1 /* append uid/gid to user/group entries */
-#define ACL_COMPACT_FMT 0x2 /* build ACL in ls -V format */
-#define ACL_NORESOLVE 0x4 /* don't do name service lookups */
-
-/*
- * Legacy aclcheck errors for aclent_t ACLs
- */
-#define EACL_GRP_ERROR GRP_ERROR
-#define EACL_USER_ERROR USER_ERROR
-#define EACL_OTHER_ERROR OTHER_ERROR
-#define EACL_CLASS_ERROR CLASS_ERROR
-#define EACL_DUPLICATE_ERROR DUPLICATE_ERROR
-#define EACL_MISS_ERROR MISS_ERROR
-#define EACL_MEM_ERROR MEM_ERROR
-#define EACL_ENTRY_ERROR ENTRY_ERROR
-
-#define EACL_INHERIT_ERROR 9 /* invalid inherit flags */
-#define EACL_FLAGS_ERROR 10 /* unknown flag value */
-#define EACL_PERM_MASK_ERROR 11 /* unknown permission */
-#define EACL_COUNT_ERROR 12 /* invalid acl count */
-
-#define EACL_INVALID_SLOT 13 /* invalid acl slot */
-#define EACL_NO_ACL_ENTRY 14 /* Entry doesn't exist */
-#define EACL_DIFF_TYPE 15 /* acls aren't same type */
-
-#define EACL_INVALID_USER_GROUP 16 /* need user/group name */
-#define EACL_INVALID_STR 17 /* invalid acl string */
-#define EACL_FIELD_NOT_BLANK 18 /* can't have blank field */
-#define EACL_INVALID_ACCESS_TYPE 19 /* invalid access type */
-#define EACL_UNKNOWN_DATA 20 /* Unrecognized data in ACL */
-#define EACL_MISSING_FIELDS 21 /* missing fields in acl */
-
-#define EACL_INHERIT_NOTDIR 22 /* Need dir for inheritance */
-
-extern int aclcheck(aclent_t *, int, int *);
-extern int acltomode(aclent_t *, int, mode_t *);
-extern int aclfrommode(aclent_t *, int, mode_t *);
-extern int aclsort(int, int, aclent_t *);
-extern char *acltotext(aclent_t *, int);
-extern aclent_t *aclfromtext(char *, int *);
-extern void acl_free(acl_t *);
-extern int acl_get(const char *, int, acl_t **);
-extern int facl_get(int, int, acl_t **);
-extern int acl_set(const char *, acl_t *acl);
-extern int facl_set(int, acl_t *acl);
-extern int acl_strip(const char *, uid_t, gid_t, mode_t);
-extern int acl_trivial(const char *);
-extern char *acl_totext(acl_t *, int);
-extern int acl_fromtext(const char *, acl_t **);
-extern int acl_check(acl_t *, int);
-
-#else /* !defined(_KERNEL) */
-
-extern void ksort(caddr_t, int, int, int (*)(void *, void *));
-extern int cmp2acls(void *, void *);
-
-#endif /* !defined(_KERNEL) */
-
-#if defined(__STDC__)
-extern int acl(const char *path, int cmd, int cnt, void *buf);
-extern int facl(int fd, int cmd, int cnt, void *buf);
-#else /* !__STDC__ */
-extern int acl();
-extern int facl();
-#endif /* defined(__STDC__) */
-
-#endif
-
-#endif /* _OPENSOLARIS_SYS_ACL_H */
diff --git a/sys/compat/opensolaris/sys/atomic.h b/sys/compat/opensolaris/sys/atomic.h
deleted file mode 100644
index 895f4df..0000000
--- a/sys/compat/opensolaris/sys/atomic.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_ATOMIC_H_
-#define _OPENSOLARIS_SYS_ATOMIC_H_
-
-#include <sys/types.h>
-#include <machine/atomic.h>
-
-#ifndef __LP64__
-extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
-extern void *atomic_cas_ptr(volatile void *target, void *cmp, void *newval);
-#endif
-#ifndef __sparc64__
-extern uint64_t atomic_cas_64(volatile uint64_t *target, uint64_t cmp,
- uint64_t newval);
-#endif
-extern uint64_t atomic_add_64_nv(volatile uint64_t *target, int64_t delta);
-extern uint8_t atomic_or_8_nv(volatile uint8_t *target, uint8_t value);
-extern void membar_producer(void);
-
-#if defined(__sparc64__) || defined(__powerpc__) || defined(__arm__)
-extern void atomic_or_8(volatile uint8_t *target, uint8_t value);
-#else
-static __inline void
-atomic_or_8(volatile uint8_t *target, uint8_t value)
-{
- atomic_set_8(target, value);
-}
-#endif
-
-static __inline uint32_t
-atomic_add_32_nv(volatile uint32_t *target, int32_t delta)
-{
- return (atomic_fetchadd_32(target, delta) + delta);
-}
-
-static __inline u_int
-atomic_add_int_nv(volatile u_int *target, int delta)
-{
- return (atomic_add_32_nv(target, delta));
-}
-
-static __inline void
-atomic_dec_32(volatile uint32_t *target)
-{
- atomic_subtract_32(target, 1);
-}
-
-static __inline uint32_t
-atomic_dec_32_nv(volatile uint32_t *target)
-{
- return (atomic_fetchadd_32(target, -1) - 1);
-}
-
-static __inline void
-atomic_inc_32(volatile uint32_t *target)
-{
- atomic_add_32(target, 1);
-}
-
-static __inline uint32_t
-atomic_inc_32_nv(volatile uint32_t *target)
-{
- return (atomic_add_32_nv(target, 1));
-}
-
-static __inline void
-atomic_inc_64(volatile uint64_t *target)
-{
- atomic_add_64(target, 1);
-}
-
-static __inline uint64_t
-atomic_inc_64_nv(volatile uint64_t *target)
-{
- return (atomic_add_64_nv(target, 1));
-}
-
-#ifdef __LP64__
-static __inline void *
-atomic_cas_ptr(volatile void *target, void *cmp, void *newval)
-{
- return ((void *)atomic_cas_64((volatile uint64_t *)target, (uint64_t)cmp,
- (uint64_t)newval));
-}
-#endif
-
-#endif /* !_OPENSOLARIS_SYS_ATOMIC_H_ */
diff --git a/sys/compat/opensolaris/sys/bitmap.h b/sys/compat/opensolaris/sys/bitmap.h
deleted file mode 100644
index d75b8bd..0000000
--- a/sys/compat/opensolaris/sys/bitmap.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_BITMAP_H
-#define _COMPAT_OPENSOLARIS_SYS_BITMAP_H
-
-#include <sys/atomic.h>
-
-/*
- * Operations on bitmaps of arbitrary size
- * A bitmap is a vector of 1 or more ulong_t's.
- * The user of the package is responsible for range checks and keeping
- * track of sizes.
- */
-
-#ifdef _LP64
-#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */
-#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */
-#else
-#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */
-#endif
-
-#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */
-#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */
-
-#ifdef _LP64
-#define BT_NBIPUL32 (1 << BT_ULSHIFT32) /* n bits per ulong_t */
-#define BT_ULMASK32 (BT_NBIPUL32 - 1) /* to extract bit index */
-#define BT_ULMAXMASK 0xffffffffffffffff /* used by bt_getlowbit */
-#else
-#define BT_ULMAXMASK 0xffffffff
-#endif
-
-/*
- * bitmap is a ulong_t *, bitindex an index_t
- *
- * The macros BT_WIM and BT_BIW internal; there is no need
- * for users of this package to use them.
- */
-
-/*
- * word in map
- */
-#define BT_WIM(bitmap, bitindex) \
- ((bitmap)[(bitindex) >> BT_ULSHIFT])
-/*
- * bit in word
- */
-#define BT_BIW(bitindex) \
- (1UL << ((bitindex) & BT_ULMASK))
-
-#ifdef _LP64
-#define BT_WIM32(bitmap, bitindex) \
- ((bitmap)[(bitindex) >> BT_ULSHIFT32])
-
-#define BT_BIW32(bitindex) \
- (1UL << ((bitindex) & BT_ULMASK32))
-#endif
-
-/*
- * These are public macros
- *
- * BT_BITOUL == n bits to n ulong_t's
- */
-#define BT_BITOUL(nbits) \
- (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
-#define BT_SIZEOFMAP(nbits) \
- (BT_BITOUL(nbits) * sizeof (ulong_t))
-#define BT_TEST(bitmap, bitindex) \
- ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
-#define BT_SET(bitmap, bitindex) \
- { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
-#define BT_CLEAR(bitmap, bitindex) \
- { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
-
-#ifdef _LP64
-#define BT_BITOUL32(nbits) \
- (((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32)
-#define BT_SIZEOFMAP32(nbits) \
- (BT_BITOUL32(nbits) * sizeof (uint_t))
-#define BT_TEST32(bitmap, bitindex) \
- ((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0)
-#define BT_SET32(bitmap, bitindex) \
- { BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); }
-#define BT_CLEAR32(bitmap, bitindex) \
- { BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); }
-#endif /* _LP64 */
-
-#endif /* _COMPAT_OPENSOLARIS_SYS_BITMAP_H */
diff --git a/sys/compat/opensolaris/sys/byteorder.h b/sys/compat/opensolaris/sys/byteorder.h
deleted file mode 100644
index 08a04cd..0000000
--- a/sys/compat/opensolaris/sys/byteorder.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _OPENSOLARIS_SYS_BYTEORDER_H_
-#define _OPENSOLARIS_SYS_BYTEORDER_H_
-
-/*
- * Macros to reverse byte order
- */
-#define BSWAP_8(x) ((x) & 0xff)
-#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
-#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
-#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
-
-#define BMASK_8(x) ((x) & 0xff)
-#define BMASK_16(x) ((x) & 0xffff)
-#define BMASK_32(x) ((x) & 0xffffffff)
-#define BMASK_64(x) (x)
-
-/*
- * Macros to convert from a specific byte order to/from native byte order
- */
-#if _BYTE_ORDER == _BIG_ENDIAN
-#define LE_64(x) BSWAP_64(x)
-#else
-#define LE_64(x) BMASK_64(x)
-#endif
-
-#endif /* _OPENSOLARIS_SYS_BYTEORDER_H_ */
diff --git a/sys/compat/opensolaris/sys/callb.h b/sys/compat/opensolaris/sys/callb.h
deleted file mode 100644
index 1894beb..0000000
--- a/sys/compat/opensolaris/sys/callb.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CALLB_H
-#define _SYS_CALLB_H
-
-#pragma ident "@(#)callb.h 1.29 05/06/23 SMI"
-
-#include <sys/kcondvar.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * definitions of callback classes (c_class)
- *
- * Callbacks belong in the same class if (1) their callback routines
- * do the same kind of processing (ideally, using the same callback function)
- * and (2) they can/should be executed at the same time in a cpr
- * suspend/resume operation.
- *
- * Note: The DAEMON class, in particular, is for stopping kernel threads
- * and nothing else. The CALLB_* macros below should be used to deal
- * with kernel threads, and the callback function should be callb_generic_cpr.
- * Another idiosyncrasy of the DAEMON class is that if a suspend operation
- * fails, some of the callback functions may be called with the RESUME
- * code which were never called with SUSPEND. Not a problem currently,
- * but see bug 4201851.
- */
-#define CB_CL_CPR_DAEMON 0
-#define CB_CL_CPR_VM 1
-#define CB_CL_CPR_CALLOUT 2
-#define CB_CL_CPR_OBP 3
-#define CB_CL_CPR_FB 4
-#define CB_CL_PANIC 5
-#define CB_CL_CPR_RPC 6
-#define CB_CL_CPR_PROMPRINTF 7
-#define CB_CL_UADMIN 8
-#define CB_CL_CPR_PM 9
-#define CB_CL_HALT 10
-#define CB_CL_CPR_DMA 11
-#define CB_CL_CPR_POST_USER 12
-#define CB_CL_UADMIN_PRE_VFS 13
-#define CB_CL_MDBOOT CB_CL_UADMIN
-#define CB_CL_ENTER_DEBUGGER 14
-#define CB_CL_CPR_POST_KERNEL 15
-#define NCBCLASS 16 /* CHANGE ME if classes are added/removed */
-
-/*
- * CB_CL_CPR_DAEMON class specific definitions are given below:
- */
-
-/*
- * code for CPR callb_execute_class
- */
-#define CB_CODE_CPR_CHKPT 0
-#define CB_CODE_CPR_RESUME 1
-
-typedef void * callb_id_t;
-/*
- * Per kernel thread structure for CPR daemon callbacks.
- * Must be protected by either a existing lock in the daemon or
- * a new lock created for such a purpose.
- */
-typedef struct callb_cpr {
- kmutex_t *cc_lockp; /* lock to protect this struct */
- char cc_events; /* various events for CPR */
- callb_id_t cc_id; /* callb id address */
- kcondvar_t cc_callb_cv; /* cv for callback waiting */
- kcondvar_t cc_stop_cv; /* cv to checkpoint block */
-} callb_cpr_t;
-
-/*
- * cc_events definitions
- */
-#define CALLB_CPR_START 1 /* a checkpoint request's started */
-#define CALLB_CPR_SAFE 2 /* thread is safe for CPR */
-#define CALLB_CPR_ALWAYS_SAFE 4 /* thread is ALWAYS safe for CPR */
-
-/*
- * Used when checking that all kernel threads are stopped.
- */
-#define CALLB_MAX_RETRY 3 /* when waiting for kthread to sleep */
-#define CALLB_THREAD_DELAY 10 /* ticks allowed to reach sleep */
-#define CPR_KTHREAD_TIMEOUT_SEC 90 /* secs before callback times out -- */
- /* due to pwr mgmt of disks, make -- */
- /* big enough for worst spinup time */
-
-#ifdef _KERNEL
-/*
- *
- * CALLB_CPR_INIT macro is used by kernel threads to add their entry to
- * the callback table and perform other initialization. It automatically
- * adds the thread as being in the callback class CB_CL_CPR_DAEMON.
- *
- * cp - ptr to the callb_cpr_t structure for this kernel thread
- *
- * lockp - pointer to mutex protecting the callb_cpr_t stuct
- *
- * func - pointer to the callback function for this kernel thread.
- * It has the prototype boolean_t <func>(void *arg, int code)
- * where: arg - ptr to the callb_cpr_t structure
- * code - not used for this type of callback
- * returns: B_TRUE if successful; B_FALSE if unsuccessful.
- *
- * name - a string giving the name of the kernel thread
- *
- * Note: lockp is the lock to protect the callb_cpr_t (cp) structure
- * later on. No lock held is needed for this initialization.
- */
-#define CALLB_CPR_INIT(cp, lockp, func, name) { \
- strlcpy(curthread->td_name, (name), \
- sizeof(curthread->td_name)); \
- strlcpy(curthread->td_proc->p_comm, (name), \
- sizeof(curthread->td_proc->p_comm)); \
- bzero((caddr_t)(cp), sizeof (callb_cpr_t)); \
- (cp)->cc_lockp = lockp; \
- (cp)->cc_id = callb_add(func, (void *)(cp), \
- CB_CL_CPR_DAEMON, name); \
- }
-
-#ifndef __lock_lint
-#define CALLB_CPR_ASSERT(cp) ASSERT(MUTEX_HELD((cp)->cc_lockp));
-#else
-#define CALLB_CPR_ASSERT(cp)
-#endif
-/*
- * Some threads (like the idle threads) do not adhere to the callback
- * protocol and are always considered safe. Such threads must never exit.
- * They register their presence by calling this macro during their
- * initialization.
- *
- * Args:
- * t - thread pointer of the client kernel thread
- * name - a string giving the name of the kernel thread
- */
-#define CALLB_CPR_INIT_SAFE(t, name) { \
- (void) callb_add_thread(callb_generic_cpr_safe, \
- (void *) &callb_cprinfo_safe, CB_CL_CPR_DAEMON, \
- name, t); \
- }
-/*
- * The lock to protect cp's content must be held before
- * calling the following two macros.
- *
- * Any code region between CALLB_CPR_SAFE_BEGIN and CALLB_CPR_SAFE_END
- * is safe for checkpoint/resume.
- */
-#define CALLB_CPR_SAFE_BEGIN(cp) { \
- CALLB_CPR_ASSERT(cp) \
- (cp)->cc_events |= CALLB_CPR_SAFE; \
- if ((cp)->cc_events & CALLB_CPR_START) \
- cv_signal(&(cp)->cc_callb_cv); \
- }
-#define CALLB_CPR_SAFE_END(cp, lockp) { \
- CALLB_CPR_ASSERT(cp) \
- while ((cp)->cc_events & CALLB_CPR_START) \
- cv_wait(&(cp)->cc_stop_cv, lockp); \
- (cp)->cc_events &= ~CALLB_CPR_SAFE; \
- }
-/*
- * cv_destroy is nop right now but may be needed in the future.
- */
-#define CALLB_CPR_EXIT(cp) { \
- CALLB_CPR_ASSERT(cp) \
- (cp)->cc_events |= CALLB_CPR_SAFE; \
- if ((cp)->cc_events & CALLB_CPR_START) \
- cv_signal(&(cp)->cc_callb_cv); \
- mutex_exit((cp)->cc_lockp); \
- (void) callb_delete((cp)->cc_id); \
- cv_destroy(&(cp)->cc_callb_cv); \
- cv_destroy(&(cp)->cc_stop_cv); \
- }
-
-extern callb_cpr_t callb_cprinfo_safe;
-extern callb_id_t callb_add(boolean_t (*)(void *, int), void *, int, char *);
-extern callb_id_t callb_add_thread(boolean_t (*)(void *, int),
- void *, int, char *, kthread_id_t);
-extern int callb_delete(callb_id_t);
-extern void callb_execute(callb_id_t, int);
-extern void *callb_execute_class(int, int);
-extern boolean_t callb_generic_cpr(void *, int);
-extern boolean_t callb_generic_cpr_safe(void *, int);
-extern boolean_t callb_is_stopped(kthread_id_t, caddr_t *);
-extern void callb_lock_table(void);
-extern void callb_unlock_table(void);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CALLB_H */
diff --git a/sys/compat/opensolaris/sys/cmn_err.h b/sys/compat/opensolaris/sys/cmn_err.h
deleted file mode 100644
index b9987e8..0000000
--- a/sys/compat/opensolaris/sys/cmn_err.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_CMN_ERR_H_
-#define _OPENSOLARIS_SYS_CMN_ERR_H_
-
-#include <sys/systm.h>
-#include <machine/stdarg.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Common error handling severity levels */
-
-#define CE_CONT 0 /* continuation */
-#define CE_NOTE 1 /* notice */
-#define CE_WARN 2 /* warning */
-#define CE_PANIC 3 /* panic */
-#define CE_IGNORE 4 /* print nothing */
-
-static __inline void
-vcmn_err(int ce, const char *fmt, va_list adx)
-{
- char buf[256];
-
- switch (ce) {
- case CE_CONT:
- snprintf(buf, sizeof(buf), "ZFS(cont): %s\n", fmt);
- break;
- case CE_NOTE:
- snprintf(buf, sizeof(buf), "ZFS: NOTICE: %s\n", fmt);
- break;
- case CE_WARN:
- snprintf(buf, sizeof(buf), "ZFS: WARNING: %s\n", fmt);
- break;
- case CE_PANIC:
- snprintf(buf, sizeof(buf), "ZFS(panic): %s\n", fmt);
- break;
- case CE_IGNORE:
- break;
- default:
- panic("unknown severity level");
- }
- if (ce != CE_IGNORE)
- vprintf(buf, adx);
- if (ce == CE_PANIC)
- panic("ZFS");
-}
-
-static __inline void
-cmn_err(int ce, const char *fmt, ...)
-{
- va_list adx;
-
- va_start(adx, fmt);
- vcmn_err(ce, fmt, adx);
- va_end(adx);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OPENSOLARIS_SYS_CMN_ERR_H_ */
diff --git a/sys/compat/opensolaris/sys/cpupart.h b/sys/compat/opensolaris/sys/cpupart.h
deleted file mode 100644
index 29e0910..0000000
--- a/sys/compat/opensolaris/sys/cpupart.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_CPUPART_H
-#define _COMPAT_OPENSOLARIS_SYS_CPUPART_H
-
-typedef int cpupartid_t;
-
-typedef struct cpupart {
- cpupartid_t cp_id; /* partition ID */
-} cpupart_t;
-
-#endif /* _COMPAT_OPENSOLARIS_SYS_CPUPART_H */
diff --git a/sys/compat/opensolaris/sys/cpuvar.h b/sys/compat/opensolaris/sys/cpuvar.h
deleted file mode 100644
index 11b591f..0000000
--- a/sys/compat/opensolaris/sys/cpuvar.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_CPUVAR_H
-#define _COMPAT_OPENSOLARIS_SYS_CPUVAR_H
-
-#include <sys/mutex.h>
-
-#ifdef _KERNEL
-#define CPU_CACHE_COHERENCE_SIZE 64
-
-/*
- * The cpu_core structure consists of per-CPU state available in any context.
- * On some architectures, this may mean that the page(s) containing the
- * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
- * is up to the platform to assure that this is performed properly. Note that
- * the structure is sized to avoid false sharing.
- */
-#define CPUC_SIZE (sizeof (uint16_t) + sizeof (uintptr_t) + \
- sizeof (kmutex_t))
-#define CPUC_PADSIZE CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE
-
-typedef struct cpu_core {
- uint16_t cpuc_dtrace_flags; /* DTrace flags */
- uint8_t cpuc_pad[CPUC_PADSIZE]; /* padding */
- uintptr_t cpuc_dtrace_illval; /* DTrace illegal value */
- kmutex_t cpuc_pid_lock; /* DTrace pid provider lock */
-} cpu_core_t;
-
-extern cpu_core_t cpu_core[];
-#endif /* _KERNEL */
-
-/*
- * DTrace flags.
- */
-#define CPU_DTRACE_NOFAULT 0x0001 /* Don't fault */
-#define CPU_DTRACE_DROP 0x0002 /* Drop this ECB */
-#define CPU_DTRACE_BADADDR 0x0004 /* DTrace fault: bad address */
-#define CPU_DTRACE_BADALIGN 0x0008 /* DTrace fault: bad alignment */
-#define CPU_DTRACE_DIVZERO 0x0010 /* DTrace fault: divide by zero */
-#define CPU_DTRACE_ILLOP 0x0020 /* DTrace fault: illegal operation */
-#define CPU_DTRACE_NOSCRATCH 0x0040 /* DTrace fault: out of scratch */
-#define CPU_DTRACE_KPRIV 0x0080 /* DTrace fault: bad kernel access */
-#define CPU_DTRACE_UPRIV 0x0100 /* DTrace fault: bad user access */
-#define CPU_DTRACE_TUPOFLOW 0x0200 /* DTrace fault: tuple stack overflow */
-#if defined(__sparc)
-#define CPU_DTRACE_FAKERESTORE 0x0400 /* pid provider hint to getreg */
-#endif
-#define CPU_DTRACE_ENTRY 0x0800 /* pid provider hint to ustack() */
-#define CPU_DTRACE_BADSTACK 0x1000 /* DTrace fault: bad stack */
-
-#define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \
- CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \
- CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \
- CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \
- CPU_DTRACE_BADSTACK)
-#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP)
-
-typedef enum {
- CPU_INIT,
- CPU_CONFIG,
- CPU_UNCONFIG,
- CPU_ON,
- CPU_OFF,
- CPU_CPUPART_IN,
- CPU_CPUPART_OUT
-} cpu_setup_t;
-
-typedef int cpu_setup_func_t(cpu_setup_t, int, void *);
-
-
-#endif /* _COMPAT_OPENSOLARIS_SYS_CPUVAR_H */
diff --git a/sys/compat/opensolaris/sys/cred.h b/sys/compat/opensolaris/sys/cred.h
deleted file mode 100644
index 85e79db..0000000
--- a/sys/compat/opensolaris/sys/cred.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_CRED_H_
-#define _OPENSOLARIS_SYS_CRED_H_
-
-#include <sys/param.h>
-#include_next <sys/ucred.h>
-
-#ifdef _KERNEL
-
-typedef struct ucred cred_t;
-
-#define CRED() (curthread->td_ucred)
-
-/*
- * kcred is used when you need all privileges.
- */
-#define kcred (thread0.td_ucred)
-
-#define crgetuid(cred) ((cred)->cr_uid)
-#define crgetgid(cred) ((cred)->cr_gid)
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_CRED_H_ */
diff --git a/sys/compat/opensolaris/sys/cyclic.h b/sys/compat/opensolaris/sys/cyclic.h
deleted file mode 100644
index 331a28c..0000000
--- a/sys/compat/opensolaris/sys/cyclic.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_CYCLIC_H_
-#define _COMPAT_OPENSOLARIS_SYS_CYCLIC_H_
-
-#ifndef _KERNEL
-typedef void cpu_t;
-#endif
-
-#include_next <sys/cyclic.h>
-
-#endif
diff --git a/sys/compat/opensolaris/sys/debug.h b/sys/compat/opensolaris/sys/debug.h
deleted file mode 100644
index 3480462..0000000
--- a/sys/compat/opensolaris/sys/debug.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_DEBUG_H_
-#define _OPENSOLARIS_SYS_DEBUG_H_
-
-#ifdef _KERNEL
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include_next <sys/debug.h>
-
-#define assfail(a, f, l) \
- (panic("solaris assert: %s, file: %s, line: %d", (a), (f), (l)), 0)
-
-#define assfail3(a, lv, op, rv, f, l) \
- panic("solaris assert: %s (0x%jx %s 0x%jx), file: %s, line: %d", \
- (a), (uintmax_t)(lv), (op), (uintmax_t)(rv), (f), (l))
-#else /* !_KERNEL */
-#include_next <sys/debug.h>
-#endif
-
-#endif /* _OPENSOLARIS_SYS_DEBUG_H_ */
diff --git a/sys/compat/opensolaris/sys/dirent.h b/sys/compat/opensolaris/sys/dirent.h
deleted file mode 100644
index c369b04..0000000
--- a/sys/compat/opensolaris/sys/dirent.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_DIRENT_H_
-#define _OPENSOLARIS_SYS_DIRENT_H_
-
-#include_next <sys/dirent.h>
-
-typedef struct dirent dirent64_t;
-#define dirent64 dirent
-#define ino64_t ino_t
-
-#define d_ino d_fileno
-
-#define DIRENT64_RECLEN(len) ((sizeof(struct dirent) - \
- sizeof(((struct dirent *)NULL)->d_name) + \
- (len) + 1 + 3) & ~3)
-
-#endif /* !_OPENSOLARIS_SYS_DIRENT_H_ */
diff --git a/sys/compat/opensolaris/sys/dkio.h b/sys/compat/opensolaris/sys/dkio.h
deleted file mode 100644
index 89b6725..0000000
--- a/sys/compat/opensolaris/sys/dkio.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _OPENSOLARIS_SYS_DKIO_H_
-#define _OPENSOLARIS_SYS_DKIO_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Disk io control commands
- * Warning: some other ioctls with the DIOC prefix exist elsewhere.
- * The Generic DKIOC numbers are from 0 - 50.
- * The Floppy Driver uses 51 - 100.
- * The Hard Disk (except SCSI) 101 - 106. (these are obsolete)
- * The CDROM Driver 151 - 200.
- * The USCSI ioctl 201 - 250.
- */
-#define DKIOC (0x04 << 8)
-
-/*
- * The following ioctls are generic in nature and need to be
- * suported as appropriate by all disk drivers
- */
-#define DKIOCGGEOM (DKIOC|1) /* Get geometry */
-#define DKIOCINFO (DKIOC|3) /* Get info */
-#define DKIOCEJECT (DKIOC|6) /* Generic 'eject' */
-#define DKIOCGVTOC (DKIOC|11) /* Get VTOC */
-#define DKIOCSVTOC (DKIOC|12) /* Set VTOC & Write to Disk */
-
-/*
- * Disk Cache Controls. These ioctls should be supported by
- * all disk drivers.
- *
- * DKIOCFLUSHWRITECACHE when used from user-mode ignores the ioctl
- * argument, but it should be passed as NULL to allow for future
- * reinterpretation. From user-mode, this ioctl request is synchronous.
- *
- * When invoked from within the kernel, the arg can be NULL to indicate
- * a synchronous request or can be the address of a struct dk_callback
- * to request an asynchronous callback when the flush request is complete.
- * In this case, the flag to the ioctl must include FKIOCTL and the
- * dkc_callback field of the pointed to struct must be non-null or the
- * request is made synchronously.
- *
- * In the callback case: if the ioctl returns 0, a callback WILL be performed.
- * If the ioctl returns non-zero, a callback will NOT be performed.
- * NOTE: In some cases, the callback may be done BEFORE the ioctl call
- * returns. The caller's locking strategy should be prepared for this case.
- */
-#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */
-
-struct dk_callback {
- void (*dkc_callback)(void *dkc_cookie, int error);
- void *dkc_cookie;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OPENSOLARIS_SYS_DKIO_H_ */
diff --git a/sys/compat/opensolaris/sys/dnlc.h b/sys/compat/opensolaris/sys/dnlc.h
deleted file mode 100644
index a2d4f01..0000000
--- a/sys/compat/opensolaris/sys/dnlc.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_DNLC_H_
-#define _OPENSOLARIS_SYS_DNLC_H_
-
-#define DNLC_NO_VNODE ((void *)(intptr_t)0xdeadc0de)
-
-#define dnlc_lookup(dvp, name) (NULL)
-#define dnlc_update(dvp, name, vp) do { } while (0)
-#define dnlc_remove(dvp, name) do { } while (0)
-#define dnlc_purge_vfsp(vfsp, count) (0)
-#define dnlc_reduce_cache(percent) do { } while (0)
-
-#endif /* !_OPENSOLARIS_SYS_DNLC_H_ */
diff --git a/sys/compat/opensolaris/sys/elf.h b/sys/compat/opensolaris/sys/elf.h
deleted file mode 100644
index a630f28..0000000
--- a/sys/compat/opensolaris/sys/elf.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * $FreeBSD$
- *
- * ELF compatibility definitions for OpenSolaris source.
- *
- */
-
-#ifndef _SYS__ELF_SOLARIS_H_
-#define _SYS__ELF_SOLARIS_H_
-
-#include_next <sys/elf.h>
-
-#define __sElfN(x) typedef __CONCAT(__CONCAT(__CONCAT(Elf,__ELF_WORD_SIZE),_),x) x
-
-__sElfN(Addr);
-__sElfN(Cap);
-__sElfN(Dyn);
-__sElfN(Ehdr);
-__sElfN(Move);
-__sElfN(Off);
-__sElfN(Phdr);
-__sElfN(Rel);
-__sElfN(Rela);
-__sElfN(Shdr);
-__sElfN(Sym);
-__sElfN(Syminfo);
-__sElfN(Verdaux);
-__sElfN(Verdef);
-__sElfN(Vernaux);
-__sElfN(Verneed);
-__sElfN(Versym);
-
-__sElfN(Half);
-__sElfN(Sword);
-__sElfN(Word);
-
-#if __ELF_WORD_SIZE == 32
-typedef Elf32_Word Xword; /* Xword/Sxword are 32-bits in Elf32 */
-typedef Elf32_Sword Sxword;
-#else
-typedef Elf64_Xword Xword;
-typedef Elf64_Sxword Sxword;
-#endif
-
-#define ELF_M_INFO __ELFN(M_INFO)
-#define ELF_M_SIZE __ELFN(M_SIZE)
-#define ELF_M_SYM __ELFN(M_SYM)
-
-/*
- * Elf `printf' type-cast macros. These force arguments to be a fixed size
- * so that Elf32 and Elf64 can share common format strings.
- */
-#define EC_ADDR(a) ((Elf64_Addr)(a)) /* "ull" */
-#define EC_OFF(a) ((Elf64_Off)(a)) /* "ull" */
-#define EC_HALF(a) ((Elf64_Half)(a)) /* "d" */
-#define EC_WORD(a) ((Elf64_Word)(a)) /* "u" */
-#define EC_SWORD(a) ((Elf64_Sword)(a)) /* "d" */
-#define EC_XWORD(a) ((Elf64_Xword)(a)) /* "ull" */
-#define EC_SXWORD(a) ((Elf64_Sxword)(a)) /* "ll" */
-#define EC_LWORD(a) ((Elf64_Lword)(a)) /* "ull" */
-
-#define elf_checksum __elfN(checksum)
-#define elf_fsize __elfN(fsize)
-#define elf_getehdr __elfN(getehdr)
-#define elf_getphdr __elfN(getphdr)
-#define elf_newehdr __elfN(newehdr)
-#define elf_newphdr __elfN(newphdr)
-#define elf_getshdr __elfN(getshdr)
-#define elf_xlatetof __elfN(xlatetof)
-#define elf_xlatetom __elfN(xlatetom)
-
-#define Elf_cap_entry __ElfN(cap_entry)
-#define Elf_cap_title __ElfN(cap_title)
-#define Elf_demangle_name __ElfN(demangle_name)
-#define Elf_dyn_entry __ElfN(dyn_entry)
-#define Elf_dyn_title __ElfN(dyn_title)
-#define Elf_ehdr __ElfN(ehdr)
-#define Elf_got_entry __ElfN(got_entry)
-#define Elf_got_title __ElfN(got_title)
-#define Elf_reloc_apply_reg __ElfN(reloc_apply_reg)
-#define Elf_reloc_apply_val __ElfN(reloc_apply_val)
-#define Elf_reloc_entry_1 __ElfN(reloc_entry_1)
-#define Elf_reloc_entry_2 __ElfN(reloc_entry_2)
-#define Elf_reloc_title __ElfN(reloc_title)
-#define Elf_phdr __ElfN(phdr)
-#define Elf_shdr __ElfN(shdr)
-#define Elf_syms_table_entry __ElfN(syms_table_entry)
-#define Elf_syms_table_title __ElfN(syms_table_title)
-#define Elf_ver_def_title __ElfN(ver_def_title)
-#define Elf_ver_line_1 __ElfN(ver_line_1)
-#define Elf_ver_line_2 __ElfN(ver_line_2)
-#define Elf_ver_line_3 __ElfN(ver_line_3)
-#define Elf_ver_line_4 __ElfN(ver_line_4)
-#define Elf_ver_line_5 __ElfN(ver_line_5)
-#define Elf_ver_need_title __ElfN(ver_need_title)
-
-#endif /* !_SYS__ELF_SOLARIS_H_ */
diff --git a/sys/compat/opensolaris/sys/kcondvar.h b/sys/compat/opensolaris/sys/kcondvar.h
deleted file mode 100644
index 0422ba0..0000000
--- a/sys/compat/opensolaris/sys/kcondvar.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_CONDVAR_H_
-#define _OPENSOLARIS_SYS_CONDVAR_H_
-
-#include <sys/param.h>
-#include <sys/proc.h>
-
-#ifdef _KERNEL
-
-#include <sys/mutex.h>
-#include <sys/condvar.h>
-
-typedef struct cv kcondvar_t;
-
-typedef enum {
- CV_DEFAULT,
- CV_DRIVER
-} kcv_type_t;
-
-#define zfs_cv_init(cv, name, type, arg) do { \
- const char *_name; \
- ASSERT((type) == CV_DEFAULT); \
- for (_name = #cv; *_name != '\0'; _name++) { \
- if (*_name >= 'a' && *_name <= 'z') \
- break; \
- } \
- if (*_name == '\0') \
- _name = #cv; \
- cv_init((cv), _name); \
-} while (0)
-#define cv_init(cv, name, type, arg) zfs_cv_init((cv), (name), (type), (arg))
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_CONDVAR_H_ */
diff --git a/sys/compat/opensolaris/sys/kmem.h b/sys/compat/opensolaris/sys/kmem.h
deleted file mode 100644
index 6e51874..0000000
--- a/sys/compat/opensolaris/sys/kmem.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_KMEM_H_
-#define _OPENSOLARIS_SYS_KMEM_H_
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/malloc.h>
-#include <sys/vmem.h>
-
-#include <vm/uma.h>
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-
-#define KM_SLEEP M_WAITOK
-#define KM_NOSLEEP M_NOWAIT
-#define KMC_NODEBUG 0
-
-typedef struct kmem_cache {
- char kc_name[32];
-#ifdef _KERNEL
- uma_zone_t kc_zone;
-#else
- size_t size;
-#endif
- int (*kc_constructor)(void *, void *, int);
- void (*kc_destructor)(void *, void *);
- void *kc_private;
-} kmem_cache_t;
-
-void *zfs_kmem_alloc(size_t size, int kmflags);
-void zfs_kmem_free(void *buf, size_t size);
-uint64_t kmem_size(void);
-uint64_t kmem_used(void);
-kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
- int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
- void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
-void kmem_cache_destroy(kmem_cache_t *cache);
-void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
-void kmem_cache_free(kmem_cache_t *cache, void *buf);
-void kmem_cache_reap_now(kmem_cache_t *cache);
-void kmem_reap(void);
-int kmem_debugging(void);
-void *calloc(size_t n, size_t s);
-
-#define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags))
-#define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO)
-#define kmem_free(buf, size) zfs_kmem_free((buf), (size))
-
-#endif /* _OPENSOLARIS_SYS_KMEM_H_ */
diff --git a/sys/compat/opensolaris/sys/kobj.h b/sys/compat/opensolaris/sys/kobj.h
deleted file mode 100644
index e060ff0..0000000
--- a/sys/compat/opensolaris/sys/kobj.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_KOBJ_H_
-#define _OPENSOLARIS_SYS_KOBJ_H_
-
-#include <sys/types.h>
-#include <sys/kmem.h>
-#include_next <sys/kobj.h>
-#ifdef AT_UID
-#undef AT_UID
-#endif
-#ifdef AT_GID
-#undef AT_GID
-#endif
-#include <sys/vnode.h>
-
-#define KM_NOWAIT 0x01
-#define KM_TMP 0x02
-
-void kobj_free(void *address, size_t size);
-void *kobj_alloc(size_t size, int flag);
-void *kobj_zalloc(size_t size, int flag);
-
-struct _buf {
- void *ptr;
- int mounted;
-};
-
-struct _buf *kobj_open_file(const char *path);
-int kobj_get_filesize(struct _buf *file, uint64_t *size);
-int kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off);
-void kobj_close_file(struct _buf *file);
-
-#endif /* _OPENSOLARIS_SYS_KOBJ_H_ */
diff --git a/sys/compat/opensolaris/sys/kstat.h b/sys/compat/opensolaris/sys/kstat.h
deleted file mode 100644
index 9df4965..0000000
--- a/sys/compat/opensolaris/sys/kstat.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_KSTAT_H_
-#define _OPENSOLARIS_SYS_KSTAT_H_
-
-#include <sys/sysctl.h>
-
-#define KSTAT_TYPE_NAMED 1
-
-#define KSTAT_FLAG_VIRTUAL 0x01
-
-typedef struct kstat {
- void *ks_data;
- u_int ks_ndata;
-#ifdef _KERNEL
- struct sysctl_ctx_list ks_sysctl_ctx;
- struct sysctl_oid *ks_sysctl_root;
-#endif
-} kstat_t;
-
-typedef struct kstat_named {
-#define KSTAT_STRLEN 31
- char name[KSTAT_STRLEN];
-#define KSTAT_DATA_CHAR 0
-#define KSTAT_DATA_INT32 1
-#define KSTAT_DATA_UINT32 2
-#define KSTAT_DATA_INT64 3
-#define KSTAT_DATA_UINT64 4
- uchar_t data_type;
- union {
- uint64_t ui64;
- } value;
-} kstat_named_t;
-
-kstat_t *kstat_create(char *module, int instance, char *name, char *class,
- uchar_t type, ulong_t ndata, uchar_t flags);
-void kstat_install(kstat_t *ksp);
-void kstat_delete(kstat_t *ksp);
-
-#endif /* _OPENSOLARIS_SYS_KSTAT_H_ */
diff --git a/sys/compat/opensolaris/sys/lock.h b/sys/compat/opensolaris/sys/lock.h
deleted file mode 100644
index 51fcd67..0000000
--- a/sys/compat/opensolaris/sys/lock.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_LOCK_H_
-#define _OPENSOLARIS_SYS_LOCK_H_
-
-#include_next <sys/lock.h>
-
-#ifdef _KERNEL
-
-#define LO_ALLMASK (LO_INITIALIZED | LO_WITNESS | LO_QUIET | \
- LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE | \
- LO_DUPOK | LO_ENROLLPEND | LO_CLASSMASK | \
- LO_NOPROFILE)
-#define LO_EXPECTED (LO_INITIALIZED | LO_WITNESS | LO_RECURSABLE | \
- LO_SLEEPABLE | LO_UPGRADABLE | LO_DUPOK | \
- /* sx lock class */(2 << LO_CLASSSHIFT))
-
-#endif /* defined(_KERNEL) */
-
-#endif /* _OPENSOLARIS_SYS_LOCK_H_ */
diff --git a/sys/compat/opensolaris/sys/misc.h b/sys/compat/opensolaris/sys/misc.h
deleted file mode 100644
index a5a52b7..0000000
--- a/sys/compat/opensolaris/sys/misc.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_MISC_H_
-#define _OPENSOLARIS_SYS_MISC_H_
-
-#define _FIOFFS (INT_MIN)
-#define _FIOGDIO (INT_MIN+1)
-#define _FIOSDIO (INT_MIN+2)
-
-#define _FIO_SEEK_DATA FIOSEEKDATA
-#define _FIO_SEEK_HOLE FIOSEEKHOLE
-
-struct opensolaris_utsname {
- char *nodename;
-};
-
-extern char hw_serial[11];
-extern struct opensolaris_utsname utsname;
-#endif /* _OPENSOLARIS_SYS_MISC_H_ */
diff --git a/sys/compat/opensolaris/sys/mman.h b/sys/compat/opensolaris/sys/mman.h
deleted file mode 100644
index ca74689..0000000
--- a/sys/compat/opensolaris/sys/mman.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_MMAN_H_
-#define _COMPAT_OPENSOLARIS_SYS_MMAN_H_
-
-#include_next <sys/mman.h>
-
-#define mmap64(_a,_b,_c,_d,_e,_f) mmap(_a,_b,_c,_d,_e,_f)
-
-#endif
diff --git a/sys/compat/opensolaris/sys/mntent.h b/sys/compat/opensolaris/sys/mntent.h
deleted file mode 100644
index 426f8ee..0000000
--- a/sys/compat/opensolaris/sys/mntent.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
- * All Rights Reserved
- */
-
-#ifndef _OPENSOLARIS_SYS_MNTENT_H_
-#define _OPENSOLARIS_SYS_MNTENT_H_
-
-#include <sys/param.h>
-#include_next <sys/mount.h>
-
-#define MNTMAXSTR 128
-
-#define MNTTYPE_ZFS "zfs" /* ZFS file system */
-
-#define MNTOPT_RO "ro" /* Read only */
-#define MNTOPT_RW "rw" /* Read/write */
-#define MNTOPT_NOSUID "nosuid" /* Neither setuid nor devices allowed */
-#define MNTOPT_DEVICES "devices" /* Device-special allowed */
-#define MNTOPT_NODEVICES "nodevices" /* Device-special disallowed */
-#define MNTOPT_SETUID "setuid" /* Set uid allowed */
-#define MNTOPT_NOSETUID "nosetuid" /* Set uid not allowed */
-#define MNTOPT_REMOUNT "remount" /* Change mount options */
-#define MNTOPT_ATIME "atime" /* update atime for files */
-#define MNTOPT_NOATIME "noatime" /* do not update atime for files */
-#define MNTOPT_XATTR "xattr" /* enable extended attributes */
-#define MNTOPT_NOXATTR "noxattr" /* disable extended attributes */
-#define MNTOPT_EXEC "exec" /* enable executables */
-#define MNTOPT_NOEXEC "noexec" /* disable executables */
-#define MNTOPT_RESTRICT "restrict" /* restricted autofs mount */
-
-#endif /* !_OPENSOLARIS_MNTENT_H_ */
diff --git a/sys/compat/opensolaris/sys/mnttab.h b/sys/compat/opensolaris/sys/mnttab.h
deleted file mode 100644
index 950a074..0000000
--- a/sys/compat/opensolaris/sys/mnttab.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_MNTTAB_H_
-#define _OPENSOLARIS_SYS_MNTTAB_H_
-
-#ifndef _KERNEL
-#include <mnttab.h>
-#endif
-
-#endif /* !_OPENSOLARIS_MNTTAB_H_ */
diff --git a/sys/compat/opensolaris/sys/modctl.h b/sys/compat/opensolaris/sys/modctl.h
deleted file mode 100644
index 7af39b0..0000000
--- a/sys/compat/opensolaris/sys/modctl.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_MODCTL_H
-#define _COMPAT_OPENSOLARIS_SYS_MODCTL_H
-
-#include <sys/param.h>
-#include <sys/linker.h>
-
-typedef struct linker_file modctl_t;
-
-#endif /* _COMPAT_OPENSOLARIS_SYS_MODCTL_H */
diff --git a/sys/compat/opensolaris/sys/mount.h b/sys/compat/opensolaris/sys/mount.h
deleted file mode 100644
index d4c4039..0000000
--- a/sys/compat/opensolaris/sys/mount.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_MOUNT_H_
-#define _OPENSOLARIS_SYS_MOUNT_H_
-
-#include_next <sys/mount.h>
-
-#define MS_FORCE MNT_FORCE
-#define MS_REMOUNT MNT_UPDATE
-
-typedef struct fid fid_t;
-
-#endif /* !_OPENSOLARIS_SYS_MOUNT_H_ */
diff --git a/sys/compat/opensolaris/sys/mutex.h b/sys/compat/opensolaris/sys/mutex.h
deleted file mode 100644
index 8756cd0..0000000
--- a/sys/compat/opensolaris/sys/mutex.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_MUTEX_H_
-#define _OPENSOLARIS_SYS_MUTEX_H_
-
-#ifdef _KERNEL
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include_next <sys/mutex.h>
-#include <sys/sx.h>
-
-typedef enum {
- MUTEX_DEFAULT = 6 /* kernel default mutex */
-} kmutex_type_t;
-
-#define MUTEX_HELD(x) (mutex_owned(x))
-#define MUTEX_NOT_HELD(x) (!mutex_owned(x) || panicstr)
-
-typedef struct sx kmutex_t;
-
-#ifndef DEBUG
-#define MUTEX_FLAGS (SX_DUPOK | SX_NOWITNESS)
-#else
-#define MUTEX_FLAGS (SX_DUPOK)
-#endif
-
-#define mutex_init(lock, desc, type, arg) do { \
- const char *_name; \
- ASSERT((type) == MUTEX_DEFAULT); \
- KASSERT(((lock)->lock_object.lo_flags & LO_ALLMASK) != \
- LO_EXPECTED, ("lock %s already initialized", #lock)); \
- bzero((lock), sizeof(struct sx)); \
- for (_name = #lock; *_name != '\0'; _name++) { \
- if (*_name >= 'a' && *_name <= 'z') \
- break; \
- } \
- if (*_name == '\0') \
- _name = #lock; \
- sx_init_flags((lock), _name, MUTEX_FLAGS); \
-} while (0)
-#define mutex_destroy(lock) sx_destroy(lock)
-#define mutex_enter(lock) sx_xlock(lock)
-#define mutex_tryenter(lock) sx_try_xlock(lock)
-#define mutex_exit(lock) sx_xunlock(lock)
-#define mutex_owned(lock) sx_xlocked(lock)
-/* TODO: Change to sx_xholder() once it is moved from kern_sx.c to sx.h. */
-#define mutex_owner(lock) ((lock)->sx_lock & SX_LOCK_SHARED ? NULL : (struct thread *)SX_OWNER((lock)->sx_lock))
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_MUTEX_H_ */
diff --git a/sys/compat/opensolaris/sys/objfs.h b/sys/compat/opensolaris/sys/objfs.h
deleted file mode 100644
index 3904f4c..0000000
--- a/sys/compat/opensolaris/sys/objfs.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_OBJFS_H
-#define _COMPAT_OPENSOLARIS_SYS_OBJFS_H
-
-/*
- * Private data structure found in '.info' section
- */
-typedef struct objfs_info {
- int objfs_info_primary;
-} objfs_info_t;
-
-
-#endif /* _COMPAT_OPENSOLARIS_SYS_OBJFS_H */
diff --git a/sys/compat/opensolaris/sys/param.h b/sys/compat/opensolaris/sys/param.h
deleted file mode 100644
index 8d36a9d..0000000
--- a/sys/compat/opensolaris/sys/param.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_PARAM_H_
-#define _COMPAT_OPENSOLARIS_SYS_PARAM_H_
-
-#include_next <sys/param.h>
-
-#define PAGESIZE PAGE_SIZE
-
-#endif
diff --git a/sys/compat/opensolaris/sys/pcpu.h b/sys/compat/opensolaris/sys/pcpu.h
deleted file mode 100644
index db38de6..0000000
--- a/sys/compat/opensolaris/sys/pcpu.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_PCPU_H_
-#define _COMPAT_OPENSOLARIS_SYS_PCPU_H_
-
-#include_next <sys/pcpu.h>
-
-typedef struct pcpu cpu_t;
-
-#define cpu_id pc_cpuid
-
-#endif
diff --git a/sys/compat/opensolaris/sys/policy.h b/sys/compat/opensolaris/sys/policy.h
deleted file mode 100644
index 50d3fd8..0000000
--- a/sys/compat/opensolaris/sys/policy.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- $ $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_POLICY_H_
-#define _OPENSOLARIS_SYS_POLICY_H_
-
-#include <sys/param.h>
-
-#ifdef _KERNEL
-
-struct mount;
-struct ucred;
-struct vattr;
-struct vnode;
-
-int secpolicy_zfs(struct ucred *cred);
-int secpolicy_sys_config(struct ucred *cred, int checkonly);
-int secpolicy_zinject(struct ucred *cred);
-int secpolicy_fs_unmount(struct ucred *cred, struct mount *vfsp);
-int secpolicy_basic_link(struct ucred *cred);
-int secpolicy_vnode_stky_modify(struct ucred *cred);
-int secpolicy_vnode_remove(struct ucred *cred);
-int secpolicy_vnode_access(struct ucred *cred, struct vnode *vp,
- uint64_t owner, int mode);
-int secpolicy_vnode_setdac(struct ucred *cred, uid_t owner);
-int secpolicy_vnode_setattr(struct ucred *cred, struct vnode *vp,
- struct vattr *vap, const struct vattr *ovap, int flags,
- int unlocked_access(void *, int, struct ucred *), void *node);
-int secpolicy_vnode_create_gid(struct ucred *cred);
-int secpolicy_vnode_setids_setgids(struct ucred *cred, gid_t gid);
-int secpolicy_vnode_setid_retain(struct ucred *cred, boolean_t issuidroot);
-void secpolicy_setid_clear(struct vattr *vap, struct ucred *cred);
-int secpolicy_setid_setsticky_clear(struct vnode *vp, struct vattr *vap,
- const struct vattr *ovap, struct ucred *cred);
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_POLICY_H_ */
diff --git a/sys/compat/opensolaris/sys/proc.h b/sys/compat/opensolaris/sys/proc.h
deleted file mode 100644
index 2410396..0000000
--- a/sys/compat/opensolaris/sys/proc.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_PROC_H_
-#define _OPENSOLARIS_SYS_PROC_H_
-
-#include <sys/param.h>
-#include <sys/kthread.h>
-#include_next <sys/proc.h>
-#include <sys/stdint.h>
-#include <sys/smp.h>
-#include <sys/debug.h>
-
-#ifdef _KERNEL
-
-#define CPU curcpu
-#define minclsyspri 0
-#define maxclsyspri 0
-#define max_ncpus mp_ncpus
-#define boot_max_ncpus mp_ncpus
-
-#define TS_RUN 0
-
-#define p0 proc0
-
-typedef short pri_t;
-typedef struct thread _kthread;
-typedef struct thread kthread_t;
-typedef struct thread *kthread_id_t;
-typedef struct proc proc_t;
-
-#if (KSTACK_PAGES * PAGE_SIZE) < 16384
-#define ZFS_KSTACK_PAGES (16384 / PAGE_SIZE)
-#else
-#define ZFS_KSTACK_PAGES 0
-#endif
-
-static __inline kthread_t *
-thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,
- size_t len, proc_t *pp, int state, pri_t pri)
-{
- proc_t *p;
- int error;
-
- /*
- * Be sure there are no surprises.
- */
- ASSERT(stk == NULL);
- ASSERT(stksize == 0);
- ASSERT(len == 0);
- ASSERT(state == TS_RUN);
-
- error = kproc_create(proc, arg, &p, 0, ZFS_KSTACK_PAGES,
- "solthread %p", proc);
- return (error == 0 ? FIRST_THREAD_IN_PROC(p) : NULL);
-}
-
-#define thread_exit() kproc_exit(0)
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_PROC_H_ */
diff --git a/sys/compat/opensolaris/sys/random.h b/sys/compat/opensolaris/sys/random.h
deleted file mode 100644
index 0cdea34..0000000
--- a/sys/compat/opensolaris/sys/random.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_RANDOM_H_
-#define _OPENSOLARIS_SYS_RANDOM_H_
-
-#include_next <sys/random.h>
-
-#define random_get_bytes(p, s) read_random((p), (int)(s))
-#define random_get_pseudo_bytes(p, s) read_random((p), (int)(s))
-
-#endif /* !_OPENSOLARIS_SYS_RANDOM_H_ */
diff --git a/sys/compat/opensolaris/sys/rwlock.h b/sys/compat/opensolaris/sys/rwlock.h
deleted file mode 100644
index a3e5515..0000000
--- a/sys/compat/opensolaris/sys/rwlock.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_RWLOCK_H_
-#define _OPENSOLARIS_SYS_RWLOCK_H_
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/sx.h>
-
-#ifdef _KERNEL
-
-typedef enum {
- RW_DRIVER = 2, /* driver (DDI) rwlock */
- RW_DEFAULT = 4 /* kernel default rwlock */
-} krw_type_t;
-
-typedef enum {
- RW_WRITER,
- RW_READER
-} krw_t;
-
-typedef struct sx krwlock_t;
-
-#ifndef DEBUG
-#define RW_FLAGS (SX_DUPOK | SX_NOWITNESS)
-#else
-#define RW_FLAGS (SX_DUPOK)
-#endif
-
-#define RW_READ_HELD(x) (rw_read_held((x)))
-#define RW_WRITE_HELD(x) (rw_write_held((x)))
-#define RW_LOCK_HELD(x) (rw_lock_held((x)))
-#define RW_ISWRITER(x) (rw_iswriter(x))
-
-#define rw_init(lock, desc, type, arg) do { \
- const char *_name; \
- KASSERT(((lock)->lock_object.lo_flags & LO_ALLMASK) != \
- LO_EXPECTED, ("lock %s already initialized", #lock)); \
- bzero((lock), sizeof(struct sx)); \
- for (_name = #lock; *_name != '\0'; _name++) { \
- if (*_name >= 'a' && *_name <= 'z') \
- break; \
- } \
- if (*_name == '\0') \
- _name = #lock; \
- sx_init_flags((lock), _name, RW_FLAGS); \
-} while (0)
-#define rw_destroy(lock) sx_destroy(lock)
-#define rw_enter(lock, how) do { \
- if ((how) == RW_READER) \
- sx_slock(lock); \
- else /* if ((how) == RW_WRITER) */ \
- sx_xlock(lock); \
-} while (0)
-#define rw_tryenter(lock, how) ((how) == RW_READER ? sx_try_slock(lock) : sx_try_xlock(lock))
-#define rw_exit(lock) sx_unlock(lock)
-#define rw_downgrade(lock) sx_downgrade(lock)
-#define rw_tryupgrade(lock) sx_try_upgrade(lock)
-#define rw_read_held(lock) ((lock)->sx_lock != SX_LOCK_UNLOCKED && ((lock)->sx_lock & SX_LOCK_SHARED))
-#define rw_write_held(lock) sx_xlocked(lock)
-#define rw_lock_held(lock) (rw_read_held(lock) || rw_write_held(lock))
-#define rw_iswriter(lock) sx_xlocked(lock)
-/* TODO: Change to sx_xholder() once it is moved from kern_sx.c to sx.h. */
-#define rw_owner(lock) ((lock)->sx_lock & SX_LOCK_SHARED ? NULL : (struct thread *)SX_OWNER((lock)->sx_lock))
-
-#endif /* defined(_KERNEL) */
-
-#endif /* _OPENSOLARIS_SYS_RWLOCK_H_ */
diff --git a/sys/compat/opensolaris/sys/sdt.h b/sys/compat/opensolaris/sys/sdt.h
deleted file mode 100644
index 6db45bb..0000000
--- a/sys/compat/opensolaris/sys/sdt.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_SDT_H_
-#define _OPENSOLARIS_SYS_SDT_H_
-
-#include_next <sys/sdt.h>
-
-#undef DTRACE_PROBE
-#undef DTRACE_PROBE1
-#undef DTRACE_PROBE2
-#undef DTRACE_PROBE3
-#undef DTRACE_PROBE4
-
-#define DTRACE_PROBE(name)
-#define DTRACE_PROBE1(name, type1, arg1)
-#define DTRACE_PROBE2(name, type1, arg1, type2, arg2)
-#define DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3)
-#define DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4)
-
-#endif /* _OPENSOLARIS_SYS_SDT_H_ */
diff --git a/sys/compat/opensolaris/sys/stat.h b/sys/compat/opensolaris/sys/stat.h
deleted file mode 100644
index 5f45ebe..0000000
--- a/sys/compat/opensolaris/sys/stat.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2007 John Birrell <jb@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-#ifndef _COMPAT_OPENSOLARIS_SYS_STAT_H_
-#define _COMPAT_OPENSOLARIS_SYS_STAT_H_
-
-#include_next <sys/stat.h>
-
-#define stat64 stat
-#define fstat64 fstat
-
-#endif
diff --git a/sys/compat/opensolaris/sys/string.h b/sys/compat/opensolaris/sys/string.h
deleted file mode 100644
index aeec929..0000000
--- a/sys/compat/opensolaris/sys/string.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_STRING_H_
-#define _OPENSOLARIS_SYS_STRING_H_
-
-#include <sys/libkern.h>
-
-char *strpbrk(const char *, const char *);
-void strident_canon(char *s, size_t n);
-
-#endif /* _OPENSOLARIS_SYS_STRING_H_ */
diff --git a/sys/compat/opensolaris/sys/sunddi.h b/sys/compat/opensolaris/sys/sunddi.h
deleted file mode 100644
index 192d5a9..0000000
--- a/sys/compat/opensolaris/sys/sunddi.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_SUNDDI_H_
-#define _OPENSOLARIS_SYS_SUNDDI_H_
-
-#define ddi_copyin(from, to, size, flag) (bcopy((from), (to), (size)), 0)
-#define ddi_copyout(from, to, size, flag) (bcopy((from), (to), (size)), 0)
-int ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result);
-
-#endif /* _OPENSOLARIS_SYS_SUNDDI_H_ */
diff --git a/sys/compat/opensolaris/sys/sysmacros.h b/sys/compat/opensolaris/sys/sysmacros.h
deleted file mode 100644
index 3ce6c61..0000000
--- a/sys/compat/opensolaris/sys/sysmacros.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _OPENSOLARIS_SYS_SYSMACROS_H_
-#define _OPENSOLARIS_SYS_SYSMACROS_H_
-
-#include <sys/param.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Macro for checking power of 2 address alignment.
- */
-#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
-
-/*
- * Macro to determine if value is a power of 2
- */
-#define ISP2(x) (((x) & ((x) - 1)) == 0)
-
-/*
- * Macros for various sorts of alignment and rounding when the alignment
- * is known to be a power of 2.
- */
-#define P2ALIGN(x, align) ((x) & -(align))
-#define P2PHASE(x, align) ((x) & ((align) - 1))
-#define P2NPHASE(x, align) (-(x) & ((align) - 1))
-#define P2ROUNDUP(x, align) (-(-(x) & -(align)))
-#define P2END(x, align) (-(~(x) & -(align)))
-#define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align)))
-#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
-/*
- * Determine whether two numbers have the same high-order bit.
- */
-#define P2SAMEHIGHBIT(x, y) (((x) ^ (y)) < ((x) & (y)))
-
-/*
- * Typed version of the P2* macros. These macros should be used to ensure
- * that the result is correctly calculated based on the data type of (x),
- * which is passed in as the last argument, regardless of the data
- * type of the alignment. For example, if (x) is of type uint64_t,
- * and we want to round it up to a page boundary using "PAGESIZE" as
- * the alignment, we can do either
- * P2ROUNDUP(x, (uint64_t)PAGESIZE)
- * or
- * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
- */
-#define P2ALIGN_TYPED(x, align, type) \
- ((type)(x) & -(type)(align))
-#define P2PHASE_TYPED(x, align, type) \
- ((type)(x) & ((type)(align) - 1))
-#define P2NPHASE_TYPED(x, align, type) \
- (-(type)(x) & ((type)(align) - 1))
-#define P2ROUNDUP_TYPED(x, align, type) \
- (-(-(type)(x) & -(type)(align)))
-#define P2END_TYPED(x, align, type) \
- (-(~(type)(x) & -(type)(align)))
-#define P2PHASEUP_TYPED(x, align, phase, type) \
- ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
-#define P2CROSS_TYPED(x, y, align, type) \
- (((type)(x) ^ (type)(y)) > (type)(align) - 1)
-#define P2SAMEHIGHBIT_TYPED(x, y, type) \
- (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
-
-#ifdef _KERNEL
-#define memmove(dst, src, size) bcopy((src), (dst), (size))
-#endif
-
-/*
- * Find highest one bit set.
- * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
- * High order bit is 31 (or 63 in _LP64 kernel).
- */
-static __inline int
-highbit(ulong_t i)
-{
- register int h = 1;
-
- if (i == 0)
- return (0);
-#ifdef _LP64
- if (i & 0xffffffff00000000ul) {
- h += 32; i >>= 32;
- }
-#endif
- if (i & 0xffff0000) {
- h += 16; i >>= 16;
- }
- if (i & 0xff00) {
- h += 8; i >>= 8;
- }
- if (i & 0xf0) {
- h += 4; i >>= 4;
- }
- if (i & 0xc) {
- h += 2; i >>= 2;
- }
- if (i & 0x2) {
- h += 1;
- }
- return (h);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OPENSOLARIS_SYS_SYSMACROS_H_ */
diff --git a/sys/compat/opensolaris/sys/systm.h b/sys/compat/opensolaris/sys/systm.h
deleted file mode 100644
index d4ef17c..0000000
--- a/sys/compat/opensolaris/sys/systm.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_SYSTM_H_
-#define _OPENSOLARIS_SYS_SYSTM_H_
-
-#include <sys/param.h>
-#include_next <sys/systm.h>
-
-#ifdef _KERNEL
-#include <sys/string.h>
-
-#define PAGESIZE PAGE_SIZE
-#define PAGEOFFSET (PAGESIZE - 1)
-#define PAGEMASK (~PAGEOFFSET)
-
-#define delay(x) pause("soldelay", (x))
-
-#define xcopyin(u, k, s) copyin(u, k, s)
-#define xcopyout(k, u, s) copyout(k, u, s)
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_SYSTM_H_ */
diff --git a/sys/compat/opensolaris/sys/taskq.h b/sys/compat/opensolaris/sys/taskq.h
deleted file mode 100644
index 2feefe3..0000000
--- a/sys/compat/opensolaris/sys/taskq.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_TASKQ_H
-#define _SYS_TASKQ_H
-
-#pragma ident "@(#)taskq.h 1.5 05/06/08 SMI"
-
-#include <sys/param.h>
-#include <sys/proc.h>
-#include <sys/kcondvar.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define TASKQ_NAMELEN 31
-
-typedef struct taskq taskq_t;
-typedef uintptr_t taskqid_t;
-typedef void (task_func_t)(void *);
-
-/*
- * Public flags for taskq_create(): bit range 0-15
- */
-#define TASKQ_PREPOPULATE 0x0001 /* Prepopulate with threads and data */
-#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
-#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
-
-/*
- * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
- * KM_SLEEP/KM_NOSLEEP.
- */
-#define TQ_SLEEP 0x00 /* Can block for memory */
-#define TQ_NOSLEEP 0x01 /* cannot block for memory; may fail */
-#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
-#define TQ_NOALLOC 0x04 /* cannot allocate memory; may fail */
-
-#ifdef _KERNEL
-
-extern taskq_t *system_taskq;
-
-extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
-extern taskq_t *taskq_create_instance(const char *, int, int, pri_t, int,
- int, uint_t);
-extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
-extern void nulltask(void *);
-extern void taskq_destroy(taskq_t *);
-extern void taskq_wait(taskq_t *);
-extern void taskq_suspend(taskq_t *);
-extern int taskq_suspended(taskq_t *);
-extern void taskq_resume(taskq_t *);
-extern int taskq_member(taskq_t *, kthread_t *);
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_TASKQ_H */
diff --git a/sys/compat/opensolaris/sys/taskq_impl.h b/sys/compat/opensolaris/sys/taskq_impl.h
deleted file mode 100644
index 02c5616..0000000
--- a/sys/compat/opensolaris/sys/taskq_impl.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_TASKQ_IMPL_H
-#define _SYS_TASKQ_IMPL_H
-
-#pragma ident "@(#)taskq_impl.h 1.6 05/06/08 SMI"
-
-#include <sys/mutex.h>
-#include <sys/rwlock.h>
-#include <sys/condvar.h>
-#include <sys/taskq.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct taskq_bucket taskq_bucket_t;
-
-typedef struct taskq_ent {
- struct taskq_ent *tqent_next;
- struct taskq_ent *tqent_prev;
- task_func_t *tqent_func;
- void *tqent_arg;
- taskq_bucket_t *tqent_bucket;
- kthread_t *tqent_thread;
- kcondvar_t tqent_cv;
-} taskq_ent_t;
-
-/*
- * Taskq Statistics fields are not protected by any locks.
- */
-typedef struct tqstat {
- uint_t tqs_hits;
- uint_t tqs_misses;
- uint_t tqs_overflow; /* no threads to allocate */
- uint_t tqs_tcreates; /* threads created */
- uint_t tqs_tdeaths; /* threads died */
- uint_t tqs_maxthreads; /* max # of alive threads */
- uint_t tqs_nomem; /* # of times there were no memory */
- uint_t tqs_disptcreates;
-} tqstat_t;
-
-/*
- * Per-CPU hash bucket manages taskq_bent_t structures using freelist.
- */
-struct taskq_bucket {
- kmutex_t tqbucket_lock;
- taskq_t *tqbucket_taskq; /* Enclosing taskq */
- taskq_ent_t tqbucket_freelist;
- uint_t tqbucket_nalloc; /* # of allocated entries */
- uint_t tqbucket_nfree; /* # of free entries */
- kcondvar_t tqbucket_cv;
- ushort_t tqbucket_flags;
- hrtime_t tqbucket_totaltime;
- tqstat_t tqbucket_stat;
-};
-
-/*
- * Bucket flags.
- */
-#define TQBUCKET_CLOSE 0x01
-#define TQBUCKET_SUSPEND 0x02
-
-/*
- * taskq implementation flags: bit range 16-31
- */
-#define TASKQ_ACTIVE 0x00010000
-#define TASKQ_SUSPENDED 0x00020000
-#define TASKQ_NOINSTANCE 0x00040000
-
-struct taskq {
- char tq_name[TASKQ_NAMELEN + 1];
- kmutex_t tq_lock;
- krwlock_t tq_threadlock;
- kcondvar_t tq_dispatch_cv;
- kcondvar_t tq_wait_cv;
- uint_t tq_flags;
- int tq_active;
- int tq_nthreads;
- int tq_nalloc;
- int tq_minalloc;
- int tq_maxalloc;
- taskq_ent_t *tq_freelist;
- taskq_ent_t tq_task;
- int tq_maxsize;
- pri_t tq_pri; /* Scheduling priority */
- taskq_bucket_t *tq_buckets; /* Per-cpu array of buckets */
- uint_t tq_nbuckets; /* # of buckets (2^n) */
- union {
- kthread_t *_tq_thread;
- kthread_t **_tq_threadlist;
- } tq_thr;
- /*
- * Statistics.
- */
- hrtime_t tq_totaltime; /* Time spent processing tasks */
- int tq_tasks; /* Total # of tasks posted */
- int tq_executed; /* Total # of tasks executed */
- int tq_maxtasks; /* Max number of tasks in the queue */
- int tq_tcreates;
- int tq_tdeaths;
-};
-
-#define tq_thread tq_thr._tq_thread
-#define tq_threadlist tq_thr._tq_threadlist
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_TASKQ_IMPL_H */
diff --git a/sys/compat/opensolaris/sys/time.h b/sys/compat/opensolaris/sys/time.h
deleted file mode 100644
index 770b251..0000000
--- a/sys/compat/opensolaris/sys/time.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_TIME_H_
-#define _OPENSOLARIS_SYS_TIME_H_
-
-#include_next <sys/time.h>
-
-#define SEC 1
-#define MILLISEC 1000
-#define MICROSEC 1000000
-#define NANOSEC 1000000000
-
-typedef longlong_t hrtime_t;
-
-#define LBOLT ((gethrtime() * hz) / NANOSEC)
-
-#ifdef _KERNEL
-#define lbolt64 (int64_t)(LBOLT)
-
-static __inline hrtime_t
-gethrtime(void) {
-
- struct timespec ts;
- hrtime_t nsec;
-
-#if 1
- getnanouptime(&ts);
-#else
- nanouptime(&ts);
-#endif
- nsec = (hrtime_t)ts.tv_sec * NANOSEC + ts.tv_nsec;
- return (nsec);
-}
-
-#define gethrestime_sec() (time_second)
-#define gethrestime(ts) getnanotime(ts)
-
-#else
-
-static __inline hrtime_t gethrtime(void) {
- struct timespec ts;
- clock_gettime(CLOCK_UPTIME,&ts);
- return (((u_int64_t) ts.tv_sec) * NANOSEC + ts.tv_nsec);
-}
-
-
-#endif /* _KERNEL */
-
-#endif /* !_OPENSOLARIS_SYS_TIME_H_ */
diff --git a/sys/compat/opensolaris/sys/types.h b/sys/compat/opensolaris/sys/types.h
deleted file mode 100644
index a99e1f0..0000000
--- a/sys/compat/opensolaris/sys/types.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_TYPES_H_
-#define _OPENSOLARIS_SYS_TYPES_H_
-
-/*
- * This is a bag of dirty hacks to keep things compiling.
- */
-
-#include <sys/stdint.h>
-#include_next <sys/types.h>
-
-#define MAXNAMELEN 256
-
-typedef struct timespec timestruc_t;
-typedef u_int uint_t;
-typedef u_char uchar_t;
-typedef u_short ushort_t;
-typedef u_long ulong_t;
-typedef long long longlong_t;
-typedef unsigned long long u_longlong_t;
-typedef off_t off64_t;
-typedef id_t taskid_t;
-typedef id_t projid_t;
-typedef id_t poolid_t;
-typedef id_t zoneid_t;
-typedef id_t ctid_t;
-
-#ifdef _KERNEL
-
-#define B_FALSE 0
-#define B_TRUE 1
-
-typedef short index_t;
-typedef off_t offset_t;
-typedef long ptrdiff_t; /* pointer difference */
-typedef void pathname_t;
-typedef int64_t rlim64_t;
-
-#else
-
-#if defined(__XOPEN_OR_POSIX)
-typedef enum { _B_FALSE, _B_TRUE } boolean_t;
-#else
-typedef enum { B_FALSE, B_TRUE } boolean_t;
-#endif /* defined(__XOPEN_OR_POSIX) */
-
-typedef longlong_t offset_t;
-typedef u_longlong_t u_offset_t;
-typedef uint64_t upad64_t;
-typedef struct timespec timespec_t;
-typedef short pri_t;
-typedef int32_t daddr32_t;
-typedef int32_t time32_t;
-typedef u_longlong_t diskaddr_t;
-typedef ushort_t o_mode_t; /* old file attribute type */
-
-#endif /* !_KERNEL */
-
-#endif /* !_OPENSOLARIS_SYS_TYPES_H_ */
diff --git a/sys/compat/opensolaris/sys/uio.h b/sys/compat/opensolaris/sys/uio.h
deleted file mode 100644
index 02ee1d8..0000000
--- a/sys/compat/opensolaris/sys/uio.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_UIO_H_
-#define _OPENSOLARIS_SYS_UIO_H_
-
-#include_next <sys/uio.h>
-#include <sys/debug.h>
-
-#ifndef _KERNEL
-#define FOF_OFFSET 1 /* Use the offset in uio argument */
-
-struct uio {
- struct iovec *uio_iov;
- int uio_iovcnt;
- off_t uio_offset;
- int uio_resid;
- enum uio_seg uio_segflg;
- enum uio_rw uio_rw;
- void *uio_td;
-};
-#endif
-
-typedef struct uio uio_t;
-typedef struct iovec iovec_t;
-
-#define uio_loffset uio_offset
-
-static __inline int
-zfs_uiomove(void *cp, size_t n, enum uio_rw dir, uio_t *uio)
-{
-
- ASSERT(uio->uio_rw == dir);
- return (uiomove(cp, (int)n, uio));
-}
-#define uiomove(cp, n, dir, uio) zfs_uiomove((cp), (n), (dir), (uio))
-
-#endif /* !_OPENSOLARIS_SYS_UIO_H_ */
diff --git a/sys/compat/opensolaris/sys/varargs.h b/sys/compat/opensolaris/sys/varargs.h
deleted file mode 100644
index 061edc1..0000000
--- a/sys/compat/opensolaris/sys/varargs.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_VARARGS_H_
-#define _OPENSOLARIS_SYS_VARARGS_H_
-
-#ifdef _KERNEL
-#include <machine/stdarg.h>
-#else
-#include <stdarg.h>
-#endif
-
-#endif /* !_OPENSOLARIS_SYS_VARARGS_H_ */
diff --git a/sys/compat/opensolaris/sys/vfs.h b/sys/compat/opensolaris/sys/vfs.h
deleted file mode 100644
index c2d8a6b..0000000
--- a/sys/compat/opensolaris/sys/vfs.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_VFS_H_
-#define _OPENSOLARIS_SYS_VFS_H_
-
-#include <sys/param.h>
-
-#ifdef _KERNEL
-
-#include <sys/mount.h>
-#include <sys/vnode.h>
-
-#define rootdir rootvnode
-
-typedef struct mount vfs_t;
-
-#define vfs_flag mnt_flag
-#define vfs_data mnt_data
-#define vfs_count mnt_ref
-#define vfs_fsid mnt_stat.f_fsid
-#define vfs_bsize mnt_stat.f_bsize
-
-#define v_flag v_vflag
-#define v_vfsp v_mount
-
-#define VFS_RDONLY MNT_RDONLY
-#define VFS_NOSETUID MNT_NOSUID
-#define VFS_NOEXEC MNT_NOEXEC
-
-#define VFS_HOLD(vfsp) do { \
- MNT_ILOCK(vfsp); \
- MNT_REF(vfsp); \
- MNT_IUNLOCK(vfsp); \
-} while (0)
-#define VFS_RELE(vfsp) do { \
- MNT_ILOCK(vfsp); \
- MNT_REL(vfsp); \
- MNT_IUNLOCK(vfsp); \
-} while (0)
-
-#define VROOT VV_ROOT
-
-/*
- * Structure defining a mount option for a filesystem.
- * option names are found in mntent.h
- */
-typedef struct mntopt {
- char *mo_name; /* option name */
- char **mo_cancel; /* list of options cancelled by this one */
- char *mo_arg; /* argument string for this option */
- int mo_flags; /* flags for this mount option */
- void *mo_data; /* filesystem specific data */
-} mntopt_t;
-
-/*
- * Flags that apply to mount options
- */
-
-#define MO_SET 0x01 /* option is set */
-#define MO_NODISPLAY 0x02 /* option not listed in mnttab */
-#define MO_HASVALUE 0x04 /* option takes a value */
-#define MO_IGNORE 0x08 /* option ignored by parser */
-#define MO_DEFAULT MO_SET /* option is on by default */
-#define MO_TAG 0x10 /* flags a tag set by user program */
-#define MO_EMPTY 0x20 /* empty space in option table */
-
-#define VFS_NOFORCEOPT 0x01 /* honor MO_IGNORE (don't set option) */
-#define VFS_DISPLAY 0x02 /* Turn off MO_NODISPLAY bit for opt */
-#define VFS_NODISPLAY 0x04 /* Turn on MO_NODISPLAY bit for opt */
-#define VFS_CREATEOPT 0x08 /* Create the opt if it's not there */
-
-/*
- * Structure holding mount option strings for the mounted file system.
- */
-typedef struct mntopts {
- uint_t mo_count; /* number of entries in table */
- mntopt_t *mo_list; /* list of mount options */
-} mntopts_t;
-
-void vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
- int flags __unused);
-void vfs_clearmntopt(vfs_t *vfsp, const char *name);
-int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
-int traverse(vnode_t **cvpp, int lktype);
-int domount(kthread_t *td, vnode_t *vp, const char *fstype, char *fspath,
- char *fspec, int fsflags);
-
-#endif /* _KERNEL */
-
-#endif /* _OPENSOLARIS_SYS_VFS_H_ */
diff --git a/sys/compat/opensolaris/sys/vnode.h b/sys/compat/opensolaris/sys/vnode.h
deleted file mode 100644
index a8a261c..0000000
--- a/sys/compat/opensolaris/sys/vnode.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_VNODE_H_
-#define _OPENSOLARIS_SYS_VNODE_H_
-
-#include_next <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/cred.h>
-#include <sys/fcntl.h>
-#include <sys/namei.h>
-#include <sys/proc.h>
-#include <sys/filedesc.h>
-#include <sys/syscallsubr.h>
-
-typedef struct vnode vnode_t;
-typedef struct vattr vattr_t;
-typedef void caller_context_t;
-
-typedef struct vop_vector vnodeops_t;
-#define vop_fid vop_vptofh
-#define vop_fid_args vop_vptofh_args
-#define a_fid a_fhp
-
-#define v_count v_usecount
-
-static __inline int
-vn_is_readonly(vnode_t *vp)
-{
- return (vp->v_mount->mnt_flag & MNT_RDONLY);
-}
-#define vn_vfswlock(vp) (0)
-#define vn_vfsunlock(vp) do { } while (0)
-#define vn_ismntpt(vp) ((vp)->v_type == VDIR && (vp)->v_mountedhere != NULL)
-#define vn_mountedvfs(vp) ((vp)->v_mountedhere)
-#define vn_has_cached_data(vp) ((vp)->v_object != NULL && (vp)->v_object->resident_page_count > 0)
-
-#define VN_HOLD(v) vref(v)
-#define VN_RELE(v) vrele(v)
-#define VN_URELE(v) vput(v)
-
-#define VOP_REALVP(vp, vpp) (*(vpp) = (vp), 0)
-
-#define vnevent_remove(vp) do { } while (0)
-#define vnevent_rmdir(vp) do { } while (0)
-#define vnevent_rename_src(vp) do { } while (0)
-#define vnevent_rename_dest(vp) do { } while (0)
-
-
-#define IS_DEVVP(vp) \
- ((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
-
-#define MODEMASK ALLPERMS
-
-#define specvp(vp, rdev, type, cr) (VN_HOLD(vp), (vp))
-#define MANDMODE(mode) (0)
-#define chklock(vp, op, offset, size, mode, ct) (0)
-#define cleanlocks(vp, pid, foo) do { } while (0)
-#define cleanshares(vp, pid) do { } while (0)
-
-/*
- * We will use va_spare is place of Solaris' va_mask.
- * This field is initialized in zfs_setattr().
- */
-#define va_mask va_spare
-/* TODO: va_fileid is shorter than va_nodeid !!! */
-#define va_nodeid va_fileid
-/* TODO: This field needs conversion! */
-#define va_nblocks va_bytes
-#define va_blksize va_blocksize
-#define va_seq va_gen
-
-#define MAXOFFSET_T OFF_MAX
-#define EXCL 0
-
-#define AT_TYPE 0x0001
-#define AT_MODE 0x0002
-#define AT_UID 0x0004
-#define AT_GID 0x0008
-#define AT_FSID 0x0010
-#define AT_NODEID 0x0020
-#define AT_NLINK 0x0040
-#define AT_SIZE 0x0080
-#define AT_ATIME 0x0100
-#define AT_MTIME 0x0200
-#define AT_CTIME 0x0400
-#define AT_RDEV 0x0800
-#define AT_BLKSIZE 0x1000
-#define AT_NBLOCKS 0x2000
-#define AT_SEQ 0x4000
-#define AT_NOSET (AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\
- AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
-
-#define ACCESSED (AT_ATIME)
-#define STATE_CHANGED (AT_CTIME)
-#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
-
-static __inline void
-vattr_init_mask(vattr_t *vap)
-{
-
- vap->va_mask = 0;
-
- if (vap->va_type != VNON)
- vap->va_mask |= AT_TYPE;
- if (vap->va_uid != (uid_t)VNOVAL)
- vap->va_mask |= AT_UID;
- if (vap->va_gid != (gid_t)VNOVAL)
- vap->va_mask |= AT_GID;
- if (vap->va_size != (u_quad_t)VNOVAL)
- vap->va_mask |= AT_SIZE;
- if (vap->va_atime.tv_sec != VNOVAL)
- vap->va_mask |= AT_ATIME;
- if (vap->va_mtime.tv_sec != VNOVAL)
- vap->va_mask |= AT_MTIME;
- if (vap->va_mode != (u_short)VNOVAL)
- vap->va_mask |= AT_MODE;
-}
-
-#define FCREAT O_CREAT
-#define FTRUNC O_TRUNC
-#define FDSYNC FFSYNC
-#define FRSYNC FFSYNC
-#define FSYNC FFSYNC
-#define FOFFMAX 0x00
-
-enum create { CRCREAT };
-
-static __inline int
-zfs_vn_open(char *pnamep, enum uio_seg seg, int filemode, int createmode,
- vnode_t **vpp, enum create crwhy, mode_t umask)
-{
- struct thread *td = curthread;
- struct nameidata nd;
- int error;
-
- ASSERT(seg == UIO_SYSSPACE);
- ASSERT(filemode == (FWRITE | FCREAT | FTRUNC | FOFFMAX));
- ASSERT(crwhy == CRCREAT);
- ASSERT(umask == 0);
-
- if (td->td_proc->p_fd->fd_rdir == NULL)
- td->td_proc->p_fd->fd_rdir = rootvnode;
- if (td->td_proc->p_fd->fd_cdir == NULL)
- td->td_proc->p_fd->fd_cdir = rootvnode;
-
- NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pnamep, td);
- error = vn_open_cred(&nd, &filemode, createmode, td->td_ucred, NULL);
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (error == 0) {
- /* We just unlock so we hold a reference. */
- VN_HOLD(nd.ni_vp);
- VOP_UNLOCK(nd.ni_vp, 0);
- *vpp = nd.ni_vp;
- }
- return (error);
-}
-#define vn_open(pnamep, seg, filemode, createmode, vpp, crwhy, umask) \
- zfs_vn_open((pnamep), (seg), (filemode), (createmode), (vpp), (crwhy), (umask))
-
-#define RLIM64_INFINITY 0
-static __inline int
-zfs_vn_rdwr(enum uio_rw rw, vnode_t *vp, caddr_t base, ssize_t len,
- offset_t offset, enum uio_seg seg, int ioflag, int ulimit, cred_t *cr,
- ssize_t *residp)
-{
- struct thread *td = curthread;
- int error, vfslocked, resid;
-
- ASSERT(rw == UIO_WRITE);
- ASSERT(ioflag == 0);
- ASSERT(ulimit == RLIM64_INFINITY);
-
- ioflag = IO_APPEND | IO_UNIT;
-
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
- error = vn_rdwr(rw, vp, base, len, offset, seg, ioflag, cr, NOCRED,
- &resid, td);
- VFS_UNLOCK_GIANT(vfslocked);
- if (residp != NULL)
- *residp = (ssize_t)resid;
- return (error);
-}
-#define vn_rdwr(rw, vp, base, len, offset, seg, ioflag, ulimit, cr, residp) \
- zfs_vn_rdwr((rw), (vp), (base), (len), (offset), (seg), (ioflag), (ulimit), (cr), (residp))
-
-static __inline int
-zfs_vop_fsync(vnode_t *vp, int flag, cred_t *cr)
-{
- struct mount *mp;
- int error, vfslocked;
-
- ASSERT(flag == FSYNC);
-
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
- goto drop;
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- error = VOP_FSYNC(vp, MNT_WAIT, curthread);
- VOP_UNLOCK(vp, 0);
- vn_finished_write(mp);
-drop:
- VFS_UNLOCK_GIANT(vfslocked);
- return (error);
-}
-#define VOP_FSYNC(vp, flag, cr) zfs_vop_fsync((vp), (flag), (cr))
-
-static __inline int
-zfs_vop_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
-{
-
- ASSERT(flag == (FWRITE | FCREAT | FTRUNC | FOFFMAX));
- ASSERT(count == 1);
- ASSERT(offset == 0);
-
- return (vn_close(vp, flag, cr, curthread));
-}
-#define VOP_CLOSE(vp, oflags, count, offset, cr) \
- zfs_vop_close((vp), (oflags), (count), (offset), (cr))
-
-static __inline int
-vn_rename(char *from, char *to, enum uio_seg seg)
-{
-
- ASSERT(seg == UIO_SYSSPACE);
-
- return (kern_rename(curthread, from, to, seg));
-}
-
-enum rm { RMFILE };
-static __inline int
-vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
-{
-
- ASSERT(seg == UIO_SYSSPACE);
- ASSERT(dirflag == RMFILE);
-
- return (kern_unlink(curthread, fnamep, seg));
-}
-
-#endif /* _OPENSOLARIS_SYS_VNODE_H_ */
diff --git a/sys/compat/opensolaris/sys/zone.h b/sys/compat/opensolaris/sys/zone.h
deleted file mode 100644
index 2e47eb1..0000000
--- a/sys/compat/opensolaris/sys/zone.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*-
- * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _OPENSOLARIS_SYS_ZONE_H_
-#define _OPENSOLARIS_SYS_ZONE_H_
-
-#ifdef _KERNEL
-
-#include <sys/jail.h>
-
-/*
- * Macros to help with zone visibility restrictions.
- */
-
-/*
- * Is process in the global zone?
- */
-#define INGLOBALZONE(p) (!jailed((p)->p_ucred))
-
-/*
- * Attach the given dataset to the given jail.
- */
-extern int zone_dataset_attach(struct ucred *, const char *, int);
-
-/*
- * Detach the given dataset to the given jail.
- */
-extern int zone_dataset_detach(struct ucred *, const char *, int);
-
-/*
- * Returns true if the named pool/dataset is visible in the current zone.
- */
-extern int zone_dataset_visible(const char *, int *);
-
-#else /* !_KERNEL */
-
-#define GLOBAL_ZONEID 0
-
-extern int getzoneid(void);
-
-#endif /* _KERNEL */
-
-#endif /* !_OPENSOLARIS_SYS_ZONE_H_ */
diff --git a/sys/contrib/opensolaris/OPENSOLARIS.LICENSE b/sys/contrib/opensolaris/OPENSOLARIS.LICENSE
deleted file mode 100644
index da23621..0000000
--- a/sys/contrib/opensolaris/OPENSOLARIS.LICENSE
+++ /dev/null
@@ -1,384 +0,0 @@
-Unless otherwise noted, all files in this distribution are released
-under the Common Development and Distribution License (CDDL).
-Exceptions are noted within the associated source files.
-
---------------------------------------------------------------------
-
-
-COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0
-
-1. Definitions.
-
- 1.1. "Contributor" means each individual or entity that creates
- or contributes to the creation of Modifications.
-
- 1.2. "Contributor Version" means the combination of the Original
- Software, prior Modifications used by a Contributor (if any),
- and the Modifications made by that particular Contributor.
-
- 1.3. "Covered Software" means (a) the Original Software, or (b)
- Modifications, or (c) the combination of files containing
- Original Software with files containing Modifications, in
- each case including portions thereof.
-
- 1.4. "Executable" means the Covered Software in any form other
- than Source Code.
-
- 1.5. "Initial Developer" means the individual or entity that first
- makes Original Software available under this License.
-
- 1.6. "Larger Work" means a work which combines Covered Software or
- portions thereof with code not governed by the terms of this
- License.
-
- 1.7. "License" means this document.
-
- 1.8. "Licensable" means having the right to grant, to the maximum
- extent possible, whether at the time of the initial grant or
- subsequently acquired, any and all of the rights conveyed
- herein.
-
- 1.9. "Modifications" means the Source Code and Executable form of
- any of the following:
-
- A. Any file that results from an addition to, deletion from or
- modification of the contents of a file containing Original
- Software or previous Modifications;
-
- B. Any new file that contains any part of the Original
- Software or previous Modifications; or
-
- C. Any new file that is contributed or otherwise made
- available under the terms of this License.
-
- 1.10. "Original Software" means the Source Code and Executable
- form of computer software code that is originally released
- under this License.
-
- 1.11. "Patent Claims" means any patent claim(s), now owned or
- hereafter acquired, including without limitation, method,
- process, and apparatus claims, in any patent Licensable by
- grantor.
-
- 1.12. "Source Code" means (a) the common form of computer software
- code in which modifications are made and (b) associated
- documentation included in or with such code.
-
- 1.13. "You" (or "Your") means an individual or a legal entity
- exercising rights under, and complying with all of the terms
- of, this License. For legal entities, "You" includes any
- entity which controls, is controlled by, or is under common
- control with You. For purposes of this definition,
- "control" means (a) the power, direct or indirect, to cause
- the direction or management of such entity, whether by
- contract or otherwise, or (b) ownership of more than fifty
- percent (50%) of the outstanding shares or beneficial
- ownership of such entity.
-
-2. License Grants.
-
- 2.1. The Initial Developer Grant.
-
- Conditioned upon Your compliance with Section 3.1 below and
- subject to third party intellectual property claims, the Initial
- Developer hereby grants You a world-wide, royalty-free,
- non-exclusive license:
-
- (a) under intellectual property rights (other than patent or
- trademark) Licensable by Initial Developer, to use,
- reproduce, modify, display, perform, sublicense and
- distribute the Original Software (or portions thereof),
- with or without Modifications, and/or as part of a Larger
- Work; and
-
- (b) under Patent Claims infringed by the making, using or
- selling of Original Software, to make, have made, use,
- practice, sell, and offer for sale, and/or otherwise
- dispose of the Original Software (or portions thereof).
-
- (c) The licenses granted in Sections 2.1(a) and (b) are
- effective on the date Initial Developer first distributes
- or otherwise makes the Original Software available to a
- third party under the terms of this License.
-
- (d) Notwithstanding Section 2.1(b) above, no patent license is
- granted: (1) for code that You delete from the Original
- Software, or (2) for infringements caused by: (i) the
- modification of the Original Software, or (ii) the
- combination of the Original Software with other software
- or devices.
-
- 2.2. Contributor Grant.
-
- Conditioned upon Your compliance with Section 3.1 below and
- subject to third party intellectual property claims, each
- Contributor hereby grants You a world-wide, royalty-free,
- non-exclusive license:
-
- (a) under intellectual property rights (other than patent or
- trademark) Licensable by Contributor to use, reproduce,
- modify, display, perform, sublicense and distribute the
- Modifications created by such Contributor (or portions
- thereof), either on an unmodified basis, with other
- Modifications, as Covered Software and/or as part of a
- Larger Work; and
-
- (b) under Patent Claims infringed by the making, using, or
- selling of Modifications made by that Contributor either
- alone and/or in combination with its Contributor Version
- (or portions of such combination), to make, use, sell,
- offer for sale, have made, and/or otherwise dispose of:
- (1) Modifications made by that Contributor (or portions
- thereof); and (2) the combination of Modifications made by
- that Contributor with its Contributor Version (or portions
- of such combination).
-
- (c) The licenses granted in Sections 2.2(a) and 2.2(b) are
- effective on the date Contributor first distributes or
- otherwise makes the Modifications available to a third
- party.
-
- (d) Notwithstanding Section 2.2(b) above, no patent license is
- granted: (1) for any code that Contributor has deleted
- from the Contributor Version; (2) for infringements caused
- by: (i) third party modifications of Contributor Version,
- or (ii) the combination of Modifications made by that
- Contributor with other software (except as part of the
- Contributor Version) or other devices; or (3) under Patent
- Claims infringed by Covered Software in the absence of
- Modifications made by that Contributor.
-
-3. Distribution Obligations.
-
- 3.1. Availability of Source Code.
-
- Any Covered Software that You distribute or otherwise make
- available in Executable form must also be made available in Source
- Code form and that Source Code form must be distributed only under
- the terms of this License. You must include a copy of this
- License with every copy of the Source Code form of the Covered
- Software You distribute or otherwise make available. You must
- inform recipients of any such Covered Software in Executable form
- as to how they can obtain such Covered Software in Source Code
- form in a reasonable manner on or through a medium customarily
- used for software exchange.
-
- 3.2. Modifications.
-
- The Modifications that You create or to which You contribute are
- governed by the terms of this License. You represent that You
- believe Your Modifications are Your original creation(s) and/or
- You have sufficient rights to grant the rights conveyed by this
- License.
-
- 3.3. Required Notices.
-
- You must include a notice in each of Your Modifications that
- identifies You as the Contributor of the Modification. You may
- not remove or alter any copyright, patent or trademark notices
- contained within the Covered Software, or any notices of licensing
- or any descriptive text giving attribution to any Contributor or
- the Initial Developer.
-
- 3.4. Application of Additional Terms.
-
- You may not offer or impose any terms on any Covered Software in
- Source Code form that alters or restricts the applicable version
- of this License or the recipients' rights hereunder. You may
- choose to offer, and to charge a fee for, warranty, support,
- indemnity or liability obligations to one or more recipients of
- Covered Software. However, you may do so only on Your own behalf,
- and not on behalf of the Initial Developer or any Contributor.
- You must make it absolutely clear that any such warranty, support,
- indemnity or liability obligation is offered by You alone, and You
- hereby agree to indemnify the Initial Developer and every
- Contributor for any liability incurred by the Initial Developer or
- such Contributor as a result of warranty, support, indemnity or
- liability terms You offer.
-
- 3.5. Distribution of Executable Versions.
-
- You may distribute the Executable form of the Covered Software
- under the terms of this License or under the terms of a license of
- Your choice, which may contain terms different from this License,
- provided that You are in compliance with the terms of this License
- and that the license for the Executable form does not attempt to
- limit or alter the recipient's rights in the Source Code form from
- the rights set forth in this License. If You distribute the
- Covered Software in Executable form under a different license, You
- must make it absolutely clear that any terms which differ from
- this License are offered by You alone, not by the Initial
- Developer or Contributor. You hereby agree to indemnify the
- Initial Developer and every Contributor for any liability incurred
- by the Initial Developer or such Contributor as a result of any
- such terms You offer.
-
- 3.6. Larger Works.
-
- You may create a Larger Work by combining Covered Software with
- other code not governed by the terms of this License and
- distribute the Larger Work as a single product. In such a case,
- You must make sure the requirements of this License are fulfilled
- for the Covered Software.
-
-4. Versions of the License.
-
- 4.1. New Versions.
-
- Sun Microsystems, Inc. is the initial license steward and may
- publish revised and/or new versions of this License from time to
- time. Each version will be given a distinguishing version number.
- Except as provided in Section 4.3, no one other than the license
- steward has the right to modify this License.
-
- 4.2. Effect of New Versions.
-
- You may always continue to use, distribute or otherwise make the
- Covered Software available under the terms of the version of the
- License under which You originally received the Covered Software.
- If the Initial Developer includes a notice in the Original
- Software prohibiting it from being distributed or otherwise made
- available under any subsequent version of the License, You must
- distribute and make the Covered Software available under the terms
- of the version of the License under which You originally received
- the Covered Software. Otherwise, You may also choose to use,
- distribute or otherwise make the Covered Software available under
- the terms of any subsequent version of the License published by
- the license steward.
-
- 4.3. Modified Versions.
-
- When You are an Initial Developer and You want to create a new
- license for Your Original Software, You may create and use a
- modified version of this License if You: (a) rename the license
- and remove any references to the name of the license steward
- (except to note that the license differs from this License); and
- (b) otherwise make it clear that the license contains terms which
- differ from this License.
-
-5. DISCLAIMER OF WARRANTY.
-
- COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS"
- BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
- INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED
- SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR
- PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
- PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY
- COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
- INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY
- NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
- WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
- ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
- DISCLAIMER.
-
-6. TERMINATION.
-
- 6.1. This License and the rights granted hereunder will terminate
- automatically if You fail to comply with terms herein and fail to
- cure such breach within 30 days of becoming aware of the breach.
- Provisions which, by their nature, must remain in effect beyond
- the termination of this License shall survive.
-
- 6.2. If You assert a patent infringement claim (excluding
- declaratory judgment actions) against Initial Developer or a
- Contributor (the Initial Developer or Contributor against whom You
- assert such claim is referred to as "Participant") alleging that
- the Participant Software (meaning the Contributor Version where
- the Participant is a Contributor or the Original Software where
- the Participant is the Initial Developer) directly or indirectly
- infringes any patent, then any and all rights granted directly or
- indirectly to You by such Participant, the Initial Developer (if
- the Initial Developer is not the Participant) and all Contributors
- under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
- notice from Participant terminate prospectively and automatically
- at the expiration of such 60 day notice period, unless if within
- such 60 day period You withdraw Your claim with respect to the
- Participant Software against such Participant either unilaterally
- or pursuant to a written agreement with Participant.
-
- 6.3. In the event of termination under Sections 6.1 or 6.2 above,
- all end user licenses that have been validly granted by You or any
- distributor hereunder prior to termination (excluding licenses
- granted to You by any distributor) shall survive termination.
-
-7. LIMITATION OF LIABILITY.
-
- UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
- (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
- INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
- COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE
- LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
- CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
- LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK
- STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
- COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
- INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
- LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
- INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT
- APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
- NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR
- CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT
- APPLY TO YOU.
-
-8. U.S. GOVERNMENT END USERS.
-
- The Covered Software is a "commercial item," as that term is
- defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial
- computer software" (as that term is defined at 48
- C.F.R. 252.227-7014(a)(1)) and "commercial computer software
- documentation" as such terms are used in 48 C.F.R. 12.212
- (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48
- C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all
- U.S. Government End Users acquire Covered Software with only those
- rights set forth herein. This U.S. Government Rights clause is in
- lieu of, and supersedes, any other FAR, DFAR, or other clause or
- provision that addresses Government rights in computer software
- under this License.
-
-9. MISCELLANEOUS.
-
- This License represents the complete agreement concerning subject
- matter hereof. If any provision of this License is held to be
- unenforceable, such provision shall be reformed only to the extent
- necessary to make it enforceable. This License shall be governed
- by the law of the jurisdiction specified in a notice contained
- within the Original Software (except to the extent applicable law,
- if any, provides otherwise), excluding such jurisdiction's
- conflict-of-law provisions. Any litigation relating to this
- License shall be subject to the jurisdiction of the courts located
- in the jurisdiction and venue specified in a notice contained
- within the Original Software, with the losing party responsible
- for costs, including, without limitation, court costs and
- reasonable attorneys' fees and expenses. The application of the
- United Nations Convention on Contracts for the International Sale
- of Goods is expressly excluded. Any law or regulation which
- provides that the language of a contract shall be construed
- against the drafter shall not apply to this License. You agree
- that You alone are responsible for compliance with the United
- States export administration regulations (and the export control
- laws and regulation of any other countries) when You use,
- distribute or otherwise make available any Covered Software.
-
-10. RESPONSIBILITY FOR CLAIMS.
-
- As between Initial Developer and the Contributors, each party is
- responsible for claims and damages arising, directly or
- indirectly, out of its utilization of rights under this License
- and You agree to work with Initial Developer and Contributors to
- distribute such responsibility on an equitable basis. Nothing
- herein is intended or shall be deemed to constitute any admission
- of liability.
-
---------------------------------------------------------------------
-
-NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND
-DISTRIBUTION LICENSE (CDDL)
-
-For Covered Software in this distribution, this License shall
-be governed by the laws of the State of California (excluding
-conflict-of-law provisions).
-
-Any litigation relating to this License shall be subject to the
-jurisdiction of the Federal Courts of the Northern District of
-California and the state courts of the State of California, with
-venue lying in Santa Clara County, California.
diff --git a/sys/contrib/opensolaris/common/atomic/i386/atomic.S b/sys/contrib/opensolaris/common/atomic/i386/atomic.S
deleted file mode 100644
index bc7f22a..0000000
--- a/sys/contrib/opensolaris/common/atomic/i386/atomic.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
- .ident "%Z%%M% %I% %E% SMI"
-
- .file "%M%"
-
-#define _ASM
-#include <sys/asm_linkage.h>
-
- ENTRY(atomic_add_64)
- ALTENTRY(atomic_add_64_nv)
- pushl %edi
- pushl %ebx
- movl 12(%esp), %edi // %edi = target address
- movl (%edi), %eax
- movl 4(%edi), %edx // %edx:%eax = old value
-1:
- movl 16(%esp), %ebx
- movl 20(%esp), %ecx // %ecx:%ebx = delta
- addl %eax, %ebx
- adcl %edx, %ecx // %ecx:%ebx = new value
- lock
- cmpxchg8b (%edi) // try to stick it in
- jne 1b
- movl %ebx, %eax
- movl %ecx, %edx // return new value
- popl %ebx
- popl %edi
- ret
- SET_SIZE(atomic_add_64_nv)
- SET_SIZE(atomic_add_64)
-
- ENTRY(atomic_or_8_nv)
- movl 4(%esp), %edx // %edx = target address
- movb (%edx), %al // %al = old value
-1:
- movl 8(%esp), %ecx // %ecx = delta
- orb %al, %cl // %cl = new value
- lock
- cmpxchgb %cl, (%edx) // try to stick it in
- jne 1b
- movzbl %cl, %eax // return new value
- ret
- SET_SIZE(atomic_or_8_nv)
-
- ENTRY(atomic_cas_ptr)
- movl 4(%esp), %edx
- movl 8(%esp), %eax
- movl 12(%esp), %ecx
- lock
- cmpxchgl %ecx, (%edx)
- ret
- SET_SIZE(atomic_cas_ptr)
-
- ENTRY(atomic_cas_64)
- pushl %ebx
- pushl %esi
- movl 12(%esp), %esi
- movl 16(%esp), %eax
- movl 20(%esp), %edx
- movl 24(%esp), %ebx
- movl 28(%esp), %ecx
- lock
- cmpxchg8b (%esi)
- popl %esi
- popl %ebx
- ret
- SET_SIZE(atomic_cas_64)
-
- ENTRY(membar_producer)
- lock
- xorl $0, (%esp)
- ret
- SET_SIZE(membar_producer)
diff --git a/sys/contrib/opensolaris/common/atomic/ia64/atomic.S b/sys/contrib/opensolaris/common/atomic/ia64/atomic.S
deleted file mode 100644
index 409d759..0000000
--- a/sys/contrib/opensolaris/common/atomic/ia64/atomic.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/*-
- * Copyright (c) 2007 Marcel Moolenaar
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <machine/asm.h>
-
- .text
-
-/*
- * uint64_t atomic_cas_64(volatile uint64_t *p, uint64_t cmp, uint64_t v)
- */
-ENTRY(atomic_cas_64, 3)
- mov ar.ccv = r33
- ;;
- cmpxchg8.acq r8 = [r32], r34, ar.ccv
- ;;
- br.ret.sptk rp
-END(atomic_cas_64)
-
-/*
- * uint64_t atomic_add_64_nv(volatile uint64_t *p, uint64_t v)
- */
-ENTRY(atomic_add_64_nv, 2)
-1:
- ld8 r16 = [r32]
- ;;
- mov ar.ccv = r16
- add r8 = r16, r33
- ;;
- cmpxchg8.acq r17 = [r32], r8, ar.ccv
- ;;
- cmp.eq p6, p7 = r16, r17
-(p6) br.ret.sptk rp
-(p7) br.cond.spnt 1b
-END(atomic_add_64_nv)
-
-/*
- * uint8_t atomic_or_8_nv(volatile uint8_t *p, uint8_t v)
- */
-ENTRY(atomic_or_8_nv, 2)
-1:
- ld8 r16 = [r32]
- ;;
- mov ar.ccv = r16
- or r8 = r16, r33
- ;;
- cmpxchg1.acq r17 = [r32], r8, ar.ccv
- ;;
- cmp.eq p6, p7 = r16, r17
-(p6) br.ret.sptk rp
-(p7) br.cond.spnt 1b
-END(atomic_or_8_nv)
-
-ENTRY(membar_producer, 0)
- mf.a
- ;;
- br.ret.sptk rp
-END(membar_producer)
diff --git a/sys/contrib/opensolaris/common/avl/avl.c b/sys/contrib/opensolaris/common/avl/avl.c
deleted file mode 100644
index 1fa2236..0000000
--- a/sys/contrib/opensolaris/common/avl/avl.c
+++ /dev/null
@@ -1,969 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-
-/*
- * AVL - generic AVL tree implementation for kernel use
- *
- * A complete description of AVL trees can be found in many CS textbooks.
- *
- * Here is a very brief overview. An AVL tree is a binary search tree that is
- * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
- * any given node, the left and right subtrees are allowed to differ in height
- * by at most 1 level.
- *
- * This relaxation from a perfectly balanced binary tree allows doing
- * insertion and deletion relatively efficiently. Searching the tree is
- * still a fast operation, roughly O(log(N)).
- *
- * The key to insertion and deletion is a set of tree maniuplations called
- * rotations, which bring unbalanced subtrees back into the semi-balanced state.
- *
- * This implementation of AVL trees has the following peculiarities:
- *
- * - The AVL specific data structures are physically embedded as fields
- * in the "using" data structures. To maintain generality the code
- * must constantly translate between "avl_node_t *" and containing
- * data structure "void *"s by adding/subracting the avl_offset.
- *
- * - Since the AVL data is always embedded in other structures, there is
- * no locking or memory allocation in the AVL routines. This must be
- * provided for by the enclosing data structure's semantics. Typically,
- * avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
- * exclusive write lock. Other operations require a read lock.
- *
- * - The implementation uses iteration instead of explicit recursion,
- * since it is intended to run on limited size kernel stacks. Since
- * there is no recursion stack present to move "up" in the tree,
- * there is an explicit "parent" link in the avl_node_t.
- *
- * - The left/right children pointers of a node are in an array.
- * In the code, variables (instead of constants) are used to represent
- * left and right indices. The implementation is written as if it only
- * dealt with left handed manipulations. By changing the value assigned
- * to "left", the code also works for right handed trees. The
- * following variables/terms are frequently used:
- *
- * int left; // 0 when dealing with left children,
- * // 1 for dealing with right children
- *
- * int left_heavy; // -1 when left subtree is taller at some node,
- * // +1 when right subtree is taller
- *
- * int right; // will be the opposite of left (0 or 1)
- * int right_heavy;// will be the opposite of left_heavy (-1 or 1)
- *
- * int direction; // 0 for "<" (ie. left child); 1 for ">" (right)
- *
- * Though it is a little more confusing to read the code, the approach
- * allows using half as much code (and hence cache footprint) for tree
- * manipulations and eliminates many conditional branches.
- *
- * - The avl_index_t is an opaque "cookie" used to find nodes at or
- * adjacent to where a new value would be inserted in the tree. The value
- * is a modified "avl_node_t *". The bottom bit (normally 0 for a
- * pointer) is set to indicate if that the new node has a value greater
- * than the value of the indicated "avl_node_t *".
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/stdint.h>
-#include <sys/debug.h>
-#include <sys/avl.h>
-
-/*
- * Small arrays to translate between balance (or diff) values and child indeces.
- *
- * Code that deals with binary tree data structures will randomly use
- * left and right children when examining a tree. C "if()" statements
- * which evaluate randomly suffer from very poor hardware branch prediction.
- * In this code we avoid some of the branch mispredictions by using the
- * following translation arrays. They replace random branches with an
- * additional memory reference. Since the translation arrays are both very
- * small the data should remain efficiently in cache.
- */
-static const int avl_child2balance[2] = {-1, 1};
-static const int avl_balance2child[] = {0, 0, 1};
-
-
-/*
- * Walk from one node to the previous valued node (ie. an infix walk
- * towards the left). At any given node we do one of 2 things:
- *
- * - If there is a left child, go to it, then to it's rightmost descendant.
- *
- * - otherwise we return thru parent nodes until we've come from a right child.
- *
- * Return Value:
- * NULL - if at the end of the nodes
- * otherwise next node
- */
-void *
-avl_walk(avl_tree_t *tree, void *oldnode, int left)
-{
- size_t off = tree->avl_offset;
- avl_node_t *node = AVL_DATA2NODE(oldnode, off);
- int right = 1 - left;
- int was_child;
-
-
- /*
- * nowhere to walk to if tree is empty
- */
- if (node == NULL)
- return (NULL);
-
- /*
- * Visit the previous valued node. There are two possibilities:
- *
- * If this node has a left child, go down one left, then all
- * the way right.
- */
- if (node->avl_child[left] != NULL) {
- for (node = node->avl_child[left];
- node->avl_child[right] != NULL;
- node = node->avl_child[right])
- ;
- /*
- * Otherwise, return thru left children as far as we can.
- */
- } else {
- for (;;) {
- was_child = AVL_XCHILD(node);
- node = AVL_XPARENT(node);
- if (node == NULL)
- return (NULL);
- if (was_child == right)
- break;
- }
- }
-
- return (AVL_NODE2DATA(node, off));
-}
-
-/*
- * Return the lowest valued node in a tree or NULL.
- * (leftmost child from root of tree)
- */
-void *
-avl_first(avl_tree_t *tree)
-{
- avl_node_t *node;
- avl_node_t *prev = NULL;
- size_t off = tree->avl_offset;
-
- for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
- prev = node;
-
- if (prev != NULL)
- return (AVL_NODE2DATA(prev, off));
- return (NULL);
-}
-
-/*
- * Return the highest valued node in a tree or NULL.
- * (rightmost child from root of tree)
- */
-void *
-avl_last(avl_tree_t *tree)
-{
- avl_node_t *node;
- avl_node_t *prev = NULL;
- size_t off = tree->avl_offset;
-
- for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
- prev = node;
-
- if (prev != NULL)
- return (AVL_NODE2DATA(prev, off));
- return (NULL);
-}
-
-/*
- * Access the node immediately before or after an insertion point.
- *
- * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
- *
- * Return value:
- * NULL: no node in the given direction
- * "void *" of the found tree node
- */
-void *
-avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
-{
- int child = AVL_INDEX2CHILD(where);
- avl_node_t *node = AVL_INDEX2NODE(where);
- void *data;
- size_t off = tree->avl_offset;
-
- if (node == NULL) {
- ASSERT(tree->avl_root == NULL);
- return (NULL);
- }
- data = AVL_NODE2DATA(node, off);
- if (child != direction)
- return (data);
-
- return (avl_walk(tree, data, direction));
-}
-
-
-/*
- * Search for the node which contains "value". The algorithm is a
- * simple binary tree search.
- *
- * return value:
- * NULL: the value is not in the AVL tree
- * *where (if not NULL) is set to indicate the insertion point
- * "void *" of the found tree node
- */
-void *
-avl_find(avl_tree_t *tree, void *value, avl_index_t *where)
-{
- avl_node_t *node;
- avl_node_t *prev = NULL;
- int child = 0;
- int diff;
- size_t off = tree->avl_offset;
-
- for (node = tree->avl_root; node != NULL;
- node = node->avl_child[child]) {
-
- prev = node;
-
- diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
- ASSERT(-1 <= diff && diff <= 1);
- if (diff == 0) {
-#ifdef DEBUG
- if (where != NULL)
- *where = 0;
-#endif
- return (AVL_NODE2DATA(node, off));
- }
- child = avl_balance2child[1 + diff];
-
- }
-
- if (where != NULL)
- *where = AVL_MKINDEX(prev, child);
-
- return (NULL);
-}
-
-
-/*
- * Perform a rotation to restore balance at the subtree given by depth.
- *
- * This routine is used by both insertion and deletion. The return value
- * indicates:
- * 0 : subtree did not change height
- * !0 : subtree was reduced in height
- *
- * The code is written as if handling left rotations, right rotations are
- * symmetric and handled by swapping values of variables right/left[_heavy]
- *
- * On input balance is the "new" balance at "node". This value is either
- * -2 or +2.
- */
-static int
-avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
-{
- int left = !(balance < 0); /* when balance = -2, left will be 0 */
- int right = 1 - left;
- int left_heavy = balance >> 1;
- int right_heavy = -left_heavy;
- avl_node_t *parent = AVL_XPARENT(node);
- avl_node_t *child = node->avl_child[left];
- avl_node_t *cright;
- avl_node_t *gchild;
- avl_node_t *gright;
- avl_node_t *gleft;
- int which_child = AVL_XCHILD(node);
- int child_bal = AVL_XBALANCE(child);
-
- /* BEGIN CSTYLED */
- /*
- * case 1 : node is overly left heavy, the left child is balanced or
- * also left heavy. This requires the following rotation.
- *
- * (node bal:-2)
- * / \
- * / \
- * (child bal:0 or -1)
- * / \
- * / \
- * cright
- *
- * becomes:
- *
- * (child bal:1 or 0)
- * / \
- * / \
- * (node bal:-1 or 0)
- * / \
- * / \
- * cright
- *
- * we detect this situation by noting that child's balance is not
- * right_heavy.
- */
- /* END CSTYLED */
- if (child_bal != right_heavy) {
-
- /*
- * compute new balance of nodes
- *
- * If child used to be left heavy (now balanced) we reduced
- * the height of this sub-tree -- used in "return...;" below
- */
- child_bal += right_heavy; /* adjust towards right */
-
- /*
- * move "cright" to be node's left child
- */
- cright = child->avl_child[right];
- node->avl_child[left] = cright;
- if (cright != NULL) {
- AVL_SETPARENT(cright, node);
- AVL_SETCHILD(cright, left);
- }
-
- /*
- * move node to be child's right child
- */
- child->avl_child[right] = node;
- AVL_SETBALANCE(node, -child_bal);
- AVL_SETCHILD(node, right);
- AVL_SETPARENT(node, child);
-
- /*
- * update the pointer into this subtree
- */
- AVL_SETBALANCE(child, child_bal);
- AVL_SETCHILD(child, which_child);
- AVL_SETPARENT(child, parent);
- if (parent != NULL)
- parent->avl_child[which_child] = child;
- else
- tree->avl_root = child;
-
- return (child_bal == 0);
- }
-
- /* BEGIN CSTYLED */
- /*
- * case 2 : When node is left heavy, but child is right heavy we use
- * a different rotation.
- *
- * (node b:-2)
- * / \
- * / \
- * / \
- * (child b:+1)
- * / \
- * / \
- * (gchild b: != 0)
- * / \
- * / \
- * gleft gright
- *
- * becomes:
- *
- * (gchild b:0)
- * / \
- * / \
- * / \
- * (child b:?) (node b:?)
- * / \ / \
- * / \ / \
- * gleft gright
- *
- * computing the new balances is more complicated. As an example:
- * if gchild was right_heavy, then child is now left heavy
- * else it is balanced
- */
- /* END CSTYLED */
- gchild = child->avl_child[right];
- gleft = gchild->avl_child[left];
- gright = gchild->avl_child[right];
-
- /*
- * move gright to left child of node and
- *
- * move gleft to right child of node
- */
- node->avl_child[left] = gright;
- if (gright != NULL) {
- AVL_SETPARENT(gright, node);
- AVL_SETCHILD(gright, left);
- }
-
- child->avl_child[right] = gleft;
- if (gleft != NULL) {
- AVL_SETPARENT(gleft, child);
- AVL_SETCHILD(gleft, right);
- }
-
- /*
- * move child to left child of gchild and
- *
- * move node to right child of gchild and
- *
- * fixup parent of all this to point to gchild
- */
- balance = AVL_XBALANCE(gchild);
- gchild->avl_child[left] = child;
- AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
- AVL_SETPARENT(child, gchild);
- AVL_SETCHILD(child, left);
-
- gchild->avl_child[right] = node;
- AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
- AVL_SETPARENT(node, gchild);
- AVL_SETCHILD(node, right);
-
- AVL_SETBALANCE(gchild, 0);
- AVL_SETPARENT(gchild, parent);
- AVL_SETCHILD(gchild, which_child);
- if (parent != NULL)
- parent->avl_child[which_child] = gchild;
- else
- tree->avl_root = gchild;
-
- return (1); /* the new tree is always shorter */
-}
-
-
-/*
- * Insert a new node into an AVL tree at the specified (from avl_find()) place.
- *
- * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
- * searches out to the leaf positions. The avl_index_t indicates the node
- * which will be the parent of the new node.
- *
- * After the node is inserted, a single rotation further up the tree may
- * be necessary to maintain an acceptable AVL balance.
- */
-void
-avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
-{
- avl_node_t *node;
- avl_node_t *parent = AVL_INDEX2NODE(where);
- int old_balance;
- int new_balance;
- int which_child = AVL_INDEX2CHILD(where);
- size_t off = tree->avl_offset;
-
- ASSERT(tree);
-#ifdef _LP64
- ASSERT(((uintptr_t)new_data & 0x7) == 0);
-#endif
-
- node = AVL_DATA2NODE(new_data, off);
-
- /*
- * First, add the node to the tree at the indicated position.
- */
- ++tree->avl_numnodes;
-
- node->avl_child[0] = NULL;
- node->avl_child[1] = NULL;
-
- AVL_SETCHILD(node, which_child);
- AVL_SETBALANCE(node, 0);
- AVL_SETPARENT(node, parent);
- if (parent != NULL) {
- ASSERT(parent->avl_child[which_child] == NULL);
- parent->avl_child[which_child] = node;
- } else {
- ASSERT(tree->avl_root == NULL);
- tree->avl_root = node;
- }
- /*
- * Now, back up the tree modifying the balance of all nodes above the
- * insertion point. If we get to a highly unbalanced ancestor, we
- * need to do a rotation. If we back out of the tree we are done.
- * If we brought any subtree into perfect balance (0), we are also done.
- */
- for (;;) {
- node = parent;
- if (node == NULL)
- return;
-
- /*
- * Compute the new balance
- */
- old_balance = AVL_XBALANCE(node);
- new_balance = old_balance + avl_child2balance[which_child];
-
- /*
- * If we introduced equal balance, then we are done immediately
- */
- if (new_balance == 0) {
- AVL_SETBALANCE(node, 0);
- return;
- }
-
- /*
- * If both old and new are not zero we went
- * from -1 to -2 balance, do a rotation.
- */
- if (old_balance != 0)
- break;
-
- AVL_SETBALANCE(node, new_balance);
- parent = AVL_XPARENT(node);
- which_child = AVL_XCHILD(node);
- }
-
- /*
- * perform a rotation to fix the tree and return
- */
- (void) avl_rotation(tree, node, new_balance);
-}
-
-/*
- * Insert "new_data" in "tree" in the given "direction" either after or
- * before (AVL_AFTER, AVL_BEFORE) the data "here".
- *
- * Insertions can only be done at empty leaf points in the tree, therefore
- * if the given child of the node is already present we move to either
- * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
- * every other node in the tree is a leaf, this always works.
- *
- * To help developers using this interface, we assert that the new node
- * is correctly ordered at every step of the way in DEBUG kernels.
- */
-void
-avl_insert_here(
- avl_tree_t *tree,
- void *new_data,
- void *here,
- int direction)
-{
- avl_node_t *node;
- int child = direction; /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
-#ifdef DEBUG
- int diff;
-#endif
-
- ASSERT(tree != NULL);
- ASSERT(new_data != NULL);
- ASSERT(here != NULL);
- ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
-
- /*
- * If corresponding child of node is not NULL, go to the neighboring
- * node and reverse the insertion direction.
- */
- node = AVL_DATA2NODE(here, tree->avl_offset);
-
-#ifdef DEBUG
- diff = tree->avl_compar(new_data, here);
- ASSERT(-1 <= diff && diff <= 1);
- ASSERT(diff != 0);
- ASSERT(diff > 0 ? child == 1 : child == 0);
-#endif
-
- if (node->avl_child[child] != NULL) {
- node = node->avl_child[child];
- child = 1 - child;
- while (node->avl_child[child] != NULL) {
-#ifdef DEBUG
- diff = tree->avl_compar(new_data,
- AVL_NODE2DATA(node, tree->avl_offset));
- ASSERT(-1 <= diff && diff <= 1);
- ASSERT(diff != 0);
- ASSERT(diff > 0 ? child == 1 : child == 0);
-#endif
- node = node->avl_child[child];
- }
-#ifdef DEBUG
- diff = tree->avl_compar(new_data,
- AVL_NODE2DATA(node, tree->avl_offset));
- ASSERT(-1 <= diff && diff <= 1);
- ASSERT(diff != 0);
- ASSERT(diff > 0 ? child == 1 : child == 0);
-#endif
- }
- ASSERT(node->avl_child[child] == NULL);
-
- avl_insert(tree, new_data, AVL_MKINDEX(node, child));
-}
-
-/*
- * Add a new node to an AVL tree.
- */
-void
-avl_add(avl_tree_t *tree, void *new_node)
-{
- avl_index_t where;
-
- /*
- * This is unfortunate. We want to call panic() here, even for
- * non-DEBUG kernels. In userland, however, we can't depend on anything
- * in libc or else the rtld build process gets confused. So, all we can
- * do in userland is resort to a normal ASSERT().
- */
- if (avl_find(tree, new_node, &where) != NULL)
-#ifdef _KERNEL
- panic("avl_find() succeeded inside avl_add()");
-#else
- ASSERT(0);
-#endif
- avl_insert(tree, new_node, where);
-}
-
-/*
- * Delete a node from the AVL tree. Deletion is similar to insertion, but
- * with 2 complications.
- *
- * First, we may be deleting an interior node. Consider the following subtree:
- *
- * d c c
- * / \ / \ / \
- * b e b e b e
- * / \ / \ /
- * a c a a
- *
- * When we are deleting node (d), we find and bring up an adjacent valued leaf
- * node, say (c), to take the interior node's place. In the code this is
- * handled by temporarily swapping (d) and (c) in the tree and then using
- * common code to delete (d) from the leaf position.
- *
- * Secondly, an interior deletion from a deep tree may require more than one
- * rotation to fix the balance. This is handled by moving up the tree through
- * parents and applying rotations as needed. The return value from
- * avl_rotation() is used to detect when a subtree did not change overall
- * height due to a rotation.
- */
-void
-avl_remove(avl_tree_t *tree, void *data)
-{
- avl_node_t *delete;
- avl_node_t *parent;
- avl_node_t *node;
- avl_node_t tmp;
- int old_balance;
- int new_balance;
- int left;
- int right;
- int which_child;
- size_t off = tree->avl_offset;
-
- ASSERT(tree);
-
- delete = AVL_DATA2NODE(data, off);
-
- /*
- * Deletion is easiest with a node that has at most 1 child.
- * We swap a node with 2 children with a sequentially valued
- * neighbor node. That node will have at most 1 child. Note this
- * has no effect on the ordering of the remaining nodes.
- *
- * As an optimization, we choose the greater neighbor if the tree
- * is right heavy, otherwise the left neighbor. This reduces the
- * number of rotations needed.
- */
- if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
-
- /*
- * choose node to swap from whichever side is taller
- */
- old_balance = AVL_XBALANCE(delete);
- left = avl_balance2child[old_balance + 1];
- right = 1 - left;
-
- /*
- * get to the previous value'd node
- * (down 1 left, as far as possible right)
- */
- for (node = delete->avl_child[left];
- node->avl_child[right] != NULL;
- node = node->avl_child[right])
- ;
-
- /*
- * create a temp placeholder for 'node'
- * move 'node' to delete's spot in the tree
- */
- tmp = *node;
-
- *node = *delete;
- if (node->avl_child[left] == node)
- node->avl_child[left] = &tmp;
-
- parent = AVL_XPARENT(node);
- if (parent != NULL)
- parent->avl_child[AVL_XCHILD(node)] = node;
- else
- tree->avl_root = node;
- AVL_SETPARENT(node->avl_child[left], node);
- AVL_SETPARENT(node->avl_child[right], node);
-
- /*
- * Put tmp where node used to be (just temporary).
- * It always has a parent and at most 1 child.
- */
- delete = &tmp;
- parent = AVL_XPARENT(delete);
- parent->avl_child[AVL_XCHILD(delete)] = delete;
- which_child = (delete->avl_child[1] != 0);
- if (delete->avl_child[which_child] != NULL)
- AVL_SETPARENT(delete->avl_child[which_child], delete);
- }
-
-
- /*
- * Here we know "delete" is at least partially a leaf node. It can
- * be easily removed from the tree.
- */
- ASSERT(tree->avl_numnodes > 0);
- --tree->avl_numnodes;
- parent = AVL_XPARENT(delete);
- which_child = AVL_XCHILD(delete);
- if (delete->avl_child[0] != NULL)
- node = delete->avl_child[0];
- else
- node = delete->avl_child[1];
-
- /*
- * Connect parent directly to node (leaving out delete).
- */
- if (node != NULL) {
- AVL_SETPARENT(node, parent);
- AVL_SETCHILD(node, which_child);
- }
- if (parent == NULL) {
- tree->avl_root = node;
- return;
- }
- parent->avl_child[which_child] = node;
-
-
- /*
- * Since the subtree is now shorter, begin adjusting parent balances
- * and performing any needed rotations.
- */
- do {
-
- /*
- * Move up the tree and adjust the balance
- *
- * Capture the parent and which_child values for the next
- * iteration before any rotations occur.
- */
- node = parent;
- old_balance = AVL_XBALANCE(node);
- new_balance = old_balance - avl_child2balance[which_child];
- parent = AVL_XPARENT(node);
- which_child = AVL_XCHILD(node);
-
- /*
- * If a node was in perfect balance but isn't anymore then
- * we can stop, since the height didn't change above this point
- * due to a deletion.
- */
- if (old_balance == 0) {
- AVL_SETBALANCE(node, new_balance);
- break;
- }
-
- /*
- * If the new balance is zero, we don't need to rotate
- * else
- * need a rotation to fix the balance.
- * If the rotation doesn't change the height
- * of the sub-tree we have finished adjusting.
- */
- if (new_balance == 0)
- AVL_SETBALANCE(node, new_balance);
- else if (!avl_rotation(tree, node, new_balance))
- break;
- } while (parent != NULL);
-}
-
-/*
- * initialize a new AVL tree
- */
-void
-avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
- size_t size, size_t offset)
-{
- ASSERT(tree);
- ASSERT(compar);
- ASSERT(size > 0);
- ASSERT(size >= offset + sizeof (avl_node_t));
-#ifdef _LP64
- ASSERT((offset & 0x7) == 0);
-#endif
-
- tree->avl_compar = compar;
- tree->avl_root = NULL;
- tree->avl_numnodes = 0;
- tree->avl_size = size;
- tree->avl_offset = offset;
-}
-
-/*
- * Delete a tree.
- */
-/* ARGSUSED */
-void
-avl_destroy(avl_tree_t *tree)
-{
- ASSERT(tree);
- ASSERT(tree->avl_numnodes == 0);
- ASSERT(tree->avl_root == NULL);
-}
-
-
-/*
- * Return the number of nodes in an AVL tree.
- */
-ulong_t
-avl_numnodes(avl_tree_t *tree)
-{
- ASSERT(tree);
- return (tree->avl_numnodes);
-}
-
-
-#define CHILDBIT (1L)
-
-/*
- * Post-order tree walk used to visit all tree nodes and destroy the tree
- * in post order. This is used for destroying a tree w/o paying any cost
- * for rebalancing it.
- *
- * example:
- *
- * void *cookie = NULL;
- * my_data_t *node;
- *
- * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
- * free(node);
- * avl_destroy(tree);
- *
- * The cookie is really an avl_node_t to the current node's parent and
- * an indication of which child you looked at last.
- *
- * On input, a cookie value of CHILDBIT indicates the tree is done.
- */
-void *
-avl_destroy_nodes(avl_tree_t *tree, void **cookie)
-{
- avl_node_t *node;
- avl_node_t *parent;
- int child;
- void *first;
- size_t off = tree->avl_offset;
-
- /*
- * Initial calls go to the first node or it's right descendant.
- */
- if (*cookie == NULL) {
- first = avl_first(tree);
-
- /*
- * deal with an empty tree
- */
- if (first == NULL) {
- *cookie = (void *)CHILDBIT;
- return (NULL);
- }
-
- node = AVL_DATA2NODE(first, off);
- parent = AVL_XPARENT(node);
- goto check_right_side;
- }
-
- /*
- * If there is no parent to return to we are done.
- */
- parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
- if (parent == NULL) {
- if (tree->avl_root != NULL) {
- ASSERT(tree->avl_numnodes == 1);
- tree->avl_root = NULL;
- tree->avl_numnodes = 0;
- }
- return (NULL);
- }
-
- /*
- * Remove the child pointer we just visited from the parent and tree.
- */
- child = (uintptr_t)(*cookie) & CHILDBIT;
- parent->avl_child[child] = NULL;
- ASSERT(tree->avl_numnodes > 1);
- --tree->avl_numnodes;
-
- /*
- * If we just did a right child or there isn't one, go up to parent.
- */
- if (child == 1 || parent->avl_child[1] == NULL) {
- node = parent;
- parent = AVL_XPARENT(parent);
- goto done;
- }
-
- /*
- * Do parent's right child, then leftmost descendent.
- */
- node = parent->avl_child[1];
- while (node->avl_child[0] != NULL) {
- parent = node;
- node = node->avl_child[0];
- }
-
- /*
- * If here, we moved to a left child. It may have one
- * child on the right (when balance == +1).
- */
-check_right_side:
- if (node->avl_child[1] != NULL) {
- ASSERT(AVL_XBALANCE(node) == 1);
- parent = node;
- node = node->avl_child[1];
- ASSERT(node->avl_child[0] == NULL &&
- node->avl_child[1] == NULL);
- } else {
- ASSERT(AVL_XBALANCE(node) <= 0);
- }
-
-done:
- if (parent == NULL) {
- *cookie = (void *)CHILDBIT;
- ASSERT(node == tree->avl_root);
- } else {
- *cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
- }
-
- return (AVL_NODE2DATA(node, off));
-}
diff --git a/sys/contrib/opensolaris/common/nvpair/nvpair.c b/sys/contrib/opensolaris/common/nvpair/nvpair.c
deleted file mode 100644
index d3d5bed..0000000
--- a/sys/contrib/opensolaris/common/nvpair/nvpair.c
+++ /dev/null
@@ -1,2953 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/debug.h>
-#include <sys/nvpair.h>
-#include <sys/nvpair_impl.h>
-#include <rpc/types.h>
-#include <rpc/xdr.h>
-
-#if defined(_KERNEL) && !defined(_BOOT)
-#include <sys/varargs.h>
-#else
-#include <stdarg.h>
-#include <strings.h>
-#endif
-
-#ifndef offsetof
-#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
-#endif
-
-
-/*
- * nvpair.c - Provides kernel & userland interfaces for manipulating
- * name-value pairs.
- *
- * Overview Diagram
- *
- * +--------------+
- * | nvlist_t |
- * |--------------|
- * | nvl_version |
- * | nvl_nvflag |
- * | nvl_priv -+-+
- * | nvl_flag | |
- * | nvl_pad | |
- * +--------------+ |
- * V
- * +--------------+ last i_nvp in list
- * | nvpriv_t | +--------------------->
- * |--------------| |
- * +--+- nvp_list | | +------------+
- * | | nvp_last -+--+ + nv_alloc_t |
- * | | nvp_curr | |------------|
- * | | nvp_nva -+----> | nva_ops |
- * | | nvp_stat | | nva_arg |
- * | +--------------+ +------------+
- * |
- * +-------+
- * V
- * +---------------------+ +-------------------+
- * | i_nvp_t | +-->| i_nvp_t | +-->
- * |---------------------| | |-------------------| |
- * | nvi_next -+--+ | nvi_next -+--+
- * | nvi_prev (NULL) | <----+ nvi_prev |
- * | . . . . . . . . . . | | . . . . . . . . . |
- * | nvp (nvpair_t) | | nvp (nvpair_t) |
- * | - nvp_size | | - nvp_size |
- * | - nvp_name_sz | | - nvp_name_sz |
- * | - nvp_value_elem | | - nvp_value_elem |
- * | - nvp_type | | - nvp_type |
- * | - data ... | | - data ... |
- * +---------------------+ +-------------------+
- *
- *
- *
- * +---------------------+ +---------------------+
- * | i_nvp_t | +--> +-->| i_nvp_t (last) |
- * |---------------------| | | |---------------------|
- * | nvi_next -+--+ ... --+ | nvi_next (NULL) |
- * <-+- nvi_prev |<-- ... <----+ nvi_prev |
- * | . . . . . . . . . | | . . . . . . . . . |
- * | nvp (nvpair_t) | | nvp (nvpair_t) |
- * | - nvp_size | | - nvp_size |
- * | - nvp_name_sz | | - nvp_name_sz |
- * | - nvp_value_elem | | - nvp_value_elem |
- * | - DATA_TYPE_NVLIST | | - nvp_type |
- * | - data (embedded) | | - data ... |
- * | nvlist name | +---------------------+
- * | +--------------+ |
- * | | nvlist_t | |
- * | |--------------| |
- * | | nvl_version | |
- * | | nvl_nvflag | |
- * | | nvl_priv --+---+---->
- * | | nvl_flag | |
- * | | nvl_pad | |
- * | +--------------+ |
- * +---------------------+
- *
- *
- * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will
- * allow value to be aligned on 8 byte boundary
- *
- * name_len is the length of the name string including the null terminator
- * so it must be >= 1
- */
-#define NVP_SIZE_CALC(name_len, data_len) \
- (NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))
-
-static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);
-static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,
- uint_t nelem, const void *data);
-
-#define NV_STAT_EMBEDDED 0x1
-#define EMBEDDED_NVL(nvp) ((nvlist_t *)(void *)NVP_VALUE(nvp))
-#define EMBEDDED_NVL_ARRAY(nvp) ((nvlist_t **)(void *)NVP_VALUE(nvp))
-
-#define NVP_VALOFF(nvp) (NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz))
-#define NVPAIR2I_NVP(nvp) \
- ((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp)))
-
-
-int
-nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...)
-{
- va_list valist;
- int err = 0;
-
- nva->nva_ops = nvo;
- nva->nva_arg = NULL;
-
- va_start(valist, nvo);
- if (nva->nva_ops->nv_ao_init != NULL)
- err = nva->nva_ops->nv_ao_init(nva, valist);
- va_end(valist);
-
- return (err);
-}
-
-void
-nv_alloc_reset(nv_alloc_t *nva)
-{
- if (nva->nva_ops->nv_ao_reset != NULL)
- nva->nva_ops->nv_ao_reset(nva);
-}
-
-void
-nv_alloc_fini(nv_alloc_t *nva)
-{
- if (nva->nva_ops->nv_ao_fini != NULL)
- nva->nva_ops->nv_ao_fini(nva);
-}
-
-nv_alloc_t *
-nvlist_lookup_nv_alloc(nvlist_t *nvl)
-{
- nvpriv_t *priv;
-
- if (nvl == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return (NULL);
-
- return (priv->nvp_nva);
-}
-
-static void *
-nv_mem_zalloc(nvpriv_t *nvp, size_t size)
-{
- nv_alloc_t *nva = nvp->nvp_nva;
- void *buf;
-
- if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL)
- bzero(buf, size);
-
- return (buf);
-}
-
-static void
-nv_mem_free(nvpriv_t *nvp, void *buf, size_t size)
-{
- nv_alloc_t *nva = nvp->nvp_nva;
-
- nva->nva_ops->nv_ao_free(nva, buf, size);
-}
-
-static void
-nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat)
-{
- bzero(priv, sizeof (priv));
-
- priv->nvp_nva = nva;
- priv->nvp_stat = stat;
-}
-
-static nvpriv_t *
-nv_priv_alloc(nv_alloc_t *nva)
-{
- nvpriv_t *priv;
-
- /*
- * nv_mem_alloc() cannot called here because it needs the priv
- * argument.
- */
- if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL)
- return (NULL);
-
- nv_priv_init(priv, nva, 0);
-
- return (priv);
-}
-
-/*
- * Embedded lists need their own nvpriv_t's. We create a new
- * nvpriv_t using the parameters and allocator from the parent
- * list's nvpriv_t.
- */
-static nvpriv_t *
-nv_priv_alloc_embedded(nvpriv_t *priv)
-{
- nvpriv_t *emb_priv;
-
- if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL)
- return (NULL);
-
- nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED);
-
- return (emb_priv);
-}
-
-static void
-nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv)
-{
- nvl->nvl_version = NV_VERSION;
- nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE);
- nvl->nvl_priv = (uint64_t)(uintptr_t)priv;
- nvl->nvl_flag = 0;
- nvl->nvl_pad = 0;
-}
-
-/*
- * nvlist_alloc - Allocate nvlist.
- */
-/*ARGSUSED1*/
-int
-nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag)
-{
-#if defined(_KERNEL) && !defined(_BOOT)
- return (nvlist_xalloc(nvlp, nvflag,
- (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
-#else
- return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep));
-#endif
-}
-
-int
-nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva)
-{
- nvpriv_t *priv;
-
- if (nvlp == NULL || nva == NULL)
- return (EINVAL);
-
- if ((priv = nv_priv_alloc(nva)) == NULL)
- return (ENOMEM);
-
- if ((*nvlp = nv_mem_zalloc(priv,
- NV_ALIGN(sizeof (nvlist_t)))) == NULL) {
- nv_mem_free(priv, priv, sizeof (nvpriv_t));
- return (ENOMEM);
- }
-
- nvlist_init(*nvlp, nvflag, priv);
-
- return (0);
-}
-
-/*
- * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair.
- */
-static nvpair_t *
-nvp_buf_alloc(nvlist_t *nvl, size_t len)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- i_nvp_t *buf;
- nvpair_t *nvp;
- size_t nvsize;
-
- /*
- * Allocate the buffer
- */
- nvsize = len + offsetof(i_nvp_t, nvi_nvp);
-
- if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL)
- return (NULL);
-
- nvp = &buf->nvi_nvp;
- nvp->nvp_size = len;
-
- return (nvp);
-}
-
-/*
- * nvp_buf_free - de-Allocate an i_nvp_t.
- */
-static void
-nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp);
-
- nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize);
-}
-
-/*
- * nvp_buf_link - link a new nv pair into the nvlist.
- */
-static void
-nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- i_nvp_t *curr = NVPAIR2I_NVP(nvp);
-
- /* Put element at end of nvlist */
- if (priv->nvp_list == NULL) {
- priv->nvp_list = priv->nvp_last = curr;
- } else {
- curr->nvi_prev = priv->nvp_last;
- priv->nvp_last->nvi_next = curr;
- priv->nvp_last = curr;
- }
-}
-
-/*
- * nvp_buf_unlink - unlink an removed nvpair out of the nvlist.
- */
-static void
-nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- i_nvp_t *curr = NVPAIR2I_NVP(nvp);
-
- /*
- * protect nvlist_next_nvpair() against walking on freed memory.
- */
- if (priv->nvp_curr == curr)
- priv->nvp_curr = curr->nvi_next;
-
- if (curr == priv->nvp_list)
- priv->nvp_list = curr->nvi_next;
- else
- curr->nvi_prev->nvi_next = curr->nvi_next;
-
- if (curr == priv->nvp_last)
- priv->nvp_last = curr->nvi_prev;
- else
- curr->nvi_next->nvi_prev = curr->nvi_prev;
-}
-
-/*
- * take a nvpair type and number of elements and make sure the are valid
- */
-static int
-i_validate_type_nelem(data_type_t type, uint_t nelem)
-{
- switch (type) {
- case DATA_TYPE_BOOLEAN:
- if (nelem != 0)
- return (EINVAL);
- break;
- case DATA_TYPE_BOOLEAN_VALUE:
- case DATA_TYPE_BYTE:
- case DATA_TYPE_INT8:
- case DATA_TYPE_UINT8:
- case DATA_TYPE_INT16:
- case DATA_TYPE_UINT16:
- case DATA_TYPE_INT32:
- case DATA_TYPE_UINT32:
- case DATA_TYPE_INT64:
- case DATA_TYPE_UINT64:
- case DATA_TYPE_STRING:
- case DATA_TYPE_HRTIME:
- case DATA_TYPE_NVLIST:
- if (nelem != 1)
- return (EINVAL);
- break;
- case DATA_TYPE_BOOLEAN_ARRAY:
- case DATA_TYPE_BYTE_ARRAY:
- case DATA_TYPE_INT8_ARRAY:
- case DATA_TYPE_UINT8_ARRAY:
- case DATA_TYPE_INT16_ARRAY:
- case DATA_TYPE_UINT16_ARRAY:
- case DATA_TYPE_INT32_ARRAY:
- case DATA_TYPE_UINT32_ARRAY:
- case DATA_TYPE_INT64_ARRAY:
- case DATA_TYPE_UINT64_ARRAY:
- case DATA_TYPE_STRING_ARRAY:
- case DATA_TYPE_NVLIST_ARRAY:
- /* we allow arrays with 0 elements */
- break;
- default:
- return (EINVAL);
- }
- return (0);
-}
-
-/*
- * Verify nvp_name_sz and check the name string length.
- */
-static int
-i_validate_nvpair_name(nvpair_t *nvp)
-{
- if ((nvp->nvp_name_sz <= 0) ||
- (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0)))
- return (EFAULT);
-
- /* verify the name string, make sure its terminated */
- if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0')
- return (EFAULT);
-
- return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT);
-}
-
-static int
-i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data)
-{
- switch (type) {
- case DATA_TYPE_BOOLEAN_VALUE:
- if (*(boolean_t *)data != B_TRUE &&
- *(boolean_t *)data != B_FALSE)
- return (EINVAL);
- break;
- case DATA_TYPE_BOOLEAN_ARRAY: {
- int i;
-
- for (i = 0; i < nelem; i++)
- if (((boolean_t *)data)[i] != B_TRUE &&
- ((boolean_t *)data)[i] != B_FALSE)
- return (EINVAL);
- break;
- }
- default:
- break;
- }
-
- return (0);
-}
-
-/*
- * This function takes a pointer to what should be a nvpair and it's size
- * and then verifies that all the nvpair fields make sense and can be
- * trusted. This function is used when decoding packed nvpairs.
- */
-static int
-i_validate_nvpair(nvpair_t *nvp)
-{
- data_type_t type = NVP_TYPE(nvp);
- int size1, size2;
-
- /* verify nvp_name_sz, check the name string length */
- if (i_validate_nvpair_name(nvp) != 0)
- return (EFAULT);
-
- if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0)
- return (EFAULT);
-
- /*
- * verify nvp_type, nvp_value_elem, and also possibly
- * verify string values and get the value size.
- */
- size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));
- size1 = nvp->nvp_size - NVP_VALOFF(nvp);
- if (size2 < 0 || size1 != NV_ALIGN(size2))
- return (EFAULT);
-
- return (0);
-}
-
-static int
-nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl)
-{
- nvpriv_t *priv;
- i_nvp_t *curr;
-
- if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL)
- return (EINVAL);
-
- for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
- nvpair_t *nvp = &curr->nvi_nvp;
- int err;
-
- if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp),
- NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0)
- return (err);
- }
-
- return (0);
-}
-
-/*
- * Frees all memory allocated for an nvpair (like embedded lists) with
- * the exception of the nvpair buffer itself.
- */
-static void
-nvpair_free(nvpair_t *nvp)
-{
- switch (NVP_TYPE(nvp)) {
- case DATA_TYPE_NVLIST:
- nvlist_free(EMBEDDED_NVL(nvp));
- break;
- case DATA_TYPE_NVLIST_ARRAY: {
- nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
- int i;
-
- for (i = 0; i < NVP_NELEM(nvp); i++)
- if (nvlp[i] != NULL)
- nvlist_free(nvlp[i]);
- break;
- }
- default:
- break;
- }
-}
-
-/*
- * nvlist_free - free an unpacked nvlist
- */
-void
-nvlist_free(nvlist_t *nvl)
-{
- nvpriv_t *priv;
- i_nvp_t *curr;
-
- if (nvl == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return;
-
- /*
- * Unpacked nvlist are linked through i_nvp_t
- */
- curr = priv->nvp_list;
- while (curr != NULL) {
- nvpair_t *nvp = &curr->nvi_nvp;
- curr = curr->nvi_next;
-
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
- }
-
- if (!(priv->nvp_stat & NV_STAT_EMBEDDED))
- nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t)));
- else
- nvl->nvl_priv = 0;
-
- nv_mem_free(priv, priv, sizeof (nvpriv_t));
-}
-
-static int
-nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- i_nvp_t *curr;
-
- if (nvp == NULL)
- return (0);
-
- for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
- if (&curr->nvi_nvp == nvp)
- return (1);
-
- return (0);
-}
-
-/*
- * Make a copy of nvlist
- */
-/*ARGSUSED1*/
-int
-nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag)
-{
-#if defined(_KERNEL) && !defined(_BOOT)
- return (nvlist_xdup(nvl, nvlp,
- (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
-#else
- return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep));
-#endif
-}
-
-int
-nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva)
-{
- int err;
- nvlist_t *ret;
-
- if (nvl == NULL || nvlp == NULL)
- return (EINVAL);
-
- if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0)
- return (err);
-
- if ((err = nvlist_copy_pairs(nvl, ret)) != 0)
- nvlist_free(ret);
- else
- *nvlp = ret;
-
- return (err);
-}
-
-/*
- * Remove all with matching name
- */
-int
-nvlist_remove_all(nvlist_t *nvl, const char *name)
-{
- nvpriv_t *priv;
- i_nvp_t *curr;
- int error = ENOENT;
-
- if (nvl == NULL || name == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return (EINVAL);
-
- curr = priv->nvp_list;
- while (curr != NULL) {
- nvpair_t *nvp = &curr->nvi_nvp;
-
- curr = curr->nvi_next;
- if (strcmp(name, NVP_NAME(nvp)) != 0)
- continue;
-
- nvp_buf_unlink(nvl, nvp);
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
-
- error = 0;
- }
-
- return (error);
-}
-
-/*
- * Remove first one with matching name and type
- */
-int
-nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
-{
- nvpriv_t *priv;
- i_nvp_t *curr;
-
- if (nvl == NULL || name == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return (EINVAL);
-
- curr = priv->nvp_list;
- while (curr != NULL) {
- nvpair_t *nvp = &curr->nvi_nvp;
-
- if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) {
- nvp_buf_unlink(nvl, nvp);
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
-
- return (0);
- }
- curr = curr->nvi_next;
- }
-
- return (ENOENT);
-}
-
-/*
- * This function calculates the size of an nvpair value.
- *
- * The data argument controls the behavior in case of the data types
- * DATA_TYPE_STRING and
- * DATA_TYPE_STRING_ARRAY
- * Is data == NULL then the size of the string(s) is excluded.
- */
-static int
-i_get_value_size(data_type_t type, const void *data, uint_t nelem)
-{
- uint64_t value_sz;
-
- if (i_validate_type_nelem(type, nelem) != 0)
- return (-1);
-
- /* Calculate required size for holding value */
- switch (type) {
- case DATA_TYPE_BOOLEAN:
- value_sz = 0;
- break;
- case DATA_TYPE_BOOLEAN_VALUE:
- value_sz = sizeof (boolean_t);
- break;
- case DATA_TYPE_BYTE:
- value_sz = sizeof (uchar_t);
- break;
- case DATA_TYPE_INT8:
- value_sz = sizeof (int8_t);
- break;
- case DATA_TYPE_UINT8:
- value_sz = sizeof (uint8_t);
- break;
- case DATA_TYPE_INT16:
- value_sz = sizeof (int16_t);
- break;
- case DATA_TYPE_UINT16:
- value_sz = sizeof (uint16_t);
- break;
- case DATA_TYPE_INT32:
- value_sz = sizeof (int32_t);
- break;
- case DATA_TYPE_UINT32:
- value_sz = sizeof (uint32_t);
- break;
- case DATA_TYPE_INT64:
- value_sz = sizeof (int64_t);
- break;
- case DATA_TYPE_UINT64:
- value_sz = sizeof (uint64_t);
- break;
- case DATA_TYPE_STRING:
- if (data == NULL)
- value_sz = 0;
- else
- value_sz = strlen(data) + 1;
- break;
- case DATA_TYPE_BOOLEAN_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (boolean_t);
- break;
- case DATA_TYPE_BYTE_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uchar_t);
- break;
- case DATA_TYPE_INT8_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (int8_t);
- break;
- case DATA_TYPE_UINT8_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uint8_t);
- break;
- case DATA_TYPE_INT16_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (int16_t);
- break;
- case DATA_TYPE_UINT16_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uint16_t);
- break;
- case DATA_TYPE_INT32_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (int32_t);
- break;
- case DATA_TYPE_UINT32_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uint32_t);
- break;
- case DATA_TYPE_INT64_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (int64_t);
- break;
- case DATA_TYPE_UINT64_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uint64_t);
- break;
- case DATA_TYPE_STRING_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uint64_t);
-
- if (data != NULL) {
- char *const *strs = data;
- uint_t i;
-
- /* no alignment requirement for strings */
- for (i = 0; i < nelem; i++) {
- if (strs[i] == NULL)
- return (-1);
- value_sz += strlen(strs[i]) + 1;
- }
- }
- break;
- case DATA_TYPE_HRTIME:
- value_sz = sizeof (hrtime_t);
- break;
- case DATA_TYPE_NVLIST:
- value_sz = NV_ALIGN(sizeof (nvlist_t));
- break;
- case DATA_TYPE_NVLIST_ARRAY:
- value_sz = (uint64_t)nelem * sizeof (uint64_t) +
- (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t));
- break;
- default:
- return (-1);
- }
-
- return (value_sz > INT32_MAX ? -1 : (int)value_sz);
-}
-
-static int
-nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl)
-{
- nvpriv_t *priv;
- int err;
-
- if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t)
- nvl->nvl_priv)) == NULL)
- return (ENOMEM);
-
- nvlist_init(emb_nvl, onvl->nvl_nvflag, priv);
-
- if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) {
- nvlist_free(emb_nvl);
- emb_nvl->nvl_priv = 0;
- }
-
- return (err);
-}
-
-/*
- * nvlist_add_common - Add new <name,value> pair to nvlist
- */
-static int
-nvlist_add_common(nvlist_t *nvl, const char *name,
- data_type_t type, uint_t nelem, const void *data)
-{
- nvpair_t *nvp;
- uint_t i;
-
- int nvp_sz, name_sz, value_sz;
- int err = 0;
-
- if (name == NULL || nvl == NULL || nvl->nvl_priv == 0)
- return (EINVAL);
-
- if (nelem != 0 && data == NULL)
- return (EINVAL);
-
- /*
- * Verify type and nelem and get the value size.
- * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
- * is the size of the string(s) included.
- */
- if ((value_sz = i_get_value_size(type, data, nelem)) < 0)
- return (EINVAL);
-
- if (i_validate_nvpair_value(type, nelem, data) != 0)
- return (EINVAL);
-
- /*
- * If we're adding an nvlist or nvlist array, ensure that we are not
- * adding the input nvlist to itself, which would cause recursion,
- * and ensure that no NULL nvlist pointers are present.
- */
- switch (type) {
- case DATA_TYPE_NVLIST:
- if (data == nvl || data == NULL)
- return (EINVAL);
- break;
- case DATA_TYPE_NVLIST_ARRAY: {
- nvlist_t **onvlp = (nvlist_t **)data;
- for (i = 0; i < nelem; i++) {
- if (onvlp[i] == nvl || onvlp[i] == NULL)
- return (EINVAL);
- }
- break;
- }
- default:
- break;
- }
-
- /* calculate sizes of the nvpair elements and the nvpair itself */
- name_sz = strlen(name) + 1;
-
- nvp_sz = NVP_SIZE_CALC(name_sz, value_sz);
-
- if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL)
- return (ENOMEM);
-
- ASSERT(nvp->nvp_size == nvp_sz);
- nvp->nvp_name_sz = name_sz;
- nvp->nvp_value_elem = nelem;
- nvp->nvp_type = type;
- bcopy(name, NVP_NAME(nvp), name_sz);
-
- switch (type) {
- case DATA_TYPE_BOOLEAN:
- break;
- case DATA_TYPE_STRING_ARRAY: {
- char *const *strs = data;
- char *buf = NVP_VALUE(nvp);
- char **cstrs = (void *)buf;
-
- /* skip pre-allocated space for pointer array */
- buf += nelem * sizeof (uint64_t);
- for (i = 0; i < nelem; i++) {
- int slen = strlen(strs[i]) + 1;
- bcopy(strs[i], buf, slen);
- cstrs[i] = buf;
- buf += slen;
- }
- break;
- }
- case DATA_TYPE_NVLIST: {
- nvlist_t *nnvl = EMBEDDED_NVL(nvp);
- nvlist_t *onvl = (nvlist_t *)data;
-
- if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) {
- nvp_buf_free(nvl, nvp);
- return (err);
- }
- break;
- }
- case DATA_TYPE_NVLIST_ARRAY: {
- nvlist_t **onvlp = (nvlist_t **)data;
- nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
- nvlist_t *embedded = (nvlist_t *)
- ((uintptr_t)nvlp + nelem * sizeof (uint64_t));
-
- for (i = 0; i < nelem; i++) {
- if ((err = nvlist_copy_embedded(nvl,
- onvlp[i], embedded)) != 0) {
- /*
- * Free any successfully created lists
- */
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
- return (err);
- }
-
- nvlp[i] = embedded++;
- }
- break;
- }
- default:
- bcopy(data, NVP_VALUE(nvp), value_sz);
- }
-
- /* if unique name, remove before add */
- if (nvl->nvl_nvflag & NV_UNIQUE_NAME)
- (void) nvlist_remove_all(nvl, name);
- else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE)
- (void) nvlist_remove(nvl, name, type);
-
- nvp_buf_link(nvl, nvp);
-
- return (0);
-}
-
-int
-nvlist_add_boolean(nvlist_t *nvl, const char *name)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL));
-}
-
-int
-nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val));
-}
-
-int
-nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val));
-}
-
-int
-nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val));
-}
-
-int
-nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val));
-}
-
-int
-nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val));
-}
-
-int
-nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val));
-}
-
-int
-nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val));
-}
-
-int
-nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val));
-}
-
-int
-nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val));
-}
-
-int
-nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val));
-}
-
-int
-nvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val));
-}
-
-int
-nvlist_add_boolean_array(nvlist_t *nvl, const char *name,
- boolean_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a));
-}
-
-int
-nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
-}
-
-int
-nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
-}
-
-int
-nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
-}
-
-int
-nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
-}
-
-int
-nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
-}
-
-int
-nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
-}
-
-int
-nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
-}
-
-int
-nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
-}
-
-int
-nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
-}
-
-int
-nvlist_add_string_array(nvlist_t *nvl, const char *name,
- char *const *a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
-}
-
-int
-nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val));
-}
-
-int
-nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val));
-}
-
-int
-nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n)
-{
- return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
-}
-
-/* reading name-value pairs */
-nvpair_t *
-nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
-{
- nvpriv_t *priv;
- i_nvp_t *curr;
-
- if (nvl == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return (NULL);
-
- curr = NVPAIR2I_NVP(nvp);
-
- /*
- * Ensure that nvp is an valid pointer.
- */
- if (nvp == NULL)
- curr = priv->nvp_list;
- else if (priv->nvp_curr == curr)
- curr = curr->nvi_next;
- else if (nvlist_contains_nvp(nvl, nvp) == 0)
- curr = NULL;
-
- priv->nvp_curr = curr;
-
- return (curr != NULL ? &curr->nvi_nvp : NULL);
-}
-
-char *
-nvpair_name(nvpair_t *nvp)
-{
- return (NVP_NAME(nvp));
-}
-
-data_type_t
-nvpair_type(nvpair_t *nvp)
-{
- return (NVP_TYPE(nvp));
-}
-
-static int
-nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data)
-{
- if (nvp == NULL || nvpair_type(nvp) != type)
- return (EINVAL);
-
- /*
- * For non-array types, we copy the data.
- * For array types (including string), we set a pointer.
- */
- switch (type) {
- case DATA_TYPE_BOOLEAN:
- if (nelem != NULL)
- *nelem = 0;
- break;
-
- case DATA_TYPE_BOOLEAN_VALUE:
- case DATA_TYPE_BYTE:
- case DATA_TYPE_INT8:
- case DATA_TYPE_UINT8:
- case DATA_TYPE_INT16:
- case DATA_TYPE_UINT16:
- case DATA_TYPE_INT32:
- case DATA_TYPE_UINT32:
- case DATA_TYPE_INT64:
- case DATA_TYPE_UINT64:
- case DATA_TYPE_HRTIME:
- if (data == NULL)
- return (EINVAL);
- bcopy(NVP_VALUE(nvp), data,
- (size_t)i_get_value_size(type, NULL, 1));
- if (nelem != NULL)
- *nelem = 1;
- break;
-
- case DATA_TYPE_NVLIST:
- case DATA_TYPE_STRING:
- if (data == NULL)
- return (EINVAL);
- *(void **)data = (void *)NVP_VALUE(nvp);
- if (nelem != NULL)
- *nelem = 1;
- break;
-
- case DATA_TYPE_BOOLEAN_ARRAY:
- case DATA_TYPE_BYTE_ARRAY:
- case DATA_TYPE_INT8_ARRAY:
- case DATA_TYPE_UINT8_ARRAY:
- case DATA_TYPE_INT16_ARRAY:
- case DATA_TYPE_UINT16_ARRAY:
- case DATA_TYPE_INT32_ARRAY:
- case DATA_TYPE_UINT32_ARRAY:
- case DATA_TYPE_INT64_ARRAY:
- case DATA_TYPE_UINT64_ARRAY:
- case DATA_TYPE_STRING_ARRAY:
- case DATA_TYPE_NVLIST_ARRAY:
- if (nelem == NULL || data == NULL)
- return (EINVAL);
- if ((*nelem = NVP_NELEM(nvp)) != 0)
- *(void **)data = (void *)NVP_VALUE(nvp);
- else
- *(void **)data = NULL;
- break;
-
- default:
- return (ENOTSUP);
- }
-
- return (0);
-}
-
-static int
-nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type,
- uint_t *nelem, void *data)
-{
- nvpriv_t *priv;
- nvpair_t *nvp;
- i_nvp_t *curr;
-
- if (name == NULL || nvl == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return (EINVAL);
-
- if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE)))
- return (ENOTSUP);
-
- for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
- nvp = &curr->nvi_nvp;
-
- if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type)
- return (nvpair_value_common(nvp, type, nelem, data));
- }
-
- return (ENOENT);
-}
-
-int
-nvlist_lookup_boolean(nvlist_t *nvl, const char *name)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL));
-}
-
-int
-nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val)
-{
- return (nvlist_lookup_common(nvl, name,
- DATA_TYPE_BOOLEAN_VALUE, NULL, val));
-}
-
-int
-nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val));
-}
-
-int
-nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val));
-}
-
-int
-nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val));
-}
-
-int
-nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val));
-}
-
-int
-nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val));
-}
-
-int
-nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val));
-}
-
-int
-nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val));
-}
-
-int
-nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val));
-}
-
-int
-nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val));
-}
-
-int
-nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val));
-}
-
-int
-nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val));
-}
-
-int
-nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name,
- boolean_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name,
- DATA_TYPE_BOOLEAN_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_byte_array(nvlist_t *nvl, const char *name,
- uchar_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name,
- uint8_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_int16_array(nvlist_t *nvl, const char *name,
- int16_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name,
- uint16_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_int32_array(nvlist_t *nvl, const char *name,
- int32_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name,
- uint32_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_int64_array(nvlist_t *nvl, const char *name,
- int64_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name,
- uint64_t **a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_string_array(nvlist_t *nvl, const char *name,
- char ***a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name,
- nvlist_t ***a, uint_t *n)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
-}
-
-int
-nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val)
-{
- return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val));
-}
-
-int
-nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...)
-{
- va_list ap;
- char *name;
- int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0);
- int ret = 0;
-
- va_start(ap, flag);
- while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
- data_type_t type;
- void *val;
- uint_t *nelem;
-
- switch (type = va_arg(ap, data_type_t)) {
- case DATA_TYPE_BOOLEAN:
- ret = nvlist_lookup_common(nvl, name, type, NULL, NULL);
- break;
-
- case DATA_TYPE_BOOLEAN_VALUE:
- case DATA_TYPE_BYTE:
- case DATA_TYPE_INT8:
- case DATA_TYPE_UINT8:
- case DATA_TYPE_INT16:
- case DATA_TYPE_UINT16:
- case DATA_TYPE_INT32:
- case DATA_TYPE_UINT32:
- case DATA_TYPE_INT64:
- case DATA_TYPE_UINT64:
- case DATA_TYPE_HRTIME:
- case DATA_TYPE_STRING:
- case DATA_TYPE_NVLIST:
- val = va_arg(ap, void *);
- ret = nvlist_lookup_common(nvl, name, type, NULL, val);
- break;
-
- case DATA_TYPE_BYTE_ARRAY:
- case DATA_TYPE_BOOLEAN_ARRAY:
- case DATA_TYPE_INT8_ARRAY:
- case DATA_TYPE_UINT8_ARRAY:
- case DATA_TYPE_INT16_ARRAY:
- case DATA_TYPE_UINT16_ARRAY:
- case DATA_TYPE_INT32_ARRAY:
- case DATA_TYPE_UINT32_ARRAY:
- case DATA_TYPE_INT64_ARRAY:
- case DATA_TYPE_UINT64_ARRAY:
- case DATA_TYPE_STRING_ARRAY:
- case DATA_TYPE_NVLIST_ARRAY:
- val = va_arg(ap, void *);
- nelem = va_arg(ap, uint_t *);
- ret = nvlist_lookup_common(nvl, name, type, nelem, val);
- break;
-
- default:
- ret = EINVAL;
- }
-
- if (ret == ENOENT && noentok)
- ret = 0;
- }
- va_end(ap);
-
- return (ret);
-}
-
-int
-nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val));
-}
-
-int
-nvpair_value_byte(nvpair_t *nvp, uchar_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val));
-}
-
-int
-nvpair_value_int8(nvpair_t *nvp, int8_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val));
-}
-
-int
-nvpair_value_uint8(nvpair_t *nvp, uint8_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val));
-}
-
-int
-nvpair_value_int16(nvpair_t *nvp, int16_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val));
-}
-
-int
-nvpair_value_uint16(nvpair_t *nvp, uint16_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val));
-}
-
-int
-nvpair_value_int32(nvpair_t *nvp, int32_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val));
-}
-
-int
-nvpair_value_uint32(nvpair_t *nvp, uint32_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val));
-}
-
-int
-nvpair_value_int64(nvpair_t *nvp, int64_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val));
-}
-
-int
-nvpair_value_uint64(nvpair_t *nvp, uint64_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val));
-}
-
-int
-nvpair_value_string(nvpair_t *nvp, char **val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val));
-}
-
-int
-nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val));
-}
-
-int
-nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val));
-}
-
-int
-nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val)
-{
- return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val));
-}
-
-/*
- * Add specified pair to the list.
- */
-int
-nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp)
-{
- if (nvl == NULL || nvp == NULL)
- return (EINVAL);
-
- return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp),
- NVP_NELEM(nvp), NVP_VALUE(nvp)));
-}
-
-/*
- * Merge the supplied nvlists and put the result in dst.
- * The merged list will contain all names specified in both lists,
- * the values are taken from nvl in the case of duplicates.
- * Return 0 on success.
- */
-/*ARGSUSED*/
-int
-nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag)
-{
- if (nvl == NULL || dst == NULL)
- return (EINVAL);
-
- if (dst != nvl)
- return (nvlist_copy_pairs(nvl, dst));
-
- return (0);
-}
-
-/*
- * Encoding related routines
- */
-#define NVS_OP_ENCODE 0
-#define NVS_OP_DECODE 1
-#define NVS_OP_GETSIZE 2
-
-typedef struct nvs_ops nvs_ops_t;
-
-typedef struct {
- int nvs_op;
- const nvs_ops_t *nvs_ops;
- void *nvs_private;
- nvpriv_t *nvs_priv;
-} nvstream_t;
-
-/*
- * nvs operations are:
- * - nvs_nvlist
- * encoding / decoding of a nvlist header (nvlist_t)
- * calculates the size used for header and end detection
- *
- * - nvs_nvpair
- * responsible for the first part of encoding / decoding of an nvpair
- * calculates the decoded size of an nvpair
- *
- * - nvs_nvp_op
- * second part of encoding / decoding of an nvpair
- *
- * - nvs_nvp_size
- * calculates the encoding size of an nvpair
- *
- * - nvs_nvl_fini
- * encodes the end detection mark (zeros).
- */
-struct nvs_ops {
- int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *);
- int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *);
- int (*nvs_nvp_op)(nvstream_t *, nvpair_t *);
- int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *);
- int (*nvs_nvl_fini)(nvstream_t *);
-};
-
-typedef struct {
- char nvh_encoding; /* nvs encoding method */
- char nvh_endian; /* nvs endian */
- char nvh_reserved1; /* reserved for future use */
- char nvh_reserved2; /* reserved for future use */
-} nvs_header_t;
-
-static int
-nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- i_nvp_t *curr;
-
- /*
- * Walk nvpair in list and encode each nvpair
- */
- for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
- if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0)
- return (EFAULT);
-
- return (nvs->nvs_ops->nvs_nvl_fini(nvs));
-}
-
-static int
-nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl)
-{
- nvpair_t *nvp;
- size_t nvsize;
- int err;
-
- /*
- * Get decoded size of next pair in stream, alloc
- * memory for nvpair_t, then decode the nvpair
- */
- while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) {
- if (nvsize == 0) /* end of list */
- break;
-
- /* make sure len makes sense */
- if (nvsize < NVP_SIZE_CALC(1, 0))
- return (EFAULT);
-
- if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL)
- return (ENOMEM);
-
- if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) {
- nvp_buf_free(nvl, nvp);
- return (err);
- }
-
- if (i_validate_nvpair(nvp) != 0) {
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
- return (EFAULT);
- }
-
- nvp_buf_link(nvl, nvp);
- }
- return (err);
-}
-
-static int
-nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
-{
- nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
- i_nvp_t *curr;
- uint64_t nvsize = *buflen;
- size_t size;
-
- /*
- * Get encoded size of nvpairs in nvlist
- */
- for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
- if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0)
- return (EINVAL);
-
- if ((nvsize += size) > INT32_MAX)
- return (EINVAL);
- }
-
- *buflen = nvsize;
- return (0);
-}
-
-static int
-nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
-{
- int err;
-
- if (nvl->nvl_priv == 0)
- return (EFAULT);
-
- /*
- * Perform the operation, starting with header, then each nvpair
- */
- if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0)
- return (err);
-
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- err = nvs_encode_pairs(nvs, nvl);
- break;
-
- case NVS_OP_DECODE:
- err = nvs_decode_pairs(nvs, nvl);
- break;
-
- case NVS_OP_GETSIZE:
- err = nvs_getsize_pairs(nvs, nvl, buflen);
- break;
-
- default:
- err = EINVAL;
- }
-
- return (err);
-}
-
-static int
-nvs_embedded(nvstream_t *nvs, nvlist_t *embedded)
-{
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- return (nvs_operation(nvs, embedded, NULL));
-
- case NVS_OP_DECODE: {
- nvpriv_t *priv;
- int err;
-
- if (embedded->nvl_version != NV_VERSION)
- return (ENOTSUP);
-
- if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL)
- return (ENOMEM);
-
- nvlist_init(embedded, embedded->nvl_nvflag, priv);
-
- if ((err = nvs_operation(nvs, embedded, NULL)) != 0)
- nvlist_free(embedded);
- return (err);
- }
- default:
- break;
- }
-
- return (EINVAL);
-}
-
-static int
-nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
-{
- size_t nelem = NVP_NELEM(nvp);
- nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
- int i;
-
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- for (i = 0; i < nelem; i++)
- if (nvs_embedded(nvs, nvlp[i]) != 0)
- return (EFAULT);
- break;
-
- case NVS_OP_DECODE: {
- size_t len = nelem * sizeof (uint64_t);
- nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len);
-
- bzero(nvlp, len); /* don't trust packed data */
- for (i = 0; i < nelem; i++) {
- if (nvs_embedded(nvs, embedded) != 0) {
- nvpair_free(nvp);
- return (EFAULT);
- }
-
- nvlp[i] = embedded++;
- }
- break;
- }
- case NVS_OP_GETSIZE: {
- uint64_t nvsize = 0;
-
- for (i = 0; i < nelem; i++) {
- size_t nvp_sz = 0;
-
- if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0)
- return (EINVAL);
-
- if ((nvsize += nvp_sz) > INT32_MAX)
- return (EINVAL);
- }
-
- *size = nvsize;
- break;
- }
- default:
- return (EINVAL);
- }
-
- return (0);
-}
-
-static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *);
-static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *);
-
-/*
- * Common routine for nvlist operations:
- * encode, decode, getsize (encoded size).
- */
-static int
-nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding,
- int nvs_op)
-{
- int err = 0;
- nvstream_t nvs;
- int nvl_endian;
-#if BYTE_ORDER == _LITTLE_ENDIAN
- int host_endian = 1;
-#else
- int host_endian = 0;
-#endif /* _LITTLE_ENDIAN */
- nvs_header_t *nvh = (void *)buf;
-
- if (buflen == NULL || nvl == NULL ||
- (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
- return (EINVAL);
-
- nvs.nvs_op = nvs_op;
-
- /*
- * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and
- * a buffer is allocated. The first 4 bytes in the buffer are
- * used for encoding method and host endian.
- */
- switch (nvs_op) {
- case NVS_OP_ENCODE:
- if (buf == NULL || *buflen < sizeof (nvs_header_t))
- return (EINVAL);
-
- nvh->nvh_encoding = encoding;
- nvh->nvh_endian = nvl_endian = host_endian;
- nvh->nvh_reserved1 = 0;
- nvh->nvh_reserved2 = 0;
- break;
-
- case NVS_OP_DECODE:
- if (buf == NULL || *buflen < sizeof (nvs_header_t))
- return (EINVAL);
-
- /* get method of encoding from first byte */
- encoding = nvh->nvh_encoding;
- nvl_endian = nvh->nvh_endian;
- break;
-
- case NVS_OP_GETSIZE:
- nvl_endian = host_endian;
-
- /*
- * add the size for encoding
- */
- *buflen = sizeof (nvs_header_t);
- break;
-
- default:
- return (ENOTSUP);
- }
-
- /*
- * Create an nvstream with proper encoding method
- */
- switch (encoding) {
- case NV_ENCODE_NATIVE:
- /*
- * check endianness, in case we are unpacking
- * from a file
- */
- if (nvl_endian != host_endian)
- return (ENOTSUP);
- err = nvs_native(&nvs, nvl, buf, buflen);
- break;
- case NV_ENCODE_XDR:
- err = nvs_xdr(&nvs, nvl, buf, buflen);
- break;
- default:
- err = ENOTSUP;
- break;
- }
-
- return (err);
-}
-
-int
-nvlist_size(nvlist_t *nvl, size_t *size, int encoding)
-{
- return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE));
-}
-
-/*
- * Pack nvlist into contiguous memory
- */
-/*ARGSUSED1*/
-int
-nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
- int kmflag)
-{
-#if defined(_KERNEL) && !defined(_BOOT)
- return (nvlist_xpack(nvl, bufp, buflen, encoding,
- (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
-#else
- return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep));
-#endif
-}
-
-int
-nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
- nv_alloc_t *nva)
-{
- nvpriv_t nvpriv;
- size_t alloc_size;
- char *buf;
- int err;
-
- if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL)
- return (EINVAL);
-
- if (*bufp != NULL)
- return (nvlist_common(nvl, *bufp, buflen, encoding,
- NVS_OP_ENCODE));
-
- /*
- * Here is a difficult situation:
- * 1. The nvlist has fixed allocator properties.
- * All other nvlist routines (like nvlist_add_*, ...) use
- * these properties.
- * 2. When using nvlist_pack() the user can specify his own
- * allocator properties (e.g. by using KM_NOSLEEP).
- *
- * We use the user specified properties (2). A clearer solution
- * will be to remove the kmflag from nvlist_pack(), but we will
- * not change the interface.
- */
- nv_priv_init(&nvpriv, nva, 0);
-
- if (err = nvlist_size(nvl, &alloc_size, encoding))
- return (err);
-
- if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL)
- return (ENOMEM);
-
- if ((err = nvlist_common(nvl, buf, &alloc_size, encoding,
- NVS_OP_ENCODE)) != 0) {
- nv_mem_free(&nvpriv, buf, alloc_size);
- } else {
- *buflen = alloc_size;
- *bufp = buf;
- }
-
- return (err);
-}
-
-/*
- * Unpack buf into an nvlist_t
- */
-/*ARGSUSED1*/
-int
-nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag)
-{
-#if defined(_KERNEL) && !defined(_BOOT)
- return (nvlist_xunpack(buf, buflen, nvlp,
- (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
-#else
- return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep));
-#endif
-}
-
-int
-nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva)
-{
- nvlist_t *nvl;
- int err;
-
- if (nvlp == NULL)
- return (EINVAL);
-
- if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0)
- return (err);
-
- if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0)
- nvlist_free(nvl);
- else
- *nvlp = nvl;
-
- return (err);
-}
-
-/*
- * Native encoding functions
- */
-typedef struct {
- /*
- * This structure is used when decoding a packed nvpair in
- * the native format. n_base points to a buffer containing the
- * packed nvpair. n_end is a pointer to the end of the buffer.
- * (n_end actually points to the first byte past the end of the
- * buffer.) n_curr is a pointer that lies between n_base and n_end.
- * It points to the current data that we are decoding.
- * The amount of data left in the buffer is equal to n_end - n_curr.
- * n_flag is used to recognize a packed embedded list.
- */
- caddr_t n_base;
- caddr_t n_end;
- caddr_t n_curr;
- uint_t n_flag;
-} nvs_native_t;
-
-static int
-nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf,
- size_t buflen)
-{
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- case NVS_OP_DECODE:
- nvs->nvs_private = native;
- native->n_curr = native->n_base = buf;
- native->n_end = buf + buflen;
- native->n_flag = 0;
- return (0);
-
- case NVS_OP_GETSIZE:
- nvs->nvs_private = native;
- native->n_curr = native->n_base = native->n_end = NULL;
- native->n_flag = 0;
- return (0);
- default:
- return (EINVAL);
- }
-}
-
-/*ARGSUSED*/
-static void
-nvs_native_destroy(nvstream_t *nvs)
-{
-}
-
-static int
-native_cp(nvstream_t *nvs, void *buf, size_t size)
-{
- nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
-
- if (native->n_curr + size > native->n_end)
- return (EFAULT);
-
- /*
- * The bcopy() below eliminates alignment requirement
- * on the buffer (stream) and is preferred over direct access.
- */
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- bcopy(buf, native->n_curr, size);
- break;
- case NVS_OP_DECODE:
- bcopy(native->n_curr, buf, size);
- break;
- default:
- return (EINVAL);
- }
-
- native->n_curr += size;
- return (0);
-}
-
-/*
- * operate on nvlist_t header
- */
-static int
-nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
-{
- nvs_native_t *native = nvs->nvs_private;
-
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- case NVS_OP_DECODE:
- if (native->n_flag)
- return (0); /* packed embedded list */
-
- native->n_flag = 1;
-
- /* copy version and nvflag of the nvlist_t */
- if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 ||
- native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0)
- return (EFAULT);
-
- return (0);
-
- case NVS_OP_GETSIZE:
- /*
- * if calculate for packed embedded list
- * 4 for end of the embedded list
- * else
- * 2 * sizeof (int32_t) for nvl_version and nvl_nvflag
- * and 4 for end of the entire list
- */
- if (native->n_flag) {
- *size += 4;
- } else {
- native->n_flag = 1;
- *size += 2 * sizeof (int32_t) + 4;
- }
-
- return (0);
-
- default:
- return (EINVAL);
- }
-}
-
-static int
-nvs_native_nvl_fini(nvstream_t *nvs)
-{
- if (nvs->nvs_op == NVS_OP_ENCODE) {
- nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
- /*
- * Add 4 zero bytes at end of nvlist. They are used
- * for end detection by the decode routine.
- */
- if (native->n_curr + sizeof (int) > native->n_end)
- return (EFAULT);
-
- bzero(native->n_curr, sizeof (int));
- native->n_curr += sizeof (int);
- }
-
- return (0);
-}
-
-static int
-nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp)
-{
- if (nvs->nvs_op == NVS_OP_ENCODE) {
- nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
- nvlist_t *packed = (void *)
- (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
- /*
- * Null out the pointer that is meaningless in the packed
- * structure. The address may not be aligned, so we have
- * to use bzero.
- */
- bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
- }
-
- return (nvs_embedded(nvs, EMBEDDED_NVL(nvp)));
-}
-
-static int
-nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp)
-{
- if (nvs->nvs_op == NVS_OP_ENCODE) {
- nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
- char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp);
- size_t len = NVP_NELEM(nvp) * sizeof (uint64_t);
- nvlist_t *packed = (nvlist_t *)((uintptr_t)value + len);
- int i;
- /*
- * Null out pointers that are meaningless in the packed
- * structure. The addresses may not be aligned, so we have
- * to use bzero.
- */
- bzero(value, len);
-
- for (i = 0; i < NVP_NELEM(nvp); i++, packed++)
- /*
- * Null out the pointer that is meaningless in the
- * packed structure. The address may not be aligned,
- * so we have to use bzero.
- */
- bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
- }
-
- return (nvs_embedded_nvl_array(nvs, nvp, NULL));
-}
-
-static void
-nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp)
-{
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE: {
- nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
- uint64_t *strp = (void *)
- (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
- /*
- * Null out pointers that are meaningless in the packed
- * structure. The addresses may not be aligned, so we have
- * to use bzero.
- */
- bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t));
- break;
- }
- case NVS_OP_DECODE: {
- char **strp = (void *)NVP_VALUE(nvp);
- char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t));
- int i;
-
- for (i = 0; i < NVP_NELEM(nvp); i++) {
- strp[i] = buf;
- buf += strlen(buf) + 1;
- }
- break;
- }
- }
-}
-
-static int
-nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
-{
- data_type_t type;
- int value_sz;
- int ret = 0;
-
- /*
- * We do the initial bcopy of the data before we look at
- * the nvpair type, because when we're decoding, we won't
- * have the correct values for the pair until we do the bcopy.
- */
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- case NVS_OP_DECODE:
- if (native_cp(nvs, nvp, nvp->nvp_size) != 0)
- return (EFAULT);
- break;
- default:
- return (EINVAL);
- }
-
- /* verify nvp_name_sz, check the name string length */
- if (i_validate_nvpair_name(nvp) != 0)
- return (EFAULT);
-
- type = NVP_TYPE(nvp);
-
- /*
- * Verify type and nelem and get the value size.
- * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
- * is the size of the string(s) excluded.
- */
- if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)
- return (EFAULT);
-
- if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)
- return (EFAULT);
-
- switch (type) {
- case DATA_TYPE_NVLIST:
- ret = nvpair_native_embedded(nvs, nvp);
- break;
- case DATA_TYPE_NVLIST_ARRAY:
- ret = nvpair_native_embedded_array(nvs, nvp);
- break;
- case DATA_TYPE_STRING_ARRAY:
- nvpair_native_string_array(nvs, nvp);
- break;
- default:
- break;
- }
-
- return (ret);
-}
-
-static int
-nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
-{
- uint64_t nvp_sz = nvp->nvp_size;
-
- switch (NVP_TYPE(nvp)) {
- case DATA_TYPE_NVLIST: {
- size_t nvsize = 0;
-
- if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0)
- return (EINVAL);
-
- nvp_sz += nvsize;
- break;
- }
- case DATA_TYPE_NVLIST_ARRAY: {
- size_t nvsize;
-
- if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0)
- return (EINVAL);
-
- nvp_sz += nvsize;
- break;
- }
- default:
- break;
- }
-
- if (nvp_sz > INT32_MAX)
- return (EINVAL);
-
- *size = nvp_sz;
-
- return (0);
-}
-
-static int
-nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
-{
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- return (nvs_native_nvp_op(nvs, nvp));
-
- case NVS_OP_DECODE: {
- nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
- int32_t decode_len;
-
- /* try to read the size value from the stream */
- if (native->n_curr + sizeof (int32_t) > native->n_end)
- return (EFAULT);
- bcopy(native->n_curr, &decode_len, sizeof (int32_t));
-
- /* sanity check the size value */
- if (decode_len < 0 ||
- decode_len > native->n_end - native->n_curr)
- return (EFAULT);
-
- *size = decode_len;
-
- /*
- * If at the end of the stream then move the cursor
- * forward, otherwise nvpair_native_op() will read
- * the entire nvpair at the same cursor position.
- */
- if (*size == 0)
- native->n_curr += sizeof (int32_t);
- break;
- }
-
- default:
- return (EINVAL);
- }
-
- return (0);
-}
-
-static const nvs_ops_t nvs_native_ops = {
- nvs_native_nvlist,
- nvs_native_nvpair,
- nvs_native_nvp_op,
- nvs_native_nvp_size,
- nvs_native_nvl_fini
-};
-
-static int
-nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
-{
- nvs_native_t native;
- int err;
-
- nvs->nvs_ops = &nvs_native_ops;
-
- if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t),
- *buflen - sizeof (nvs_header_t))) != 0)
- return (err);
-
- err = nvs_operation(nvs, nvl, buflen);
-
- nvs_native_destroy(nvs);
-
- return (err);
-}
-
-/*
- * XDR encoding functions
- *
- * An xdr packed nvlist is encoded as:
- *
- * - encoding methode and host endian (4 bytes)
- * - nvl_version (4 bytes)
- * - nvl_nvflag (4 bytes)
- *
- * - encoded nvpairs, the format of one xdr encoded nvpair is:
- * - encoded size of the nvpair (4 bytes)
- * - decoded size of the nvpair (4 bytes)
- * - name string, (4 + sizeof(NV_ALIGN4(string))
- * a string is coded as size (4 bytes) and data
- * - data type (4 bytes)
- * - number of elements in the nvpair (4 bytes)
- * - data
- *
- * - 2 zero's for end of the entire list (8 bytes)
- */
-static int
-nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen)
-{
- /* xdr data must be 4 byte aligned */
- if ((ulong_t)buf % 4 != 0)
- return (EFAULT);
-
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE);
- nvs->nvs_private = xdr;
- return (0);
- case NVS_OP_DECODE:
- xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE);
- nvs->nvs_private = xdr;
- return (0);
- case NVS_OP_GETSIZE:
- nvs->nvs_private = NULL;
- return (0);
- default:
- return (EINVAL);
- }
-}
-
-static void
-nvs_xdr_destroy(nvstream_t *nvs)
-{
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- case NVS_OP_DECODE:
- xdr_destroy((XDR *)nvs->nvs_private);
- break;
- default:
- break;
- }
-}
-
-static int
-nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
-{
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE:
- case NVS_OP_DECODE: {
- XDR *xdr = nvs->nvs_private;
-
- if (!xdr_int(xdr, &nvl->nvl_version) ||
- !xdr_u_int(xdr, &nvl->nvl_nvflag))
- return (EFAULT);
- break;
- }
- case NVS_OP_GETSIZE: {
- /*
- * 2 * 4 for nvl_version + nvl_nvflag
- * and 8 for end of the entire list
- */
- *size += 2 * 4 + 8;
- break;
- }
- default:
- return (EINVAL);
- }
- return (0);
-}
-
-static int
-nvs_xdr_nvl_fini(nvstream_t *nvs)
-{
- if (nvs->nvs_op == NVS_OP_ENCODE) {
- XDR *xdr = nvs->nvs_private;
- int zero = 0;
-
- if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero))
- return (EFAULT);
- }
-
- return (0);
-}
-
-/*
- * The format of xdr encoded nvpair is:
- * encode_size, decode_size, name string, data type, nelem, data
- */
-static int
-nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
-{
- data_type_t type;
- char *buf;
- char *buf_end = (char *)nvp + nvp->nvp_size;
- int value_sz;
- uint_t nelem, buflen;
- bool_t ret = FALSE;
- XDR *xdr = nvs->nvs_private;
-
- ASSERT(xdr != NULL && nvp != NULL);
-
- /* name string */
- if ((buf = NVP_NAME(nvp)) >= buf_end)
- return (EFAULT);
- buflen = buf_end - buf;
-
- if (!xdr_string(xdr, &buf, buflen - 1))
- return (EFAULT);
- nvp->nvp_name_sz = strlen(buf) + 1;
-
- /* type and nelem */
- if (!xdr_int(xdr, (int *)&nvp->nvp_type) ||
- !xdr_int(xdr, &nvp->nvp_value_elem))
- return (EFAULT);
-
- type = NVP_TYPE(nvp);
- nelem = nvp->nvp_value_elem;
-
- /*
- * Verify type and nelem and get the value size.
- * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
- * is the size of the string(s) excluded.
- */
- if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)
- return (EFAULT);
-
- /* if there is no data to extract then return */
- if (nelem == 0)
- return (0);
-
- /* value */
- if ((buf = NVP_VALUE(nvp)) >= buf_end)
- return (EFAULT);
- buflen = buf_end - buf;
-
- if (buflen < value_sz)
- return (EFAULT);
-
- switch (type) {
- case DATA_TYPE_NVLIST:
- if (nvs_embedded(nvs, (void *)buf) == 0)
- return (0);
- break;
-
- case DATA_TYPE_NVLIST_ARRAY:
- if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0)
- return (0);
- break;
-
- case DATA_TYPE_BOOLEAN:
- ret = TRUE;
- break;
-
- case DATA_TYPE_BYTE:
- case DATA_TYPE_INT8:
- case DATA_TYPE_UINT8:
- ret = xdr_char(xdr, buf);
- break;
-
- case DATA_TYPE_INT16:
- ret = xdr_short(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_UINT16:
- ret = xdr_u_short(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_BOOLEAN_VALUE:
- case DATA_TYPE_INT32:
- ret = xdr_int(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_UINT32:
- ret = xdr_u_int(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_INT64:
- ret = xdr_longlong_t(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_UINT64:
- ret = xdr_u_longlong_t(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_HRTIME:
- /*
- * NOTE: must expose the definition of hrtime_t here
- */
- ret = xdr_longlong_t(xdr, (void *)buf);
- break;
-
- case DATA_TYPE_STRING:
- ret = xdr_string(xdr, &buf, buflen - 1);
- break;
-
- case DATA_TYPE_BYTE_ARRAY:
- ret = xdr_opaque(xdr, buf, nelem);
- break;
-
- case DATA_TYPE_INT8_ARRAY:
- case DATA_TYPE_UINT8_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t),
- (xdrproc_t)xdr_char);
- break;
-
- case DATA_TYPE_INT16_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t),
- sizeof (int16_t), (xdrproc_t)xdr_short);
- break;
-
- case DATA_TYPE_UINT16_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t),
- sizeof (uint16_t), (xdrproc_t)xdr_u_short);
- break;
-
- case DATA_TYPE_BOOLEAN_ARRAY:
- case DATA_TYPE_INT32_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t),
- sizeof (int32_t), (xdrproc_t)xdr_int);
- break;
-
- case DATA_TYPE_UINT32_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t),
- sizeof (uint32_t), (xdrproc_t)xdr_u_int);
- break;
-
- case DATA_TYPE_INT64_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t),
- sizeof (int64_t), (xdrproc_t)xdr_longlong_t);
- break;
-
- case DATA_TYPE_UINT64_ARRAY:
- ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t),
- sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t);
- break;
-
- case DATA_TYPE_STRING_ARRAY: {
- size_t len = nelem * sizeof (uint64_t);
- char **strp = (void *)buf;
- int i;
-
- if (nvs->nvs_op == NVS_OP_DECODE)
- bzero(buf, len); /* don't trust packed data */
-
- for (i = 0; i < nelem; i++) {
- if (buflen <= len)
- return (EFAULT);
-
- buf += len;
- buflen -= len;
-
- if (xdr_string(xdr, &buf, buflen - 1) != TRUE)
- return (EFAULT);
-
- if (nvs->nvs_op == NVS_OP_DECODE)
- strp[i] = buf;
- len = strlen(buf) + 1;
- }
- ret = TRUE;
- break;
- }
- default:
- break;
- }
-
- return (ret == TRUE ? 0 : EFAULT);
-}
-
-static int
-nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
-{
- data_type_t type = NVP_TYPE(nvp);
- /*
- * encode_size + decode_size + name string size + data type + nelem
- * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp)))
- */
- uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4;
-
- switch (type) {
- case DATA_TYPE_BOOLEAN:
- break;
-
- case DATA_TYPE_BOOLEAN_VALUE:
- case DATA_TYPE_BYTE:
- case DATA_TYPE_INT8:
- case DATA_TYPE_UINT8:
- case DATA_TYPE_INT16:
- case DATA_TYPE_UINT16:
- case DATA_TYPE_INT32:
- case DATA_TYPE_UINT32:
- nvp_sz += 4; /* 4 is the minimum xdr unit */
- break;
-
- case DATA_TYPE_INT64:
- case DATA_TYPE_UINT64:
- case DATA_TYPE_HRTIME:
- nvp_sz += 8;
- break;
-
- case DATA_TYPE_STRING:
- nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp)));
- break;
-
- case DATA_TYPE_BYTE_ARRAY:
- nvp_sz += NV_ALIGN4(NVP_NELEM(nvp));
- break;
-
- case DATA_TYPE_BOOLEAN_ARRAY:
- case DATA_TYPE_INT8_ARRAY:
- case DATA_TYPE_UINT8_ARRAY:
- case DATA_TYPE_INT16_ARRAY:
- case DATA_TYPE_UINT16_ARRAY:
- case DATA_TYPE_INT32_ARRAY:
- case DATA_TYPE_UINT32_ARRAY:
- nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp);
- break;
-
- case DATA_TYPE_INT64_ARRAY:
- case DATA_TYPE_UINT64_ARRAY:
- nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp);
- break;
-
- case DATA_TYPE_STRING_ARRAY: {
- int i;
- char **strs = (void *)NVP_VALUE(nvp);
-
- for (i = 0; i < NVP_NELEM(nvp); i++)
- nvp_sz += 4 + NV_ALIGN4(strlen(strs[i]));
-
- break;
- }
-
- case DATA_TYPE_NVLIST:
- case DATA_TYPE_NVLIST_ARRAY: {
- size_t nvsize = 0;
- int old_nvs_op = nvs->nvs_op;
- int err;
-
- nvs->nvs_op = NVS_OP_GETSIZE;
- if (type == DATA_TYPE_NVLIST)
- err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize);
- else
- err = nvs_embedded_nvl_array(nvs, nvp, &nvsize);
- nvs->nvs_op = old_nvs_op;
-
- if (err != 0)
- return (EINVAL);
-
- nvp_sz += nvsize;
- break;
- }
-
- default:
- return (EINVAL);
- }
-
- if (nvp_sz > INT32_MAX)
- return (EINVAL);
-
- *size = nvp_sz;
-
- return (0);
-}
-
-
-/*
- * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates
- * the largest nvpair that could be encoded in the buffer.
- *
- * See comments above nvpair_xdr_op() for the format of xdr encoding.
- * The size of a xdr packed nvpair without any data is 5 words.
- *
- * Using the size of the data directly as an estimate would be ok
- * in all cases except one. If the data type is of DATA_TYPE_STRING_ARRAY
- * then the actual nvpair has space for an array of pointers to index
- * the strings. These pointers are not encoded into the packed xdr buffer.
- *
- * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are
- * of length 0, then each string is endcoded in xdr format as a single word.
- * Therefore when expanded to an nvpair there will be 2.25 word used for
- * each string. (a int64_t allocated for pointer usage, and a single char
- * for the null termination.)
- *
- * This is the calculation performed by the NVS_XDR_MAX_LEN macro.
- */
-#define NVS_XDR_HDR_LEN ((size_t)(5 * 4))
-#define NVS_XDR_DATA_LEN(y) (((size_t)(y) <= NVS_XDR_HDR_LEN) ? \
- 0 : ((size_t)(y) - NVS_XDR_HDR_LEN))
-#define NVS_XDR_MAX_LEN(x) (NVP_SIZE_CALC(1, 0) + \
- (NVS_XDR_DATA_LEN(x) * 2) + \
- NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4)))
-
-static int
-nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
-{
- XDR *xdr = nvs->nvs_private;
- int32_t encode_len, decode_len;
-
- switch (nvs->nvs_op) {
- case NVS_OP_ENCODE: {
- size_t nvsize;
-
- if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0)
- return (EFAULT);
-
- decode_len = nvp->nvp_size;
- encode_len = nvsize;
- if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
- return (EFAULT);
-
- return (nvs_xdr_nvp_op(nvs, nvp));
- }
- case NVS_OP_DECODE: {
- struct xdr_bytesrec bytesrec;
-
- /* get the encode and decode size */
- if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
- return (EFAULT);
- *size = decode_len;
-
- /* are we at the end of the stream? */
- if (*size == 0)
- return (0);
-
- /* sanity check the size parameter */
- if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec))
- return (EFAULT);
-
- if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail))
- return (EFAULT);
- break;
- }
-
- default:
- return (EINVAL);
- }
- return (0);
-}
-
-static const struct nvs_ops nvs_xdr_ops = {
- nvs_xdr_nvlist,
- nvs_xdr_nvpair,
- nvs_xdr_nvp_op,
- nvs_xdr_nvp_size,
- nvs_xdr_nvl_fini
-};
-
-static int
-nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
-{
- XDR xdr;
- int err;
-
- nvs->nvs_ops = &nvs_xdr_ops;
-
- if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t),
- *buflen - sizeof (nvs_header_t))) != 0)
- return (err);
-
- err = nvs_operation(nvs, nvl, buflen);
-
- nvs_xdr_destroy(nvs);
-
- return (err);
-}
diff --git a/sys/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c b/sys/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c
deleted file mode 100644
index 620171e..0000000
--- a/sys/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/nvpair.h>
-#include <sys/sysmacros.h>
-#if defined(_KERNEL) && !defined(_BOOT)
-#include <sys/varargs.h>
-#else
-#include <stdarg.h>
-#include <strings.h>
-#endif
-
-/*
- * This allocator is very simple.
- * - it uses a pre-allocated buffer for memory allocations.
- * - it does _not_ free memory in the pre-allocated buffer.
- *
- * The reason for the selected implemention is simplicity.
- * This allocator is designed for the usage in interrupt context when
- * the caller may not wait for free memory.
- */
-
-/* pre-allocated buffer for memory allocations */
-typedef struct nvbuf {
- uintptr_t nvb_buf; /* address of pre-allocated buffer */
- uintptr_t nvb_lim; /* limit address in the buffer */
- uintptr_t nvb_cur; /* current address in the buffer */
-} nvbuf_t;
-
-/*
- * Initialize the pre-allocated buffer allocator. The caller needs to supply
- *
- * buf address of pre-allocated buffer
- * bufsz size of pre-allocated buffer
- *
- * nv_fixed_init() calculates the remaining members of nvbuf_t.
- */
-static int
-nv_fixed_init(nv_alloc_t *nva, va_list valist)
-{
- uintptr_t base = va_arg(valist, uintptr_t);
- uintptr_t lim = base + va_arg(valist, size_t);
- nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t));
-
- if (base == 0 || (uintptr_t)&nvb[1] > lim)
- return (EINVAL);
-
- nvb->nvb_buf = (uintptr_t)&nvb[0];
- nvb->nvb_cur = (uintptr_t)&nvb[1];
- nvb->nvb_lim = lim;
- nva->nva_arg = nvb;
-
- return (0);
-}
-
-static void *
-nv_fixed_alloc(nv_alloc_t *nva, size_t size)
-{
- nvbuf_t *nvb = nva->nva_arg;
- uintptr_t new = nvb->nvb_cur;
-
- if (size == 0 || new + size > nvb->nvb_lim)
- return (NULL);
-
- nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t));
-
- return ((void *)new);
-}
-
-/*ARGSUSED*/
-static void
-nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size)
-{
- /* don't free memory in the pre-allocated buffer */
-}
-
-static void
-nv_fixed_reset(nv_alloc_t *nva)
-{
- nvbuf_t *nvb = nva->nva_arg;
-
- nvb->nvb_cur = (uintptr_t)&nvb[1];
-}
-
-const nv_alloc_ops_t nv_fixed_ops_def = {
- nv_fixed_init, /* nv_ao_init() */
- NULL, /* nv_ao_fini() */
- nv_fixed_alloc, /* nv_ao_alloc() */
- nv_fixed_free, /* nv_ao_free() */
- nv_fixed_reset /* nv_ao_reset() */
-};
-
-const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def;
diff --git a/sys/contrib/opensolaris/common/zfs/zfs_namecheck.c b/sys/contrib/opensolaris/common/zfs/zfs_namecheck.c
deleted file mode 100644
index 2004d86..0000000
--- a/sys/contrib/opensolaris/common/zfs/zfs_namecheck.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Common name validation routines for ZFS. These routines are shared by the
- * userland code as well as the ioctl() layer to ensure that we don't
- * inadvertently expose a hole through direct ioctl()s that never gets tested.
- * In userland, however, we want significantly more information about _why_ the
- * name is invalid. In the kernel, we only care whether it's valid or not.
- * Each routine therefore takes a 'namecheck_err_t' which describes exactly why
- * the name failed to validate.
- *
- * Each function returns 0 on success, -1 on error.
- */
-
-#if defined(_KERNEL)
-#include <sys/systm.h>
-#else
-#include <string.h>
-#endif
-
-#include <sys/param.h>
-#include "zfs_namecheck.h"
-
-static int
-valid_char(char c)
-{
- return ((c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- c == '-' || c == '_' || c == '.' || c == ':');
-}
-
-/*
- * Snapshot names must be made up of alphanumeric characters plus the following
- * characters:
- *
- * [-_.:]
- */
-int
-snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
-{
- const char *loc;
-
- if (strlen(path) >= MAXNAMELEN) {
- if (why)
- *why = NAME_ERR_TOOLONG;
- return (-1);
- }
-
- if (path[0] == '\0') {
- if (why)
- *why = NAME_ERR_EMPTY_COMPONENT;
- return (-1);
- }
-
- for (loc = path; *loc; loc++) {
- if (!valid_char(*loc)) {
- if (why) {
- *why = NAME_ERR_INVALCHAR;
- *what = *loc;
- }
- return (-1);
- }
- }
- return (0);
-}
-
-/*
- * Dataset names must be of the following form:
- *
- * [component][/]*[component][@component]
- *
- * Where each component is made up of alphanumeric characters plus the following
- * characters:
- *
- * [-_.:]
- */
-int
-dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
-{
- const char *loc, *end;
- int found_snapshot;
-
- /*
- * Make sure the name is not too long.
- *
- * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
- * which is the same as MAXNAMELEN used in the kernel.
- * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
- * places using MAXNAMELEN.
- */
- if (strlen(path) >= MAXNAMELEN) {
- if (why)
- *why = NAME_ERR_TOOLONG;
- return (-1);
- }
-
- /* Explicitly check for a leading slash. */
- if (path[0] == '/') {
- if (why)
- *why = NAME_ERR_LEADING_SLASH;
- return (-1);
- }
-
- if (path[0] == '\0') {
- if (why)
- *why = NAME_ERR_EMPTY_COMPONENT;
- return (-1);
- }
-
- loc = path;
- found_snapshot = 0;
- for (;;) {
- /* Find the end of this component */
- end = loc;
- while (*end != '/' && *end != '@' && *end != '\0')
- end++;
-
- if (*end == '\0' && end[-1] == '/') {
- /* trailing slashes are not allowed */
- if (why)
- *why = NAME_ERR_TRAILING_SLASH;
- return (-1);
- }
-
- /* Zero-length components are not allowed */
- if (loc == end) {
- if (why) {
- /*
- * Make sure this is really a zero-length
- * component and not a '@@'.
- */
- if (*end == '@' && found_snapshot) {
- *why = NAME_ERR_MULTIPLE_AT;
- } else {
- *why = NAME_ERR_EMPTY_COMPONENT;
- }
- }
-
- return (-1);
- }
-
- /* Validate the contents of this component */
- while (loc != end) {
- if (!valid_char(*loc)) {
- if (why) {
- *why = NAME_ERR_INVALCHAR;
- *what = *loc;
- }
- return (-1);
- }
- loc++;
- }
-
- /* If we've reached the end of the string, we're OK */
- if (*end == '\0')
- return (0);
-
- if (*end == '@') {
- /*
- * If we've found an @ symbol, indicate that we're in
- * the snapshot component, and report a second '@'
- * character as an error.
- */
- if (found_snapshot) {
- if (why)
- *why = NAME_ERR_MULTIPLE_AT;
- return (-1);
- }
-
- found_snapshot = 1;
- }
-
- /*
- * If there is a '/' in a snapshot name
- * then report an error
- */
- if (*end == '/' && found_snapshot) {
- if (why)
- *why = NAME_ERR_TRAILING_SLASH;
- return (-1);
- }
-
- /* Update to the next component */
- loc = end + 1;
- }
-}
-
-/*
- * For pool names, we have the same set of valid characters as described in
- * dataset names, with the additional restriction that the pool name must begin
- * with a letter. The pool names 'raidz' and 'mirror' are also reserved names
- * that cannot be used.
- */
-int
-pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
-{
- const char *c;
-
- /*
- * Make sure the name is not too long.
- *
- * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
- * which is the same as MAXNAMELEN used in the kernel.
- * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
- * places using MAXNAMELEN.
- */
- if (strlen(pool) >= MAXNAMELEN) {
- if (why)
- *why = NAME_ERR_TOOLONG;
- return (-1);
- }
-
- c = pool;
- while (*c != '\0') {
- if (!valid_char(*c)) {
- if (why) {
- *why = NAME_ERR_INVALCHAR;
- *what = *c;
- }
- return (-1);
- }
- c++;
- }
-
- if (!(*pool >= 'a' && *pool <= 'z') &&
- !(*pool >= 'A' && *pool <= 'Z')) {
- if (why)
- *why = NAME_ERR_NOLETTER;
- return (-1);
- }
-
- if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
- if (why)
- *why = NAME_ERR_RESERVED;
- return (-1);
- }
-
- if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
- if (why)
- *why = NAME_ERR_DISKLIKE;
- return (-1);
- }
-
- return (0);
-}
-
-/*
- * Check if the dataset name is private for internal usage.
- * '$' is reserved for internal dataset names. e.g. "$MOS"
- *
- * Return 1 if the given name is used internally.
- * Return 0 if it is not.
- */
-int
-dataset_name_hidden(const char *name)
-{
- if (strchr(name, '$') != NULL)
- return (1);
-
- return (0);
-}
diff --git a/sys/contrib/opensolaris/common/zfs/zfs_namecheck.h b/sys/contrib/opensolaris/common/zfs/zfs_namecheck.h
deleted file mode 100644
index 7e0cda9..0000000
--- a/sys/contrib/opensolaris/common/zfs/zfs_namecheck.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZFS_NAMECHECK_H
-#define _ZFS_NAMECHECK_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
- NAME_ERR_LEADING_SLASH, /* name begins with leading slash */
- NAME_ERR_EMPTY_COMPONENT, /* name contains an empty component */
- NAME_ERR_TRAILING_SLASH, /* name ends with a slash */
- NAME_ERR_INVALCHAR, /* invalid character found */
- NAME_ERR_MULTIPLE_AT, /* multiple '@' characters found */
- NAME_ERR_NOLETTER, /* pool doesn't begin with a letter */
- NAME_ERR_RESERVED, /* entire name is reserved */
- NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */
- NAME_ERR_TOOLONG, /* name is too long */
-} namecheck_err_t;
-
-int pool_namecheck(const char *, namecheck_err_t *, char *);
-int dataset_namecheck(const char *, namecheck_err_t *, char *);
-int dataset_name_hidden(const char *);
-int snapshot_namecheck(const char *, namecheck_err_t *, char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZFS_NAMECHECK_H */
diff --git a/sys/contrib/opensolaris/common/zfs/zfs_prop.c b/sys/contrib/opensolaris/common/zfs/zfs_prop.c
deleted file mode 100644
index 7125619..0000000
--- a/sys/contrib/opensolaris/common/zfs/zfs_prop.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Master property table.
- *
- * This table keeps track of all the properties supported by ZFS, and their
- * various attributes. Not all of these are needed by the kernel, and several
- * are only used by a single libzfs client. But having them here centralizes
- * all property information in one location.
- *
- * name The human-readable string representing this property
- * proptype Basic type (string, boolean, number)
- * default Default value for the property. Sadly, C only allows
- * you to initialize the first member of a union, so we
- * have two default members for each property.
- * attr Attributes (readonly, inheritable) for the property
- * types Valid dataset types to which this applies
- * values String describing acceptable values for the property
- * colname The column header for 'zfs list'
- * colfmt The column formatting for 'zfs list'
- *
- * This table must match the order of property types in libzfs.h.
- */
-
-#include <sys/zio.h>
-#include <sys/spa.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_ioctl.h>
-
-#include "zfs_prop.h"
-
-#if defined(_KERNEL)
-#include <sys/systm.h>
-#else
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#endif
-
-typedef enum {
- prop_default,
- prop_readonly,
- prop_inherit
-} prop_attr_t;
-
-typedef struct {
- const char *pd_name;
- zfs_proptype_t pd_proptype;
- uint64_t pd_numdefault;
- const char *pd_strdefault;
- prop_attr_t pd_attr;
- int pd_types;
- const char *pd_values;
- const char *pd_colname;
- boolean_t pd_rightalign;
- boolean_t pd_visible;
-} prop_desc_t;
-
-static prop_desc_t zfs_prop_table[] = {
- { "type", prop_type_string, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY, "filesystem | volume | snapshot", "TYPE", B_TRUE,
- B_TRUE },
- { "creation", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY, "<date>", "CREATION", B_FALSE, B_TRUE },
- { "used", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY, "<size>", "USED", B_TRUE, B_TRUE },
- { "available", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL", B_TRUE,
- B_TRUE },
- { "referenced", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY,
- "<size>", "REFER", B_TRUE, B_TRUE },
- { "compressratio", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY, "<1.00x or higher if compressed>", "RATIO", B_TRUE,
- B_TRUE },
- { "mounted", prop_type_boolean, 0, NULL, prop_readonly,
- ZFS_TYPE_FILESYSTEM, "yes | no | -", "MOUNTED", B_TRUE, B_TRUE },
- { "origin", prop_type_string, 0, NULL, prop_readonly,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN",
- B_FALSE, B_TRUE },
- { "quota", prop_type_number, 0, NULL, prop_default,
- ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA", B_TRUE, B_TRUE },
- { "reservation", prop_type_number, 0, NULL, prop_default,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "<size> | none", "RESERV", B_TRUE, B_TRUE },
- { "volsize", prop_type_number, 0, NULL, prop_default,
- ZFS_TYPE_VOLUME, "<size>", "VOLSIZE", B_TRUE, B_TRUE },
- { "volblocksize", prop_type_number, 8192, NULL, prop_readonly,
- ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_TRUE,
- B_TRUE },
- { "recordsize", prop_type_number, SPA_MAXBLOCKSIZE, NULL,
- prop_inherit,
- ZFS_TYPE_FILESYSTEM,
- "512 to 128k, power of 2", "RECSIZE", B_TRUE, B_TRUE },
- { "mountpoint", prop_type_string, 0, "/", prop_inherit,
- ZFS_TYPE_FILESYSTEM,
- "<path> | legacy | none", "MOUNTPOINT", B_FALSE, B_TRUE },
- { "sharenfs", prop_type_string, 0, "off", prop_inherit,
- ZFS_TYPE_FILESYSTEM,
- "on | off | exports(5) options", "SHARENFS", B_FALSE, B_TRUE },
- { "checksum", prop_type_index, ZIO_CHECKSUM_DEFAULT, "on",
- prop_inherit, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM", B_TRUE,
- B_TRUE },
- { "compression", prop_type_index, ZIO_COMPRESS_DEFAULT, "off",
- prop_inherit, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "on | off | lzjb | gzip | gzip-[1-9]", "COMPRESS", B_TRUE, B_TRUE },
- { "atime", prop_type_boolean, 1, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM,
- "on | off", "ATIME", B_TRUE, B_TRUE },
- { "devices", prop_type_boolean, 1, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "on | off", "DEVICES", B_TRUE, B_TRUE },
- { "exec", prop_type_boolean, 1, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "on | off", "EXEC", B_TRUE, B_TRUE },
- { "setuid", prop_type_boolean, 1, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
- B_TRUE, B_TRUE },
- { "readonly", prop_type_boolean, 0, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "on | off", "RDONLY", B_TRUE, B_TRUE },
- { "jailed", prop_type_boolean, 0, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM,
- "on | off", "JAILED", B_TRUE, B_TRUE },
- { "snapdir", prop_type_index, ZFS_SNAPDIR_HIDDEN, "hidden",
- prop_inherit,
- ZFS_TYPE_FILESYSTEM,
- "hidden | visible", "SNAPDIR", B_TRUE, B_TRUE },
- { "aclmode", prop_type_index, ZFS_ACL_GROUPMASK, "groupmask",
- prop_inherit, ZFS_TYPE_FILESYSTEM,
- "discard | groupmask | passthrough", "ACLMODE", B_TRUE, B_TRUE },
- { "aclinherit", prop_type_index, ZFS_ACL_SECURE, "secure",
- prop_inherit, ZFS_TYPE_FILESYSTEM,
- "discard | noallow | secure | passthrough", "ACLINHERIT", B_TRUE,
- B_TRUE },
- { "createtxg", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY, NULL, NULL, B_FALSE, B_FALSE },
- { "name", prop_type_string, 0, NULL, prop_readonly,
- ZFS_TYPE_ANY, NULL, "NAME", B_FALSE, B_FALSE },
- { "canmount", prop_type_boolean, 1, NULL, prop_default,
- ZFS_TYPE_FILESYSTEM,
- "on | off", "CANMOUNT", B_TRUE, B_TRUE },
- { "shareiscsi", prop_type_string, 0, "off", prop_inherit,
- ZFS_TYPE_ANY,
- "on | off | type=<type>", "SHAREISCSI", B_FALSE, B_TRUE },
- { "iscsioptions", prop_type_string, 0, NULL, prop_inherit,
- ZFS_TYPE_VOLUME, NULL, "ISCSIOPTIONS", B_FALSE, B_FALSE },
- { "xattr", prop_type_boolean, 1, NULL, prop_inherit,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
- "on | off", "XATTR", B_TRUE, B_TRUE },
- { "numclones", prop_type_number, 0, NULL, prop_readonly,
- ZFS_TYPE_SNAPSHOT, NULL, NULL, B_FALSE, B_FALSE },
- { "copies", prop_type_index, 1, "1", prop_inherit,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "1 | 2 | 3", "COPIES", B_TRUE, B_TRUE },
- { "bootfs", prop_type_string, 0, NULL, prop_default,
- ZFS_TYPE_POOL, "<filesystem>", "BOOTFS", B_FALSE, B_TRUE },
-};
-
-#define ZFS_PROP_COUNT ((sizeof (zfs_prop_table))/(sizeof (prop_desc_t)))
-
-/*
- * Returns TRUE if the property applies to the given dataset types.
- */
-int
-zfs_prop_valid_for_type(zfs_prop_t prop, int types)
-{
- return ((zfs_prop_table[prop].pd_types & types) != 0);
-}
-
-/*
- * Determine if the specified property is visible or not.
- */
-boolean_t
-zfs_prop_is_visible(zfs_prop_t prop)
-{
- if (prop < 0)
- return (B_FALSE);
-
- return (zfs_prop_table[prop].pd_visible);
-}
-
-/*
- * Iterate over all properties, calling back into the specified function
- * for each property. We will continue to iterate until we either
- * reach the end or the callback function something other than
- * ZFS_PROP_CONT.
- */
-zfs_prop_t
-zfs_prop_iter_common(zfs_prop_f func, void *cb, zfs_type_t type,
- boolean_t show_all)
-{
- int i;
-
- for (i = 0; i < ZFS_PROP_COUNT; i++) {
- if (zfs_prop_valid_for_type(i, type) &&
- (zfs_prop_is_visible(i) || show_all)) {
- if (func(i, cb) != ZFS_PROP_CONT)
- return (i);
- }
- }
- return (ZFS_PROP_CONT);
-}
-
-zfs_prop_t
-zfs_prop_iter(zfs_prop_f func, void *cb, boolean_t show_all)
-{
- return (zfs_prop_iter_common(func, cb, ZFS_TYPE_ANY, show_all));
-}
-
-zpool_prop_t
-zpool_prop_iter(zpool_prop_f func, void *cb, boolean_t show_all)
-{
- return (zfs_prop_iter_common(func, cb, ZFS_TYPE_POOL, show_all));
-}
-
-zfs_proptype_t
-zfs_prop_get_type(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_proptype);
-}
-
-static boolean_t
-propname_match(const char *p, zfs_prop_t prop, size_t len)
-{
- const char *propname = zfs_prop_table[prop].pd_name;
-#ifndef _KERNEL
- const char *colname = zfs_prop_table[prop].pd_colname;
- int c;
-#endif
-
-#ifndef _KERNEL
- if (colname == NULL)
- return (B_FALSE);
-#endif
-
- if (len == strlen(propname) &&
- strncmp(p, propname, len) == 0)
- return (B_TRUE);
-
-#ifndef _KERNEL
- if (len != strlen(colname))
- return (B_FALSE);
-
- for (c = 0; c < len; c++)
- if (p[c] != tolower(colname[c]))
- break;
-
- return (colname[c] == '\0');
-#else
- return (B_FALSE);
-#endif
-}
-
-zfs_prop_t
-zfs_name_to_prop_cb(zfs_prop_t prop, void *cb_data)
-{
- const char *propname = cb_data;
-
- if (propname_match(propname, prop, strlen(propname)))
- return (prop);
-
- return (ZFS_PROP_CONT);
-}
-
-/*
- * Given a property name and its type, returns the corresponding property ID.
- */
-zfs_prop_t
-zfs_name_to_prop_common(const char *propname, zfs_type_t type)
-{
- zfs_prop_t prop;
-
- prop = zfs_prop_iter_common(zfs_name_to_prop_cb, (void *)propname,
- type, B_TRUE);
- return (prop == ZFS_PROP_CONT ? ZFS_PROP_INVAL : prop);
-}
-
-/*
- * Given a zfs dataset property name, returns the corresponding property ID.
- */
-zfs_prop_t
-zfs_name_to_prop(const char *propname)
-{
- return (zfs_name_to_prop_common(propname, ZFS_TYPE_ANY));
-}
-
-/*
- * Given a pool property name, returns the corresponding property ID.
- */
-zpool_prop_t
-zpool_name_to_prop(const char *propname)
-{
- return (zfs_name_to_prop_common(propname, ZFS_TYPE_POOL));
-}
-
-/*
- * For user property names, we allow all lowercase alphanumeric characters, plus
- * a few useful punctuation characters.
- */
-static int
-valid_char(char c)
-{
- return ((c >= 'a' && c <= 'z') ||
- (c >= '0' && c <= '9') ||
- c == '-' || c == '_' || c == '.' || c == ':');
-}
-
-/*
- * Returns true if this is a valid user-defined property (one with a ':').
- */
-boolean_t
-zfs_prop_user(const char *name)
-{
- int i;
- char c;
- boolean_t foundsep = B_FALSE;
-
- for (i = 0; i < strlen(name); i++) {
- c = name[i];
- if (!valid_char(c))
- return (B_FALSE);
- if (c == ':')
- foundsep = B_TRUE;
- }
-
- if (!foundsep)
- return (B_FALSE);
-
- return (B_TRUE);
-}
-
-/*
- * Return the default value for the given property.
- */
-const char *
-zfs_prop_default_string(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_strdefault);
-}
-
-uint64_t
-zfs_prop_default_numeric(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_numdefault);
-}
-
-/*
- * Returns TRUE if the property is readonly.
- */
-int
-zfs_prop_readonly(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_attr == prop_readonly);
-}
-
-/*
- * Given a dataset property ID, returns the corresponding name.
- * Assuming the zfs dataset propety ID is valid.
- */
-const char *
-zfs_prop_to_name(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_name);
-}
-
-/*
- * Given a pool property ID, returns the corresponding name.
- * Assuming the pool propety ID is valid.
- */
-const char *
-zpool_prop_to_name(zpool_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_name);
-}
-
-/*
- * Returns TRUE if the property is inheritable.
- */
-int
-zfs_prop_inheritable(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_attr == prop_inherit);
-}
-
-typedef struct zfs_index {
- const char *name;
- uint64_t index;
-} zfs_index_t;
-
-static zfs_index_t checksum_table[] = {
- { "on", ZIO_CHECKSUM_ON },
- { "off", ZIO_CHECKSUM_OFF },
- { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 },
- { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 },
- { "sha256", ZIO_CHECKSUM_SHA256 },
- { NULL }
-};
-
-static zfs_index_t compress_table[] = {
- { "on", ZIO_COMPRESS_ON },
- { "off", ZIO_COMPRESS_OFF },
- { "lzjb", ZIO_COMPRESS_LZJB },
- { "gzip", ZIO_COMPRESS_GZIP_6 }, /* the default gzip level */
- { "gzip-1", ZIO_COMPRESS_GZIP_1 },
- { "gzip-2", ZIO_COMPRESS_GZIP_2 },
- { "gzip-3", ZIO_COMPRESS_GZIP_3 },
- { "gzip-4", ZIO_COMPRESS_GZIP_4 },
- { "gzip-5", ZIO_COMPRESS_GZIP_5 },
- { "gzip-6", ZIO_COMPRESS_GZIP_6 },
- { "gzip-7", ZIO_COMPRESS_GZIP_7 },
- { "gzip-8", ZIO_COMPRESS_GZIP_8 },
- { "gzip-9", ZIO_COMPRESS_GZIP_9 },
- { NULL }
-};
-
-static zfs_index_t snapdir_table[] = {
- { "hidden", ZFS_SNAPDIR_HIDDEN },
- { "visible", ZFS_SNAPDIR_VISIBLE },
- { NULL }
-};
-
-static zfs_index_t acl_mode_table[] = {
- { "discard", ZFS_ACL_DISCARD },
- { "groupmask", ZFS_ACL_GROUPMASK },
- { "passthrough", ZFS_ACL_PASSTHROUGH },
- { NULL }
-};
-
-static zfs_index_t acl_inherit_table[] = {
- { "discard", ZFS_ACL_DISCARD },
- { "noallow", ZFS_ACL_NOALLOW },
- { "secure", ZFS_ACL_SECURE },
- { "passthrough", ZFS_ACL_PASSTHROUGH },
- { NULL }
-};
-
-static zfs_index_t copies_table[] = {
- { "1", 1 },
- { "2", 2 },
- { "3", 3 },
- { NULL }
-};
-
-static zfs_index_t *
-zfs_prop_index_table(zfs_prop_t prop)
-{
- switch (prop) {
- case ZFS_PROP_CHECKSUM:
- return (checksum_table);
- case ZFS_PROP_COMPRESSION:
- return (compress_table);
- case ZFS_PROP_SNAPDIR:
- return (snapdir_table);
- case ZFS_PROP_ACLMODE:
- return (acl_mode_table);
- case ZFS_PROP_ACLINHERIT:
- return (acl_inherit_table);
- case ZFS_PROP_COPIES:
- return (copies_table);
- default:
- return (NULL);
- }
-}
-
-
-/*
- * Tables of index types, plus functions to convert between the user view
- * (strings) and internal representation (uint64_t).
- */
-int
-zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index)
-{
- zfs_index_t *table;
- int i;
-
- if ((table = zfs_prop_index_table(prop)) == NULL)
- return (-1);
-
- for (i = 0; table[i].name != NULL; i++) {
- if (strcmp(string, table[i].name) == 0) {
- *index = table[i].index;
- return (0);
- }
- }
-
- return (-1);
-}
-
-int
-zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
-{
- zfs_index_t *table;
- int i;
-
- if ((table = zfs_prop_index_table(prop)) == NULL)
- return (-1);
-
- for (i = 0; table[i].name != NULL; i++) {
- if (table[i].index == index) {
- *string = table[i].name;
- return (0);
- }
- }
-
- return (-1);
-}
-
-#ifndef _KERNEL
-
-/*
- * Returns a string describing the set of acceptable values for the given
- * zfs property, or NULL if it cannot be set.
- */
-const char *
-zfs_prop_values(zfs_prop_t prop)
-{
- if (zfs_prop_table[prop].pd_types == ZFS_TYPE_POOL)
- return (NULL);
-
- return (zfs_prop_table[prop].pd_values);
-}
-
-/*
- * Returns a string describing the set of acceptable values for the given
- * zpool property, or NULL if it cannot be set.
- */
-const char *
-zpool_prop_values(zfs_prop_t prop)
-{
- if (zfs_prop_table[prop].pd_types != ZFS_TYPE_POOL)
- return (NULL);
-
- return (zfs_prop_table[prop].pd_values);
-}
-
-/*
- * Returns TRUE if this property is a string type. Note that index types
- * (compression, checksum) are treated as strings in userland, even though they
- * are stored numerically on disk.
- */
-int
-zfs_prop_is_string(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_proptype == prop_type_string ||
- zfs_prop_table[prop].pd_proptype == prop_type_index);
-}
-
-/*
- * Returns the column header for the given property. Used only in
- * 'zfs list -o', but centralized here with the other property information.
- */
-const char *
-zfs_prop_column_name(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_colname);
-}
-
-/*
- * Returns whether the given property should be displayed right-justified for
- * 'zfs list'.
- */
-boolean_t
-zfs_prop_align_right(zfs_prop_t prop)
-{
- return (zfs_prop_table[prop].pd_rightalign);
-}
-
-/*
- * Determines the minimum width for the column, and indicates whether it's fixed
- * or not. Only string columns are non-fixed.
- */
-size_t
-zfs_prop_width(zfs_prop_t prop, boolean_t *fixed)
-{
- prop_desc_t *pd = &zfs_prop_table[prop];
- zfs_index_t *idx;
- size_t ret;
- int i;
-
- *fixed = B_TRUE;
-
- /*
- * Start with the width of the column name.
- */
- ret = strlen(pd->pd_colname);
-
- /*
- * For fixed-width values, make sure the width is large enough to hold
- * any possible value.
- */
- switch (pd->pd_proptype) {
- case prop_type_number:
- /*
- * The maximum length of a human-readable number is 5 characters
- * ("20.4M", for example).
- */
- if (ret < 5)
- ret = 5;
- /*
- * 'creation' is handled specially because it's a number
- * internally, but displayed as a date string.
- */
- if (prop == ZFS_PROP_CREATION)
- *fixed = B_FALSE;
- break;
- case prop_type_boolean:
- /*
- * The maximum length of a boolean value is 3 characters, for
- * "off".
- */
- if (ret < 3)
- ret = 3;
- break;
- case prop_type_index:
- idx = zfs_prop_index_table(prop);
- for (i = 0; idx[i].name != NULL; i++) {
- if (strlen(idx[i].name) > ret)
- ret = strlen(idx[i].name);
- }
- break;
-
- case prop_type_string:
- *fixed = B_FALSE;
- break;
- }
-
- return (ret);
-}
-
-#endif
diff --git a/sys/contrib/opensolaris/common/zfs/zfs_prop.h b/sys/contrib/opensolaris/common/zfs/zfs_prop.h
deleted file mode 100644
index 133e740..0000000
--- a/sys/contrib/opensolaris/common/zfs/zfs_prop.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZFS_PROP_H
-#define _ZFS_PROP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/fs/zfs.h>
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * For index types (e.g. compression and checksum), we want the numeric value
- * in the kernel, but the string value in userland.
- */
-typedef enum {
- prop_type_number, /* numeric value */
- prop_type_string, /* string value */
- prop_type_boolean, /* boolean value */
- prop_type_index /* numeric value indexed by string */
-} zfs_proptype_t;
-
-zfs_proptype_t zfs_prop_get_type(zfs_prop_t);
-size_t zfs_prop_width(zfs_prop_t, boolean_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZFS_PROP_H */
diff --git a/sys/contrib/opensolaris/uts/common/Makefile.files b/sys/contrib/opensolaris/uts/common/Makefile.files
deleted file mode 100644
index 1800e79..0000000
--- a/sys/contrib/opensolaris/uts/common/Makefile.files
+++ /dev/null
@@ -1,101 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# ident "%Z%%M% %I% %E% SMI"
-#
-# This Makefile defines all file modules for the directory uts/common
-# and its children. These are the source files which may be considered
-# common to all SunOS systems.
-
-ZFS_COMMON_OBJS += \
- arc.o \
- bplist.o \
- dbuf.o \
- dmu.o \
- dmu_send.o \
- dmu_object.o \
- dmu_objset.o \
- dmu_traverse.o \
- dmu_tx.o \
- dnode.o \
- dnode_sync.o \
- dsl_dir.o \
- dsl_dataset.o \
- dsl_pool.o \
- dsl_synctask.o \
- dmu_zfetch.o \
- dsl_prop.o \
- fletcher.o \
- gzip.o \
- lzjb.o \
- metaslab.o \
- refcount.o \
- sha256.o \
- spa.o \
- spa_config.o \
- spa_errlog.o \
- spa_history.o \
- spa_misc.o \
- space_map.o \
- txg.o \
- uberblock.o \
- unique.o \
- vdev.o \
- vdev_cache.o \
- vdev_label.o \
- vdev_mirror.o \
- vdev_missing.o \
- vdev_queue.o \
- vdev_raidz.o \
- vdev_root.o \
- zap.o \
- zap_leaf.o \
- zap_micro.o \
- zfs_byteswap.o \
- zfs_fm.o \
- zfs_znode.o \
- zil.o \
- zio.o \
- zio_checksum.o \
- zio_compress.o \
- zio_inject.o
-
-ZFS_SHARED_OBJS += \
- zfs_namecheck.o \
- zfs_prop.o
-
-ZFS_OBJS += \
- $(ZFS_COMMON_OBJS) \
- $(ZFS_SHARED_OBJS) \
- zfs_acl.o \
- zfs_ctldir.o \
- zfs_dir.o \
- zfs_ioctl.o \
- zfs_log.o \
- zfs_replay.o \
- zfs_rlock.o \
- zfs_vfsops.o \
- zfs_vnops.o \
- zvol.o
diff --git a/sys/contrib/opensolaris/uts/common/fs/gfs.c b/sys/contrib/opensolaris/uts/common/fs/gfs.c
deleted file mode 100644
index 738c9d4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/gfs.c
+++ /dev/null
@@ -1,884 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/* Portions Copyright 2007 Shivakumar GN */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/cmn_err.h>
-#include <sys/debug.h>
-#include <sys/dirent.h>
-#include <sys/kmem.h>
-#include <sys/mman.h>
-#include <sys/mutex.h>
-#include <sys/sysmacros.h>
-#include <sys/systm.h>
-#include <sys/uio.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/cred.h>
-#include <sys/kdb.h>
-
-#include <sys/gfs.h>
-
-/*
- * Generic pseudo-filesystem routines.
- *
- * There are significant similarities between the implementation of certain file
- * system entry points across different filesystems. While one could attempt to
- * "choke up on the bat" and incorporate common functionality into a VOP
- * preamble or postamble, such an approach is limited in the benefit it can
- * provide. In this file we instead define a toolkit of routines which can be
- * called from a filesystem (with in-kernel pseudo-filesystems being the focus
- * of the exercise) in a more component-like fashion.
- *
- * There are three basic classes of routines:
- *
- * 1) Lowlevel support routines
- *
- * These routines are designed to play a support role for existing
- * pseudo-filesystems (such as procfs). They simplify common tasks,
- * without enforcing the filesystem to hand over management to GFS. The
- * routines covered are:
- *
- * gfs_readdir_init()
- * gfs_readdir_emit()
- * gfs_readdir_emitn()
- * gfs_readdir_pred()
- * gfs_readdir_fini()
- * gfs_lookup_dot()
- *
- * 2) Complete GFS management
- *
- * These routines take a more active role in management of the
- * pseudo-filesystem. They handle the relationship between vnode private
- * data and VFS data, as well as the relationship between vnodes in the
- * directory hierarchy.
- *
- * In order to use these interfaces, the first member of every private
- * v_data must be a gfs_file_t or a gfs_dir_t. This hands over all control
- * to GFS.
- *
- * gfs_file_create()
- * gfs_dir_create()
- * gfs_root_create()
- *
- * gfs_file_inactive()
- * gfs_dir_inactive()
- * gfs_dir_lookup()
- * gfs_dir_readdir()
- *
- * gfs_vop_inactive()
- * gfs_vop_lookup()
- * gfs_vop_readdir()
- * gfs_vop_map()
- *
- * 3) Single File pseudo-filesystems
- *
- * This routine creates a rooted file to be overlayed ontop of another
- * file in the physical filespace.
- *
- * Note that the parent is NULL (actually the vfs), but there is nothing
- * technically keeping such a file from utilizing the "Complete GFS
- * management" set of routines.
- *
- * gfs_root_create_file()
- */
-
-/*
- * Low level directory routines
- *
- * These routines provide some simple abstractions for reading directories.
- * They are designed to be used by existing pseudo filesystems (namely procfs)
- * that already have a complicated management infrastructure.
- */
-
-/*
- * gfs_readdir_init: initiate a generic readdir
- * st - a pointer to an uninitialized gfs_readdir_state_t structure
- * name_max - the directory's maximum file name length
- * ureclen - the exported file-space record length (1 for non-legacy FSs)
- * uiop - the uiop passed to readdir
- * parent - the parent directory's inode
- * self - this directory's inode
- *
- * Returns 0 or a non-zero errno.
- *
- * Typical VOP_READDIR usage of gfs_readdir_*:
- *
- * if ((error = gfs_readdir_init(...)) != 0)
- * return (error);
- * eof = 0;
- * while ((error = gfs_readdir_pred(..., &voffset)) != 0) {
- * if (!consumer_entry_at(voffset))
- * voffset = consumer_next_entry(voffset);
- * if (consumer_eof(voffset)) {
- * eof = 1
- * break;
- * }
- * if ((error = gfs_readdir_emit(..., voffset,
- * consumer_ino(voffset), consumer_name(voffset))) != 0)
- * break;
- * }
- * return (gfs_readdir_fini(..., error, eofp, eof));
- *
- * As you can see, a zero result from gfs_readdir_pred() or
- * gfs_readdir_emit() indicates that processing should continue,
- * whereas a non-zero result indicates that the loop should terminate.
- * Most consumers need do nothing more than let gfs_readdir_fini()
- * determine what the cause of failure was and return the appropriate
- * value.
- */
-int
-gfs_readdir_init(gfs_readdir_state_t *st, int name_max, int ureclen,
- uio_t *uiop, ino64_t parent, ino64_t self)
-{
- if (uiop->uio_loffset < 0 || uiop->uio_resid <= 0 ||
- (uiop->uio_loffset % ureclen) != 0)
- return (EINVAL);
-
- st->grd_ureclen = ureclen;
- st->grd_oresid = uiop->uio_resid;
- st->grd_namlen = name_max;
- st->grd_dirent = kmem_zalloc(DIRENT64_RECLEN(st->grd_namlen), KM_SLEEP);
- st->grd_parent = parent;
- st->grd_self = self;
-
- return (0);
-}
-
-/*
- * gfs_readdir_emit_int: internal routine to emit directory entry
- *
- * st - the current readdir state, which must have d_ino and d_name
- * set
- * uiop - caller-supplied uio pointer
- * next - the offset of the next entry
- */
-static int
-gfs_readdir_emit_int(gfs_readdir_state_t *st, uio_t *uiop, offset_t next,
- int *ncookies, u_long **cookies)
-{
- int reclen, namlen;
-
- namlen = strlen(st->grd_dirent->d_name);
- reclen = DIRENT64_RECLEN(namlen);
-
- if (reclen > uiop->uio_resid) {
- /*
- * Error if no entries were returned yet
- */
- if (uiop->uio_resid == st->grd_oresid)
- return (EINVAL);
- return (-1);
- }
-
- /* XXX: This can change in the future. */
- st->grd_dirent->d_type = DT_DIR;
- st->grd_dirent->d_reclen = (ushort_t)reclen;
- st->grd_dirent->d_namlen = namlen;
-
- if (uiomove((caddr_t)st->grd_dirent, reclen, UIO_READ, uiop))
- return (EFAULT);
-
- uiop->uio_loffset = next;
- if (*cookies != NULL) {
- **cookies = next;
- (*cookies)++;
- (*ncookies)--;
- KASSERT(*ncookies >= 0, ("ncookies=%d", *ncookies));
- }
-
- return (0);
-}
-
-/*
- * gfs_readdir_emit: emit a directory entry
- * voff - the virtual offset (obtained from gfs_readdir_pred)
- * ino - the entry's inode
- * name - the entry's name
- *
- * Returns a 0 on success, a non-zero errno on failure, or -1 if the
- * readdir loop should terminate. A non-zero result (either errno or
- * -1) from this function is typically passed directly to
- * gfs_readdir_fini().
- */
-int
-gfs_readdir_emit(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff,
- ino64_t ino, const char *name, int *ncookies, u_long **cookies)
-{
- offset_t off = (voff + 2) * st->grd_ureclen;
-
- st->grd_dirent->d_ino = ino;
- (void) strncpy(st->grd_dirent->d_name, name, st->grd_namlen);
-
- /*
- * Inter-entry offsets are invalid, so we assume a record size of
- * grd_ureclen and explicitly set the offset appropriately.
- */
- return (gfs_readdir_emit_int(st, uiop, off + st->grd_ureclen, ncookies,
- cookies));
-}
-
-/*
- * gfs_readdir_pred: readdir loop predicate
- * voffp - a pointer in which the next virtual offset should be stored
- *
- * Returns a 0 on success, a non-zero errno on failure, or -1 if the
- * readdir loop should terminate. A non-zero result (either errno or
- * -1) from this function is typically passed directly to
- * gfs_readdir_fini().
- */
-int
-gfs_readdir_pred(gfs_readdir_state_t *st, uio_t *uiop, offset_t *voffp,
- int *ncookies, u_long **cookies)
-{
- offset_t off, voff;
- int error;
-
-top:
- if (uiop->uio_resid <= 0)
- return (-1);
-
- off = uiop->uio_loffset / st->grd_ureclen;
- voff = off - 2;
- if (off == 0) {
- if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_self,
- ".", ncookies, cookies)) == 0)
- goto top;
- } else if (off == 1) {
- if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_parent,
- "..", ncookies, cookies)) == 0)
- goto top;
- } else {
- *voffp = voff;
- return (0);
- }
-
- return (error);
-}
-
-/*
- * gfs_readdir_fini: generic readdir cleanup
- * error - if positive, an error to return
- * eofp - the eofp passed to readdir
- * eof - the eof value
- *
- * Returns a 0 on success, a non-zero errno on failure. This result
- * should be returned from readdir.
- */
-int
-gfs_readdir_fini(gfs_readdir_state_t *st, int error, int *eofp, int eof)
-{
- kmem_free(st->grd_dirent, DIRENT64_RECLEN(st->grd_namlen));
- if (error > 0)
- return (error);
- if (eofp)
- *eofp = eof;
- return (0);
-}
-
-/*
- * gfs_lookup_dot
- *
- * Performs a basic check for "." and ".." directory entries.
- */
-int
-gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm)
-{
- if (*nm == '\0' || strcmp(nm, ".") == 0) {
- VN_HOLD(dvp);
- *vpp = dvp;
- return (0);
- } else if (strcmp(nm, "..") == 0) {
- if (pvp == NULL) {
- ASSERT(dvp->v_flag & VROOT);
- VN_HOLD(dvp);
- *vpp = dvp;
- } else {
- VN_HOLD(pvp);
- *vpp = pvp;
- }
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- return (0);
- }
-
- return (-1);
-}
-
-/*
- * gfs_file_create(): create a new GFS file
- *
- * size - size of private data structure (v_data)
- * pvp - parent vnode (GFS directory)
- * ops - vnode operations vector
- *
- * In order to use this interface, the parent vnode must have been created by
- * gfs_dir_create(), and the private data stored in v_data must have a
- * 'gfs_file_t' as its first field.
- *
- * Given these constraints, this routine will automatically:
- *
- * - Allocate v_data for the vnode
- * - Initialize necessary fields in the vnode
- * - Hold the parent
- */
-vnode_t *
-gfs_file_create(size_t size, vnode_t *pvp, vfs_t *vfsp, vnodeops_t *ops)
-{
- gfs_file_t *fp;
- vnode_t *vp;
- int error;
-
- /*
- * Allocate vnode and internal data structure
- */
- fp = kmem_zalloc(size, KM_SLEEP);
- error = getnewvnode("zfs", vfsp, ops, &vp);
- ASSERT(error == 0);
- vp->v_data = (caddr_t)fp;
-
- /*
- * Set up various pointers
- */
- fp->gfs_vnode = vp;
- fp->gfs_parent = pvp;
- fp->gfs_size = size;
- fp->gfs_type = GFS_FILE;
-
- error = insmntque(vp, vfsp);
- KASSERT(error == 0, ("insmntque() failed: error %d", error));
-
- /*
- * Initialize vnode and hold parent.
- */
- if (pvp)
- VN_HOLD(pvp);
-
- return (vp);
-}
-
-/*
- * gfs_dir_create: creates a new directory in the parent
- *
- * size - size of private data structure (v_data)
- * pvp - parent vnode (GFS directory)
- * ops - vnode operations vector
- * entries - NULL-terminated list of static entries (if any)
- * maxlen - maximum length of a directory entry
- * readdir_cb - readdir callback (see gfs_dir_readdir)
- * inode_cb - inode callback (see gfs_dir_readdir)
- * lookup_cb - lookup callback (see gfs_dir_lookup)
- *
- * In order to use this function, the first member of the private vnode
- * structure (v_data) must be a gfs_dir_t. For each directory, there are
- * static entries, defined when the structure is initialized, and dynamic
- * entries, retrieved through callbacks.
- *
- * If a directory has static entries, then it must supply a inode callback,
- * which will compute the inode number based on the parent and the index.
- * For a directory with dynamic entries, the caller must supply a readdir
- * callback and a lookup callback. If a static lookup fails, we fall back to
- * the supplied lookup callback, if any.
- *
- * This function also performs the same initialization as gfs_file_create().
- */
-vnode_t *
-gfs_dir_create(size_t struct_size, vnode_t *pvp, vfs_t *vfsp, vnodeops_t *ops,
- gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen,
- gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb)
-{
- vnode_t *vp;
- gfs_dir_t *dp;
- gfs_dirent_t *de;
-
- vp = gfs_file_create(struct_size, pvp, vfsp, ops);
- vp->v_type = VDIR;
-
- dp = vp->v_data;
- dp->gfsd_file.gfs_type = GFS_DIR;
- dp->gfsd_maxlen = maxlen;
-
- if (entries != NULL) {
- for (de = entries; de->gfse_name != NULL; de++)
- dp->gfsd_nstatic++;
-
- dp->gfsd_static = kmem_alloc(
- dp->gfsd_nstatic * sizeof (gfs_dirent_t), KM_SLEEP);
- bcopy(entries, dp->gfsd_static,
- dp->gfsd_nstatic * sizeof (gfs_dirent_t));
- }
-
- dp->gfsd_readdir = readdir_cb;
- dp->gfsd_lookup = lookup_cb;
- dp->gfsd_inode = inode_cb;
-
- mutex_init(&dp->gfsd_lock, NULL, MUTEX_DEFAULT, NULL);
-
- return (vp);
-}
-
-/*
- * gfs_root_create(): create a root vnode for a GFS filesystem
- *
- * Similar to gfs_dir_create(), this creates a root vnode for a filesystem. The
- * only difference is that it takes a vfs_t instead of a vnode_t as its parent.
- */
-vnode_t *
-gfs_root_create(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino,
- gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen,
- gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb)
-{
- vnode_t *vp;
-
- VFS_HOLD(vfsp);
- vp = gfs_dir_create(size, NULL, vfsp, ops, entries, inode_cb,
- maxlen, readdir_cb, lookup_cb);
- /* Manually set the inode */
- ((gfs_file_t *)vp->v_data)->gfs_ino = ino;
- vp->v_flag |= VROOT;
-
- return (vp);
-}
-
-/*
- * gfs_file_inactive()
- *
- * Called from the VOP_INACTIVE() routine. If necessary, this routine will
- * remove the given vnode from the parent directory and clean up any references
- * in the VFS layer.
- *
- * If the vnode was not removed (due to a race with vget), then NULL is
- * returned. Otherwise, a pointer to the private data is returned.
- */
-void *
-gfs_file_inactive(vnode_t *vp)
-{
- int i;
- gfs_dirent_t *ge = NULL;
- gfs_file_t *fp = vp->v_data;
- gfs_dir_t *dp = NULL;
- void *data;
-
- if (fp->gfs_parent == NULL)
- goto found;
-
- dp = fp->gfs_parent->v_data;
-
- /*
- * First, see if this vnode is cached in the parent.
- */
- gfs_dir_lock(dp);
-
- /*
- * Find it in the set of static entries.
- */
- for (i = 0; i < dp->gfsd_nstatic; i++) {
- ge = &dp->gfsd_static[i];
-
- if (ge->gfse_vnode == vp)
- goto found;
- }
-
- /*
- * If 'ge' is NULL, then it is a dynamic entry.
- */
- ge = NULL;
-
-found:
- VI_LOCK(vp);
- ASSERT(vp->v_count < 2);
- /*
- * Really remove this vnode
- */
- data = vp->v_data;
- if (ge != NULL) {
- /*
- * If this was a statically cached entry, simply set the
- * cached vnode to NULL.
- */
- ge->gfse_vnode = NULL;
- }
- if (vp->v_count == 1) {
- vp->v_usecount--;
- vdropl(vp);
- } else {
- VI_UNLOCK(vp);
- }
-
- /*
- * Free vnode and release parent
- */
- if (fp->gfs_parent) {
- gfs_dir_unlock(dp);
- VI_LOCK(fp->gfs_parent);
- fp->gfs_parent->v_usecount--;
- VI_UNLOCK(fp->gfs_parent);
- } else {
- ASSERT(vp->v_vfsp != NULL);
- VFS_RELE(vp->v_vfsp);
- }
-
- return (data);
-}
-
-/*
- * gfs_dir_inactive()
- *
- * Same as above, but for directories.
- */
-void *
-gfs_dir_inactive(vnode_t *vp)
-{
- gfs_dir_t *dp;
-
- ASSERT(vp->v_type == VDIR);
-
- if ((dp = gfs_file_inactive(vp)) != NULL) {
- mutex_destroy(&dp->gfsd_lock);
- if (dp->gfsd_nstatic)
- kmem_free(dp->gfsd_static,
- dp->gfsd_nstatic * sizeof (gfs_dirent_t));
- }
-
- return (dp);
-}
-
-/*
- * gfs_dir_lookup()
- *
- * Looks up the given name in the directory and returns the corresponding vnode,
- * if found.
- *
- * First, we search statically defined entries, if any. If a match is found,
- * and GFS_CACHE_VNODE is set and the vnode exists, we simply return the
- * existing vnode. Otherwise, we call the static entry's callback routine,
- * caching the result if necessary.
- *
- * If no static entry is found, we invoke the lookup callback, if any. The
- * arguments to this callback are:
- *
- * int gfs_lookup_cb(vnode_t *pvp, const char *nm, vnode_t **vpp);
- *
- * pvp - parent vnode
- * nm - name of entry
- * vpp - pointer to resulting vnode
- *
- * Returns 0 on success, non-zero on error.
- */
-int
-gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp)
-{
- int i;
- gfs_dirent_t *ge;
- vnode_t *vp;
- gfs_dir_t *dp = dvp->v_data;
- int ret = 0;
-
- ASSERT(dvp->v_type == VDIR);
-
- if (gfs_lookup_dot(vpp, dvp, dp->gfsd_file.gfs_parent, nm) == 0)
- return (0);
-
- gfs_dir_lock(dp);
-
- /*
- * Search static entries.
- */
- for (i = 0; i < dp->gfsd_nstatic; i++) {
- ge = &dp->gfsd_static[i];
-
- if (strcmp(ge->gfse_name, nm) == 0) {
- if (ge->gfse_vnode) {
- ASSERT(ge->gfse_flags & GFS_CACHE_VNODE);
- vp = ge->gfse_vnode;
- VN_HOLD(vp);
- goto out;
- }
-
- /*
- * We drop the directory lock, as the constructor will
- * need to do KM_SLEEP allocations. If we return from
- * the constructor only to find that a parallel
- * operation has completed, and GFS_CACHE_VNODE is set
- * for this entry, we discard the result in favor of the
- * cached vnode.
- */
- gfs_dir_unlock(dp);
- vp = ge->gfse_ctor(dvp);
- gfs_dir_lock(dp);
-
- ((gfs_file_t *)vp->v_data)->gfs_index = i;
-
- /* Set the inode according to the callback. */
- ((gfs_file_t *)vp->v_data)->gfs_ino =
- dp->gfsd_inode(dvp, i);
-
- if (ge->gfse_flags & GFS_CACHE_VNODE) {
- if (ge->gfse_vnode == NULL) {
- ge->gfse_vnode = vp;
- } else {
- /*
- * A parallel constructor beat us to it;
- * return existing vnode. We have to be
- * careful because we can't release the
- * current vnode while holding the
- * directory lock; its inactive routine
- * will try to lock this directory.
- */
- vnode_t *oldvp = vp;
- vp = ge->gfse_vnode;
- VN_HOLD(vp);
-
- gfs_dir_unlock(dp);
- VN_RELE(oldvp);
- gfs_dir_lock(dp);
- }
- }
-
- goto out;
- }
- }
-
- /*
- * See if there is a dynamic constructor.
- */
- if (dp->gfsd_lookup) {
- ino64_t ino;
- gfs_file_t *fp;
-
- /*
- * Once again, drop the directory lock, as the lookup routine
- * will need to allocate memory, or otherwise deadlock on this
- * directory.
- */
- gfs_dir_unlock(dp);
- ret = dp->gfsd_lookup(dvp, nm, &vp, &ino);
- gfs_dir_lock(dp);
- if (ret != 0)
- goto out;
-
- fp = (gfs_file_t *)vp->v_data;
- fp->gfs_index = -1;
- fp->gfs_ino = ino;
- } else {
- /*
- * No static entry found, and there is no lookup callback, so
- * return ENOENT.
- */
- ret = ENOENT;
- }
-
-out:
- gfs_dir_unlock(dp);
-
- if (ret == 0)
- *vpp = vp;
- else
- *vpp = NULL;
-
- return (ret);
-}
-
-/*
- * gfs_dir_readdir: does a readdir() on the given directory
- *
- * dvp - directory vnode
- * uiop - uio structure
- * eofp - eof pointer
- * data - arbitrary data passed to readdir callback
- *
- * This routine does all the readdir() dirty work. Even so, the caller must
- * supply two callbacks in order to get full compatibility.
- *
- * If the directory contains static entries, an inode callback must be
- * specified. This avoids having to create every vnode and call VOP_GETATTR()
- * when reading the directory. This function has the following arguments:
- *
- * ino_t gfs_inode_cb(vnode_t *vp, int index);
- *
- * vp - vnode for the directory
- * index - index in original gfs_dirent_t array
- *
- * Returns the inode number for the given entry.
- *
- * For directories with dynamic entries, a readdir callback must be provided.
- * This is significantly more complex, thanks to the particulars of
- * VOP_READDIR().
- *
- * int gfs_readdir_cb(vnode_t *vp, struct dirent64 *dp, int *eofp,
- * offset_t *off, offset_t *nextoff, void *data)
- *
- * vp - directory vnode
- * dp - directory entry, sized according to maxlen given to
- * gfs_dir_create(). callback must fill in d_name and
- * d_ino.
- * eofp - callback must set to 1 when EOF has been reached
- * off - on entry, the last offset read from the directory. Callback
- * must set to the offset of the current entry, typically left
- * untouched.
- * nextoff - callback must set to offset of next entry. Typically
- * (off + 1)
- * data - caller-supplied data
- *
- * Return 0 on success, or error on failure.
- */
-int
-gfs_dir_readdir(vnode_t *dvp, uio_t *uiop, int *eofp, int *ncookies,
- u_long **cookies, void *data)
-{
- gfs_readdir_state_t gstate;
- int error, eof = 0;
- ino64_t ino, pino;
- offset_t off, next;
- gfs_dir_t *dp = dvp->v_data;
-
- ino = dp->gfsd_file.gfs_ino;
-
- if (dp->gfsd_file.gfs_parent == NULL)
- pino = ino; /* root of filesystem */
- else
- pino = ((gfs_file_t *)
- (dp->gfsd_file.gfs_parent->v_data))->gfs_ino;
-
- if ((error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1, uiop,
- pino, ino)) != 0)
- return (error);
-
- while ((error = gfs_readdir_pred(&gstate, uiop, &off, ncookies,
- cookies)) == 0 && !eof) {
-
- if (off >= 0 && off < dp->gfsd_nstatic) {
- ino = dp->gfsd_inode(dvp, off);
-
- if ((error = gfs_readdir_emit(&gstate, uiop,
- off, ino, dp->gfsd_static[off].gfse_name, ncookies,
- cookies)) != 0)
- break;
-
- } else if (dp->gfsd_readdir) {
- off -= dp->gfsd_nstatic;
-
- if ((error = dp->gfsd_readdir(dvp,
- gstate.grd_dirent, &eof, &off, &next,
- data)) != 0 || eof)
- break;
-
- off += dp->gfsd_nstatic + 2;
- next += dp->gfsd_nstatic + 2;
-
- if ((error = gfs_readdir_emit_int(&gstate, uiop,
- next, ncookies, cookies)) != 0)
- break;
- } else {
- /*
- * Offset is beyond the end of the static entries, and
- * we have no dynamic entries. Set EOF.
- */
- eof = 1;
- }
- }
-
- return (gfs_readdir_fini(&gstate, error, eofp, eof));
-}
-
-/*
- * gfs_vop_readdir: VOP_READDIR() entry point
- *
- * For use directly in vnode ops table. Given a GFS directory, calls
- * gfs_dir_readdir() as necessary.
- */
-/* ARGSUSED */
-int
-gfs_vop_readdir(ap)
- struct vop_readdir_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- struct ucred *a_cred;
- int *a_eofflag;
- int *ncookies;
- u_long **a_cookies;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- uio_t *uiop = ap->a_uio;
- int *eofp = ap->a_eofflag;
- int ncookies = 0;
- u_long *cookies = NULL;
- int error;
-
- if (ap->a_ncookies) {
- /*
- * Minimum entry size is dirent size and 1 byte for a file name.
- */
- ncookies = uiop->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
- cookies = malloc(ncookies * sizeof(u_long), M_TEMP, M_WAITOK);
- *ap->a_cookies = cookies;
- *ap->a_ncookies = ncookies;
- }
-
- error = gfs_dir_readdir(vp, uiop, eofp, &ncookies, &cookies, NULL);
-
- if (error == 0) {
- /* Subtract unused cookies */
- if (ap->a_ncookies)
- *ap->a_ncookies -= ncookies;
- } else if (ap->a_ncookies) {
- free(*ap->a_cookies, M_TEMP);
- *ap->a_cookies = NULL;
- *ap->a_ncookies = 0;
- }
-
- return (error);
-}
-
-/*
- * gfs_vop_inactive: VOP_INACTIVE() entry point
- *
- * Given a vnode that is a GFS file or directory, call gfs_file_inactive() or
- * gfs_dir_inactive() as necessary, and kmem_free()s associated private data.
- */
-/* ARGSUSED */
-int
-gfs_vop_inactive(ap)
- struct vop_inactive_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- gfs_file_t *fp = vp->v_data;
- void *data;
-
- if (fp->gfs_type == GFS_DIR)
- data = gfs_dir_inactive(vp);
- else
- data = gfs_file_inactive(vp);
-
- if (data != NULL)
- kmem_free(data, fp->gfs_size);
- vp->v_data = NULL;
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/contrib/opensolaris/uts/common/fs/zfs/arc.c
deleted file mode 100644
index 420f802..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ /dev/null
@@ -1,2859 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * DVA-based Adjustable Replacement Cache
- *
- * While much of the theory of operation used here is
- * based on the self-tuning, low overhead replacement cache
- * presented by Megiddo and Modha at FAST 2003, there are some
- * significant differences:
- *
- * 1. The Megiddo and Modha model assumes any page is evictable.
- * Pages in its cache cannot be "locked" into memory. This makes
- * the eviction algorithm simple: evict the last page in the list.
- * This also make the performance characteristics easy to reason
- * about. Our cache is not so simple. At any given moment, some
- * subset of the blocks in the cache are un-evictable because we
- * have handed out a reference to them. Blocks are only evictable
- * when there are no external references active. This makes
- * eviction far more problematic: we choose to evict the evictable
- * blocks that are the "lowest" in the list.
- *
- * There are times when it is not possible to evict the requested
- * space. In these circumstances we are unable to adjust the cache
- * size. To prevent the cache growing unbounded at these times we
- * implement a "cache throttle" that slowes the flow of new data
- * into the cache until we can make space avaiable.
- *
- * 2. The Megiddo and Modha model assumes a fixed cache size.
- * Pages are evicted when the cache is full and there is a cache
- * miss. Our model has a variable sized cache. It grows with
- * high use, but also tries to react to memory preasure from the
- * operating system: decreasing its size when system memory is
- * tight.
- *
- * 3. The Megiddo and Modha model assumes a fixed page size. All
- * elements of the cache are therefor exactly the same size. So
- * when adjusting the cache size following a cache miss, its simply
- * a matter of choosing a single page to evict. In our model, we
- * have variable sized cache blocks (rangeing from 512 bytes to
- * 128K bytes). We therefor choose a set of blocks to evict to make
- * space for a cache miss that approximates as closely as possible
- * the space used by the new block.
- *
- * See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache"
- * by N. Megiddo & D. Modha, FAST 2003
- */
-
-/*
- * The locking model:
- *
- * A new reference to a cache buffer can be obtained in two
- * ways: 1) via a hash table lookup using the DVA as a key,
- * or 2) via one of the ARC lists. The arc_read() inerface
- * uses method 1, while the internal arc algorithms for
- * adjusting the cache use method 2. We therefor provide two
- * types of locks: 1) the hash table lock array, and 2) the
- * arc list locks.
- *
- * Buffers do not have their own mutexs, rather they rely on the
- * hash table mutexs for the bulk of their protection (i.e. most
- * fields in the arc_buf_hdr_t are protected by these mutexs).
- *
- * buf_hash_find() returns the appropriate mutex (held) when it
- * locates the requested buffer in the hash table. It returns
- * NULL for the mutex if the buffer was not in the table.
- *
- * buf_hash_remove() expects the appropriate hash mutex to be
- * already held before it is invoked.
- *
- * Each arc state also has a mutex which is used to protect the
- * buffer list associated with the state. When attempting to
- * obtain a hash table lock while holding an arc list lock you
- * must use: mutex_tryenter() to avoid deadlock. Also note that
- * the active state mutex must be held before the ghost state mutex.
- *
- * Arc buffers may have an associated eviction callback function.
- * This function will be invoked prior to removing the buffer (e.g.
- * in arc_do_user_evicts()). Note however that the data associated
- * with the buffer may be evicted prior to the callback. The callback
- * must be made with *no locks held* (to prevent deadlock). Additionally,
- * the users of callbacks must ensure that their private data is
- * protected from simultaneous callbacks from arc_buf_evict()
- * and arc_do_user_evicts().
- *
- * Note that the majority of the performance stats are manipulated
- * with atomic operations.
- */
-
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/zfs_context.h>
-#include <sys/arc.h>
-#include <sys/refcount.h>
-#ifdef _KERNEL
-#include <sys/dnlc.h>
-#endif
-#include <sys/callb.h>
-#include <sys/kstat.h>
-#include <sys/sdt.h>
-
-static kmutex_t arc_reclaim_thr_lock;
-static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
-static uint8_t arc_thread_exit;
-
-#define ARC_REDUCE_DNLC_PERCENT 3
-uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT;
-
-typedef enum arc_reclaim_strategy {
- ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */
- ARC_RECLAIM_CONS /* Conservative reclaim strategy */
-} arc_reclaim_strategy_t;
-
-/* number of seconds before growing cache again */
-static int arc_grow_retry = 60;
-
-/*
- * minimum lifespan of a prefetch block in clock ticks
- * (initialized in arc_init())
- */
-static int arc_min_prefetch_lifespan;
-
-static int arc_dead;
-
-/*
- * These tunables are for performance analysis.
- */
-u_long zfs_arc_max;
-u_long zfs_arc_min;
-TUNABLE_ULONG("vfs.zfs.arc_max", &zfs_arc_max);
-TUNABLE_ULONG("vfs.zfs.arc_min", &zfs_arc_min);
-SYSCTL_DECL(_vfs_zfs);
-SYSCTL_ULONG(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
- "Maximum ARC size");
-SYSCTL_ULONG(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0,
- "Minimum ARC size");
-
-/*
- * Note that buffers can be on one of 5 states:
- * ARC_anon - anonymous (discussed below)
- * ARC_mru - recently used, currently cached
- * ARC_mru_ghost - recentely used, no longer in cache
- * ARC_mfu - frequently used, currently cached
- * ARC_mfu_ghost - frequently used, no longer in cache
- * When there are no active references to the buffer, they
- * are linked onto one of the lists in arc. These are the
- * only buffers that can be evicted or deleted.
- *
- * Anonymous buffers are buffers that are not associated with
- * a DVA. These are buffers that hold dirty block copies
- * before they are written to stable storage. By definition,
- * they are "ref'd" and are considered part of arc_mru
- * that cannot be freed. Generally, they will aquire a DVA
- * as they are written and migrate onto the arc_mru list.
- */
-
-typedef struct arc_state {
- list_t arcs_list; /* linked list of evictable buffer in state */
- uint64_t arcs_lsize; /* total size of buffers in the linked list */
- uint64_t arcs_size; /* total size of all buffers in this state */
- kmutex_t arcs_mtx;
-} arc_state_t;
-
-/* The 5 states: */
-static arc_state_t ARC_anon;
-static arc_state_t ARC_mru;
-static arc_state_t ARC_mru_ghost;
-static arc_state_t ARC_mfu;
-static arc_state_t ARC_mfu_ghost;
-
-typedef struct arc_stats {
- kstat_named_t arcstat_hits;
- kstat_named_t arcstat_misses;
- kstat_named_t arcstat_demand_data_hits;
- kstat_named_t arcstat_demand_data_misses;
- kstat_named_t arcstat_demand_metadata_hits;
- kstat_named_t arcstat_demand_metadata_misses;
- kstat_named_t arcstat_prefetch_data_hits;
- kstat_named_t arcstat_prefetch_data_misses;
- kstat_named_t arcstat_prefetch_metadata_hits;
- kstat_named_t arcstat_prefetch_metadata_misses;
- kstat_named_t arcstat_mru_hits;
- kstat_named_t arcstat_mru_ghost_hits;
- kstat_named_t arcstat_mfu_hits;
- kstat_named_t arcstat_mfu_ghost_hits;
- kstat_named_t arcstat_deleted;
- kstat_named_t arcstat_recycle_miss;
- kstat_named_t arcstat_mutex_miss;
- kstat_named_t arcstat_evict_skip;
- kstat_named_t arcstat_hash_elements;
- kstat_named_t arcstat_hash_elements_max;
- kstat_named_t arcstat_hash_collisions;
- kstat_named_t arcstat_hash_chains;
- kstat_named_t arcstat_hash_chain_max;
- kstat_named_t arcstat_p;
- kstat_named_t arcstat_c;
- kstat_named_t arcstat_c_min;
- kstat_named_t arcstat_c_max;
- kstat_named_t arcstat_size;
-} arc_stats_t;
-
-static arc_stats_t arc_stats = {
- { "hits", KSTAT_DATA_UINT64 },
- { "misses", KSTAT_DATA_UINT64 },
- { "demand_data_hits", KSTAT_DATA_UINT64 },
- { "demand_data_misses", KSTAT_DATA_UINT64 },
- { "demand_metadata_hits", KSTAT_DATA_UINT64 },
- { "demand_metadata_misses", KSTAT_DATA_UINT64 },
- { "prefetch_data_hits", KSTAT_DATA_UINT64 },
- { "prefetch_data_misses", KSTAT_DATA_UINT64 },
- { "prefetch_metadata_hits", KSTAT_DATA_UINT64 },
- { "prefetch_metadata_misses", KSTAT_DATA_UINT64 },
- { "mru_hits", KSTAT_DATA_UINT64 },
- { "mru_ghost_hits", KSTAT_DATA_UINT64 },
- { "mfu_hits", KSTAT_DATA_UINT64 },
- { "mfu_ghost_hits", KSTAT_DATA_UINT64 },
- { "deleted", KSTAT_DATA_UINT64 },
- { "recycle_miss", KSTAT_DATA_UINT64 },
- { "mutex_miss", KSTAT_DATA_UINT64 },
- { "evict_skip", KSTAT_DATA_UINT64 },
- { "hash_elements", KSTAT_DATA_UINT64 },
- { "hash_elements_max", KSTAT_DATA_UINT64 },
- { "hash_collisions", KSTAT_DATA_UINT64 },
- { "hash_chains", KSTAT_DATA_UINT64 },
- { "hash_chain_max", KSTAT_DATA_UINT64 },
- { "p", KSTAT_DATA_UINT64 },
- { "c", KSTAT_DATA_UINT64 },
- { "c_min", KSTAT_DATA_UINT64 },
- { "c_max", KSTAT_DATA_UINT64 },
- { "size", KSTAT_DATA_UINT64 }
-};
-
-#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
-
-#define ARCSTAT_INCR(stat, val) \
- atomic_add_64(&arc_stats.stat.value.ui64, (val));
-
-#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
-#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
-
-#define ARCSTAT_MAX(stat, val) { \
- uint64_t m; \
- while ((val) > (m = arc_stats.stat.value.ui64) && \
- (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val)))) \
- continue; \
-}
-
-#define ARCSTAT_MAXSTAT(stat) \
- ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
-
-/*
- * We define a macro to allow ARC hits/misses to be easily broken down by
- * two separate conditions, giving a total of four different subtypes for
- * each of hits and misses (so eight statistics total).
- */
-#define ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat) \
- if (cond1) { \
- if (cond2) { \
- ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \
- } else { \
- ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \
- } \
- } else { \
- if (cond2) { \
- ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \
- } else { \
- ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\
- } \
- }
-
-kstat_t *arc_ksp;
-static arc_state_t *arc_anon;
-static arc_state_t *arc_mru;
-static arc_state_t *arc_mru_ghost;
-static arc_state_t *arc_mfu;
-static arc_state_t *arc_mfu_ghost;
-
-/*
- * There are several ARC variables that are critical to export as kstats --
- * but we don't want to have to grovel around in the kstat whenever we wish to
- * manipulate them. For these variables, we therefore define them to be in
- * terms of the statistic variable. This assures that we are not introducing
- * the possibility of inconsistency by having shadow copies of the variables,
- * while still allowing the code to be readable.
- */
-#define arc_size ARCSTAT(arcstat_size) /* actual total arc size */
-#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
-#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
-#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
-#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
-
-static int arc_no_grow; /* Don't try to grow cache size */
-static uint64_t arc_tempreserve;
-
-typedef struct arc_callback arc_callback_t;
-
-struct arc_callback {
- void *acb_private;
- arc_done_func_t *acb_done;
- arc_byteswap_func_t *acb_byteswap;
- arc_buf_t *acb_buf;
- zio_t *acb_zio_dummy;
- arc_callback_t *acb_next;
-};
-
-typedef struct arc_write_callback arc_write_callback_t;
-
-struct arc_write_callback {
- void *awcb_private;
- arc_done_func_t *awcb_ready;
- arc_done_func_t *awcb_done;
- arc_buf_t *awcb_buf;
-};
-
-struct arc_buf_hdr {
- /* protected by hash lock */
- dva_t b_dva;
- uint64_t b_birth;
- uint64_t b_cksum0;
-
- kmutex_t b_freeze_lock;
- zio_cksum_t *b_freeze_cksum;
-
- arc_buf_hdr_t *b_hash_next;
- arc_buf_t *b_buf;
- uint32_t b_flags;
- uint32_t b_datacnt;
-
- arc_callback_t *b_acb;
- kcondvar_t b_cv;
-
- /* immutable */
- arc_buf_contents_t b_type;
- uint64_t b_size;
- spa_t *b_spa;
-
- /* protected by arc state mutex */
- arc_state_t *b_state;
- list_node_t b_arc_node;
-
- /* updated atomically */
- clock_t b_arc_access;
-
- /* self protecting */
- refcount_t b_refcnt;
-};
-
-static arc_buf_t *arc_eviction_list;
-static kmutex_t arc_eviction_mtx;
-static arc_buf_hdr_t arc_eviction_hdr;
-static void arc_get_data_buf(arc_buf_t *buf);
-static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
-
-#define GHOST_STATE(state) \
- ((state) == arc_mru_ghost || (state) == arc_mfu_ghost)
-
-/*
- * Private ARC flags. These flags are private ARC only flags that will show up
- * in b_flags in the arc_hdr_buf_t. Some flags are publicly declared, and can
- * be passed in as arc_flags in things like arc_read. However, these flags
- * should never be passed and should only be set by ARC code. When adding new
- * public flags, make sure not to smash the private ones.
- */
-
-#define ARC_IN_HASH_TABLE (1 << 9) /* this buffer is hashed */
-#define ARC_IO_IN_PROGRESS (1 << 10) /* I/O in progress for buf */
-#define ARC_IO_ERROR (1 << 11) /* I/O failed for buf */
-#define ARC_FREED_IN_READ (1 << 12) /* buf freed while in read */
-#define ARC_BUF_AVAILABLE (1 << 13) /* block not in active use */
-#define ARC_INDIRECT (1 << 14) /* this is an indirect block */
-
-#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE)
-#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS)
-#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_IO_ERROR)
-#define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FREED_IN_READ)
-#define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_BUF_AVAILABLE)
-
-/*
- * Hash table routines
- */
-
-#define HT_LOCK_PAD 128
-
-struct ht_lock {
- kmutex_t ht_lock;
-#ifdef _KERNEL
- unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
-#endif
-};
-
-#define BUF_LOCKS 256
-typedef struct buf_hash_table {
- uint64_t ht_mask;
- arc_buf_hdr_t **ht_table;
- struct ht_lock ht_locks[BUF_LOCKS];
-} buf_hash_table_t;
-
-static buf_hash_table_t buf_hash_table;
-
-#define BUF_HASH_INDEX(spa, dva, birth) \
- (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
-#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
-#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
-#define HDR_LOCK(buf) \
- (BUF_HASH_LOCK(BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth)))
-
-uint64_t zfs_crc64_table[256];
-
-static uint64_t
-buf_hash(spa_t *spa, dva_t *dva, uint64_t birth)
-{
- uintptr_t spav = (uintptr_t)spa;
- uint8_t *vdva = (uint8_t *)dva;
- uint64_t crc = -1ULL;
- int i;
-
- ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
-
- for (i = 0; i < sizeof (dva_t); i++)
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF];
-
- crc ^= (spav>>8) ^ birth;
-
- return (crc);
-}
-
-#define BUF_EMPTY(buf) \
- ((buf)->b_dva.dva_word[0] == 0 && \
- (buf)->b_dva.dva_word[1] == 0 && \
- (buf)->b_birth == 0)
-
-#define BUF_EQUAL(spa, dva, birth, buf) \
- ((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \
- ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \
- ((buf)->b_birth == birth) && ((buf)->b_spa == spa)
-
-static arc_buf_hdr_t *
-buf_hash_find(spa_t *spa, dva_t *dva, uint64_t birth, kmutex_t **lockp)
-{
- uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
- kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
- arc_buf_hdr_t *buf;
-
- mutex_enter(hash_lock);
- for (buf = buf_hash_table.ht_table[idx]; buf != NULL;
- buf = buf->b_hash_next) {
- if (BUF_EQUAL(spa, dva, birth, buf)) {
- *lockp = hash_lock;
- return (buf);
- }
- }
- mutex_exit(hash_lock);
- *lockp = NULL;
- return (NULL);
-}
-
-/*
- * Insert an entry into the hash table. If there is already an element
- * equal to elem in the hash table, then the already existing element
- * will be returned and the new element will not be inserted.
- * Otherwise returns NULL.
- */
-static arc_buf_hdr_t *
-buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
-{
- uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
- kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
- arc_buf_hdr_t *fbuf;
- uint32_t i;
-
- ASSERT(!HDR_IN_HASH_TABLE(buf));
- *lockp = hash_lock;
- mutex_enter(hash_lock);
- for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL;
- fbuf = fbuf->b_hash_next, i++) {
- if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf))
- return (fbuf);
- }
-
- buf->b_hash_next = buf_hash_table.ht_table[idx];
- buf_hash_table.ht_table[idx] = buf;
- buf->b_flags |= ARC_IN_HASH_TABLE;
-
- /* collect some hash table performance data */
- if (i > 0) {
- ARCSTAT_BUMP(arcstat_hash_collisions);
- if (i == 1)
- ARCSTAT_BUMP(arcstat_hash_chains);
-
- ARCSTAT_MAX(arcstat_hash_chain_max, i);
- }
-
- ARCSTAT_BUMP(arcstat_hash_elements);
- ARCSTAT_MAXSTAT(arcstat_hash_elements);
-
- return (NULL);
-}
-
-static void
-buf_hash_remove(arc_buf_hdr_t *buf)
-{
- arc_buf_hdr_t *fbuf, **bufp;
- uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
-
- ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx)));
- ASSERT(HDR_IN_HASH_TABLE(buf));
-
- bufp = &buf_hash_table.ht_table[idx];
- while ((fbuf = *bufp) != buf) {
- ASSERT(fbuf != NULL);
- bufp = &fbuf->b_hash_next;
- }
- *bufp = buf->b_hash_next;
- buf->b_hash_next = NULL;
- buf->b_flags &= ~ARC_IN_HASH_TABLE;
-
- /* collect some hash table performance data */
- ARCSTAT_BUMPDOWN(arcstat_hash_elements);
-
- if (buf_hash_table.ht_table[idx] &&
- buf_hash_table.ht_table[idx]->b_hash_next == NULL)
- ARCSTAT_BUMPDOWN(arcstat_hash_chains);
-}
-
-/*
- * Global data structures and functions for the buf kmem cache.
- */
-static kmem_cache_t *hdr_cache;
-static kmem_cache_t *buf_cache;
-
-static void
-buf_fini(void)
-{
- int i;
-
- kmem_free(buf_hash_table.ht_table,
- (buf_hash_table.ht_mask + 1) * sizeof (void *));
- for (i = 0; i < BUF_LOCKS; i++)
- mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
- kmem_cache_destroy(hdr_cache);
- kmem_cache_destroy(buf_cache);
-}
-
-/*
- * Constructor callback - called when the cache is empty
- * and a new buf is requested.
- */
-/* ARGSUSED */
-static int
-hdr_cons(void *vbuf, void *unused, int kmflag)
-{
- arc_buf_hdr_t *buf = vbuf;
-
- bzero(buf, sizeof (arc_buf_hdr_t));
- refcount_create(&buf->b_refcnt);
- cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
- return (0);
-}
-
-/*
- * Destructor callback - called when a cached buf is
- * no longer required.
- */
-/* ARGSUSED */
-static void
-hdr_dest(void *vbuf, void *unused)
-{
- arc_buf_hdr_t *buf = vbuf;
-
- refcount_destroy(&buf->b_refcnt);
- cv_destroy(&buf->b_cv);
-}
-
-/*
- * Reclaim callback -- invoked when memory is low.
- */
-/* ARGSUSED */
-static void
-hdr_recl(void *unused)
-{
- dprintf("hdr_recl called\n");
- /*
- * umem calls the reclaim func when we destroy the buf cache,
- * which is after we do arc_fini().
- */
- if (!arc_dead)
- cv_signal(&arc_reclaim_thr_cv);
-}
-
-static void
-buf_init(void)
-{
- uint64_t *ct;
- uint64_t hsize = 1ULL << 12;
- int i, j;
-
- /*
- * The hash table is big enough to fill all of physical memory
- * with an average 64K block size. The table will take up
- * totalmem*sizeof(void*)/64K (eg. 128KB/GB with 8-byte pointers).
- */
- while (hsize * 65536 < (uint64_t)physmem * PAGESIZE)
- hsize <<= 1;
-retry:
- buf_hash_table.ht_mask = hsize - 1;
- buf_hash_table.ht_table =
- kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP);
- if (buf_hash_table.ht_table == NULL) {
- ASSERT(hsize > (1ULL << 8));
- hsize >>= 1;
- goto retry;
- }
-
- hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
- 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
- buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
- 0, NULL, NULL, NULL, NULL, NULL, 0);
-
- for (i = 0; i < 256; i++)
- for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
- *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
-
- for (i = 0; i < BUF_LOCKS; i++) {
- mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
- NULL, MUTEX_DEFAULT, NULL);
- }
-}
-
-#define ARC_MINTIME (hz>>4) /* 62 ms */
-
-static void
-arc_cksum_verify(arc_buf_t *buf)
-{
- zio_cksum_t zc;
-
- if (!(zfs_flags & ZFS_DEBUG_MODIFY))
- return;
-
- mutex_enter(&buf->b_hdr->b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum == NULL ||
- (buf->b_hdr->b_flags & ARC_IO_ERROR)) {
- mutex_exit(&buf->b_hdr->b_freeze_lock);
- return;
- }
- fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
- if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
- panic("buffer modified while frozen!");
- mutex_exit(&buf->b_hdr->b_freeze_lock);
-}
-
-static void
-arc_cksum_compute(arc_buf_t *buf)
-{
- if (!(zfs_flags & ZFS_DEBUG_MODIFY))
- return;
-
- mutex_enter(&buf->b_hdr->b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum != NULL) {
- mutex_exit(&buf->b_hdr->b_freeze_lock);
- return;
- }
- buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP);
- fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
- buf->b_hdr->b_freeze_cksum);
- mutex_exit(&buf->b_hdr->b_freeze_lock);
-}
-
-void
-arc_buf_thaw(arc_buf_t *buf)
-{
- if (!(zfs_flags & ZFS_DEBUG_MODIFY))
- return;
-
- if (buf->b_hdr->b_state != arc_anon)
- panic("modifying non-anon buffer!");
- if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS)
- panic("modifying buffer while i/o in progress!");
- arc_cksum_verify(buf);
- mutex_enter(&buf->b_hdr->b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum != NULL) {
- kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
- buf->b_hdr->b_freeze_cksum = NULL;
- }
- mutex_exit(&buf->b_hdr->b_freeze_lock);
-}
-
-void
-arc_buf_freeze(arc_buf_t *buf)
-{
- if (!(zfs_flags & ZFS_DEBUG_MODIFY))
- return;
-
- ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
- buf->b_hdr->b_state == arc_anon);
- arc_cksum_compute(buf);
-}
-
-static void
-add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
-{
- ASSERT(MUTEX_HELD(hash_lock));
-
- if ((refcount_add(&ab->b_refcnt, tag) == 1) &&
- (ab->b_state != arc_anon)) {
- uint64_t delta = ab->b_size * ab->b_datacnt;
-
- ASSERT(!MUTEX_HELD(&ab->b_state->arcs_mtx));
- mutex_enter(&ab->b_state->arcs_mtx);
- ASSERT(list_link_active(&ab->b_arc_node));
- list_remove(&ab->b_state->arcs_list, ab);
- if (GHOST_STATE(ab->b_state)) {
- ASSERT3U(ab->b_datacnt, ==, 0);
- ASSERT3P(ab->b_buf, ==, NULL);
- delta = ab->b_size;
- }
- ASSERT(delta > 0);
- ASSERT3U(ab->b_state->arcs_lsize, >=, delta);
- atomic_add_64(&ab->b_state->arcs_lsize, -delta);
- mutex_exit(&ab->b_state->arcs_mtx);
- /* remove the prefetch flag is we get a reference */
- if (ab->b_flags & ARC_PREFETCH)
- ab->b_flags &= ~ARC_PREFETCH;
- }
-}
-
-static int
-remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
-{
- int cnt;
- arc_state_t *state = ab->b_state;
-
- ASSERT(state == arc_anon || MUTEX_HELD(hash_lock));
- ASSERT(!GHOST_STATE(state));
-
- if (((cnt = refcount_remove(&ab->b_refcnt, tag)) == 0) &&
- (state != arc_anon)) {
- ASSERT(!MUTEX_HELD(&state->arcs_mtx));
- mutex_enter(&state->arcs_mtx);
- ASSERT(!list_link_active(&ab->b_arc_node));
- list_insert_head(&state->arcs_list, ab);
- ASSERT(ab->b_datacnt > 0);
- atomic_add_64(&state->arcs_lsize, ab->b_size * ab->b_datacnt);
- ASSERT3U(state->arcs_size, >=, state->arcs_lsize);
- mutex_exit(&state->arcs_mtx);
- }
- return (cnt);
-}
-
-/*
- * Move the supplied buffer to the indicated state. The mutex
- * for the buffer must be held by the caller.
- */
-static void
-arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock)
-{
- arc_state_t *old_state = ab->b_state;
- int64_t refcnt = refcount_count(&ab->b_refcnt);
- uint64_t from_delta, to_delta;
-
- ASSERT(MUTEX_HELD(hash_lock));
- ASSERT(new_state != old_state);
- ASSERT(refcnt == 0 || ab->b_datacnt > 0);
- ASSERT(ab->b_datacnt == 0 || !GHOST_STATE(new_state));
-
- from_delta = to_delta = ab->b_datacnt * ab->b_size;
-
- /*
- * If this buffer is evictable, transfer it from the
- * old state list to the new state list.
- */
- if (refcnt == 0) {
- if (old_state != arc_anon) {
- int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx);
-
- if (use_mutex)
- mutex_enter(&old_state->arcs_mtx);
-
- ASSERT(list_link_active(&ab->b_arc_node));
- list_remove(&old_state->arcs_list, ab);
-
- /*
- * If prefetching out of the ghost cache,
- * we will have a non-null datacnt.
- */
- if (GHOST_STATE(old_state) && ab->b_datacnt == 0) {
- /* ghost elements have a ghost size */
- ASSERT(ab->b_buf == NULL);
- from_delta = ab->b_size;
- }
- ASSERT3U(old_state->arcs_lsize, >=, from_delta);
- atomic_add_64(&old_state->arcs_lsize, -from_delta);
-
- if (use_mutex)
- mutex_exit(&old_state->arcs_mtx);
- }
- if (new_state != arc_anon) {
- int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx);
-
- if (use_mutex)
- mutex_enter(&new_state->arcs_mtx);
-
- list_insert_head(&new_state->arcs_list, ab);
-
- /* ghost elements have a ghost size */
- if (GHOST_STATE(new_state)) {
- ASSERT(ab->b_datacnt == 0);
- ASSERT(ab->b_buf == NULL);
- to_delta = ab->b_size;
- }
- atomic_add_64(&new_state->arcs_lsize, to_delta);
- ASSERT3U(new_state->arcs_size + to_delta, >=,
- new_state->arcs_lsize);
-
- if (use_mutex)
- mutex_exit(&new_state->arcs_mtx);
- }
- }
-
- ASSERT(!BUF_EMPTY(ab));
- if (new_state == arc_anon && old_state != arc_anon) {
- buf_hash_remove(ab);
- }
-
- /* adjust state sizes */
- if (to_delta)
- atomic_add_64(&new_state->arcs_size, to_delta);
- if (from_delta) {
- ASSERT3U(old_state->arcs_size, >=, from_delta);
- atomic_add_64(&old_state->arcs_size, -from_delta);
- }
- ab->b_state = new_state;
-}
-
-arc_buf_t *
-arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
-{
- arc_buf_hdr_t *hdr;
- arc_buf_t *buf;
-
- ASSERT3U(size, >, 0);
- hdr = kmem_cache_alloc(hdr_cache, KM_SLEEP);
- ASSERT(BUF_EMPTY(hdr));
- hdr->b_size = size;
- hdr->b_type = type;
- hdr->b_spa = spa;
- hdr->b_state = arc_anon;
- hdr->b_arc_access = 0;
- mutex_init(&hdr->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
- buf = kmem_cache_alloc(buf_cache, KM_SLEEP);
- buf->b_hdr = hdr;
- buf->b_data = NULL;
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_next = NULL;
- hdr->b_buf = buf;
- arc_get_data_buf(buf);
- hdr->b_datacnt = 1;
- hdr->b_flags = 0;
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- (void) refcount_add(&hdr->b_refcnt, tag);
-
- return (buf);
-}
-
-static arc_buf_t *
-arc_buf_clone(arc_buf_t *from)
-{
- arc_buf_t *buf;
- arc_buf_hdr_t *hdr = from->b_hdr;
- uint64_t size = hdr->b_size;
-
- buf = kmem_cache_alloc(buf_cache, KM_SLEEP);
- buf->b_hdr = hdr;
- buf->b_data = NULL;
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_next = hdr->b_buf;
- hdr->b_buf = buf;
- arc_get_data_buf(buf);
- bcopy(from->b_data, buf->b_data, size);
- hdr->b_datacnt += 1;
- return (buf);
-}
-
-void
-arc_buf_add_ref(arc_buf_t *buf, void* tag)
-{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_lock;
-
- /*
- * Check to see if this buffer is currently being evicted via
- * arc_do_user_evicts().
- */
- mutex_enter(&arc_eviction_mtx);
- hdr = buf->b_hdr;
- if (hdr == NULL) {
- mutex_exit(&arc_eviction_mtx);
- return;
- }
- hash_lock = HDR_LOCK(hdr);
- mutex_exit(&arc_eviction_mtx);
-
- mutex_enter(hash_lock);
- if (buf->b_data == NULL) {
- /*
- * This buffer is evicted.
- */
- mutex_exit(hash_lock);
- return;
- }
-
- ASSERT(buf->b_hdr == hdr);
- ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
- add_reference(hdr, hash_lock, tag);
- arc_access(hdr, hash_lock);
- mutex_exit(hash_lock);
- ARCSTAT_BUMP(arcstat_hits);
- ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
- demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
- data, metadata, hits);
-}
-
-static void
-arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
-{
- arc_buf_t **bufp;
-
- /* free up data associated with the buf */
- if (buf->b_data) {
- arc_state_t *state = buf->b_hdr->b_state;
- uint64_t size = buf->b_hdr->b_size;
- arc_buf_contents_t type = buf->b_hdr->b_type;
-
- arc_cksum_verify(buf);
- if (!recycle) {
- if (type == ARC_BUFC_METADATA) {
- zio_buf_free(buf->b_data, size);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- zio_data_buf_free(buf->b_data, size);
- }
- atomic_add_64(&arc_size, -size);
- }
- if (list_link_active(&buf->b_hdr->b_arc_node)) {
- ASSERT(refcount_is_zero(&buf->b_hdr->b_refcnt));
- ASSERT(state != arc_anon);
- ASSERT3U(state->arcs_lsize, >=, size);
- atomic_add_64(&state->arcs_lsize, -size);
- }
- ASSERT3U(state->arcs_size, >=, size);
- atomic_add_64(&state->arcs_size, -size);
- buf->b_data = NULL;
- ASSERT(buf->b_hdr->b_datacnt > 0);
- buf->b_hdr->b_datacnt -= 1;
- }
-
- /* only remove the buf if requested */
- if (!all)
- return;
-
- /* remove the buf from the hdr list */
- for (bufp = &buf->b_hdr->b_buf; *bufp != buf; bufp = &(*bufp)->b_next)
- continue;
- *bufp = buf->b_next;
-
- ASSERT(buf->b_efunc == NULL);
-
- /* clean up the buf */
- buf->b_hdr = NULL;
- kmem_cache_free(buf_cache, buf);
-}
-
-static void
-arc_hdr_destroy(arc_buf_hdr_t *hdr)
-{
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- ASSERT3P(hdr->b_state, ==, arc_anon);
- ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-
- if (!BUF_EMPTY(hdr)) {
- ASSERT(!HDR_IN_HASH_TABLE(hdr));
- bzero(&hdr->b_dva, sizeof (dva_t));
- hdr->b_birth = 0;
- hdr->b_cksum0 = 0;
- }
- while (hdr->b_buf) {
- arc_buf_t *buf = hdr->b_buf;
-
- if (buf->b_efunc) {
- mutex_enter(&arc_eviction_mtx);
- ASSERT(buf->b_hdr != NULL);
- arc_buf_destroy(hdr->b_buf, FALSE, FALSE);
- hdr->b_buf = buf->b_next;
- buf->b_hdr = &arc_eviction_hdr;
- buf->b_next = arc_eviction_list;
- arc_eviction_list = buf;
- mutex_exit(&arc_eviction_mtx);
- } else {
- arc_buf_destroy(hdr->b_buf, FALSE, TRUE);
- }
- }
- if (hdr->b_freeze_cksum != NULL) {
- kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
- hdr->b_freeze_cksum = NULL;
- }
- mutex_destroy(&hdr->b_freeze_lock);
-
- ASSERT(!list_link_active(&hdr->b_arc_node));
- ASSERT3P(hdr->b_hash_next, ==, NULL);
- ASSERT3P(hdr->b_acb, ==, NULL);
- kmem_cache_free(hdr_cache, hdr);
-}
-
-void
-arc_buf_free(arc_buf_t *buf, void *tag)
-{
- arc_buf_hdr_t *hdr = buf->b_hdr;
- int hashed = hdr->b_state != arc_anon;
-
- ASSERT(buf->b_efunc == NULL);
- ASSERT(buf->b_data != NULL);
-
- if (hashed) {
- kmutex_t *hash_lock = HDR_LOCK(hdr);
-
- mutex_enter(hash_lock);
- (void) remove_reference(hdr, hash_lock, tag);
- if (hdr->b_datacnt > 1)
- arc_buf_destroy(buf, FALSE, TRUE);
- else
- hdr->b_flags |= ARC_BUF_AVAILABLE;
- mutex_exit(hash_lock);
- } else if (HDR_IO_IN_PROGRESS(hdr)) {
- int destroy_hdr;
- /*
- * We are in the middle of an async write. Don't destroy
- * this buffer unless the write completes before we finish
- * decrementing the reference count.
- */
- mutex_enter(&arc_eviction_mtx);
- (void) remove_reference(hdr, NULL, tag);
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- destroy_hdr = !HDR_IO_IN_PROGRESS(hdr);
- mutex_exit(&arc_eviction_mtx);
- if (destroy_hdr)
- arc_hdr_destroy(hdr);
- } else {
- if (remove_reference(hdr, NULL, tag) > 0) {
- ASSERT(HDR_IO_ERROR(hdr));
- arc_buf_destroy(buf, FALSE, TRUE);
- } else {
- arc_hdr_destroy(hdr);
- }
- }
-}
-
-int
-arc_buf_remove_ref(arc_buf_t *buf, void* tag)
-{
- arc_buf_hdr_t *hdr = buf->b_hdr;
- kmutex_t *hash_lock = HDR_LOCK(hdr);
- int no_callback = (buf->b_efunc == NULL);
-
- if (hdr->b_state == arc_anon) {
- arc_buf_free(buf, tag);
- return (no_callback);
- }
-
- mutex_enter(hash_lock);
- ASSERT(hdr->b_state != arc_anon);
- ASSERT(buf->b_data != NULL);
-
- (void) remove_reference(hdr, hash_lock, tag);
- if (hdr->b_datacnt > 1) {
- if (no_callback)
- arc_buf_destroy(buf, FALSE, TRUE);
- } else if (no_callback) {
- ASSERT(hdr->b_buf == buf && buf->b_next == NULL);
- hdr->b_flags |= ARC_BUF_AVAILABLE;
- }
- ASSERT(no_callback || hdr->b_datacnt > 1 ||
- refcount_is_zero(&hdr->b_refcnt));
- mutex_exit(hash_lock);
- return (no_callback);
-}
-
-int
-arc_buf_size(arc_buf_t *buf)
-{
- return (buf->b_hdr->b_size);
-}
-
-/*
- * Evict buffers from list until we've removed the specified number of
- * bytes. Move the removed buffers to the appropriate evict state.
- * If the recycle flag is set, then attempt to "recycle" a buffer:
- * - look for a buffer to evict that is `bytes' long.
- * - return the data block from this buffer rather than freeing it.
- * This flag is used by callers that are trying to make space for a
- * new buffer in a full arc cache.
- */
-static void *
-arc_evict(arc_state_t *state, int64_t bytes, boolean_t recycle,
- arc_buf_contents_t type)
-{
- arc_state_t *evicted_state;
- uint64_t bytes_evicted = 0, skipped = 0, missed = 0;
- arc_buf_hdr_t *ab, *ab_prev = NULL;
- kmutex_t *hash_lock;
- boolean_t have_lock;
- void *stolen = NULL;
-
- ASSERT(state == arc_mru || state == arc_mfu);
-
- evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
-
- mutex_enter(&state->arcs_mtx);
- mutex_enter(&evicted_state->arcs_mtx);
-
- for (ab = list_tail(&state->arcs_list); ab; ab = ab_prev) {
- ab_prev = list_prev(&state->arcs_list, ab);
- /* prefetch buffers have a minimum lifespan */
- if (HDR_IO_IN_PROGRESS(ab) ||
- (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) &&
- LBOLT - ab->b_arc_access < arc_min_prefetch_lifespan)) {
- skipped++;
- continue;
- }
- /* "lookahead" for better eviction candidate */
- if (recycle && ab->b_size != bytes &&
- ab_prev && ab_prev->b_size == bytes)
- continue;
- hash_lock = HDR_LOCK(ab);
- have_lock = MUTEX_HELD(hash_lock);
- if (have_lock || mutex_tryenter(hash_lock)) {
- ASSERT3U(refcount_count(&ab->b_refcnt), ==, 0);
- ASSERT(ab->b_datacnt > 0);
- while (ab->b_buf) {
- arc_buf_t *buf = ab->b_buf;
- if (buf->b_data) {
- bytes_evicted += ab->b_size;
- if (recycle && ab->b_type == type &&
- ab->b_size == bytes) {
- stolen = buf->b_data;
- recycle = FALSE;
- }
- }
- if (buf->b_efunc) {
- mutex_enter(&arc_eviction_mtx);
- arc_buf_destroy(buf,
- buf->b_data == stolen, FALSE);
- ab->b_buf = buf->b_next;
- buf->b_hdr = &arc_eviction_hdr;
- buf->b_next = arc_eviction_list;
- arc_eviction_list = buf;
- mutex_exit(&arc_eviction_mtx);
- } else {
- arc_buf_destroy(buf,
- buf->b_data == stolen, TRUE);
- }
- }
- ASSERT(ab->b_datacnt == 0);
- arc_change_state(evicted_state, ab, hash_lock);
- ASSERT(HDR_IN_HASH_TABLE(ab));
- ab->b_flags = ARC_IN_HASH_TABLE;
- DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, ab);
- if (!have_lock)
- mutex_exit(hash_lock);
- if (bytes >= 0 && bytes_evicted >= bytes)
- break;
- } else {
- missed += 1;
- }
- }
-
- mutex_exit(&evicted_state->arcs_mtx);
- mutex_exit(&state->arcs_mtx);
-
- if (bytes_evicted < bytes)
- dprintf("only evicted %lld bytes from %x",
- (longlong_t)bytes_evicted, state);
-
- if (skipped)
- ARCSTAT_INCR(arcstat_evict_skip, skipped);
-
- if (missed)
- ARCSTAT_INCR(arcstat_mutex_miss, missed);
-
- return (stolen);
-}
-
-/*
- * Remove buffers from list until we've removed the specified number of
- * bytes. Destroy the buffers that are removed.
- */
-static void
-arc_evict_ghost(arc_state_t *state, int64_t bytes)
-{
- arc_buf_hdr_t *ab, *ab_prev;
- kmutex_t *hash_lock;
- uint64_t bytes_deleted = 0;
- uint64_t bufs_skipped = 0;
-
- ASSERT(GHOST_STATE(state));
-top:
- mutex_enter(&state->arcs_mtx);
- for (ab = list_tail(&state->arcs_list); ab; ab = ab_prev) {
- ab_prev = list_prev(&state->arcs_list, ab);
- hash_lock = HDR_LOCK(ab);
- if (mutex_tryenter(hash_lock)) {
- ASSERT(!HDR_IO_IN_PROGRESS(ab));
- ASSERT(ab->b_buf == NULL);
- arc_change_state(arc_anon, ab, hash_lock);
- mutex_exit(hash_lock);
- ARCSTAT_BUMP(arcstat_deleted);
- bytes_deleted += ab->b_size;
- arc_hdr_destroy(ab);
- DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab);
- if (bytes >= 0 && bytes_deleted >= bytes)
- break;
- } else {
- if (bytes < 0) {
- mutex_exit(&state->arcs_mtx);
- mutex_enter(hash_lock);
- mutex_exit(hash_lock);
- goto top;
- }
- bufs_skipped += 1;
- }
- }
- mutex_exit(&state->arcs_mtx);
-
- if (bufs_skipped) {
- ARCSTAT_INCR(arcstat_mutex_miss, bufs_skipped);
- ASSERT(bytes >= 0);
- }
-
- if (bytes_deleted < bytes)
- dprintf("only deleted %lld bytes from %p",
- (longlong_t)bytes_deleted, state);
-}
-
-static void
-arc_adjust(void)
-{
- int64_t top_sz, mru_over, arc_over, todelete;
-
- top_sz = arc_anon->arcs_size + arc_mru->arcs_size;
-
- if (top_sz > arc_p && arc_mru->arcs_lsize > 0) {
- int64_t toevict = MIN(arc_mru->arcs_lsize, top_sz - arc_p);
- (void) arc_evict(arc_mru, toevict, FALSE, ARC_BUFC_UNDEF);
- top_sz = arc_anon->arcs_size + arc_mru->arcs_size;
- }
-
- mru_over = top_sz + arc_mru_ghost->arcs_size - arc_c;
-
- if (mru_over > 0) {
- if (arc_mru_ghost->arcs_lsize > 0) {
- todelete = MIN(arc_mru_ghost->arcs_lsize, mru_over);
- arc_evict_ghost(arc_mru_ghost, todelete);
- }
- }
-
- if ((arc_over = arc_size - arc_c) > 0) {
- int64_t tbl_over;
-
- if (arc_mfu->arcs_lsize > 0) {
- int64_t toevict = MIN(arc_mfu->arcs_lsize, arc_over);
- (void) arc_evict(arc_mfu, toevict, FALSE,
- ARC_BUFC_UNDEF);
- }
-
- tbl_over = arc_size + arc_mru_ghost->arcs_lsize +
- arc_mfu_ghost->arcs_lsize - arc_c*2;
-
- if (tbl_over > 0 && arc_mfu_ghost->arcs_lsize > 0) {
- todelete = MIN(arc_mfu_ghost->arcs_lsize, tbl_over);
- arc_evict_ghost(arc_mfu_ghost, todelete);
- }
- }
-}
-
-static void
-arc_do_user_evicts(void)
-{
- mutex_enter(&arc_eviction_mtx);
- while (arc_eviction_list != NULL) {
- arc_buf_t *buf = arc_eviction_list;
- arc_eviction_list = buf->b_next;
- buf->b_hdr = NULL;
- mutex_exit(&arc_eviction_mtx);
-
- if (buf->b_efunc != NULL)
- VERIFY(buf->b_efunc(buf) == 0);
-
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- kmem_cache_free(buf_cache, buf);
- mutex_enter(&arc_eviction_mtx);
- }
- mutex_exit(&arc_eviction_mtx);
-}
-
-/*
- * Flush all *evictable* data from the cache.
- * NOTE: this will not touch "active" (i.e. referenced) data.
- */
-void
-arc_flush(void)
-{
- while (list_head(&arc_mru->arcs_list))
- (void) arc_evict(arc_mru, -1, FALSE, ARC_BUFC_UNDEF);
- while (list_head(&arc_mfu->arcs_list))
- (void) arc_evict(arc_mfu, -1, FALSE, ARC_BUFC_UNDEF);
-
- arc_evict_ghost(arc_mru_ghost, -1);
- arc_evict_ghost(arc_mfu_ghost, -1);
-
- mutex_enter(&arc_reclaim_thr_lock);
- arc_do_user_evicts();
- mutex_exit(&arc_reclaim_thr_lock);
- ASSERT(arc_eviction_list == NULL);
-}
-
-int arc_shrink_shift = 5; /* log2(fraction of arc to reclaim) */
-
-void
-arc_shrink(void)
-{
- if (arc_c > arc_c_min) {
- uint64_t to_free;
-
-#ifdef _KERNEL
- to_free = arc_c >> arc_shrink_shift;
-#else
- to_free = arc_c >> arc_shrink_shift;
-#endif
- if (arc_c > arc_c_min + to_free)
- atomic_add_64(&arc_c, -to_free);
- else
- arc_c = arc_c_min;
-
- atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift));
- if (arc_c > arc_size)
- arc_c = MAX(arc_size, arc_c_min);
- if (arc_p > arc_c)
- arc_p = (arc_c >> 1);
- ASSERT(arc_c >= arc_c_min);
- ASSERT((int64_t)arc_p >= 0);
- }
-
- if (arc_size > arc_c)
- arc_adjust();
-}
-
-static int zfs_needfree = 0;
-
-static int
-arc_reclaim_needed(void)
-{
-#if 0
- uint64_t extra;
-#endif
-
-#ifdef _KERNEL
-
- if (zfs_needfree)
- return (1);
-
-#if 0
- /*
- * check to make sure that swapfs has enough space so that anon
- * reservations can still succeeed. anon_resvmem() checks that the
- * availrmem is greater than swapfs_minfree, and the number of reserved
- * swap pages. We also add a bit of extra here just to prevent
- * circumstances from getting really dire.
- */
- if (availrmem < swapfs_minfree + swapfs_reserve + extra)
- return (1);
-
- /*
- * If zio data pages are being allocated out of a separate heap segment,
- * then check that the size of available vmem for this area remains
- * above 1/4th free. This needs to be done when the size of the
- * non-default segment is smaller than physical memory, so we could
- * conceivably run out of VA in that segment before running out of
- * physical memory.
- */
- if (zio_arena != NULL) {
- size_t arc_ziosize =
- btop(vmem_size(zio_arena, VMEM_FREE | VMEM_ALLOC));
-
- if ((physmem > arc_ziosize) &&
- (btop(vmem_size(zio_arena, VMEM_FREE)) < arc_ziosize >> 2))
- return (1);
- }
-
-#if defined(__i386)
- /*
- * If we're on an i386 platform, it's possible that we'll exhaust the
- * kernel heap space before we ever run out of available physical
- * memory. Most checks of the size of the heap_area compare against
- * tune.t_minarmem, which is the minimum available real memory that we
- * can have in the system. However, this is generally fixed at 25 pages
- * which is so low that it's useless. In this comparison, we seek to
- * calculate the total heap-size, and reclaim if more than 3/4ths of the
- * heap is allocated. (Or, in the caclulation, if less than 1/4th is
- * free)
- */
- if (btop(vmem_size(heap_arena, VMEM_FREE)) <
- (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
- return (1);
-#endif
-#else
- if (kmem_used() > (kmem_size() * 3) / 4)
- return (1);
-#endif
-
-#else
- if (spa_get_random(100) == 0)
- return (1);
-#endif
- return (0);
-}
-
-static void
-arc_kmem_reap_now(arc_reclaim_strategy_t strat)
-{
-#ifdef ZIO_USE_UMA
- size_t i;
- kmem_cache_t *prev_cache = NULL;
- kmem_cache_t *prev_data_cache = NULL;
- extern kmem_cache_t *zio_buf_cache[];
- extern kmem_cache_t *zio_data_buf_cache[];
-#endif
-
-#ifdef _KERNEL
- /*
- * First purge some DNLC entries, in case the DNLC is using
- * up too much memory.
- */
- dnlc_reduce_cache((void *)(uintptr_t)arc_reduce_dnlc_percent);
-
-#if defined(__i386)
- /*
- * Reclaim unused memory from all kmem caches.
- */
- kmem_reap();
-#endif
-#endif
-
- /*
- * An agressive reclamation will shrink the cache size as well as
- * reap free buffers from the arc kmem caches.
- */
- if (strat == ARC_RECLAIM_AGGR)
- arc_shrink();
-
-#ifdef ZIO_USE_UMA
- for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
- if (zio_buf_cache[i] != prev_cache) {
- prev_cache = zio_buf_cache[i];
- kmem_cache_reap_now(zio_buf_cache[i]);
- }
- if (zio_data_buf_cache[i] != prev_data_cache) {
- prev_data_cache = zio_data_buf_cache[i];
- kmem_cache_reap_now(zio_data_buf_cache[i]);
- }
- }
-#endif
- kmem_cache_reap_now(buf_cache);
- kmem_cache_reap_now(hdr_cache);
-}
-
-static void
-arc_reclaim_thread(void *dummy __unused)
-{
- clock_t growtime = 0;
- arc_reclaim_strategy_t last_reclaim = ARC_RECLAIM_CONS;
- callb_cpr_t cpr;
-
- CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
-
- mutex_enter(&arc_reclaim_thr_lock);
- while (arc_thread_exit == 0) {
- if (arc_reclaim_needed()) {
-
- if (arc_no_grow) {
- if (last_reclaim == ARC_RECLAIM_CONS) {
- last_reclaim = ARC_RECLAIM_AGGR;
- } else {
- last_reclaim = ARC_RECLAIM_CONS;
- }
- } else {
- arc_no_grow = TRUE;
- last_reclaim = ARC_RECLAIM_AGGR;
- membar_producer();
- }
-
- /* reset the growth delay for every reclaim */
- growtime = LBOLT + (arc_grow_retry * hz);
- ASSERT(growtime > 0);
-
- if (zfs_needfree && last_reclaim == ARC_RECLAIM_CONS) {
- /*
- * If zfs_needfree is TRUE our vm_lowmem hook
- * was called and in that case we must free some
- * memory, so switch to aggressive mode.
- */
- arc_no_grow = TRUE;
- last_reclaim = ARC_RECLAIM_AGGR;
- }
- arc_kmem_reap_now(last_reclaim);
- } else if ((growtime > 0) && ((growtime - LBOLT) <= 0)) {
- arc_no_grow = FALSE;
- }
-
- if (zfs_needfree ||
- (2 * arc_c < arc_size +
- arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size))
- arc_adjust();
-
- if (arc_eviction_list != NULL)
- arc_do_user_evicts();
-
- if (arc_reclaim_needed()) {
- zfs_needfree = 0;
-#ifdef _KERNEL
- wakeup(&zfs_needfree);
-#endif
- }
-
- /* block until needed, or one second, whichever is shorter */
- CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait(&arc_reclaim_thr_cv,
- &arc_reclaim_thr_lock, hz);
- CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock);
- }
-
- arc_thread_exit = 0;
- cv_broadcast(&arc_reclaim_thr_cv);
- CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_thr_lock */
- thread_exit();
-}
-
-/*
- * Adapt arc info given the number of bytes we are trying to add and
- * the state that we are comming from. This function is only called
- * when we are adding new content to the cache.
- */
-static void
-arc_adapt(int bytes, arc_state_t *state)
-{
- int mult;
-
- ASSERT(bytes > 0);
- /*
- * Adapt the target size of the MRU list:
- * - if we just hit in the MRU ghost list, then increase
- * the target size of the MRU list.
- * - if we just hit in the MFU ghost list, then increase
- * the target size of the MFU list by decreasing the
- * target size of the MRU list.
- */
- if (state == arc_mru_ghost) {
- mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ?
- 1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size));
-
- arc_p = MIN(arc_c, arc_p + bytes * mult);
- } else if (state == arc_mfu_ghost) {
- mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ?
- 1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size));
-
- arc_p = MAX(0, (int64_t)arc_p - bytes * mult);
- }
- ASSERT((int64_t)arc_p >= 0);
-
- if (arc_reclaim_needed()) {
- cv_signal(&arc_reclaim_thr_cv);
- return;
- }
-
- if (arc_no_grow)
- return;
-
- if (arc_c >= arc_c_max)
- return;
-
- /*
- * If we're within (2 * maxblocksize) bytes of the target
- * cache size, increment the target cache size
- */
- if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
- atomic_add_64(&arc_c, (int64_t)bytes);
- if (arc_c > arc_c_max)
- arc_c = arc_c_max;
- else if (state == arc_anon)
- atomic_add_64(&arc_p, (int64_t)bytes);
- if (arc_p > arc_c)
- arc_p = arc_c;
- }
- ASSERT((int64_t)arc_p >= 0);
-}
-
-/*
- * Check if the cache has reached its limits and eviction is required
- * prior to insert.
- */
-static int
-arc_evict_needed()
-{
- if (arc_reclaim_needed())
- return (1);
-
- return (arc_size > arc_c);
-}
-
-/*
- * The buffer, supplied as the first argument, needs a data block.
- * So, if we are at cache max, determine which cache should be victimized.
- * We have the following cases:
- *
- * 1. Insert for MRU, p > sizeof(arc_anon + arc_mru) ->
- * In this situation if we're out of space, but the resident size of the MFU is
- * under the limit, victimize the MFU cache to satisfy this insertion request.
- *
- * 2. Insert for MRU, p <= sizeof(arc_anon + arc_mru) ->
- * Here, we've used up all of the available space for the MRU, so we need to
- * evict from our own cache instead. Evict from the set of resident MRU
- * entries.
- *
- * 3. Insert for MFU (c - p) > sizeof(arc_mfu) ->
- * c minus p represents the MFU space in the cache, since p is the size of the
- * cache that is dedicated to the MRU. In this situation there's still space on
- * the MFU side, so the MRU side needs to be victimized.
- *
- * 4. Insert for MFU (c - p) < sizeof(arc_mfu) ->
- * MFU's resident set is consuming more space than it has been allotted. In
- * this situation, we must victimize our own cache, the MFU, for this insertion.
- */
-static void
-arc_get_data_buf(arc_buf_t *buf)
-{
- arc_state_t *state = buf->b_hdr->b_state;
- uint64_t size = buf->b_hdr->b_size;
- arc_buf_contents_t type = buf->b_hdr->b_type;
-
- arc_adapt(size, state);
-
- /*
- * We have not yet reached cache maximum size,
- * just allocate a new buffer.
- */
- if (!arc_evict_needed()) {
- if (type == ARC_BUFC_METADATA) {
- buf->b_data = zio_buf_alloc(size);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- buf->b_data = zio_data_buf_alloc(size);
- }
- atomic_add_64(&arc_size, size);
- goto out;
- }
-
- /*
- * If we are prefetching from the mfu ghost list, this buffer
- * will end up on the mru list; so steal space from there.
- */
- if (state == arc_mfu_ghost)
- state = buf->b_hdr->b_flags & ARC_PREFETCH ? arc_mru : arc_mfu;
- else if (state == arc_mru_ghost)
- state = arc_mru;
-
- if (state == arc_mru || state == arc_anon) {
- uint64_t mru_used = arc_anon->arcs_size + arc_mru->arcs_size;
- state = (arc_p > mru_used) ? arc_mfu : arc_mru;
- } else {
- /* MFU cases */
- uint64_t mfu_space = arc_c - arc_p;
- state = (mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
- }
- if ((buf->b_data = arc_evict(state, size, TRUE, type)) == NULL) {
- if (type == ARC_BUFC_METADATA) {
- buf->b_data = zio_buf_alloc(size);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- buf->b_data = zio_data_buf_alloc(size);
- }
- atomic_add_64(&arc_size, size);
- ARCSTAT_BUMP(arcstat_recycle_miss);
- }
- ASSERT(buf->b_data != NULL);
-out:
- /*
- * Update the state size. Note that ghost states have a
- * "ghost size" and so don't need to be updated.
- */
- if (!GHOST_STATE(buf->b_hdr->b_state)) {
- arc_buf_hdr_t *hdr = buf->b_hdr;
-
- atomic_add_64(&hdr->b_state->arcs_size, size);
- if (list_link_active(&hdr->b_arc_node)) {
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- atomic_add_64(&hdr->b_state->arcs_lsize, size);
- }
- /*
- * If we are growing the cache, and we are adding anonymous
- * data, and we have outgrown arc_p, update arc_p
- */
- if (arc_size < arc_c && hdr->b_state == arc_anon &&
- arc_anon->arcs_size + arc_mru->arcs_size > arc_p)
- arc_p = MIN(arc_c, arc_p + size);
- }
-}
-
-/*
- * This routine is called whenever a buffer is accessed.
- * NOTE: the hash lock is dropped in this function.
- */
-static void
-arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
-{
- ASSERT(MUTEX_HELD(hash_lock));
-
- if (buf->b_state == arc_anon) {
- /*
- * This buffer is not in the cache, and does not
- * appear in our "ghost" list. Add the new buffer
- * to the MRU state.
- */
-
- ASSERT(buf->b_arc_access == 0);
- buf->b_arc_access = LBOLT;
- DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf);
- arc_change_state(arc_mru, buf, hash_lock);
-
- } else if (buf->b_state == arc_mru) {
- /*
- * If this buffer is here because of a prefetch, then either:
- * - clear the flag if this is a "referencing" read
- * (any subsequent access will bump this into the MFU state).
- * or
- * - move the buffer to the head of the list if this is
- * another prefetch (to make it less likely to be evicted).
- */
- if ((buf->b_flags & ARC_PREFETCH) != 0) {
- if (refcount_count(&buf->b_refcnt) == 0) {
- ASSERT(list_link_active(&buf->b_arc_node));
- mutex_enter(&arc_mru->arcs_mtx);
- list_remove(&arc_mru->arcs_list, buf);
- list_insert_head(&arc_mru->arcs_list, buf);
- mutex_exit(&arc_mru->arcs_mtx);
- } else {
- buf->b_flags &= ~ARC_PREFETCH;
- ARCSTAT_BUMP(arcstat_mru_hits);
- }
- buf->b_arc_access = LBOLT;
- return;
- }
-
- /*
- * This buffer has been "accessed" only once so far,
- * but it is still in the cache. Move it to the MFU
- * state.
- */
- if (LBOLT > buf->b_arc_access + ARC_MINTIME) {
- /*
- * More than 125ms have passed since we
- * instantiated this buffer. Move it to the
- * most frequently used state.
- */
- buf->b_arc_access = LBOLT;
- DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
- arc_change_state(arc_mfu, buf, hash_lock);
- }
- ARCSTAT_BUMP(arcstat_mru_hits);
- } else if (buf->b_state == arc_mru_ghost) {
- arc_state_t *new_state;
- /*
- * This buffer has been "accessed" recently, but
- * was evicted from the cache. Move it to the
- * MFU state.
- */
-
- if (buf->b_flags & ARC_PREFETCH) {
- new_state = arc_mru;
- if (refcount_count(&buf->b_refcnt) > 0)
- buf->b_flags &= ~ARC_PREFETCH;
- DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf);
- } else {
- new_state = arc_mfu;
- DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
- }
-
- buf->b_arc_access = LBOLT;
- arc_change_state(new_state, buf, hash_lock);
-
- ARCSTAT_BUMP(arcstat_mru_ghost_hits);
- } else if (buf->b_state == arc_mfu) {
- /*
- * This buffer has been accessed more than once and is
- * still in the cache. Keep it in the MFU state.
- *
- * NOTE: an add_reference() that occurred when we did
- * the arc_read() will have kicked this off the list.
- * If it was a prefetch, we will explicitly move it to
- * the head of the list now.
- */
- if ((buf->b_flags & ARC_PREFETCH) != 0) {
- ASSERT(refcount_count(&buf->b_refcnt) == 0);
- ASSERT(list_link_active(&buf->b_arc_node));
- mutex_enter(&arc_mfu->arcs_mtx);
- list_remove(&arc_mfu->arcs_list, buf);
- list_insert_head(&arc_mfu->arcs_list, buf);
- mutex_exit(&arc_mfu->arcs_mtx);
- }
- ARCSTAT_BUMP(arcstat_mfu_hits);
- buf->b_arc_access = LBOLT;
- } else if (buf->b_state == arc_mfu_ghost) {
- arc_state_t *new_state = arc_mfu;
- /*
- * This buffer has been accessed more than once but has
- * been evicted from the cache. Move it back to the
- * MFU state.
- */
-
- if (buf->b_flags & ARC_PREFETCH) {
- /*
- * This is a prefetch access...
- * move this block back to the MRU state.
- */
- ASSERT3U(refcount_count(&buf->b_refcnt), ==, 0);
- new_state = arc_mru;
- }
-
- buf->b_arc_access = LBOLT;
- DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
- arc_change_state(new_state, buf, hash_lock);
-
- ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
- } else {
- ASSERT(!"invalid arc state");
- }
-}
-
-/* a generic arc_done_func_t which you can use */
-/* ARGSUSED */
-void
-arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
-{
- bcopy(buf->b_data, arg, buf->b_hdr->b_size);
- VERIFY(arc_buf_remove_ref(buf, arg) == 1);
-}
-
-/* a generic arc_done_func_t which you can use */
-void
-arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
-{
- arc_buf_t **bufp = arg;
- if (zio && zio->io_error) {
- VERIFY(arc_buf_remove_ref(buf, arg) == 1);
- *bufp = NULL;
- } else {
- *bufp = buf;
- }
-}
-
-static void
-arc_read_done(zio_t *zio)
-{
- arc_buf_hdr_t *hdr, *found;
- arc_buf_t *buf;
- arc_buf_t *abuf; /* buffer we're assigning to callback */
- kmutex_t *hash_lock;
- arc_callback_t *callback_list, *acb;
- int freeable = FALSE;
-
- buf = zio->io_private;
- hdr = buf->b_hdr;
-
- /*
- * The hdr was inserted into hash-table and removed from lists
- * prior to starting I/O. We should find this header, since
- * it's in the hash table, and it should be legit since it's
- * not possible to evict it during the I/O. The only possible
- * reason for it not to be found is if we were freed during the
- * read.
- */
- found = buf_hash_find(zio->io_spa, &hdr->b_dva, hdr->b_birth,
- &hash_lock);
-
- ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) ||
- (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))));
-
- /* byteswap if necessary */
- callback_list = hdr->b_acb;
- ASSERT(callback_list != NULL);
- if (BP_SHOULD_BYTESWAP(zio->io_bp) && callback_list->acb_byteswap)
- callback_list->acb_byteswap(buf->b_data, hdr->b_size);
-
- arc_cksum_compute(buf);
-
- /* create copies of the data buffer for the callers */
- abuf = buf;
- for (acb = callback_list; acb; acb = acb->acb_next) {
- if (acb->acb_done) {
- if (abuf == NULL)
- abuf = arc_buf_clone(buf);
- acb->acb_buf = abuf;
- abuf = NULL;
- }
- }
- hdr->b_acb = NULL;
- hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
- ASSERT(!HDR_BUF_AVAILABLE(hdr));
- if (abuf == buf)
- hdr->b_flags |= ARC_BUF_AVAILABLE;
-
- ASSERT(refcount_is_zero(&hdr->b_refcnt) || callback_list != NULL);
-
- if (zio->io_error != 0) {
- hdr->b_flags |= ARC_IO_ERROR;
- if (hdr->b_state != arc_anon)
- arc_change_state(arc_anon, hdr, hash_lock);
- if (HDR_IN_HASH_TABLE(hdr))
- buf_hash_remove(hdr);
- freeable = refcount_is_zero(&hdr->b_refcnt);
- /* convert checksum errors into IO errors */
- if (zio->io_error == ECKSUM)
- zio->io_error = EIO;
- }
-
- /*
- * Broadcast before we drop the hash_lock to avoid the possibility
- * that the hdr (and hence the cv) might be freed before we get to
- * the cv_broadcast().
- */
- cv_broadcast(&hdr->b_cv);
-
- if (hash_lock) {
- /*
- * Only call arc_access on anonymous buffers. This is because
- * if we've issued an I/O for an evicted buffer, we've already
- * called arc_access (to prevent any simultaneous readers from
- * getting confused).
- */
- if (zio->io_error == 0 && hdr->b_state == arc_anon)
- arc_access(hdr, hash_lock);
- mutex_exit(hash_lock);
- } else {
- /*
- * This block was freed while we waited for the read to
- * complete. It has been removed from the hash table and
- * moved to the anonymous state (so that it won't show up
- * in the cache).
- */
- ASSERT3P(hdr->b_state, ==, arc_anon);
- freeable = refcount_is_zero(&hdr->b_refcnt);
- }
-
- /* execute each callback and free its structure */
- while ((acb = callback_list) != NULL) {
- if (acb->acb_done)
- acb->acb_done(zio, acb->acb_buf, acb->acb_private);
-
- if (acb->acb_zio_dummy != NULL) {
- acb->acb_zio_dummy->io_error = zio->io_error;
- zio_nowait(acb->acb_zio_dummy);
- }
-
- callback_list = acb->acb_next;
- kmem_free(acb, sizeof (arc_callback_t));
- }
-
- if (freeable)
- arc_hdr_destroy(hdr);
-}
-
-/*
- * "Read" the block block at the specified DVA (in bp) via the
- * cache. If the block is found in the cache, invoke the provided
- * callback immediately and return. Note that the `zio' parameter
- * in the callback will be NULL in this case, since no IO was
- * required. If the block is not in the cache pass the read request
- * on to the spa with a substitute callback function, so that the
- * requested block will be added to the cache.
- *
- * If a read request arrives for a block that has a read in-progress,
- * either wait for the in-progress read to complete (and return the
- * results); or, if this is a read with a "done" func, add a record
- * to the read to invoke the "done" func when the read completes,
- * and return; or just return.
- *
- * arc_read_done() will invoke all the requested "done" functions
- * for readers of this block.
- */
-int
-arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_byteswap_func_t *swap,
- arc_done_func_t *done, void *private, int priority, int flags,
- uint32_t *arc_flags, zbookmark_t *zb)
-{
- arc_buf_hdr_t *hdr;
- arc_buf_t *buf;
- kmutex_t *hash_lock;
- zio_t *rzio;
-
-top:
- hdr = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
- if (hdr && hdr->b_datacnt > 0) {
-
- *arc_flags |= ARC_CACHED;
-
- if (HDR_IO_IN_PROGRESS(hdr)) {
-
- if (*arc_flags & ARC_WAIT) {
- cv_wait(&hdr->b_cv, hash_lock);
- mutex_exit(hash_lock);
- goto top;
- }
- ASSERT(*arc_flags & ARC_NOWAIT);
-
- if (done) {
- arc_callback_t *acb = NULL;
-
- acb = kmem_zalloc(sizeof (arc_callback_t),
- KM_SLEEP);
- acb->acb_done = done;
- acb->acb_private = private;
- acb->acb_byteswap = swap;
- if (pio != NULL)
- acb->acb_zio_dummy = zio_null(pio,
- spa, NULL, NULL, flags);
-
- ASSERT(acb->acb_done != NULL);
- acb->acb_next = hdr->b_acb;
- hdr->b_acb = acb;
- add_reference(hdr, hash_lock, private);
- mutex_exit(hash_lock);
- return (0);
- }
- mutex_exit(hash_lock);
- return (0);
- }
-
- ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
-
- if (done) {
- add_reference(hdr, hash_lock, private);
- /*
- * If this block is already in use, create a new
- * copy of the data so that we will be guaranteed
- * that arc_release() will always succeed.
- */
- buf = hdr->b_buf;
- ASSERT(buf);
- ASSERT(buf->b_data);
- if (HDR_BUF_AVAILABLE(hdr)) {
- ASSERT(buf->b_efunc == NULL);
- hdr->b_flags &= ~ARC_BUF_AVAILABLE;
- } else {
- buf = arc_buf_clone(buf);
- }
- } else if (*arc_flags & ARC_PREFETCH &&
- refcount_count(&hdr->b_refcnt) == 0) {
- hdr->b_flags |= ARC_PREFETCH;
- }
- DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
- arc_access(hdr, hash_lock);
- mutex_exit(hash_lock);
- ARCSTAT_BUMP(arcstat_hits);
- ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
- demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
- data, metadata, hits);
-
- if (done)
- done(NULL, buf, private);
- } else {
- uint64_t size = BP_GET_LSIZE(bp);
- arc_callback_t *acb;
-
- if (hdr == NULL) {
- /* this block is not in the cache */
- arc_buf_hdr_t *exists;
- arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
- buf = arc_buf_alloc(spa, size, private, type);
- hdr = buf->b_hdr;
- hdr->b_dva = *BP_IDENTITY(bp);
- hdr->b_birth = bp->blk_birth;
- hdr->b_cksum0 = bp->blk_cksum.zc_word[0];
- exists = buf_hash_insert(hdr, &hash_lock);
- if (exists) {
- /* somebody beat us to the hash insert */
- mutex_exit(hash_lock);
- bzero(&hdr->b_dva, sizeof (dva_t));
- hdr->b_birth = 0;
- hdr->b_cksum0 = 0;
- (void) arc_buf_remove_ref(buf, private);
- goto top; /* restart the IO request */
- }
- /* if this is a prefetch, we don't have a reference */
- if (*arc_flags & ARC_PREFETCH) {
- (void) remove_reference(hdr, hash_lock,
- private);
- hdr->b_flags |= ARC_PREFETCH;
- }
- if (BP_GET_LEVEL(bp) > 0)
- hdr->b_flags |= ARC_INDIRECT;
- } else {
- /* this block is in the ghost cache */
- ASSERT(GHOST_STATE(hdr->b_state));
- ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 0);
- ASSERT(hdr->b_buf == NULL);
-
- /* if this is a prefetch, we don't have a reference */
- if (*arc_flags & ARC_PREFETCH)
- hdr->b_flags |= ARC_PREFETCH;
- else
- add_reference(hdr, hash_lock, private);
- buf = kmem_cache_alloc(buf_cache, KM_SLEEP);
- buf->b_hdr = hdr;
- buf->b_data = NULL;
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_next = NULL;
- hdr->b_buf = buf;
- arc_get_data_buf(buf);
- ASSERT(hdr->b_datacnt == 0);
- hdr->b_datacnt = 1;
-
- }
-
- acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
- acb->acb_done = done;
- acb->acb_private = private;
- acb->acb_byteswap = swap;
-
- ASSERT(hdr->b_acb == NULL);
- hdr->b_acb = acb;
- hdr->b_flags |= ARC_IO_IN_PROGRESS;
-
- /*
- * If the buffer has been evicted, migrate it to a present state
- * before issuing the I/O. Once we drop the hash-table lock,
- * the header will be marked as I/O in progress and have an
- * attached buffer. At this point, anybody who finds this
- * buffer ought to notice that it's legit but has a pending I/O.
- */
-
- if (GHOST_STATE(hdr->b_state))
- arc_access(hdr, hash_lock);
- mutex_exit(hash_lock);
-
- ASSERT3U(hdr->b_size, ==, size);
- DTRACE_PROBE3(arc__miss, blkptr_t *, bp, uint64_t, size,
- zbookmark_t *, zb);
- ARCSTAT_BUMP(arcstat_misses);
- ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
- demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
- data, metadata, misses);
-
- rzio = zio_read(pio, spa, bp, buf->b_data, size,
- arc_read_done, buf, priority, flags, zb);
-
- if (*arc_flags & ARC_WAIT)
- return (zio_wait(rzio));
-
- ASSERT(*arc_flags & ARC_NOWAIT);
- zio_nowait(rzio);
- }
- return (0);
-}
-
-/*
- * arc_read() variant to support pool traversal. If the block is already
- * in the ARC, make a copy of it; otherwise, the caller will do the I/O.
- * The idea is that we don't want pool traversal filling up memory, but
- * if the ARC already has the data anyway, we shouldn't pay for the I/O.
- */
-int
-arc_tryread(spa_t *spa, blkptr_t *bp, void *data)
-{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_mtx;
- int rc = 0;
-
- hdr = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_mtx);
-
- if (hdr && hdr->b_datacnt > 0 && !HDR_IO_IN_PROGRESS(hdr)) {
- arc_buf_t *buf = hdr->b_buf;
-
- ASSERT(buf);
- while (buf->b_data == NULL) {
- buf = buf->b_next;
- ASSERT(buf);
- }
- bcopy(buf->b_data, data, hdr->b_size);
- } else {
- rc = ENOENT;
- }
-
- if (hash_mtx)
- mutex_exit(hash_mtx);
-
- return (rc);
-}
-
-void
-arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
-{
- ASSERT(buf->b_hdr != NULL);
- ASSERT(buf->b_hdr->b_state != arc_anon);
- ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt) || func == NULL);
- buf->b_efunc = func;
- buf->b_private = private;
-}
-
-/*
- * This is used by the DMU to let the ARC know that a buffer is
- * being evicted, so the ARC should clean up. If this arc buf
- * is not yet in the evicted state, it will be put there.
- */
-int
-arc_buf_evict(arc_buf_t *buf)
-{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_lock;
- arc_buf_t **bufp;
-
- mutex_enter(&arc_eviction_mtx);
- hdr = buf->b_hdr;
- if (hdr == NULL) {
- /*
- * We are in arc_do_user_evicts().
- */
- ASSERT(buf->b_data == NULL);
- mutex_exit(&arc_eviction_mtx);
- return (0);
- }
- hash_lock = HDR_LOCK(hdr);
- mutex_exit(&arc_eviction_mtx);
-
- mutex_enter(hash_lock);
-
- if (buf->b_data == NULL) {
- /*
- * We are on the eviction list.
- */
- mutex_exit(hash_lock);
- mutex_enter(&arc_eviction_mtx);
- if (buf->b_hdr == NULL) {
- /*
- * We are already in arc_do_user_evicts().
- */
- mutex_exit(&arc_eviction_mtx);
- return (0);
- } else {
- arc_buf_t copy = *buf; /* structure assignment */
- /*
- * Process this buffer now
- * but let arc_do_user_evicts() do the reaping.
- */
- buf->b_efunc = NULL;
- mutex_exit(&arc_eviction_mtx);
- VERIFY(copy.b_efunc(&copy) == 0);
- return (1);
- }
- }
-
- ASSERT(buf->b_hdr == hdr);
- ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
- ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
-
- /*
- * Pull this buffer off of the hdr
- */
- bufp = &hdr->b_buf;
- while (*bufp != buf)
- bufp = &(*bufp)->b_next;
- *bufp = buf->b_next;
-
- ASSERT(buf->b_data != NULL);
- arc_buf_destroy(buf, FALSE, FALSE);
-
- if (hdr->b_datacnt == 0) {
- arc_state_t *old_state = hdr->b_state;
- arc_state_t *evicted_state;
-
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
-
- evicted_state =
- (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
-
- mutex_enter(&old_state->arcs_mtx);
- mutex_enter(&evicted_state->arcs_mtx);
-
- arc_change_state(evicted_state, hdr, hash_lock);
- ASSERT(HDR_IN_HASH_TABLE(hdr));
- hdr->b_flags = ARC_IN_HASH_TABLE;
-
- mutex_exit(&evicted_state->arcs_mtx);
- mutex_exit(&old_state->arcs_mtx);
- }
- mutex_exit(hash_lock);
-
- VERIFY(buf->b_efunc(buf) == 0);
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_hdr = NULL;
- kmem_cache_free(buf_cache, buf);
- return (1);
-}
-
-/*
- * Release this buffer from the cache. This must be done
- * after a read and prior to modifying the buffer contents.
- * If the buffer has more than one reference, we must make
- * make a new hdr for the buffer.
- */
-void
-arc_release(arc_buf_t *buf, void *tag)
-{
- arc_buf_hdr_t *hdr = buf->b_hdr;
- kmutex_t *hash_lock = HDR_LOCK(hdr);
-
- /* this buffer is not on any list */
- ASSERT(refcount_count(&hdr->b_refcnt) > 0);
-
- if (hdr->b_state == arc_anon) {
- /* this buffer is already released */
- ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 1);
- ASSERT(BUF_EMPTY(hdr));
- ASSERT(buf->b_efunc == NULL);
- arc_buf_thaw(buf);
- return;
- }
-
- mutex_enter(hash_lock);
-
- /*
- * Do we have more than one buf?
- */
- if (hdr->b_buf != buf || buf->b_next != NULL) {
- arc_buf_hdr_t *nhdr;
- arc_buf_t **bufp;
- uint64_t blksz = hdr->b_size;
- spa_t *spa = hdr->b_spa;
- arc_buf_contents_t type = hdr->b_type;
-
- ASSERT(hdr->b_datacnt > 1);
- /*
- * Pull the data off of this buf and attach it to
- * a new anonymous buf.
- */
- (void) remove_reference(hdr, hash_lock, tag);
- bufp = &hdr->b_buf;
- while (*bufp != buf)
- bufp = &(*bufp)->b_next;
- *bufp = (*bufp)->b_next;
- buf->b_next = NULL;
-
- ASSERT3U(hdr->b_state->arcs_size, >=, hdr->b_size);
- atomic_add_64(&hdr->b_state->arcs_size, -hdr->b_size);
- if (refcount_is_zero(&hdr->b_refcnt)) {
- ASSERT3U(hdr->b_state->arcs_lsize, >=, hdr->b_size);
- atomic_add_64(&hdr->b_state->arcs_lsize, -hdr->b_size);
- }
- hdr->b_datacnt -= 1;
- arc_cksum_verify(buf);
-
- mutex_exit(hash_lock);
-
- nhdr = kmem_cache_alloc(hdr_cache, KM_SLEEP);
- nhdr->b_size = blksz;
- nhdr->b_spa = spa;
- nhdr->b_type = type;
- nhdr->b_buf = buf;
- nhdr->b_state = arc_anon;
- nhdr->b_arc_access = 0;
- nhdr->b_flags = 0;
- nhdr->b_datacnt = 1;
- nhdr->b_freeze_cksum = NULL;
- mutex_init(&nhdr->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
- (void) refcount_add(&nhdr->b_refcnt, tag);
- buf->b_hdr = nhdr;
- atomic_add_64(&arc_anon->arcs_size, blksz);
-
- hdr = nhdr;
- } else {
- ASSERT(refcount_count(&hdr->b_refcnt) == 1);
- ASSERT(!list_link_active(&hdr->b_arc_node));
- ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- arc_change_state(arc_anon, hdr, hash_lock);
- hdr->b_arc_access = 0;
- mutex_exit(hash_lock);
- bzero(&hdr->b_dva, sizeof (dva_t));
- hdr->b_birth = 0;
- hdr->b_cksum0 = 0;
- arc_buf_thaw(buf);
- }
- buf->b_efunc = NULL;
- buf->b_private = NULL;
-}
-
-int
-arc_released(arc_buf_t *buf)
-{
- return (buf->b_data != NULL && buf->b_hdr->b_state == arc_anon);
-}
-
-int
-arc_has_callback(arc_buf_t *buf)
-{
- return (buf->b_efunc != NULL);
-}
-
-#ifdef ZFS_DEBUG
-int
-arc_referenced(arc_buf_t *buf)
-{
- return (refcount_count(&buf->b_hdr->b_refcnt));
-}
-#endif
-
-static void
-arc_write_ready(zio_t *zio)
-{
- arc_write_callback_t *callback = zio->io_private;
- arc_buf_t *buf = callback->awcb_buf;
-
- if (callback->awcb_ready) {
- ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt));
- callback->awcb_ready(zio, buf, callback->awcb_private);
- }
- arc_cksum_compute(buf);
-}
-
-static void
-arc_write_done(zio_t *zio)
-{
- arc_write_callback_t *callback = zio->io_private;
- arc_buf_t *buf = callback->awcb_buf;
- arc_buf_hdr_t *hdr = buf->b_hdr;
-
- hdr->b_acb = NULL;
-
- /* this buffer is on no lists and is not in the hash table */
- ASSERT3P(hdr->b_state, ==, arc_anon);
-
- hdr->b_dva = *BP_IDENTITY(zio->io_bp);
- hdr->b_birth = zio->io_bp->blk_birth;
- hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
- /*
- * If the block to be written was all-zero, we may have
- * compressed it away. In this case no write was performed
- * so there will be no dva/birth-date/checksum. The buffer
- * must therefor remain anonymous (and uncached).
- */
- if (!BUF_EMPTY(hdr)) {
- arc_buf_hdr_t *exists;
- kmutex_t *hash_lock;
-
- arc_cksum_verify(buf);
-
- exists = buf_hash_insert(hdr, &hash_lock);
- if (exists) {
- /*
- * This can only happen if we overwrite for
- * sync-to-convergence, because we remove
- * buffers from the hash table when we arc_free().
- */
- ASSERT(DVA_EQUAL(BP_IDENTITY(&zio->io_bp_orig),
- BP_IDENTITY(zio->io_bp)));
- ASSERT3U(zio->io_bp_orig.blk_birth, ==,
- zio->io_bp->blk_birth);
-
- ASSERT(refcount_is_zero(&exists->b_refcnt));
- arc_change_state(arc_anon, exists, hash_lock);
- mutex_exit(hash_lock);
- arc_hdr_destroy(exists);
- exists = buf_hash_insert(hdr, &hash_lock);
- ASSERT3P(exists, ==, NULL);
- }
- hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
- arc_access(hdr, hash_lock);
- mutex_exit(hash_lock);
- } else if (callback->awcb_done == NULL) {
- int destroy_hdr;
- /*
- * This is an anonymous buffer with no user callback,
- * destroy it if there are no active references.
- */
- mutex_enter(&arc_eviction_mtx);
- destroy_hdr = refcount_is_zero(&hdr->b_refcnt);
- hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
- mutex_exit(&arc_eviction_mtx);
- if (destroy_hdr)
- arc_hdr_destroy(hdr);
- } else {
- hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
- }
-
- if (callback->awcb_done) {
- ASSERT(!refcount_is_zero(&hdr->b_refcnt));
- callback->awcb_done(zio, buf, callback->awcb_private);
- }
-
- kmem_free(callback, sizeof (arc_write_callback_t));
-}
-
-zio_t *
-arc_write(zio_t *pio, spa_t *spa, int checksum, int compress, int ncopies,
- uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
- arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
- int flags, zbookmark_t *zb)
-{
- arc_buf_hdr_t *hdr = buf->b_hdr;
- arc_write_callback_t *callback;
- zio_t *zio;
-
- /* this is a private buffer - no locking required */
- ASSERT3P(hdr->b_state, ==, arc_anon);
- ASSERT(BUF_EMPTY(hdr));
- ASSERT(!HDR_IO_ERROR(hdr));
- ASSERT((hdr->b_flags & ARC_IO_IN_PROGRESS) == 0);
- ASSERT(hdr->b_acb == 0);
- callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
- callback->awcb_ready = ready;
- callback->awcb_done = done;
- callback->awcb_private = private;
- callback->awcb_buf = buf;
- hdr->b_flags |= ARC_IO_IN_PROGRESS;
- zio = zio_write(pio, spa, checksum, compress, ncopies, txg, bp,
- buf->b_data, hdr->b_size, arc_write_ready, arc_write_done, callback,
- priority, flags, zb);
-
- return (zio);
-}
-
-int
-arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private, uint32_t arc_flags)
-{
- arc_buf_hdr_t *ab;
- kmutex_t *hash_lock;
- zio_t *zio;
-
- /*
- * If this buffer is in the cache, release it, so it
- * can be re-used.
- */
- ab = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
- if (ab != NULL) {
- /*
- * The checksum of blocks to free is not always
- * preserved (eg. on the deadlist). However, if it is
- * nonzero, it should match what we have in the cache.
- */
- ASSERT(bp->blk_cksum.zc_word[0] == 0 ||
- ab->b_cksum0 == bp->blk_cksum.zc_word[0]);
- if (ab->b_state != arc_anon)
- arc_change_state(arc_anon, ab, hash_lock);
- if (HDR_IO_IN_PROGRESS(ab)) {
- /*
- * This should only happen when we prefetch.
- */
- ASSERT(ab->b_flags & ARC_PREFETCH);
- ASSERT3U(ab->b_datacnt, ==, 1);
- ab->b_flags |= ARC_FREED_IN_READ;
- if (HDR_IN_HASH_TABLE(ab))
- buf_hash_remove(ab);
- ab->b_arc_access = 0;
- bzero(&ab->b_dva, sizeof (dva_t));
- ab->b_birth = 0;
- ab->b_cksum0 = 0;
- ab->b_buf->b_efunc = NULL;
- ab->b_buf->b_private = NULL;
- mutex_exit(hash_lock);
- } else if (refcount_is_zero(&ab->b_refcnt)) {
- mutex_exit(hash_lock);
- arc_hdr_destroy(ab);
- ARCSTAT_BUMP(arcstat_deleted);
- } else {
- /*
- * We still have an active reference on this
- * buffer. This can happen, e.g., from
- * dbuf_unoverride().
- */
- ASSERT(!HDR_IN_HASH_TABLE(ab));
- ab->b_arc_access = 0;
- bzero(&ab->b_dva, sizeof (dva_t));
- ab->b_birth = 0;
- ab->b_cksum0 = 0;
- ab->b_buf->b_efunc = NULL;
- ab->b_buf->b_private = NULL;
- mutex_exit(hash_lock);
- }
- }
-
- zio = zio_free(pio, spa, txg, bp, done, private);
-
- if (arc_flags & ARC_WAIT)
- return (zio_wait(zio));
-
- ASSERT(arc_flags & ARC_NOWAIT);
- zio_nowait(zio);
-
- return (0);
-}
-
-void
-arc_tempreserve_clear(uint64_t tempreserve)
-{
- atomic_add_64(&arc_tempreserve, -tempreserve);
- ASSERT((int64_t)arc_tempreserve >= 0);
-}
-
-int
-arc_tempreserve_space(uint64_t tempreserve)
-{
-#ifdef ZFS_DEBUG
- /*
- * Once in a while, fail for no reason. Everything should cope.
- */
- if (spa_get_random(10000) == 0) {
- dprintf("forcing random failure\n");
- return (ERESTART);
- }
-#endif
- if (tempreserve > arc_c/4 && !arc_no_grow)
- arc_c = MIN(arc_c_max, tempreserve * 4);
- if (tempreserve > arc_c)
- return (ENOMEM);
-
- /*
- * Throttle writes when the amount of dirty data in the cache
- * gets too large. We try to keep the cache less than half full
- * of dirty blocks so that our sync times don't grow too large.
- * Note: if two requests come in concurrently, we might let them
- * both succeed, when one of them should fail. Not a huge deal.
- *
- * XXX The limit should be adjusted dynamically to keep the time
- * to sync a dataset fixed (around 1-5 seconds?).
- */
-
- if (tempreserve + arc_tempreserve + arc_anon->arcs_size > arc_c / 2 &&
- arc_tempreserve + arc_anon->arcs_size > arc_c / 4) {
- dprintf("failing, arc_tempreserve=%lluK anon=%lluK "
- "tempreserve=%lluK arc_c=%lluK\n",
- arc_tempreserve>>10, arc_anon->arcs_lsize>>10,
- tempreserve>>10, arc_c>>10);
- return (ERESTART);
- }
- atomic_add_64(&arc_tempreserve, tempreserve);
- return (0);
-}
-
-static kmutex_t arc_lowmem_lock;
-#ifdef _KERNEL
-static eventhandler_tag arc_event_lowmem = NULL;
-
-static void
-arc_lowmem(void *arg __unused, int howto __unused)
-{
-
- /* Serialize access via arc_lowmem_lock. */
- mutex_enter(&arc_lowmem_lock);
- zfs_needfree = 1;
- cv_signal(&arc_reclaim_thr_cv);
- while (zfs_needfree)
- tsleep(&zfs_needfree, 0, "zfs:lowmem", hz / 5);
- mutex_exit(&arc_lowmem_lock);
-}
-#endif
-
-void
-arc_init(void)
-{
- mutex_init(&arc_reclaim_thr_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&arc_lowmem_lock, NULL, MUTEX_DEFAULT, NULL);
-
- /* Convert seconds to clock ticks */
- arc_min_prefetch_lifespan = 1 * hz;
-
- /* Start out with 1/8 of all memory */
- arc_c = kmem_size() / 8;
-#if 0
-#ifdef _KERNEL
- /*
- * On architectures where the physical memory can be larger
- * than the addressable space (intel in 32-bit mode), we may
- * need to limit the cache to 1/8 of VM size.
- */
- arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
-#endif
-#endif
- /* set min cache to 1/32 of all memory, or 16MB, whichever is more */
- arc_c_min = MAX(arc_c / 4, 64<<18);
- /* set max to 1/2 of all memory, or all but 1GB, whichever is more */
- if (arc_c * 8 >= 1<<30)
- arc_c_max = (arc_c * 8) - (1<<30);
- else
- arc_c_max = arc_c_min;
- arc_c_max = MAX(arc_c * 5, arc_c_max);
-#ifdef _KERNEL
- /*
- * Allow the tunables to override our calculations if they are
- * reasonable (ie. over 16MB)
- */
- if (zfs_arc_max >= 64<<18 && zfs_arc_max < kmem_size())
- arc_c_max = zfs_arc_max;
- if (zfs_arc_min >= 64<<18 && zfs_arc_min <= arc_c_max)
- arc_c_min = zfs_arc_min;
-#endif
- arc_c = arc_c_max;
- arc_p = (arc_c >> 1);
-
- /* if kmem_flags are set, lets try to use less memory */
- if (kmem_debugging())
- arc_c = arc_c / 2;
- if (arc_c < arc_c_min)
- arc_c = arc_c_min;
-
- zfs_arc_min = arc_c_min;
- zfs_arc_max = arc_c_max;
-
- arc_anon = &ARC_anon;
- arc_mru = &ARC_mru;
- arc_mru_ghost = &ARC_mru_ghost;
- arc_mfu = &ARC_mfu;
- arc_mfu_ghost = &ARC_mfu_ghost;
- arc_size = 0;
-
- mutex_init(&arc_anon->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mru->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mru_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mfu->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mfu_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
-
- list_create(&arc_mru->arcs_list, sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mru_ghost->arcs_list, sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mfu->arcs_list, sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mfu_ghost->arcs_list, sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_arc_node));
-
- buf_init();
-
- arc_thread_exit = 0;
- arc_eviction_list = NULL;
- mutex_init(&arc_eviction_mtx, NULL, MUTEX_DEFAULT, NULL);
- bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));
-
- arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
- sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
-
- if (arc_ksp != NULL) {
- arc_ksp->ks_data = &arc_stats;
- kstat_install(arc_ksp);
- }
-
- (void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
- TS_RUN, minclsyspri);
-
-#ifdef _KERNEL
- arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
- EVENTHANDLER_PRI_FIRST);
-#endif
-
- arc_dead = FALSE;
-
-#ifdef _KERNEL
- /* Warn about ZFS memory and address space requirements. */
- if (((uint64_t)physmem * PAGESIZE) < (256 + 128 + 64) * (1 << 20)) {
- printf("ZFS WARNING: Recommended minimum RAM size is 512MB; "
- "expect unstable behavior.\n");
- }
- if (kmem_size() < 512 * (1 << 20)) {
- printf("ZFS WARNING: Recommended minimum kmem_size is 512MB; "
- "expect unstable behavior.\n");
- printf(" Consider tuning vm.kmem_size and "
- "vm.kmem_size_max\n");
- printf(" in /boot/loader.conf.\n");
- }
-#endif
-}
-
-void
-arc_fini(void)
-{
- mutex_enter(&arc_reclaim_thr_lock);
- arc_thread_exit = 1;
- cv_signal(&arc_reclaim_thr_cv);
- while (arc_thread_exit != 0)
- cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock);
- mutex_exit(&arc_reclaim_thr_lock);
-
- arc_flush();
-
- arc_dead = TRUE;
-
- if (arc_ksp != NULL) {
- kstat_delete(arc_ksp);
- arc_ksp = NULL;
- }
-
- mutex_destroy(&arc_eviction_mtx);
- mutex_destroy(&arc_reclaim_thr_lock);
- cv_destroy(&arc_reclaim_thr_cv);
-
- list_destroy(&arc_mru->arcs_list);
- list_destroy(&arc_mru_ghost->arcs_list);
- list_destroy(&arc_mfu->arcs_list);
- list_destroy(&arc_mfu_ghost->arcs_list);
-
- mutex_destroy(&arc_anon->arcs_mtx);
- mutex_destroy(&arc_mru->arcs_mtx);
- mutex_destroy(&arc_mru_ghost->arcs_mtx);
- mutex_destroy(&arc_mfu->arcs_mtx);
- mutex_destroy(&arc_mfu_ghost->arcs_mtx);
-
- buf_fini();
-
- mutex_destroy(&arc_lowmem_lock);
-#ifdef _KERNEL
- if (arc_event_lowmem != NULL)
- EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
-#endif
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/bplist.c b/sys/contrib/opensolaris/uts/common/fs/zfs/bplist.c
deleted file mode 100644
index 4442b1f..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/bplist.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/bplist.h>
-#include <sys/zfs_context.h>
-
-static int
-bplist_hold(bplist_t *bpl)
-{
- ASSERT(MUTEX_HELD(&bpl->bpl_lock));
- if (bpl->bpl_dbuf == NULL) {
- int err = dmu_bonus_hold(bpl->bpl_mos,
- bpl->bpl_object, bpl, &bpl->bpl_dbuf);
- if (err)
- return (err);
- bpl->bpl_phys = bpl->bpl_dbuf->db_data;
- }
- return (0);
-}
-
-uint64_t
-bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
-{
- int size;
-
- size = spa_version(dmu_objset_spa(mos)) < ZFS_VERSION_BPLIST_ACCOUNT ?
- BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
-
- return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
- DMU_OT_BPLIST_HDR, size, tx));
-}
-
-void
-bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
-{
- VERIFY(dmu_object_free(mos, object, tx) == 0);
-}
-
-int
-bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
-{
- dmu_object_info_t doi;
- int err;
-
- err = dmu_object_info(mos, object, &doi);
- if (err)
- return (err);
-
- mutex_enter(&bpl->bpl_lock);
-
- ASSERT(bpl->bpl_dbuf == NULL);
- ASSERT(bpl->bpl_phys == NULL);
- ASSERT(bpl->bpl_cached_dbuf == NULL);
- ASSERT(bpl->bpl_queue == NULL);
- ASSERT(object != 0);
- ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
- ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
-
- bpl->bpl_mos = mos;
- bpl->bpl_object = object;
- bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
- bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
- bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
-
- mutex_exit(&bpl->bpl_lock);
- return (0);
-}
-
-void
-bplist_close(bplist_t *bpl)
-{
- mutex_enter(&bpl->bpl_lock);
-
- ASSERT(bpl->bpl_queue == NULL);
-
- if (bpl->bpl_cached_dbuf) {
- dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
- bpl->bpl_cached_dbuf = NULL;
- }
- if (bpl->bpl_dbuf) {
- dmu_buf_rele(bpl->bpl_dbuf, bpl);
- bpl->bpl_dbuf = NULL;
- bpl->bpl_phys = NULL;
- }
-
- mutex_exit(&bpl->bpl_lock);
-}
-
-boolean_t
-bplist_empty(bplist_t *bpl)
-{
- boolean_t rv;
-
- if (bpl->bpl_object == 0)
- return (B_TRUE);
-
- mutex_enter(&bpl->bpl_lock);
- VERIFY(0 == bplist_hold(bpl)); /* XXX */
- rv = (bpl->bpl_phys->bpl_entries == 0);
- mutex_exit(&bpl->bpl_lock);
-
- return (rv);
-}
-
-static int
-bplist_cache(bplist_t *bpl, uint64_t blkid)
-{
- int err = 0;
-
- if (bpl->bpl_cached_dbuf == NULL ||
- bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
- if (bpl->bpl_cached_dbuf != NULL)
- dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
- err = dmu_buf_hold(bpl->bpl_mos,
- bpl->bpl_object, blkid << bpl->bpl_blockshift,
- bpl, &bpl->bpl_cached_dbuf);
- ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
- 1ULL << bpl->bpl_blockshift);
- }
- return (err);
-}
-
-int
-bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
-{
- uint64_t blk, off;
- blkptr_t *bparray;
- int err;
-
- mutex_enter(&bpl->bpl_lock);
-
- err = bplist_hold(bpl);
- if (err) {
- mutex_exit(&bpl->bpl_lock);
- return (err);
- }
-
- if (*itorp >= bpl->bpl_phys->bpl_entries) {
- mutex_exit(&bpl->bpl_lock);
- return (ENOENT);
- }
-
- blk = *itorp >> bpl->bpl_bpshift;
- off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
-
- err = bplist_cache(bpl, blk);
- if (err) {
- mutex_exit(&bpl->bpl_lock);
- return (err);
- }
-
- bparray = bpl->bpl_cached_dbuf->db_data;
- *bp = bparray[off];
- (*itorp)++;
- mutex_exit(&bpl->bpl_lock);
- return (0);
-}
-
-int
-bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
-{
- uint64_t blk, off;
- blkptr_t *bparray;
- int err;
-
- ASSERT(!BP_IS_HOLE(bp));
- mutex_enter(&bpl->bpl_lock);
- err = bplist_hold(bpl);
- if (err)
- return (err);
-
- blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
- off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
-
- err = bplist_cache(bpl, blk);
- if (err) {
- mutex_exit(&bpl->bpl_lock);
- return (err);
- }
-
- dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
- bparray = bpl->bpl_cached_dbuf->db_data;
- bparray[off] = *bp;
-
- /* We never need the fill count. */
- bparray[off].blk_fill = 0;
-
- /* The bplist will compress better if we can leave off the checksum */
- bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
-
- dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
- bpl->bpl_phys->bpl_entries++;
- bpl->bpl_phys->bpl_bytes +=
- bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
- if (bpl->bpl_havecomp) {
- bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
- bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
- }
- mutex_exit(&bpl->bpl_lock);
-
- return (0);
-}
-
-/*
- * Deferred entry; will be written later by bplist_sync().
- */
-void
-bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
-{
- bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
-
- ASSERT(!BP_IS_HOLE(bp));
- mutex_enter(&bpl->bpl_lock);
- bpq->bpq_blk = *bp;
- bpq->bpq_next = bpl->bpl_queue;
- bpl->bpl_queue = bpq;
- mutex_exit(&bpl->bpl_lock);
-}
-
-void
-bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
-{
- bplist_q_t *bpq;
-
- mutex_enter(&bpl->bpl_lock);
- while ((bpq = bpl->bpl_queue) != NULL) {
- bpl->bpl_queue = bpq->bpq_next;
- mutex_exit(&bpl->bpl_lock);
- VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
- kmem_free(bpq, sizeof (*bpq));
- mutex_enter(&bpl->bpl_lock);
- }
- mutex_exit(&bpl->bpl_lock);
-}
-
-void
-bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
-{
- mutex_enter(&bpl->bpl_lock);
- ASSERT3P(bpl->bpl_queue, ==, NULL);
- VERIFY(0 == bplist_hold(bpl));
- dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
- VERIFY(0 == dmu_free_range(bpl->bpl_mos,
- bpl->bpl_object, 0, -1ULL, tx));
- bpl->bpl_phys->bpl_entries = 0;
- bpl->bpl_phys->bpl_bytes = 0;
- if (bpl->bpl_havecomp) {
- bpl->bpl_phys->bpl_comp = 0;
- bpl->bpl_phys->bpl_uncomp = 0;
- }
- mutex_exit(&bpl->bpl_lock);
-}
-
-int
-bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
-{
- uint64_t itor = 0, comp = 0, uncomp = 0;
- int err;
- blkptr_t bp;
-
- mutex_enter(&bpl->bpl_lock);
-
- err = bplist_hold(bpl);
- if (err) {
- mutex_exit(&bpl->bpl_lock);
- return (err);
- }
-
- *usedp = bpl->bpl_phys->bpl_bytes;
- if (bpl->bpl_havecomp) {
- *compp = bpl->bpl_phys->bpl_comp;
- *uncompp = bpl->bpl_phys->bpl_uncomp;
- }
- mutex_exit(&bpl->bpl_lock);
-
- if (!bpl->bpl_havecomp) {
- while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
- comp += BP_GET_PSIZE(&bp);
- uncomp += BP_GET_UCSIZE(&bp);
- }
- if (err == ENOENT)
- err = 0;
- *compp = comp;
- *uncompp = uncomp;
- }
-
- return (err);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
deleted file mode 100644
index 94c6308..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ /dev/null
@@ -1,2247 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/dmu.h>
-#include <sys/dmu_impl.h>
-#include <sys/dbuf.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dmu_tx.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dmu_zfetch.h>
-
-static void dbuf_destroy(dmu_buf_impl_t *db);
-static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
-static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, int checksum,
- int compress, dmu_tx_t *tx);
-static arc_done_func_t dbuf_write_ready;
-static arc_done_func_t dbuf_write_done;
-
-int zfs_mdcomp_disable = 0;
-SYSCTL_DECL(_vfs_zfs);
-TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
- &zfs_mdcomp_disable, 0, "Disable metadata compression");
-
-/*
- * Global data structures and functions for the dbuf cache.
- */
-static kmem_cache_t *dbuf_cache;
-
-/* ARGSUSED */
-static int
-dbuf_cons(void *vdb, void *unused, int kmflag)
-{
- dmu_buf_impl_t *db = vdb;
- bzero(db, sizeof (dmu_buf_impl_t));
-
- mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
- refcount_create(&db->db_holds);
- return (0);
-}
-
-/* ARGSUSED */
-static void
-dbuf_dest(void *vdb, void *unused)
-{
- dmu_buf_impl_t *db = vdb;
- mutex_destroy(&db->db_mtx);
- cv_destroy(&db->db_changed);
- refcount_destroy(&db->db_holds);
-}
-
-/*
- * dbuf hash table routines
- */
-static dbuf_hash_table_t dbuf_hash_table;
-
-static uint64_t dbuf_hash_count;
-
-static uint64_t
-dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
-{
- uintptr_t osv = (uintptr_t)os;
- uint64_t crc = -1ULL;
-
- ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF];
-
- crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16);
-
- return (crc);
-}
-
-#define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid);
-
-#define DBUF_EQUAL(dbuf, os, obj, level, blkid) \
- ((dbuf)->db.db_object == (obj) && \
- (dbuf)->db_objset == (os) && \
- (dbuf)->db_level == (level) && \
- (dbuf)->db_blkid == (blkid))
-
-dmu_buf_impl_t *
-dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
-{
- dbuf_hash_table_t *h = &dbuf_hash_table;
- objset_impl_t *os = dn->dn_objset;
- uint64_t obj = dn->dn_object;
- uint64_t hv = DBUF_HASH(os, obj, level, blkid);
- uint64_t idx = hv & h->hash_table_mask;
- dmu_buf_impl_t *db;
-
- mutex_enter(DBUF_HASH_MUTEX(h, idx));
- for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
- if (DBUF_EQUAL(db, os, obj, level, blkid)) {
- mutex_enter(&db->db_mtx);
- if (db->db_state != DB_EVICTING) {
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
- return (db);
- }
- mutex_exit(&db->db_mtx);
- }
- }
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
- return (NULL);
-}
-
-/*
- * Insert an entry into the hash table. If there is already an element
- * equal to elem in the hash table, then the already existing element
- * will be returned and the new element will not be inserted.
- * Otherwise returns NULL.
- */
-static dmu_buf_impl_t *
-dbuf_hash_insert(dmu_buf_impl_t *db)
-{
- dbuf_hash_table_t *h = &dbuf_hash_table;
- objset_impl_t *os = db->db_objset;
- uint64_t obj = db->db.db_object;
- int level = db->db_level;
- uint64_t blkid = db->db_blkid;
- uint64_t hv = DBUF_HASH(os, obj, level, blkid);
- uint64_t idx = hv & h->hash_table_mask;
- dmu_buf_impl_t *dbf;
-
- mutex_enter(DBUF_HASH_MUTEX(h, idx));
- for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) {
- if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
- mutex_enter(&dbf->db_mtx);
- if (dbf->db_state != DB_EVICTING) {
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
- return (dbf);
- }
- mutex_exit(&dbf->db_mtx);
- }
- }
-
- mutex_enter(&db->db_mtx);
- db->db_hash_next = h->hash_table[idx];
- h->hash_table[idx] = db;
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
- atomic_add_64(&dbuf_hash_count, 1);
-
- return (NULL);
-}
-
-/*
- * Remove an entry from the hash table. This operation will
- * fail if there are any existing holds on the db.
- */
-static void
-dbuf_hash_remove(dmu_buf_impl_t *db)
-{
- dbuf_hash_table_t *h = &dbuf_hash_table;
- uint64_t hv = DBUF_HASH(db->db_objset, db->db.db_object,
- db->db_level, db->db_blkid);
- uint64_t idx = hv & h->hash_table_mask;
- dmu_buf_impl_t *dbf, **dbp;
-
- /*
- * We musn't hold db_mtx to maintin lock ordering:
- * DBUF_HASH_MUTEX > db_mtx.
- */
- ASSERT(refcount_is_zero(&db->db_holds));
- ASSERT(db->db_state == DB_EVICTING);
- ASSERT(!MUTEX_HELD(&db->db_mtx));
-
- mutex_enter(DBUF_HASH_MUTEX(h, idx));
- dbp = &h->hash_table[idx];
- while ((dbf = *dbp) != db) {
- dbp = &dbf->db_hash_next;
- ASSERT(dbf != NULL);
- }
- *dbp = db->db_hash_next;
- db->db_hash_next = NULL;
- mutex_exit(DBUF_HASH_MUTEX(h, idx));
- atomic_add_64(&dbuf_hash_count, -1);
-}
-
-static arc_evict_func_t dbuf_do_evict;
-
-static void
-dbuf_evict_user(dmu_buf_impl_t *db)
-{
- ASSERT(MUTEX_HELD(&db->db_mtx));
-
- if (db->db_level != 0 || db->db_evict_func == NULL)
- return;
-
- if (db->db_user_data_ptr_ptr)
- *db->db_user_data_ptr_ptr = db->db.db_data;
- db->db_evict_func(&db->db, db->db_user_ptr);
- db->db_user_ptr = NULL;
- db->db_user_data_ptr_ptr = NULL;
- db->db_evict_func = NULL;
-}
-
-void
-dbuf_evict(dmu_buf_impl_t *db)
-{
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db_data_pending == NULL);
-
- dbuf_clear(db);
- dbuf_destroy(db);
-}
-
-void
-dbuf_init(void)
-{
- uint64_t hsize = 1ULL << 16;
- dbuf_hash_table_t *h = &dbuf_hash_table;
- int i;
-
- /*
- * The hash table is big enough to fill all of physical memory
- * with an average 4K block size. The table will take up
- * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers).
- */
- while (hsize * 4096 < (uint64_t)physmem * PAGESIZE)
- hsize <<= 1;
-
-retry:
- h->hash_table_mask = hsize - 1;
- h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
- if (h->hash_table == NULL) {
- /* XXX - we should really return an error instead of assert */
- ASSERT(hsize > (1ULL << 10));
- hsize >>= 1;
- goto retry;
- }
-
- dbuf_cache = kmem_cache_create("dmu_buf_impl_t",
- sizeof (dmu_buf_impl_t),
- 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
-
- for (i = 0; i < DBUF_MUTEXES; i++)
- mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
-}
-
-void
-dbuf_fini(void)
-{
- dbuf_hash_table_t *h = &dbuf_hash_table;
- int i;
-
- for (i = 0; i < DBUF_MUTEXES; i++)
- mutex_destroy(&h->hash_mutexes[i]);
- kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
- kmem_cache_destroy(dbuf_cache);
-}
-
-/*
- * Other stuff.
- */
-
-#ifdef ZFS_DEBUG
-static void
-dbuf_verify(dmu_buf_impl_t *db)
-{
- dnode_t *dn = db->db_dnode;
-
- ASSERT(MUTEX_HELD(&db->db_mtx));
-
- if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY))
- return;
-
- ASSERT(db->db_objset != NULL);
- if (dn == NULL) {
- ASSERT(db->db_parent == NULL);
- ASSERT(db->db_blkptr == NULL);
- } else {
- ASSERT3U(db->db.db_object, ==, dn->dn_object);
- ASSERT3P(db->db_objset, ==, dn->dn_objset);
- ASSERT3U(db->db_level, <, dn->dn_nlevels);
- ASSERT(db->db_blkid == DB_BONUS_BLKID ||
- list_head(&dn->dn_dbufs));
- }
- if (db->db_blkid == DB_BONUS_BLKID) {
- ASSERT(dn != NULL);
- ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen);
- ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID);
- } else {
- ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
- }
-
- if (db->db_level == 0) {
- /* we can be momentarily larger in dnode_set_blksz() */
- if (db->db_blkid != DB_BONUS_BLKID && dn) {
- ASSERT3U(db->db.db_size, >=, dn->dn_datablksz);
- }
- if (db->db.db_object == DMU_META_DNODE_OBJECT) {
- dbuf_dirty_record_t *dr = db->db_data_pending;
- /*
- * it should only be modified in syncing
- * context, so make sure we only have
- * one copy of the data.
- */
- ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
- }
- }
-
- /* verify db->db_blkptr */
- if (db->db_blkptr) {
- if (db->db_parent == dn->dn_dbuf) {
- /* db is pointed to by the dnode */
- /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
- if (db->db.db_object == DMU_META_DNODE_OBJECT)
- ASSERT(db->db_parent == NULL);
- else
- ASSERT(db->db_parent != NULL);
- ASSERT3P(db->db_blkptr, ==,
- &dn->dn_phys->dn_blkptr[db->db_blkid]);
- } else {
- /* db is pointed to by an indirect block */
- int epb = db->db_parent->db.db_size >> SPA_BLKPTRSHIFT;
- ASSERT3U(db->db_parent->db_level, ==, db->db_level+1);
- ASSERT3U(db->db_parent->db.db_object, ==,
- db->db.db_object);
- /*
- * dnode_grow_indblksz() can make this fail if we don't
- * have the struct_rwlock. XXX indblksz no longer
- * grows. safe to do this now?
- */
- if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock)) {
- ASSERT3P(db->db_blkptr, ==,
- ((blkptr_t *)db->db_parent->db.db_data +
- db->db_blkid % epb));
- }
- }
- }
- if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) &&
- db->db.db_data && db->db_blkid != DB_BONUS_BLKID &&
- db->db_state != DB_FILL && !dn->dn_free_txg) {
- /*
- * If the blkptr isn't set but they have nonzero data,
- * it had better be dirty, otherwise we'll lose that
- * data when we evict this buffer.
- */
- if (db->db_dirtycnt == 0) {
- uint64_t *buf = db->db.db_data;
- int i;
-
- for (i = 0; i < db->db.db_size >> 3; i++) {
- ASSERT(buf[i] == 0);
- }
- }
- }
-}
-#endif
-
-static void
-dbuf_update_data(dmu_buf_impl_t *db)
-{
- ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_level == 0 && db->db_user_data_ptr_ptr) {
- ASSERT(!refcount_is_zero(&db->db_holds));
- *db->db_user_data_ptr_ptr = db->db.db_data;
- }
-}
-
-static void
-dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
-{
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
- db->db_buf = buf;
- if (buf != NULL) {
- ASSERT(buf->b_data != NULL);
- db->db.db_data = buf->b_data;
- if (!arc_released(buf))
- arc_set_callback(buf, dbuf_do_evict, db);
- dbuf_update_data(db);
- } else {
- dbuf_evict_user(db);
- db->db.db_data = NULL;
- db->db_state = DB_UNCACHED;
- }
-}
-
-uint64_t
-dbuf_whichblock(dnode_t *dn, uint64_t offset)
-{
- if (dn->dn_datablkshift) {
- return (offset >> dn->dn_datablkshift);
- } else {
- ASSERT3U(offset, <, dn->dn_datablksz);
- return (0);
- }
-}
-
-static void
-dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
-{
- dmu_buf_impl_t *db = vdb;
-
- mutex_enter(&db->db_mtx);
- ASSERT3U(db->db_state, ==, DB_READ);
- /*
- * All reads are synchronous, so we must have a hold on the dbuf
- */
- ASSERT(refcount_count(&db->db_holds) > 0);
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
- if (db->db_level == 0 && db->db_freed_in_flight) {
- /* we were freed in flight; disregard any error */
- arc_release(buf, db);
- bzero(buf->b_data, db->db.db_size);
- arc_buf_freeze(buf);
- db->db_freed_in_flight = FALSE;
- dbuf_set_data(db, buf);
- db->db_state = DB_CACHED;
- } else if (zio == NULL || zio->io_error == 0) {
- dbuf_set_data(db, buf);
- db->db_state = DB_CACHED;
- } else {
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
- ASSERT3P(db->db_buf, ==, NULL);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
- db->db_state = DB_UNCACHED;
- }
- cv_broadcast(&db->db_changed);
- mutex_exit(&db->db_mtx);
- dbuf_rele(db, NULL);
-}
-
-static void
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
-{
- blkptr_t *bp;
- zbookmark_t zb;
- uint32_t aflags = ARC_NOWAIT;
-
- ASSERT(!refcount_is_zero(&db->db_holds));
- /* We need the struct_rwlock to prevent db_blkptr from changing. */
- ASSERT(RW_LOCK_HELD(&db->db_dnode->dn_struct_rwlock));
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_state == DB_UNCACHED);
- ASSERT(db->db_buf == NULL);
-
- if (db->db_blkid == DB_BONUS_BLKID) {
- ASSERT3U(db->db_dnode->dn_bonuslen, ==, db->db.db_size);
- db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
- if (db->db.db_size < DN_MAX_BONUSLEN)
- bzero(db->db.db_data, DN_MAX_BONUSLEN);
- bcopy(DN_BONUS(db->db_dnode->dn_phys), db->db.db_data,
- db->db.db_size);
- dbuf_update_data(db);
- db->db_state = DB_CACHED;
- mutex_exit(&db->db_mtx);
- return;
- }
-
- if (db->db_level == 0 && dnode_block_freed(db->db_dnode, db->db_blkid))
- bp = NULL;
- else
- bp = db->db_blkptr;
-
- if (bp == NULL)
- dprintf_dbuf(db, "blkptr: %s\n", "NULL");
- else
- dprintf_dbuf_bp(db, bp, "%s", "blkptr:");
-
- if (bp == NULL || BP_IS_HOLE(bp)) {
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
- ASSERT(bp == NULL || BP_IS_HOLE(bp));
- dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa,
- db->db.db_size, db, type));
- bzero(db->db.db_data, db->db.db_size);
- db->db_state = DB_CACHED;
- *flags |= DB_RF_CACHED;
- mutex_exit(&db->db_mtx);
- return;
- }
-
- db->db_state = DB_READ;
- mutex_exit(&db->db_mtx);
-
- zb.zb_objset = db->db_objset->os_dsl_dataset ?
- db->db_objset->os_dsl_dataset->ds_object : 0;
- zb.zb_object = db->db.db_object;
- zb.zb_level = db->db_level;
- zb.zb_blkid = db->db_blkid;
-
- dbuf_add_ref(db, NULL);
- /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */
- ASSERT3U(db->db_dnode->dn_type, <, DMU_OT_NUMTYPES);
- (void) arc_read(zio, db->db_dnode->dn_objset->os_spa, bp,
- db->db_level > 0 ? byteswap_uint64_array :
- dmu_ot[db->db_dnode->dn_type].ot_byteswap,
- dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
- (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
- &aflags, &zb);
- if (aflags & ARC_CACHED)
- *flags |= DB_RF_CACHED;
-}
-
-int
-dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
-{
- int err = 0;
- int havepzio = (zio != NULL);
- int prefetch;
-
- /*
- * We don't have to hold the mutex to check db_state because it
- * can't be freed while we have a hold on the buffer.
- */
- ASSERT(!refcount_is_zero(&db->db_holds));
-
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER);
-
- prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID &&
- (flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL;
-
- mutex_enter(&db->db_mtx);
- if (db->db_state == DB_CACHED) {
- mutex_exit(&db->db_mtx);
- if (prefetch)
- dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_exit(&db->db_dnode->dn_struct_rwlock);
- } else if (db->db_state == DB_UNCACHED) {
- if (zio == NULL) {
- zio = zio_root(db->db_dnode->dn_objset->os_spa,
- NULL, NULL, ZIO_FLAG_CANFAIL);
- }
- dbuf_read_impl(db, zio, &flags);
-
- /* dbuf_read_impl has dropped db_mtx for us */
-
- if (prefetch)
- dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset,
- db->db.db_size, flags & DB_RF_CACHED);
-
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_exit(&db->db_dnode->dn_struct_rwlock);
-
- if (!havepzio)
- err = zio_wait(zio);
- } else {
- mutex_exit(&db->db_mtx);
- if (prefetch)
- dmu_zfetch(&db->db_dnode->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
- if ((flags & DB_RF_HAVESTRUCT) == 0)
- rw_exit(&db->db_dnode->dn_struct_rwlock);
-
- mutex_enter(&db->db_mtx);
- if ((flags & DB_RF_NEVERWAIT) == 0) {
- while (db->db_state == DB_READ ||
- db->db_state == DB_FILL) {
- ASSERT(db->db_state == DB_READ ||
- (flags & DB_RF_HAVESTRUCT) == 0);
- cv_wait(&db->db_changed, &db->db_mtx);
- }
- if (db->db_state == DB_UNCACHED)
- err = EIO;
- }
- mutex_exit(&db->db_mtx);
- }
-
- ASSERT(err || havepzio || db->db_state == DB_CACHED);
- return (err);
-}
-
-static void
-dbuf_noread(dmu_buf_impl_t *db)
-{
- ASSERT(!refcount_is_zero(&db->db_holds));
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
- mutex_enter(&db->db_mtx);
- while (db->db_state == DB_READ || db->db_state == DB_FILL)
- cv_wait(&db->db_changed, &db->db_mtx);
- if (db->db_state == DB_UNCACHED) {
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db.db_data == NULL);
- dbuf_set_data(db, arc_buf_alloc(db->db_dnode->dn_objset->os_spa,
- db->db.db_size, db, type));
- db->db_state = DB_FILL;
- } else {
- ASSERT3U(db->db_state, ==, DB_CACHED);
- }
- mutex_exit(&db->db_mtx);
-}
-
-/*
- * This is our just-in-time copy function. It makes a copy of
- * buffers, that have been modified in a previous transaction
- * group, before we modify them in the current active group.
- *
- * This function is used in two places: when we are dirtying a
- * buffer for the first time in a txg, and when we are freeing
- * a range in a dnode that includes this buffer.
- *
- * Note that when we are called from dbuf_free_range() we do
- * not put a hold on the buffer, we just traverse the active
- * dbuf list for the dnode.
- */
-static void
-dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
-{
- dbuf_dirty_record_t *dr = db->db_last_dirty;
-
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db.db_data != NULL);
- ASSERT(db->db_level == 0);
- ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
-
- if (dr == NULL ||
- (dr->dt.dl.dr_data !=
- ((db->db_blkid == DB_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
- return;
-
- /*
- * If the last dirty record for this dbuf has not yet synced
- * and its referencing the dbuf data, either:
- * reset the reference to point to a new copy,
- * or (if there a no active holders)
- * just null out the current db_data pointer.
- */
- ASSERT(dr->dr_txg >= txg - 2);
- if (db->db_blkid == DB_BONUS_BLKID) {
- /* Note that the data bufs here are zio_bufs */
- dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
- bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
- } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
- int size = db->db.db_size;
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- dr->dt.dl.dr_data = arc_buf_alloc(
- db->db_dnode->dn_objset->os_spa, size, db, type);
- bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
- } else {
- dbuf_set_data(db, NULL);
- }
-}
-
-void
-dbuf_unoverride(dbuf_dirty_record_t *dr)
-{
- dmu_buf_impl_t *db = dr->dr_dbuf;
- uint64_t txg = dr->dr_txg;
-
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
- ASSERT(db->db_level == 0);
-
- if (db->db_blkid == DB_BONUS_BLKID ||
- dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
- return;
-
- /* free this block */
- if (!BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) {
- /* XXX can get silent EIO here */
- (void) arc_free(NULL, db->db_dnode->dn_objset->os_spa,
- txg, &dr->dt.dl.dr_overridden_by, NULL, NULL, ARC_WAIT);
- }
- dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
- /*
- * Release the already-written buffer, so we leave it in
- * a consistent dirty state. Note that all callers are
- * modifying the buffer, so they will immediately do
- * another (redundant) arc_release(). Therefore, leave
- * the buf thawed to save the effort of freezing &
- * immediately re-thawing it.
- */
- arc_release(dr->dt.dl.dr_data, db);
-}
-
-void
-dbuf_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db, *db_next;
- uint64_t txg = tx->tx_txg;
-
- dprintf_dnode(dn, "blkid=%llu nblks=%llu\n", blkid, nblks);
- mutex_enter(&dn->dn_dbufs_mtx);
- for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
- db_next = list_next(&dn->dn_dbufs, db);
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
- if (db->db_level != 0)
- continue;
- dprintf_dbuf(db, "found buf %s\n", "");
- if (db->db_blkid < blkid ||
- db->db_blkid >= blkid+nblks)
- continue;
-
- /* found a level 0 buffer in the range */
- if (dbuf_undirty(db, tx))
- continue;
-
- mutex_enter(&db->db_mtx);
- if (db->db_state == DB_UNCACHED ||
- db->db_state == DB_EVICTING) {
- ASSERT(db->db.db_data == NULL);
- mutex_exit(&db->db_mtx);
- continue;
- }
- if (db->db_state == DB_READ || db->db_state == DB_FILL) {
- /* will be handled in dbuf_read_done or dbuf_rele */
- db->db_freed_in_flight = TRUE;
- mutex_exit(&db->db_mtx);
- continue;
- }
- if (refcount_count(&db->db_holds) == 0) {
- ASSERT(db->db_buf);
- dbuf_clear(db);
- continue;
- }
- /* The dbuf is referenced */
-
- if (db->db_last_dirty != NULL) {
- dbuf_dirty_record_t *dr = db->db_last_dirty;
-
- if (dr->dr_txg == txg) {
- /*
- * This buffer is "in-use", re-adjust the file
- * size to reflect that this buffer may
- * contain new data when we sync.
- */
- if (db->db_blkid > dn->dn_maxblkid)
- dn->dn_maxblkid = db->db_blkid;
- dbuf_unoverride(dr);
- } else {
- /*
- * This dbuf is not dirty in the open context.
- * Either uncache it (if its not referenced in
- * the open context) or reset its contents to
- * empty.
- */
- dbuf_fix_old_data(db, txg);
- }
- }
- /* clear the contents if its cached */
- if (db->db_state == DB_CACHED) {
- ASSERT(db->db.db_data != NULL);
- arc_release(db->db_buf, db);
- bzero(db->db.db_data, db->db.db_size);
- arc_buf_freeze(db->db_buf);
- }
-
- mutex_exit(&db->db_mtx);
- }
- mutex_exit(&dn->dn_dbufs_mtx);
-}
-
-static int
-dbuf_new_block(dmu_buf_impl_t *db)
-{
- dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
- uint64_t birth_txg = 0;
-
- /* Don't count meta-objects */
- if (ds == NULL)
- return (FALSE);
-
- /*
- * We don't need any locking to protect db_blkptr:
- * If it's syncing, then db_last_dirty will be set
- * so we'll ignore db_blkptr.
- */
- ASSERT(MUTEX_HELD(&db->db_mtx));
- /* If we have been dirtied since the last snapshot, its not new */
- if (db->db_last_dirty)
- birth_txg = db->db_last_dirty->dr_txg;
- else if (db->db_blkptr)
- birth_txg = db->db_blkptr->blk_birth;
-
- if (birth_txg)
- return (!dsl_dataset_block_freeable(ds, birth_txg));
- else
- return (TRUE);
-}
-
-void
-dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
-{
- arc_buf_t *buf, *obuf;
- int osize = db->db.db_size;
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
-
- /* XXX does *this* func really need the lock? */
- ASSERT(RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock));
-
- /*
- * This call to dbuf_will_dirty() with the dn_struct_rwlock held
- * is OK, because there can be no other references to the db
- * when we are changing its size, so no concurrent DB_FILL can
- * be happening.
- */
- /*
- * XXX we should be doing a dbuf_read, checking the return
- * value and returning that up to our callers
- */
- dbuf_will_dirty(db, tx);
-
- /* create the data buffer for the new block */
- buf = arc_buf_alloc(db->db_dnode->dn_objset->os_spa, size, db, type);
-
- /* copy old block data to the new block */
- obuf = db->db_buf;
- bcopy(obuf->b_data, buf->b_data, MIN(osize, size));
- /* zero the remainder */
- if (size > osize)
- bzero((uint8_t *)buf->b_data + osize, size - osize);
-
- mutex_enter(&db->db_mtx);
- dbuf_set_data(db, buf);
- VERIFY(arc_buf_remove_ref(obuf, db) == 1);
- db->db.db_size = size;
-
- if (db->db_level == 0) {
- ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
- db->db_last_dirty->dt.dl.dr_data = buf;
- }
- mutex_exit(&db->db_mtx);
-
- dnode_willuse_space(db->db_dnode, size-osize, tx);
-}
-
-dbuf_dirty_record_t *
-dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
-{
- dnode_t *dn = db->db_dnode;
- objset_impl_t *os = dn->dn_objset;
- dbuf_dirty_record_t **drp, *dr;
- int drop_struct_lock = FALSE;
- int txgoff = tx->tx_txg & TXG_MASK;
-
- ASSERT(tx->tx_txg != 0);
- ASSERT(!refcount_is_zero(&db->db_holds));
- DMU_TX_DIRTY_BUF(tx, db);
-
- /*
- * Shouldn't dirty a regular buffer in syncing context. Private
- * objects may be dirtied in syncing context, but only if they
- * were already pre-dirtied in open context.
- * XXX We may want to prohibit dirtying in syncing context even
- * if they did pre-dirty.
- */
- ASSERT(!dmu_tx_is_syncing(tx) ||
- BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
- dn->dn_object == DMU_META_DNODE_OBJECT ||
- dn->dn_objset->os_dsl_dataset == NULL ||
- dsl_dir_is_private(dn->dn_objset->os_dsl_dataset->ds_dir));
-
- /*
- * We make this assert for private objects as well, but after we
- * check if we're already dirty. They are allowed to re-dirty
- * in syncing context.
- */
- ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
- dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
-
- mutex_enter(&db->db_mtx);
- /*
- * XXX make this true for indirects too? The problem is that
- * transactions created with dmu_tx_create_assigned() from
- * syncing context don't bother holding ahead.
- */
- ASSERT(db->db_level != 0 ||
- db->db_state == DB_CACHED || db->db_state == DB_FILL);
-
- mutex_enter(&dn->dn_mtx);
- /*
- * Don't set dirtyctx to SYNC if we're just modifying this as we
- * initialize the objset.
- */
- if (dn->dn_dirtyctx == DN_UNDIRTIED &&
- !BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
- dn->dn_dirtyctx =
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
- ASSERT(dn->dn_dirtyctx_firstset == NULL);
- dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
- }
- mutex_exit(&dn->dn_mtx);
-
- /*
- * If this buffer is already dirty, we're done.
- */
- drp = &db->db_last_dirty;
- ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
- db->db.db_object == DMU_META_DNODE_OBJECT);
- while (*drp && (*drp)->dr_txg > tx->tx_txg)
- drp = &(*drp)->dr_next;
- if (*drp && (*drp)->dr_txg == tx->tx_txg) {
- if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
- /*
- * If this buffer has already been written out,
- * we now need to reset its state.
- */
- dbuf_unoverride(*drp);
- if (db->db.db_object != DMU_META_DNODE_OBJECT)
- arc_buf_thaw(db->db_buf);
- }
- mutex_exit(&db->db_mtx);
- return (*drp);
- }
-
- /*
- * Only valid if not already dirty.
- */
- ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
-
- ASSERT3U(dn->dn_nlevels, >, db->db_level);
- ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
- dn->dn_phys->dn_nlevels > db->db_level ||
- dn->dn_next_nlevels[txgoff] > db->db_level ||
- dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
- dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
-
- /*
- * We should only be dirtying in syncing context if it's the
- * mos, a spa os, or we're initializing the os. However, we are
- * allowed to dirty in syncing context provided we already
- * dirtied it in open context. Hence we must make this
- * assertion only if we're not already dirty.
- */
- ASSERT(!dmu_tx_is_syncing(tx) ||
- os->os_dsl_dataset == NULL ||
- !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
- !BP_IS_HOLE(os->os_rootbp));
- ASSERT(db->db.db_size != 0);
-
- dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
-
- /*
- * If this buffer is dirty in an old transaction group we need
- * to make a copy of it so that the changes we make in this
- * transaction group won't leak out when we sync the older txg.
- */
- dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
- if (db->db_level == 0) {
- void *data_old = db->db_buf;
-
- if (db->db_blkid == DB_BONUS_BLKID) {
- dbuf_fix_old_data(db, tx->tx_txg);
- data_old = db->db.db_data;
- } else if (db->db.db_object != DMU_META_DNODE_OBJECT) {
- /*
- * Release the data buffer from the cache so that we
- * can modify it without impacting possible other users
- * of this cached data block. Note that indirect
- * blocks and private objects are not released until the
- * syncing state (since they are only modified then).
- */
- arc_release(db->db_buf, db);
- dbuf_fix_old_data(db, tx->tx_txg);
- data_old = db->db_buf;
- }
- ASSERT(data_old != NULL);
- dr->dt.dl.dr_data = data_old;
- } else {
- mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL);
- list_create(&dr->dt.di.dr_children,
- sizeof (dbuf_dirty_record_t),
- offsetof(dbuf_dirty_record_t, dr_dirty_node));
- }
- dr->dr_dbuf = db;
- dr->dr_txg = tx->tx_txg;
- dr->dr_next = *drp;
- *drp = dr;
-
- /*
- * We could have been freed_in_flight between the dbuf_noread
- * and dbuf_dirty. We win, as though the dbuf_noread() had
- * happened after the free.
- */
- if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
- mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, db->db_blkid, 1, tx);
- mutex_exit(&dn->dn_mtx);
- db->db_freed_in_flight = FALSE;
- }
-
- if (db->db_blkid != DB_BONUS_BLKID) {
- /*
- * Update the accounting.
- */
- if (!dbuf_new_block(db) && db->db_blkptr) {
- /*
- * This is only a guess -- if the dbuf is dirty
- * in a previous txg, we don't know how much
- * space it will use on disk yet. We should
- * really have the struct_rwlock to access
- * db_blkptr, but since this is just a guess,
- * it's OK if we get an odd answer.
- */
- dnode_willuse_space(dn,
- -bp_get_dasize(os->os_spa, db->db_blkptr), tx);
- }
- dnode_willuse_space(dn, db->db.db_size, tx);
- }
-
- /*
- * This buffer is now part of this txg
- */
- dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg);
- db->db_dirtycnt += 1;
- ASSERT3U(db->db_dirtycnt, <=, 3);
-
- mutex_exit(&db->db_mtx);
-
- if (db->db_blkid == DB_BONUS_BLKID) {
- mutex_enter(&dn->dn_mtx);
- ASSERT(!list_link_active(&dr->dr_dirty_node));
- list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
- mutex_exit(&dn->dn_mtx);
- dnode_setdirty(dn, tx);
- return (dr);
- }
-
- if (db->db_level == 0) {
- dnode_new_blkid(dn, db->db_blkid, tx);
- ASSERT(dn->dn_maxblkid >= db->db_blkid);
- }
-
- if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- drop_struct_lock = TRUE;
- }
-
- if (db->db_level+1 < dn->dn_nlevels) {
- dmu_buf_impl_t *parent = db->db_parent;
- dbuf_dirty_record_t *di;
- int parent_held = FALSE;
-
- if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) {
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
-
- parent = dbuf_hold_level(dn, db->db_level+1,
- db->db_blkid >> epbs, FTAG);
- parent_held = TRUE;
- }
- if (drop_struct_lock)
- rw_exit(&dn->dn_struct_rwlock);
- ASSERT3U(db->db_level+1, ==, parent->db_level);
- di = dbuf_dirty(parent, tx);
- if (parent_held)
- dbuf_rele(parent, FTAG);
-
- mutex_enter(&db->db_mtx);
- /* possible race with dbuf_undirty() */
- if (db->db_last_dirty == dr ||
- dn->dn_object == DMU_META_DNODE_OBJECT) {
- mutex_enter(&di->dt.di.dr_mtx);
- ASSERT3U(di->dr_txg, ==, tx->tx_txg);
- ASSERT(!list_link_active(&dr->dr_dirty_node));
- list_insert_tail(&di->dt.di.dr_children, dr);
- mutex_exit(&di->dt.di.dr_mtx);
- dr->dr_parent = di;
- }
- mutex_exit(&db->db_mtx);
- } else {
- ASSERT(db->db_level+1 == dn->dn_nlevels);
- ASSERT(db->db_blkid < dn->dn_nblkptr);
- ASSERT(db->db_parent == NULL ||
- db->db_parent == db->db_dnode->dn_dbuf);
- mutex_enter(&dn->dn_mtx);
- ASSERT(!list_link_active(&dr->dr_dirty_node));
- list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
- mutex_exit(&dn->dn_mtx);
- if (drop_struct_lock)
- rw_exit(&dn->dn_struct_rwlock);
- }
-
- dnode_setdirty(dn, tx);
- return (dr);
-}
-
-static int
-dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
-{
- dnode_t *dn = db->db_dnode;
- uint64_t txg = tx->tx_txg;
- dbuf_dirty_record_t *dr;
-
- ASSERT(txg != 0);
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
-
- mutex_enter(&db->db_mtx);
-
- /*
- * If this buffer is not dirty, we're done.
- */
- for (dr = db->db_last_dirty; dr; dr = dr->dr_next)
- if (dr->dr_txg <= txg)
- break;
- if (dr == NULL || dr->dr_txg < txg) {
- mutex_exit(&db->db_mtx);
- return (0);
- }
- ASSERT(dr->dr_txg == txg);
-
- /*
- * If this buffer is currently held, we cannot undirty
- * it, since one of the current holders may be in the
- * middle of an update. Note that users of dbuf_undirty()
- * should not place a hold on the dbuf before the call.
- */
- if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
- mutex_exit(&db->db_mtx);
- /* Make sure we don't toss this buffer at sync phase */
- mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, db->db_blkid, 1, tx);
- mutex_exit(&dn->dn_mtx);
- return (0);
- }
-
- dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
-
- ASSERT(db->db.db_size != 0);
-
- /* XXX would be nice to fix up dn_towrite_space[] */
-
- db->db_last_dirty = dr->dr_next;
-
- if (dr->dr_parent) {
- mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
- list_remove(&dr->dr_parent->dt.di.dr_children, dr);
- mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
- } else if (db->db_level+1 == dn->dn_nlevels) {
- ASSERT3P(db->db_parent, ==, dn->dn_dbuf);
- mutex_enter(&dn->dn_mtx);
- list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
- mutex_exit(&dn->dn_mtx);
- }
-
- if (db->db_level == 0) {
- dbuf_unoverride(dr);
-
- ASSERT(db->db_buf != NULL);
- ASSERT(dr->dt.dl.dr_data != NULL);
- if (dr->dt.dl.dr_data != db->db_buf)
- VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1);
- } else {
- ASSERT(db->db_buf != NULL);
- ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- list_destroy(&dr->dt.di.dr_children);
- mutex_destroy(&dr->dt.di.dr_mtx);
- }
- kmem_free(dr, sizeof (dbuf_dirty_record_t));
-
- ASSERT(db->db_dirtycnt > 0);
- db->db_dirtycnt -= 1;
-
- if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
- arc_buf_t *buf = db->db_buf;
-
- ASSERT(arc_released(buf));
- dbuf_set_data(db, NULL);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
- dbuf_evict(db);
- return (1);
- }
-
- mutex_exit(&db->db_mtx);
- return (0);
-}
-
-#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
-void
-dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
-{
- int rf = DB_RF_MUST_SUCCEED;
-
- ASSERT(tx->tx_txg != 0);
- ASSERT(!refcount_is_zero(&db->db_holds));
-
- if (RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock))
- rf |= DB_RF_HAVESTRUCT;
- (void) dbuf_read(db, NULL, rf);
- (void) dbuf_dirty(db, tx);
-}
-
-void
-dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
- ASSERT(tx->tx_txg != 0);
- ASSERT(db->db_level == 0);
- ASSERT(!refcount_is_zero(&db->db_holds));
-
- ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
- dmu_tx_private_ok(tx));
-
- dbuf_noread(db);
- (void) dbuf_dirty(db, tx);
-}
-
-#pragma weak dmu_buf_fill_done = dbuf_fill_done
-/* ARGSUSED */
-void
-dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
-{
- mutex_enter(&db->db_mtx);
- DBUF_VERIFY(db);
-
- if (db->db_state == DB_FILL) {
- if (db->db_level == 0 && db->db_freed_in_flight) {
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
- /* we were freed while filling */
- /* XXX dbuf_undirty? */
- bzero(db->db.db_data, db->db.db_size);
- db->db_freed_in_flight = FALSE;
- }
- db->db_state = DB_CACHED;
- cv_broadcast(&db->db_changed);
- }
- mutex_exit(&db->db_mtx);
-}
-
-/*
- * "Clear" the contents of this dbuf. This will mark the dbuf
- * EVICTING and clear *most* of its references. Unfortunetely,
- * when we are not holding the dn_dbufs_mtx, we can't clear the
- * entry in the dn_dbufs list. We have to wait until dbuf_destroy()
- * in this case. For callers from the DMU we will usually see:
- * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
- * For the arc callback, we will usually see:
- * dbuf_do_evict()->dbuf_clear();dbuf_destroy()
- * Sometimes, though, we will get a mix of these two:
- * DMU: dbuf_clear()->arc_buf_evict()
- * ARC: dbuf_do_evict()->dbuf_destroy()
- */
-void
-dbuf_clear(dmu_buf_impl_t *db)
-{
- dnode_t *dn = db->db_dnode;
- dmu_buf_impl_t *parent = db->db_parent;
- dmu_buf_impl_t *dndb = dn->dn_dbuf;
- int dbuf_gone = FALSE;
-
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(refcount_is_zero(&db->db_holds));
-
- dbuf_evict_user(db);
-
- if (db->db_state == DB_CACHED) {
- ASSERT(db->db.db_data != NULL);
- if (db->db_blkid == DB_BONUS_BLKID)
- zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
- db->db.db_data = NULL;
- db->db_state = DB_UNCACHED;
- }
-
- ASSERT3U(db->db_state, ==, DB_UNCACHED);
- ASSERT(db->db_data_pending == NULL);
-
- db->db_state = DB_EVICTING;
- db->db_blkptr = NULL;
-
- if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
- list_remove(&dn->dn_dbufs, db);
- dnode_rele(dn, db);
- }
-
- if (db->db_buf)
- dbuf_gone = arc_buf_evict(db->db_buf);
-
- if (!dbuf_gone)
- mutex_exit(&db->db_mtx);
-
- /*
- * If this dbuf is referened from an indirect dbuf,
- * decrement the ref count on the indirect dbuf.
- */
- if (parent && parent != dndb)
- dbuf_rele(parent, db);
-}
-
-static int
-dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
- dmu_buf_impl_t **parentp, blkptr_t **bpp)
-{
- int nlevels, epbs;
-
- *parentp = NULL;
- *bpp = NULL;
-
- ASSERT(blkid != DB_BONUS_BLKID);
-
- if (dn->dn_phys->dn_nlevels == 0)
- nlevels = 1;
- else
- nlevels = dn->dn_phys->dn_nlevels;
-
- epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
-
- ASSERT3U(level * epbs, <, 64);
- ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
- if (level >= nlevels ||
- (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
- /* the buffer has no parent yet */
- return (ENOENT);
- } else if (level < nlevels-1) {
- /* this block is referenced from an indirect block */
- int err = dbuf_hold_impl(dn, level+1,
- blkid >> epbs, fail_sparse, NULL, parentp);
- if (err)
- return (err);
- err = dbuf_read(*parentp, NULL,
- (DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL));
- if (err) {
- dbuf_rele(*parentp, NULL);
- *parentp = NULL;
- return (err);
- }
- *bpp = ((blkptr_t *)(*parentp)->db.db_data) +
- (blkid & ((1ULL << epbs) - 1));
- return (0);
- } else {
- /* the block is referenced from the dnode */
- ASSERT3U(level, ==, nlevels-1);
- ASSERT(dn->dn_phys->dn_nblkptr == 0 ||
- blkid < dn->dn_phys->dn_nblkptr);
- if (dn->dn_dbuf) {
- dbuf_add_ref(dn->dn_dbuf, NULL);
- *parentp = dn->dn_dbuf;
- }
- *bpp = &dn->dn_phys->dn_blkptr[blkid];
- return (0);
- }
-}
-
-static dmu_buf_impl_t *
-dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
- dmu_buf_impl_t *parent, blkptr_t *blkptr)
-{
- objset_impl_t *os = dn->dn_objset;
- dmu_buf_impl_t *db, *odb;
-
- ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
- ASSERT(dn->dn_type != DMU_OT_NONE);
-
- db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
-
- db->db_objset = os;
- db->db.db_object = dn->dn_object;
- db->db_level = level;
- db->db_blkid = blkid;
- db->db_last_dirty = NULL;
- db->db_dirtycnt = 0;
- db->db_dnode = dn;
- db->db_parent = parent;
- db->db_blkptr = blkptr;
-
- db->db_user_ptr = NULL;
- db->db_user_data_ptr_ptr = NULL;
- db->db_evict_func = NULL;
- db->db_immediate_evict = 0;
- db->db_freed_in_flight = 0;
-
- if (blkid == DB_BONUS_BLKID) {
- ASSERT3P(parent, ==, dn->dn_dbuf);
- db->db.db_size = dn->dn_bonuslen;
- db->db.db_offset = DB_BONUS_BLKID;
- db->db_state = DB_UNCACHED;
- /* the bonus dbuf is not placed in the hash table */
- return (db);
- } else {
- int blocksize =
- db->db_level ? 1<<dn->dn_indblkshift : dn->dn_datablksz;
- db->db.db_size = blocksize;
- db->db.db_offset = db->db_blkid * blocksize;
- }
-
- /*
- * Hold the dn_dbufs_mtx while we get the new dbuf
- * in the hash table *and* added to the dbufs list.
- * This prevents a possible deadlock with someone
- * trying to look up this dbuf before its added to the
- * dn_dbufs list.
- */
- mutex_enter(&dn->dn_dbufs_mtx);
- db->db_state = DB_EVICTING;
- if ((odb = dbuf_hash_insert(db)) != NULL) {
- /* someone else inserted it first */
- kmem_cache_free(dbuf_cache, db);
- mutex_exit(&dn->dn_dbufs_mtx);
- return (odb);
- }
- list_insert_head(&dn->dn_dbufs, db);
- db->db_state = DB_UNCACHED;
- mutex_exit(&dn->dn_dbufs_mtx);
-
- if (parent && parent != dn->dn_dbuf)
- dbuf_add_ref(parent, db);
-
- ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
- refcount_count(&dn->dn_holds) > 0);
- (void) refcount_add(&dn->dn_holds, db);
-
- dprintf_dbuf(db, "db=%p\n", db);
-
- return (db);
-}
-
-static int
-dbuf_do_evict(void *private)
-{
- arc_buf_t *buf = private;
- dmu_buf_impl_t *db = buf->b_private;
-
- if (!MUTEX_HELD(&db->db_mtx))
- mutex_enter(&db->db_mtx);
-
- ASSERT(refcount_is_zero(&db->db_holds));
-
- if (db->db_state != DB_EVICTING) {
- ASSERT(db->db_state == DB_CACHED);
- DBUF_VERIFY(db);
- db->db_buf = NULL;
- dbuf_evict(db);
- } else {
- mutex_exit(&db->db_mtx);
- dbuf_destroy(db);
- }
- return (0);
-}
-
-static void
-dbuf_destroy(dmu_buf_impl_t *db)
-{
- ASSERT(refcount_is_zero(&db->db_holds));
-
- if (db->db_blkid != DB_BONUS_BLKID) {
- dnode_t *dn = db->db_dnode;
-
- /*
- * If this dbuf is still on the dn_dbufs list,
- * remove it from that list.
- */
- if (list_link_active(&db->db_link)) {
- mutex_enter(&dn->dn_dbufs_mtx);
- list_remove(&dn->dn_dbufs, db);
- mutex_exit(&dn->dn_dbufs_mtx);
-
- dnode_rele(dn, db);
- }
- dbuf_hash_remove(db);
- }
- db->db_parent = NULL;
- db->db_dnode = NULL;
- db->db_buf = NULL;
-
- ASSERT(db->db.db_data == NULL);
- ASSERT(db->db_hash_next == NULL);
- ASSERT(db->db_blkptr == NULL);
- ASSERT(db->db_data_pending == NULL);
-
- kmem_cache_free(dbuf_cache, db);
-}
-
-void
-dbuf_prefetch(dnode_t *dn, uint64_t blkid)
-{
- dmu_buf_impl_t *db = NULL;
- blkptr_t *bp = NULL;
-
- ASSERT(blkid != DB_BONUS_BLKID);
- ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
-
- if (dnode_block_freed(dn, blkid))
- return;
-
- /* dbuf_find() returns with db_mtx held */
- if (db = dbuf_find(dn, 0, blkid)) {
- if (refcount_count(&db->db_holds) > 0) {
- /*
- * This dbuf is active. We assume that it is
- * already CACHED, or else about to be either
- * read or filled.
- */
- mutex_exit(&db->db_mtx);
- return;
- }
- mutex_exit(&db->db_mtx);
- db = NULL;
- }
-
- if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
- if (bp && !BP_IS_HOLE(bp)) {
- uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
- zbookmark_t zb;
- zb.zb_objset = dn->dn_objset->os_dsl_dataset ?
- dn->dn_objset->os_dsl_dataset->ds_object : 0;
- zb.zb_object = dn->dn_object;
- zb.zb_level = 0;
- zb.zb_blkid = blkid;
-
- (void) arc_read(NULL, dn->dn_objset->os_spa, bp,
- dmu_ot[dn->dn_type].ot_byteswap,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
- &aflags, &zb);
- }
- if (db)
- dbuf_rele(db, NULL);
- }
-}
-
-/*
- * Returns with db_holds incremented, and db_mtx not held.
- * Note: dn_struct_rwlock must be held.
- */
-int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
- void *tag, dmu_buf_impl_t **dbp)
-{
- dmu_buf_impl_t *db, *parent = NULL;
-
- ASSERT(blkid != DB_BONUS_BLKID);
- ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
- ASSERT3U(dn->dn_nlevels, >, level);
-
- *dbp = NULL;
-top:
- /* dbuf_find() returns with db_mtx held */
- db = dbuf_find(dn, level, blkid);
-
- if (db == NULL) {
- blkptr_t *bp = NULL;
- int err;
-
- ASSERT3P(parent, ==, NULL);
- err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
- if (fail_sparse) {
- if (err == 0 && bp && BP_IS_HOLE(bp))
- err = ENOENT;
- if (err) {
- if (parent)
- dbuf_rele(parent, NULL);
- return (err);
- }
- }
- if (err && err != ENOENT)
- return (err);
- db = dbuf_create(dn, level, blkid, parent, bp);
- }
-
- if (db->db_buf && refcount_is_zero(&db->db_holds)) {
- arc_buf_add_ref(db->db_buf, db);
- if (db->db_buf->b_data == NULL) {
- dbuf_clear(db);
- if (parent) {
- dbuf_rele(parent, NULL);
- parent = NULL;
- }
- goto top;
- }
- ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
- }
-
- ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf));
-
- /*
- * If this buffer is currently syncing out, and we are are
- * still referencing it from db_data, we need to make a copy
- * of it in case we decide we want to dirty it again in this txg.
- */
- if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID &&
- dn->dn_object != DMU_META_DNODE_OBJECT &&
- db->db_state == DB_CACHED && db->db_data_pending) {
- dbuf_dirty_record_t *dr = db->db_data_pending;
-
- if (dr->dt.dl.dr_data == db->db_buf) {
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
- dbuf_set_data(db,
- arc_buf_alloc(db->db_dnode->dn_objset->os_spa,
- db->db.db_size, db, type));
- bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
- db->db.db_size);
- }
- }
-
- (void) refcount_add(&db->db_holds, tag);
- dbuf_update_data(db);
- DBUF_VERIFY(db);
- mutex_exit(&db->db_mtx);
-
- /* NOTE: we can't rele the parent until after we drop the db_mtx */
- if (parent)
- dbuf_rele(parent, NULL);
-
- ASSERT3P(db->db_dnode, ==, dn);
- ASSERT3U(db->db_blkid, ==, blkid);
- ASSERT3U(db->db_level, ==, level);
- *dbp = db;
-
- return (0);
-}
-
-dmu_buf_impl_t *
-dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
-{
- dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db);
- return (err ? NULL : db);
-}
-
-dmu_buf_impl_t *
-dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
-{
- dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
- return (err ? NULL : db);
-}
-
-dmu_buf_impl_t *
-dbuf_create_bonus(dnode_t *dn)
-{
- dmu_buf_impl_t *db = dn->dn_bonus;
-
- ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
-
- ASSERT(dn->dn_bonus == NULL);
- db = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL);
- return (db);
-}
-
-#pragma weak dmu_buf_add_ref = dbuf_add_ref
-void
-dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
-{
- int64_t holds = refcount_add(&db->db_holds, tag);
- ASSERT(holds > 1);
-}
-
-#pragma weak dmu_buf_rele = dbuf_rele
-void
-dbuf_rele(dmu_buf_impl_t *db, void *tag)
-{
- int64_t holds;
-
- mutex_enter(&db->db_mtx);
- DBUF_VERIFY(db);
-
- holds = refcount_remove(&db->db_holds, tag);
- ASSERT(holds >= 0);
-
- /*
- * We can't freeze indirects if there is a possibility that they
- * may be modified in the current syncing context.
- */
- if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
- arc_buf_freeze(db->db_buf);
-
- if (holds == db->db_dirtycnt &&
- db->db_level == 0 && db->db_immediate_evict)
- dbuf_evict_user(db);
-
- if (holds == 0) {
- if (db->db_blkid == DB_BONUS_BLKID) {
- mutex_exit(&db->db_mtx);
- dnode_rele(db->db_dnode, db);
- } else if (db->db_buf == NULL) {
- /*
- * This is a special case: we never associated this
- * dbuf with any data allocated from the ARC.
- */
- ASSERT3U(db->db_state, ==, DB_UNCACHED);
- dbuf_evict(db);
- } else if (arc_released(db->db_buf)) {
- arc_buf_t *buf = db->db_buf;
- /*
- * This dbuf has anonymous data associated with it.
- */
- dbuf_set_data(db, NULL);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
- dbuf_evict(db);
- } else {
- VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
- mutex_exit(&db->db_mtx);
- }
- } else {
- mutex_exit(&db->db_mtx);
- }
-}
-
-#pragma weak dmu_buf_refcount = dbuf_refcount
-uint64_t
-dbuf_refcount(dmu_buf_impl_t *db)
-{
- return (refcount_count(&db->db_holds));
-}
-
-void *
-dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr,
- dmu_buf_evict_func_t *evict_func)
-{
- return (dmu_buf_update_user(db_fake, NULL, user_ptr,
- user_data_ptr_ptr, evict_func));
-}
-
-void *
-dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr,
- dmu_buf_evict_func_t *evict_func)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-
- db->db_immediate_evict = TRUE;
- return (dmu_buf_update_user(db_fake, NULL, user_ptr,
- user_data_ptr_ptr, evict_func));
-}
-
-void *
-dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
- void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(db->db_level == 0);
-
- ASSERT((user_ptr == NULL) == (evict_func == NULL));
-
- mutex_enter(&db->db_mtx);
-
- if (db->db_user_ptr == old_user_ptr) {
- db->db_user_ptr = user_ptr;
- db->db_user_data_ptr_ptr = user_data_ptr_ptr;
- db->db_evict_func = evict_func;
-
- dbuf_update_data(db);
- } else {
- old_user_ptr = db->db_user_ptr;
- }
-
- mutex_exit(&db->db_mtx);
- return (old_user_ptr);
-}
-
-void *
-dmu_buf_get_user(dmu_buf_t *db_fake)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(!refcount_is_zero(&db->db_holds));
-
- return (db->db_user_ptr);
-}
-
-static void
-dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
-{
- /* ASSERT(dmu_tx_is_syncing(tx) */
- ASSERT(MUTEX_HELD(&db->db_mtx));
-
- if (db->db_blkptr != NULL)
- return;
-
- if (db->db_level == dn->dn_phys->dn_nlevels-1) {
- /*
- * This buffer was allocated at a time when there was
- * no available blkptrs from the dnode, or it was
- * inappropriate to hook it in (i.e., nlevels mis-match).
- */
- ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr);
- ASSERT(db->db_parent == NULL);
- db->db_parent = dn->dn_dbuf;
- db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid];
- DBUF_VERIFY(db);
- } else {
- dmu_buf_impl_t *parent = db->db_parent;
- int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
-
- ASSERT(dn->dn_phys->dn_nlevels > 1);
- if (parent == NULL) {
- mutex_exit(&db->db_mtx);
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- (void) dbuf_hold_impl(dn, db->db_level+1,
- db->db_blkid >> epbs, FALSE, db, &parent);
- rw_exit(&dn->dn_struct_rwlock);
- mutex_enter(&db->db_mtx);
- db->db_parent = parent;
- }
- db->db_blkptr = (blkptr_t *)parent->db.db_data +
- (db->db_blkid & ((1ULL << epbs) - 1));
- DBUF_VERIFY(db);
- }
-}
-
-static void
-dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db = dr->dr_dbuf;
- dnode_t *dn = db->db_dnode;
- zio_t *zio;
-
- ASSERT(dmu_tx_is_syncing(tx));
-
- dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
-
- mutex_enter(&db->db_mtx);
-
- ASSERT(db->db_level > 0);
- DBUF_VERIFY(db);
-
- if (db->db_buf == NULL) {
- mutex_exit(&db->db_mtx);
- (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
- mutex_enter(&db->db_mtx);
- }
- ASSERT3U(db->db_state, ==, DB_CACHED);
- ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
- ASSERT(db->db_buf != NULL);
-
- dbuf_check_blkptr(dn, db);
-
- db->db_data_pending = dr;
-
- arc_release(db->db_buf, db);
- mutex_exit(&db->db_mtx);
-
- /*
- * XXX -- we should design a compression algorithm
- * that specializes in arrays of bps.
- */
- dbuf_write(dr, db->db_buf, ZIO_CHECKSUM_FLETCHER_4,
- zfs_mdcomp_disable ? ZIO_COMPRESS_EMPTY : ZIO_COMPRESS_LZJB, tx);
-
- zio = dr->dr_zio;
- mutex_enter(&dr->dt.di.dr_mtx);
- dbuf_sync_list(&dr->dt.di.dr_children, tx);
- ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- mutex_exit(&dr->dt.di.dr_mtx);
- zio_nowait(zio);
-}
-
-static void
-dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
-{
- arc_buf_t **datap = &dr->dt.dl.dr_data;
- dmu_buf_impl_t *db = dr->dr_dbuf;
- dnode_t *dn = db->db_dnode;
- objset_impl_t *os = dn->dn_objset;
- uint64_t txg = tx->tx_txg;
- int checksum, compress;
- int blksz;
-
- ASSERT(dmu_tx_is_syncing(tx));
-
- dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
-
- mutex_enter(&db->db_mtx);
- /*
- * To be synced, we must be dirtied. But we
- * might have been freed after the dirty.
- */
- if (db->db_state == DB_UNCACHED) {
- /* This buffer has been freed since it was dirtied */
- ASSERT(db->db.db_data == NULL);
- } else if (db->db_state == DB_FILL) {
- /* This buffer was freed and is now being re-filled */
- ASSERT(db->db.db_data != dr->dt.dl.dr_data);
- } else {
- ASSERT3U(db->db_state, ==, DB_CACHED);
- }
- DBUF_VERIFY(db);
-
- /*
- * If this is a bonus buffer, simply copy the bonus data into the
- * dnode. It will be written out when the dnode is synced (and it
- * will be synced, since it must have been dirty for dbuf_sync to
- * be called).
- */
- if (db->db_blkid == DB_BONUS_BLKID) {
- dbuf_dirty_record_t **drp;
- /*
- * Use dn_phys->dn_bonuslen since db.db_size is the length
- * of the bonus buffer in the open transaction rather than
- * the syncing transaction.
- */
- ASSERT(*datap != NULL);
- ASSERT3U(db->db_level, ==, 0);
- ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
- bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
- if (*datap != db->db.db_data)
- zio_buf_free(*datap, DN_MAX_BONUSLEN);
- db->db_data_pending = NULL;
- drp = &db->db_last_dirty;
- while (*drp != dr)
- drp = &(*drp)->dr_next;
- ASSERT((*drp)->dr_next == NULL);
- *drp = NULL;
- if (dr->dr_dbuf->db_level != 0) {
- list_destroy(&dr->dt.di.dr_children);
- mutex_destroy(&dr->dt.di.dr_mtx);
- }
- kmem_free(dr, sizeof (dbuf_dirty_record_t));
- ASSERT(db->db_dirtycnt > 0);
- db->db_dirtycnt -= 1;
- mutex_exit(&db->db_mtx);
- dbuf_rele(db, (void *)(uintptr_t)txg);
- return;
- }
-
- /*
- * If this buffer is in the middle of an immdiate write,
- * wait for the synchronous IO to complete.
- */
- while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) {
- ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
- cv_wait(&db->db_changed, &db->db_mtx);
- ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
- }
-
- dbuf_check_blkptr(dn, db);
-
- /*
- * If this dbuf has already been written out via an immediate write,
- * just complete the write by copying over the new block pointer and
- * updating the accounting via the write-completion functions.
- */
- if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
- zio_t zio_fake;
-
- zio_fake.io_private = &db;
- zio_fake.io_error = 0;
- zio_fake.io_bp = db->db_blkptr;
- zio_fake.io_bp_orig = *db->db_blkptr;
- zio_fake.io_txg = txg;
-
- *db->db_blkptr = dr->dt.dl.dr_overridden_by;
- dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
- db->db_data_pending = dr;
- dr->dr_zio = &zio_fake;
- mutex_exit(&db->db_mtx);
-
- if (BP_IS_OLDER(&zio_fake.io_bp_orig, txg))
- dsl_dataset_block_kill(os->os_dsl_dataset,
- &zio_fake.io_bp_orig, dn->dn_zio, tx);
-
- dbuf_write_ready(&zio_fake, db->db_buf, db);
- dbuf_write_done(&zio_fake, db->db_buf, db);
-
- return;
- }
-
- blksz = arc_buf_size(*datap);
-
- if (dn->dn_object != DMU_META_DNODE_OBJECT) {
- /*
- * If this buffer is currently "in use" (i.e., there are
- * active holds and db_data still references it), then make
- * a copy before we start the write so that any modifications
- * from the open txg will not leak into this write.
- *
- * NOTE: this copy does not need to be made for objects only
- * modified in the syncing context (e.g. DNONE_DNODE blocks).
- */
- if (refcount_count(&db->db_holds) > 1 && *datap == db->db_buf) {
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- *datap = arc_buf_alloc(os->os_spa, blksz, db, type);
- bcopy(db->db.db_data, (*datap)->b_data, blksz);
- }
- } else {
- /*
- * Private object buffers are released here rather
- * than in dbuf_dirty() since they are only modified
- * in the syncing context and we don't want the
- * overhead of making multiple copies of the data.
- */
- arc_release(db->db_buf, db);
- }
-
- ASSERT(*datap != NULL);
- db->db_data_pending = dr;
-
- mutex_exit(&db->db_mtx);
-
- /*
- * Allow dnode settings to override objset settings,
- * except for metadata checksums.
- */
- if (dmu_ot[dn->dn_type].ot_metadata) {
- checksum = os->os_md_checksum;
- compress = zio_compress_select(dn->dn_compress,
- os->os_md_compress);
- } else {
- checksum = zio_checksum_select(dn->dn_checksum,
- os->os_checksum);
- compress = zio_compress_select(dn->dn_compress,
- os->os_compress);
- }
-
- dbuf_write(dr, *datap, checksum, compress, tx);
-
- ASSERT(!list_link_active(&dr->dr_dirty_node));
- if (dn->dn_object == DMU_META_DNODE_OBJECT)
- list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr);
- else
- zio_nowait(dr->dr_zio);
-}
-
-void
-dbuf_sync_list(list_t *list, dmu_tx_t *tx)
-{
- dbuf_dirty_record_t *dr;
-
- while (dr = list_head(list)) {
- if (dr->dr_zio != NULL) {
- /*
- * If we find an already initialized zio then we
- * are processing the meta-dnode, and we have finished.
- * The dbufs for all dnodes are put back on the list
- * during processing, so that we can zio_wait()
- * these IOs after initiating all child IOs.
- */
- ASSERT3U(dr->dr_dbuf->db.db_object, ==,
- DMU_META_DNODE_OBJECT);
- break;
- }
- list_remove(list, dr);
- if (dr->dr_dbuf->db_level > 0)
- dbuf_sync_indirect(dr, tx);
- else
- dbuf_sync_leaf(dr, tx);
- }
-}
-
-static void
-dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, int checksum,
- int compress, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db = dr->dr_dbuf;
- dnode_t *dn = db->db_dnode;
- objset_impl_t *os = dn->dn_objset;
- dmu_buf_impl_t *parent = db->db_parent;
- uint64_t txg = tx->tx_txg;
- zbookmark_t zb;
- zio_t *zio;
- int zio_flags;
-
- if (parent != dn->dn_dbuf) {
- ASSERT(parent && parent->db_data_pending);
- ASSERT(db->db_level == parent->db_level-1);
- ASSERT(arc_released(parent->db_buf));
- zio = parent->db_data_pending->dr_zio;
- } else {
- ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1);
- ASSERT3P(db->db_blkptr, ==,
- &dn->dn_phys->dn_blkptr[db->db_blkid]);
- zio = dn->dn_zio;
- }
-
- ASSERT(db->db_level == 0 || data == db->db_buf);
- ASSERT3U(db->db_blkptr->blk_birth, <=, txg);
- ASSERT(zio);
-
- zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
- zb.zb_object = db->db.db_object;
- zb.zb_level = db->db_level;
- zb.zb_blkid = db->db_blkid;
-
- zio_flags = ZIO_FLAG_MUSTSUCCEED;
- if (dmu_ot[dn->dn_type].ot_metadata || zb.zb_level != 0)
- zio_flags |= ZIO_FLAG_METADATA;
- if (BP_IS_OLDER(db->db_blkptr, txg))
- dsl_dataset_block_kill(
- os->os_dsl_dataset, db->db_blkptr, zio, tx);
-
- dr->dr_zio = arc_write(zio, os->os_spa, checksum, compress,
- dmu_get_replication_level(os, &zb, dn->dn_type), txg,
- db->db_blkptr, data, dbuf_write_ready, dbuf_write_done, db,
- ZIO_PRIORITY_ASYNC_WRITE, zio_flags, &zb);
-}
-
-/* ARGSUSED */
-static void
-dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
-{
- dmu_buf_impl_t *db = vdb;
- dnode_t *dn = db->db_dnode;
- objset_impl_t *os = dn->dn_objset;
- blkptr_t *bp_orig = &zio->io_bp_orig;
- uint64_t fill = 0;
- int old_size, new_size, i;
-
- dprintf_dbuf_bp(db, bp_orig, "bp_orig: %s", "");
-
- old_size = bp_get_dasize(os->os_spa, bp_orig);
- new_size = bp_get_dasize(os->os_spa, zio->io_bp);
-
- dnode_diduse_space(dn, new_size-old_size);
-
- if (BP_IS_HOLE(zio->io_bp)) {
- dsl_dataset_t *ds = os->os_dsl_dataset;
- dmu_tx_t *tx = os->os_synctx;
-
- if (bp_orig->blk_birth == tx->tx_txg)
- dsl_dataset_block_kill(ds, bp_orig, NULL, tx);
- ASSERT3U(db->db_blkptr->blk_fill, ==, 0);
- return;
- }
-
- mutex_enter(&db->db_mtx);
-
- if (db->db_level == 0) {
- mutex_enter(&dn->dn_mtx);
- if (db->db_blkid > dn->dn_phys->dn_maxblkid)
- dn->dn_phys->dn_maxblkid = db->db_blkid;
- mutex_exit(&dn->dn_mtx);
-
- if (dn->dn_type == DMU_OT_DNODE) {
- dnode_phys_t *dnp = db->db.db_data;
- for (i = db->db.db_size >> DNODE_SHIFT; i > 0;
- i--, dnp++) {
- if (dnp->dn_type != DMU_OT_NONE)
- fill++;
- }
- } else {
- fill = 1;
- }
- } else {
- blkptr_t *bp = db->db.db_data;
- ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
- for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, bp++) {
- if (BP_IS_HOLE(bp))
- continue;
- ASSERT3U(BP_GET_LSIZE(bp), ==,
- db->db_level == 1 ? dn->dn_datablksz :
- (1<<dn->dn_phys->dn_indblkshift));
- fill += bp->blk_fill;
- }
- }
-
- db->db_blkptr->blk_fill = fill;
- BP_SET_TYPE(db->db_blkptr, dn->dn_type);
- BP_SET_LEVEL(db->db_blkptr, db->db_level);
-
- mutex_exit(&db->db_mtx);
-
- /* We must do this after we've set the bp's type and level */
- if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp), BP_IDENTITY(bp_orig))) {
- dsl_dataset_t *ds = os->os_dsl_dataset;
- dmu_tx_t *tx = os->os_synctx;
-
- if (bp_orig->blk_birth == tx->tx_txg)
- dsl_dataset_block_kill(ds, bp_orig, NULL, tx);
- dsl_dataset_block_born(ds, zio->io_bp, tx);
- }
-}
-
-/* ARGSUSED */
-static void
-dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
-{
- dmu_buf_impl_t *db = vdb;
- uint64_t txg = zio->io_txg;
- dbuf_dirty_record_t **drp, *dr;
-
- ASSERT3U(zio->io_error, ==, 0);
-
- mutex_enter(&db->db_mtx);
-
- drp = &db->db_last_dirty;
- while (*drp != db->db_data_pending)
- drp = &(*drp)->dr_next;
- ASSERT(!list_link_active(&(*drp)->dr_dirty_node));
- ASSERT((*drp)->dr_txg == txg);
- ASSERT((*drp)->dr_next == NULL);
- dr = *drp;
- *drp = NULL;
-
- if (db->db_level == 0) {
- ASSERT(db->db_blkid != DB_BONUS_BLKID);
- ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
-
- if (dr->dt.dl.dr_data != db->db_buf)
- VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db) == 1);
- else if (!BP_IS_HOLE(db->db_blkptr))
- arc_set_callback(db->db_buf, dbuf_do_evict, db);
- else
- ASSERT(arc_released(db->db_buf));
- } else {
- dnode_t *dn = db->db_dnode;
-
- ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
- if (!BP_IS_HOLE(db->db_blkptr)) {
- int epbs =
- dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
- ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
- db->db.db_size);
- ASSERT3U(dn->dn_phys->dn_maxblkid
- >> (db->db_level * epbs), >=, db->db_blkid);
- arc_set_callback(db->db_buf, dbuf_do_evict, db);
- }
- list_destroy(&dr->dt.di.dr_children);
- mutex_destroy(&dr->dt.di.dr_mtx);
- }
- kmem_free(dr, sizeof (dbuf_dirty_record_t));
-
- cv_broadcast(&db->db_changed);
- ASSERT(db->db_dirtycnt > 0);
- db->db_dirtycnt -= 1;
- db->db_data_pending = NULL;
- mutex_exit(&db->db_mtx);
-
- dprintf_dbuf_bp(db, zio->io_bp, "bp: %s", "");
-
- dbuf_rele(db, (void *)(uintptr_t)txg);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu.c
deleted file mode 100644
index d3be6b4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ /dev/null
@@ -1,1029 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_impl.h>
-#include <sys/dmu_tx.h>
-#include <sys/dbuf.h>
-#include <sys/dnode.h>
-#include <sys/zfs_context.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_synctask.h>
-#include <sys/dsl_prop.h>
-#include <sys/dmu_zfetch.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zap.h>
-#include <sys/zio_checksum.h>
-
-const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
- { byteswap_uint8_array, TRUE, "unallocated" },
- { zap_byteswap, TRUE, "object directory" },
- { byteswap_uint64_array, TRUE, "object array" },
- { byteswap_uint8_array, TRUE, "packed nvlist" },
- { byteswap_uint64_array, TRUE, "packed nvlist size" },
- { byteswap_uint64_array, TRUE, "bplist" },
- { byteswap_uint64_array, TRUE, "bplist header" },
- { byteswap_uint64_array, TRUE, "SPA space map header" },
- { byteswap_uint64_array, TRUE, "SPA space map" },
- { byteswap_uint64_array, TRUE, "ZIL intent log" },
- { dnode_buf_byteswap, TRUE, "DMU dnode" },
- { dmu_objset_byteswap, TRUE, "DMU objset" },
- { byteswap_uint64_array, TRUE, "DSL directory" },
- { zap_byteswap, TRUE, "DSL directory child map"},
- { zap_byteswap, TRUE, "DSL dataset snap map" },
- { zap_byteswap, TRUE, "DSL props" },
- { byteswap_uint64_array, TRUE, "DSL dataset" },
- { zfs_znode_byteswap, TRUE, "ZFS znode" },
- { zfs_acl_byteswap, TRUE, "ZFS ACL" },
- { byteswap_uint8_array, FALSE, "ZFS plain file" },
- { zap_byteswap, TRUE, "ZFS directory" },
- { zap_byteswap, TRUE, "ZFS master node" },
- { zap_byteswap, TRUE, "ZFS delete queue" },
- { byteswap_uint8_array, FALSE, "zvol object" },
- { zap_byteswap, TRUE, "zvol prop" },
- { byteswap_uint8_array, FALSE, "other uint8[]" },
- { byteswap_uint64_array, FALSE, "other uint64[]" },
- { zap_byteswap, TRUE, "other ZAP" },
- { zap_byteswap, TRUE, "persistent error log" },
- { byteswap_uint8_array, TRUE, "SPA history" },
- { byteswap_uint64_array, TRUE, "SPA history offsets" },
- { zap_byteswap, TRUE, "Pool properties" },
-};
-
-int
-dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
- void *tag, dmu_buf_t **dbp)
-{
- dnode_t *dn;
- uint64_t blkid;
- dmu_buf_impl_t *db;
- int err;
-
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
- blkid = dbuf_whichblock(dn, offset);
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- db = dbuf_hold(dn, blkid, tag);
- rw_exit(&dn->dn_struct_rwlock);
- if (db == NULL) {
- err = EIO;
- } else {
- err = dbuf_read(db, NULL, DB_RF_CANFAIL);
- if (err) {
- dbuf_rele(db, tag);
- db = NULL;
- }
- }
-
- dnode_rele(dn, FTAG);
- *dbp = &db->db;
- return (err);
-}
-
-int
-dmu_bonus_max(void)
-{
- return (DN_MAX_BONUSLEN);
-}
-
-/*
- * returns ENOENT, EIO, or 0.
- */
-int
-dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
-{
- dnode_t *dn;
- int err, count;
- dmu_buf_impl_t *db;
-
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_bonus == NULL) {
- rw_exit(&dn->dn_struct_rwlock);
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- if (dn->dn_bonus == NULL)
- dn->dn_bonus = dbuf_create_bonus(dn);
- }
- db = dn->dn_bonus;
- rw_exit(&dn->dn_struct_rwlock);
- mutex_enter(&db->db_mtx);
- count = refcount_add(&db->db_holds, tag);
- mutex_exit(&db->db_mtx);
- if (count == 1)
- dnode_add_ref(dn, db);
- dnode_rele(dn, FTAG);
-
- VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED));
-
- *dbp = &db->db;
- return (0);
-}
-
-/*
- * Note: longer-term, we should modify all of the dmu_buf_*() interfaces
- * to take a held dnode rather than <os, object> -- the lookup is wasteful,
- * and can induce severe lock contention when writing to several files
- * whose dnodes are in the same block.
- */
-static int
-dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
-{
- dmu_buf_t **dbp;
- uint64_t blkid, nblks, i;
- uint32_t flags;
- int err;
- zio_t *zio;
-
- ASSERT(length <= DMU_MAX_ACCESS);
-
- flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
- if (length > zfetch_array_rd_sz)
- flags |= DB_RF_NOPREFETCH;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_datablkshift) {
- int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
- P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
- } else {
- if (offset + length > dn->dn_datablksz) {
- zfs_panic_recover("zfs: accessing past end of object "
- "%llx/%llx (size=%u access=%llu+%llu)",
- (longlong_t)dn->dn_objset->
- os_dsl_dataset->ds_object,
- (longlong_t)dn->dn_object, dn->dn_datablksz,
- (longlong_t)offset, (longlong_t)length);
- return (EIO);
- }
- nblks = 1;
- }
- dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
-
- zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE);
- blkid = dbuf_whichblock(dn, offset);
- for (i = 0; i < nblks; i++) {
- dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
- if (db == NULL) {
- rw_exit(&dn->dn_struct_rwlock);
- dmu_buf_rele_array(dbp, nblks, tag);
- zio_nowait(zio);
- return (EIO);
- }
- /* initiate async i/o */
- if (read) {
- rw_exit(&dn->dn_struct_rwlock);
- (void) dbuf_read(db, zio, flags);
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- }
- dbp[i] = &db->db;
- }
- rw_exit(&dn->dn_struct_rwlock);
-
- /* wait for async i/o */
- err = zio_wait(zio);
- if (err) {
- dmu_buf_rele_array(dbp, nblks, tag);
- return (err);
- }
-
- /* wait for other io to complete */
- if (read) {
- for (i = 0; i < nblks; i++) {
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
- mutex_enter(&db->db_mtx);
- while (db->db_state == DB_READ ||
- db->db_state == DB_FILL)
- cv_wait(&db->db_changed, &db->db_mtx);
- if (db->db_state == DB_UNCACHED)
- err = EIO;
- mutex_exit(&db->db_mtx);
- if (err) {
- dmu_buf_rele_array(dbp, nblks, tag);
- return (err);
- }
- }
- }
-
- *numbufsp = nblks;
- *dbpp = dbp;
- return (0);
-}
-
-static int
-dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
-{
- dnode_t *dn;
- int err;
-
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
-
- err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
-
- dnode_rele(dn, FTAG);
-
- return (err);
-}
-
-int
-dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
-{
- dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
- int err;
-
- err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
- numbufsp, dbpp);
-
- return (err);
-}
-
-void
-dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
-{
- int i;
- dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake;
-
- if (numbufs == 0)
- return;
-
- for (i = 0; i < numbufs; i++) {
- if (dbp[i])
- dbuf_rele(dbp[i], tag);
- }
-
- kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs);
-}
-
-void
-dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
-{
- dnode_t *dn;
- uint64_t blkid;
- int nblks, i, err;
-
- if (zfs_prefetch_disable)
- return;
-
- if (len == 0) { /* they're interested in the bonus buffer */
- dn = os->os->os_meta_dnode;
-
- if (object == 0 || object >= DN_MAX_OBJECT)
- return;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
- dbuf_prefetch(dn, blkid);
- rw_exit(&dn->dn_struct_rwlock);
- return;
- }
-
- /*
- * XXX - Note, if the dnode for the requested object is not
- * already cached, we will do a *synchronous* read in the
- * dnode_hold() call. The same is true for any indirects.
- */
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err != 0)
- return;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_datablkshift) {
- int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset+len, 1<<blkshift) -
- P2ALIGN(offset, 1<<blkshift)) >> blkshift;
- } else {
- nblks = (offset < dn->dn_datablksz);
- }
-
- if (nblks != 0) {
- blkid = dbuf_whichblock(dn, offset);
- for (i = 0; i < nblks; i++)
- dbuf_prefetch(dn, blkid+i);
- }
-
- rw_exit(&dn->dn_struct_rwlock);
-
- dnode_rele(dn, FTAG);
-}
-
-int
-dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t size, dmu_tx_t *tx)
-{
- dnode_t *dn;
- int err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
- ASSERT(offset < UINT64_MAX);
- ASSERT(size == -1ULL || size <= UINT64_MAX - offset);
- dnode_free_range(dn, offset, size, tx);
- dnode_rele(dn, FTAG);
- return (0);
-}
-
-int
-dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf)
-{
- dnode_t *dn;
- dmu_buf_t **dbp;
- int numbufs, i, err;
-
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
-
- /*
- * Deal with odd block sizes, where there can't be data past the first
- * block. If we ever do the tail block optimization, we will need to
- * handle that here as well.
- */
- if (dn->dn_datablkshift == 0) {
- int newsz = offset > dn->dn_datablksz ? 0 :
- MIN(size, dn->dn_datablksz - offset);
- bzero((char *)buf + newsz, size - newsz);
- size = newsz;
- }
-
- while (size > 0) {
- uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
- int err;
-
- /*
- * NB: we could do this block-at-a-time, but it's nice
- * to be reading in parallel.
- */
- err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
- TRUE, FTAG, &numbufs, &dbp);
- if (err)
- return (err);
-
- for (i = 0; i < numbufs; i++) {
- int tocpy;
- int bufoff;
- dmu_buf_t *db = dbp[i];
-
- ASSERT(size > 0);
-
- bufoff = offset - db->db_offset;
- tocpy = (int)MIN(db->db_size - bufoff, size);
-
- bcopy((char *)db->db_data + bufoff, buf, tocpy);
-
- offset += tocpy;
- size -= tocpy;
- buf = (char *)buf + tocpy;
- }
- dmu_buf_rele_array(dbp, numbufs, FTAG);
- }
- dnode_rele(dn, FTAG);
- return (0);
-}
-
-void
-dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx)
-{
- dmu_buf_t **dbp;
- int numbufs, i;
-
- if (size == 0)
- return;
-
- VERIFY(0 == dmu_buf_hold_array(os, object, offset, size,
- FALSE, FTAG, &numbufs, &dbp));
-
- for (i = 0; i < numbufs; i++) {
- int tocpy;
- int bufoff;
- dmu_buf_t *db = dbp[i];
-
- ASSERT(size > 0);
-
- bufoff = offset - db->db_offset;
- tocpy = (int)MIN(db->db_size - bufoff, size);
-
- ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
-
- if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
- else
- dmu_buf_will_dirty(db, tx);
-
- bcopy(buf, (char *)db->db_data + bufoff, tocpy);
-
- if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
-
- offset += tocpy;
- size -= tocpy;
- buf = (char *)buf + tocpy;
- }
- dmu_buf_rele_array(dbp, numbufs, FTAG);
-}
-
-#ifdef _KERNEL
-int
-dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
-{
- dmu_buf_t **dbp;
- int numbufs, i, err;
-
- /*
- * NB: we could do this block-at-a-time, but it's nice
- * to be reading in parallel.
- */
- err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG,
- &numbufs, &dbp);
- if (err)
- return (err);
-
- for (i = 0; i < numbufs; i++) {
- int tocpy;
- int bufoff;
- dmu_buf_t *db = dbp[i];
-
- ASSERT(size > 0);
-
- bufoff = uio->uio_loffset - db->db_offset;
- tocpy = (int)MIN(db->db_size - bufoff, size);
-
- err = uiomove((char *)db->db_data + bufoff, tocpy,
- UIO_READ, uio);
- if (err)
- break;
-
- size -= tocpy;
- }
- dmu_buf_rele_array(dbp, numbufs, FTAG);
-
- return (err);
-}
-
-int
-dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
- dmu_tx_t *tx)
-{
- dmu_buf_t **dbp;
- int numbufs, i;
- int err = 0;
-
- if (size == 0)
- return (0);
-
- err = dmu_buf_hold_array(os, object, uio->uio_loffset, size,
- FALSE, FTAG, &numbufs, &dbp);
- if (err)
- return (err);
-
- for (i = 0; i < numbufs; i++) {
- int tocpy;
- int bufoff;
- dmu_buf_t *db = dbp[i];
-
- ASSERT(size > 0);
-
- bufoff = uio->uio_loffset - db->db_offset;
- tocpy = (int)MIN(db->db_size - bufoff, size);
-
- ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
-
- if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
- else
- dmu_buf_will_dirty(db, tx);
-
- /*
- * XXX uiomove could block forever (eg. nfs-backed
- * pages). There needs to be a uiolockdown() function
- * to lock the pages in memory, so that uiomove won't
- * block.
- */
- err = uiomove((char *)db->db_data + bufoff, tocpy,
- UIO_WRITE, uio);
-
- if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
-
- if (err)
- break;
-
- size -= tocpy;
- }
- dmu_buf_rele_array(dbp, numbufs, FTAG);
- return (err);
-}
-
-#ifndef __FreeBSD__
-int
-dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- page_t *pp, dmu_tx_t *tx)
-{
- dmu_buf_t **dbp;
- int numbufs, i;
- int err;
-
- if (size == 0)
- return (0);
-
- err = dmu_buf_hold_array(os, object, offset, size,
- FALSE, FTAG, &numbufs, &dbp);
- if (err)
- return (err);
-
- for (i = 0; i < numbufs; i++) {
- int tocpy, copied, thiscpy;
- int bufoff;
- dmu_buf_t *db = dbp[i];
- caddr_t va;
-
- ASSERT(size > 0);
- ASSERT3U(db->db_size, >=, PAGESIZE);
-
- bufoff = offset - db->db_offset;
- tocpy = (int)MIN(db->db_size - bufoff, size);
-
- ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
-
- if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
- else
- dmu_buf_will_dirty(db, tx);
-
- for (copied = 0; copied < tocpy; copied += PAGESIZE) {
- ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff);
- thiscpy = MIN(PAGESIZE, tocpy - copied);
- va = ppmapin(pp, PROT_READ, (caddr_t)-1);
- bcopy(va, (char *)db->db_data + bufoff, thiscpy);
- ppmapout(va);
- pp = pp->p_next;
- bufoff += PAGESIZE;
- }
-
- if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
-
- if (err)
- break;
-
- offset += tocpy;
- size -= tocpy;
- }
- dmu_buf_rele_array(dbp, numbufs, FTAG);
- return (err);
-}
-#endif /* !__FreeBSD__ */
-#endif /* _KERNEL */
-
-typedef struct {
- dbuf_dirty_record_t *dr;
- dmu_sync_cb_t *done;
- void *arg;
-} dmu_sync_arg_t;
-
-/* ARGSUSED */
-static void
-dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
-{
- dmu_sync_arg_t *in = varg;
- dbuf_dirty_record_t *dr = in->dr;
- dmu_buf_impl_t *db = dr->dr_dbuf;
- dmu_sync_cb_t *done = in->done;
-
- if (!BP_IS_HOLE(zio->io_bp)) {
- zio->io_bp->blk_fill = 1;
- BP_SET_TYPE(zio->io_bp, db->db_dnode->dn_type);
- BP_SET_LEVEL(zio->io_bp, 0);
- }
-
- mutex_enter(&db->db_mtx);
- ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
- dr->dt.dl.dr_overridden_by = *zio->io_bp; /* structure assignment */
- dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
- cv_broadcast(&db->db_changed);
- mutex_exit(&db->db_mtx);
-
- if (done)
- done(&(db->db), in->arg);
-
- kmem_free(in, sizeof (dmu_sync_arg_t));
-}
-
-/*
- * Intent log support: sync the block associated with db to disk.
- * N.B. and XXX: the caller is responsible for making sure that the
- * data isn't changing while dmu_sync() is writing it.
- *
- * Return values:
- *
- * EEXIST: this txg has already been synced, so there's nothing to to.
- * The caller should not log the write.
- *
- * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do.
- * The caller should not log the write.
- *
- * EALREADY: this block is already in the process of being synced.
- * The caller should track its progress (somehow).
- *
- * EINPROGRESS: the IO has been initiated.
- * The caller should log this blkptr in the callback.
- *
- * 0: completed. Sets *bp to the blkptr just written.
- * The caller should log this blkptr immediately.
- */
-int
-dmu_sync(zio_t *pio, dmu_buf_t *db_fake,
- blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- objset_impl_t *os = db->db_objset;
- dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
- tx_state_t *tx = &dp->dp_tx;
- dbuf_dirty_record_t *dr;
- dmu_sync_arg_t *in;
- zbookmark_t zb;
- zio_t *zio;
- int zio_flags;
- int err;
-
- ASSERT(BP_IS_HOLE(bp));
- ASSERT(txg != 0);
-
-
- dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n",
- txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg);
-
- /*
- * XXX - would be nice if we could do this without suspending...
- */
- txg_suspend(dp);
-
- /*
- * If this txg already synced, there's nothing to do.
- */
- if (txg <= tx->tx_synced_txg) {
- txg_resume(dp);
- /*
- * If we're running ziltest, we need the blkptr regardless.
- */
- if (txg > spa_freeze_txg(dp->dp_spa)) {
- /* if db_blkptr == NULL, this was an empty write */
- if (db->db_blkptr)
- *bp = *db->db_blkptr; /* structure assignment */
- return (0);
- }
- return (EEXIST);
- }
-
- mutex_enter(&db->db_mtx);
-
- if (txg == tx->tx_syncing_txg) {
- while (db->db_data_pending) {
- /*
- * IO is in-progress. Wait for it to finish.
- * XXX - would be nice to be able to somehow "attach"
- * this zio to the parent zio passed in.
- */
- cv_wait(&db->db_changed, &db->db_mtx);
- if (!db->db_data_pending &&
- db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) {
- /*
- * IO was compressed away
- */
- *bp = *db->db_blkptr; /* structure assignment */
- mutex_exit(&db->db_mtx);
- txg_resume(dp);
- return (0);
- }
- ASSERT(db->db_data_pending ||
- (db->db_blkptr && db->db_blkptr->blk_birth == txg));
- }
-
- if (db->db_blkptr && db->db_blkptr->blk_birth == txg) {
- /*
- * IO is already completed.
- */
- *bp = *db->db_blkptr; /* structure assignment */
- mutex_exit(&db->db_mtx);
- txg_resume(dp);
- return (0);
- }
- }
-
- dr = db->db_last_dirty;
- while (dr && dr->dr_txg > txg)
- dr = dr->dr_next;
- if (dr == NULL || dr->dr_txg < txg) {
- /*
- * This dbuf isn't dirty, must have been free_range'd.
- * There's no need to log writes to freed blocks, so we're done.
- */
- mutex_exit(&db->db_mtx);
- txg_resume(dp);
- return (ENOENT);
- }
-
- ASSERT(dr->dr_txg == txg);
- if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) {
- /*
- * We have already issued a sync write for this buffer.
- */
- mutex_exit(&db->db_mtx);
- txg_resume(dp);
- return (EALREADY);
- } else if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
- /*
- * This buffer has already been synced. It could not
- * have been dirtied since, or we would have cleared the state.
- */
- *bp = dr->dt.dl.dr_overridden_by; /* structure assignment */
- mutex_exit(&db->db_mtx);
- txg_resume(dp);
- return (0);
- }
-
- dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
- in = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP);
- in->dr = dr;
- in->done = done;
- in->arg = arg;
- mutex_exit(&db->db_mtx);
- txg_resume(dp);
-
- zb.zb_objset = os->os_dsl_dataset->ds_object;
- zb.zb_object = db->db.db_object;
- zb.zb_level = db->db_level;
- zb.zb_blkid = db->db_blkid;
- zio_flags = ZIO_FLAG_MUSTSUCCEED;
- if (dmu_ot[db->db_dnode->dn_type].ot_metadata || zb.zb_level != 0)
- zio_flags |= ZIO_FLAG_METADATA;
- zio = arc_write(pio, os->os_spa,
- zio_checksum_select(db->db_dnode->dn_checksum, os->os_checksum),
- zio_compress_select(db->db_dnode->dn_compress, os->os_compress),
- dmu_get_replication_level(os, &zb, db->db_dnode->dn_type),
- txg, bp, dr->dt.dl.dr_data, NULL, dmu_sync_done, in,
- ZIO_PRIORITY_SYNC_WRITE, zio_flags, &zb);
-
- if (pio) {
- zio_nowait(zio);
- err = EINPROGRESS;
- } else {
- err = zio_wait(zio);
- ASSERT(err == 0);
- }
- return (err);
-}
-
-int
-dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
- dmu_tx_t *tx)
-{
- dnode_t *dn;
- int err;
-
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
- err = dnode_set_blksz(dn, size, ibs, tx);
- dnode_rele(dn, FTAG);
- return (err);
-}
-
-void
-dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
- dmu_tx_t *tx)
-{
- dnode_t *dn;
-
- /* XXX assumes dnode_hold will not get an i/o error */
- (void) dnode_hold(os->os, object, FTAG, &dn);
- ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS);
- dn->dn_checksum = checksum;
- dnode_setdirty(dn, tx);
- dnode_rele(dn, FTAG);
-}
-
-void
-dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
- dmu_tx_t *tx)
-{
- dnode_t *dn;
-
- /* XXX assumes dnode_hold will not get an i/o error */
- (void) dnode_hold(os->os, object, FTAG, &dn);
- ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
- dn->dn_compress = compress;
- dnode_setdirty(dn, tx);
- dnode_rele(dn, FTAG);
-}
-
-int
-dmu_get_replication_level(objset_impl_t *os,
- zbookmark_t *zb, dmu_object_type_t ot)
-{
- int ncopies = os->os_copies;
-
- /* If it's the mos, it should have max copies set. */
- ASSERT(zb->zb_objset != 0 ||
- ncopies == spa_max_replication(os->os_spa));
-
- if (dmu_ot[ot].ot_metadata || zb->zb_level != 0)
- ncopies++;
- return (MIN(ncopies, spa_max_replication(os->os_spa)));
-}
-
-int
-dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
-{
- dnode_t *dn;
- int i, err;
-
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
- /*
- * Sync any current changes before
- * we go trundling through the block pointers.
- */
- for (i = 0; i < TXG_SIZE; i++) {
- if (list_link_active(&dn->dn_dirty_link[i]))
- break;
- }
- if (i != TXG_SIZE) {
- dnode_rele(dn, FTAG);
- txg_wait_synced(dmu_objset_pool(os), 0);
- err = dnode_hold(os->os, object, FTAG, &dn);
- if (err)
- return (err);
- }
-
- err = dnode_next_offset(dn, hole, off, 1, 1, 0);
- dnode_rele(dn, FTAG);
-
- return (err);
-}
-
-void
-dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
-{
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- mutex_enter(&dn->dn_mtx);
-
- doi->doi_data_block_size = dn->dn_datablksz;
- doi->doi_metadata_block_size = dn->dn_indblkshift ?
- 1ULL << dn->dn_indblkshift : 0;
- doi->doi_indirection = dn->dn_nlevels;
- doi->doi_checksum = dn->dn_checksum;
- doi->doi_compress = dn->dn_compress;
- doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) +
- SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT;
- doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid;
- doi->doi_type = dn->dn_type;
- doi->doi_bonus_size = dn->dn_bonuslen;
- doi->doi_bonus_type = dn->dn_bonustype;
-
- mutex_exit(&dn->dn_mtx);
- rw_exit(&dn->dn_struct_rwlock);
-}
-
-/*
- * Get information on a DMU object.
- * If doi is NULL, just indicates whether the object exists.
- */
-int
-dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi)
-{
- dnode_t *dn;
- int err = dnode_hold(os->os, object, FTAG, &dn);
-
- if (err)
- return (err);
-
- if (doi != NULL)
- dmu_object_info_from_dnode(dn, doi);
-
- dnode_rele(dn, FTAG);
- return (0);
-}
-
-/*
- * As above, but faster; can be used when you have a held dbuf in hand.
- */
-void
-dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi)
-{
- dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi);
-}
-
-/*
- * Faster still when you only care about the size.
- * This is specifically optimized for zfs_getattr().
- */
-void
-dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512)
-{
- dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
-
- *blksize = dn->dn_datablksz;
- /* add 1 for dnode space */
- *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >>
- SPA_MINBLOCKSHIFT) + 1;
-}
-
-void
-byteswap_uint64_array(void *vbuf, size_t size)
-{
- uint64_t *buf = vbuf;
- size_t count = size >> 3;
- int i;
-
- ASSERT((size & 7) == 0);
-
- for (i = 0; i < count; i++)
- buf[i] = BSWAP_64(buf[i]);
-}
-
-void
-byteswap_uint32_array(void *vbuf, size_t size)
-{
- uint32_t *buf = vbuf;
- size_t count = size >> 2;
- int i;
-
- ASSERT((size & 3) == 0);
-
- for (i = 0; i < count; i++)
- buf[i] = BSWAP_32(buf[i]);
-}
-
-void
-byteswap_uint16_array(void *vbuf, size_t size)
-{
- uint16_t *buf = vbuf;
- size_t count = size >> 1;
- int i;
-
- ASSERT((size & 1) == 0);
-
- for (i = 0; i < count; i++)
- buf[i] = BSWAP_16(buf[i]);
-}
-
-/* ARGSUSED */
-void
-byteswap_uint8_array(void *vbuf, size_t size)
-{
-}
-
-void
-dmu_init(void)
-{
- dbuf_init();
- dnode_init();
- arc_init();
-}
-
-void
-dmu_fini(void)
-{
- arc_fini();
- dnode_fini();
- dbuf_fini();
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
deleted file mode 100644
index 93168cc..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_tx.h>
-#include <sys/dnode.h>
-
-uint64_t
-dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- objset_impl_t *osi = os->os;
- uint64_t object;
- uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
- (osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
- dnode_t *dn = NULL;
- int restarted = B_FALSE;
-
- mutex_enter(&osi->os_obj_lock);
- for (;;) {
- object = osi->os_obj_next;
- /*
- * Each time we polish off an L2 bp worth of dnodes
- * (2^13 objects), move to another L2 bp that's still
- * reasonably sparse (at most 1/4 full). Look from the
- * beginning once, but after that keep looking from here.
- * If we can't find one, just keep going from here.
- */
- if (P2PHASE(object, L2_dnode_count) == 0) {
- uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
- int error = dnode_next_offset(osi->os_meta_dnode,
- B_TRUE, &offset, 2, DNODES_PER_BLOCK >> 2, 0);
- restarted = B_TRUE;
- if (error == 0)
- object = offset >> DNODE_SHIFT;
- }
- osi->os_obj_next = ++object;
-
- /*
- * XXX We should check for an i/o error here and return
- * up to our caller. Actually we should pre-read it in
- * dmu_tx_assign(), but there is currently no mechanism
- * to do so.
- */
- (void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE,
- FTAG, &dn);
- if (dn)
- break;
-
- if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
- osi->os_obj_next = object - 1;
- }
-
- dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
- dnode_rele(dn, FTAG);
-
- mutex_exit(&osi->os_obj_lock);
-
- dmu_tx_add_new_object(tx, os, object);
- return (object);
-}
-
-int
-dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- dnode_t *dn;
- int err;
-
- if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
- return (EBADF);
-
- err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
- if (err)
- return (err);
- dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
- dnode_rele(dn, FTAG);
-
- dmu_tx_add_new_object(tx, os, object);
- return (0);
-}
-
-int
-dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- dnode_t *dn;
- int err;
-
- if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
- return (EBADF);
-
- err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
- FTAG, &dn);
- if (err)
- return (err);
- dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
- dnode_rele(dn, FTAG);
-
- return (0);
-}
-
-int
-dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
-{
- dnode_t *dn;
- int err;
-
- ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
-
- err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
- FTAG, &dn);
- if (err)
- return (err);
-
- ASSERT(dn->dn_type != DMU_OT_NONE);
- dnode_free(dn, tx);
- dnode_rele(dn, FTAG);
-
- return (0);
-}
-
-int
-dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
-{
- uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
- int error;
-
- error = dnode_next_offset(os->os->os_meta_dnode,
- hole, &offset, 0, DNODES_PER_BLOCK, txg);
-
- *objectp = offset >> DNODE_SHIFT;
-
- return (error);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
deleted file mode 100644
index 378fe8c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
+++ /dev/null
@@ -1,1037 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_synctask.h>
-#include <sys/dnode.h>
-#include <sys/dbuf.h>
-#include <sys/zvol.h>
-#include <sys/dmu_tx.h>
-#include <sys/zio_checksum.h>
-#include <sys/zap.h>
-#include <sys/zil.h>
-#include <sys/dmu_impl.h>
-
-
-spa_t *
-dmu_objset_spa(objset_t *os)
-{
- return (os->os->os_spa);
-}
-
-zilog_t *
-dmu_objset_zil(objset_t *os)
-{
- return (os->os->os_zil);
-}
-
-dsl_pool_t *
-dmu_objset_pool(objset_t *os)
-{
- dsl_dataset_t *ds;
-
- if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir)
- return (ds->ds_dir->dd_pool);
- else
- return (spa_get_dsl(os->os->os_spa));
-}
-
-dsl_dataset_t *
-dmu_objset_ds(objset_t *os)
-{
- return (os->os->os_dsl_dataset);
-}
-
-dmu_objset_type_t
-dmu_objset_type(objset_t *os)
-{
- return (os->os->os_phys->os_type);
-}
-
-void
-dmu_objset_name(objset_t *os, char *buf)
-{
- dsl_dataset_name(os->os->os_dsl_dataset, buf);
-}
-
-uint64_t
-dmu_objset_id(objset_t *os)
-{
- dsl_dataset_t *ds = os->os->os_dsl_dataset;
-
- return (ds ? ds->ds_object : 0);
-}
-
-static void
-checksum_changed_cb(void *arg, uint64_t newval)
-{
- objset_impl_t *osi = arg;
-
- /*
- * Inheritance should have been done by now.
- */
- ASSERT(newval != ZIO_CHECKSUM_INHERIT);
-
- osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
-}
-
-static void
-compression_changed_cb(void *arg, uint64_t newval)
-{
- objset_impl_t *osi = arg;
-
- /*
- * Inheritance and range checking should have been done by now.
- */
- ASSERT(newval != ZIO_COMPRESS_INHERIT);
-
- osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
-}
-
-static void
-copies_changed_cb(void *arg, uint64_t newval)
-{
- objset_impl_t *osi = arg;
-
- /*
- * Inheritance and range checking should have been done by now.
- */
- ASSERT(newval > 0);
- ASSERT(newval <= spa_max_replication(osi->os_spa));
-
- osi->os_copies = newval;
-}
-
-void
-dmu_objset_byteswap(void *buf, size_t size)
-{
- objset_phys_t *osp = buf;
-
- ASSERT(size == sizeof (objset_phys_t));
- dnode_byteswap(&osp->os_meta_dnode);
- byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
- osp->os_type = BSWAP_64(osp->os_type);
-}
-
-int
-dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
- objset_impl_t **osip)
-{
- objset_impl_t *winner, *osi;
- int i, err, checksum;
-
- osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP);
- osi->os.os = osi;
- osi->os_dsl_dataset = ds;
- osi->os_spa = spa;
- osi->os_rootbp = bp;
- if (!BP_IS_HOLE(osi->os_rootbp)) {
- uint32_t aflags = ARC_WAIT;
- zbookmark_t zb;
- zb.zb_objset = ds ? ds->ds_object : 0;
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = 0;
-
- dprintf_bp(osi->os_rootbp, "reading %s", "");
- err = arc_read(NULL, spa, osi->os_rootbp,
- dmu_ot[DMU_OT_OBJSET].ot_byteswap,
- arc_getbuf_func, &osi->os_phys_buf,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
- if (err) {
- kmem_free(osi, sizeof (objset_impl_t));
- return (err);
- }
- osi->os_phys = osi->os_phys_buf->b_data;
- arc_release(osi->os_phys_buf, &osi->os_phys_buf);
- } else {
- osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
- &osi->os_phys_buf, ARC_BUFC_METADATA);
- osi->os_phys = osi->os_phys_buf->b_data;
- bzero(osi->os_phys, sizeof (objset_phys_t));
- }
-
- /*
- * Note: the changed_cb will be called once before the register
- * func returns, thus changing the checksum/compression from the
- * default (fletcher2/off). Snapshots don't need to know, and
- * registering would complicate clone promotion.
- */
- if (ds && ds->ds_phys->ds_num_children == 0) {
- err = dsl_prop_register(ds, "checksum",
- checksum_changed_cb, osi);
- if (err == 0)
- err = dsl_prop_register(ds, "compression",
- compression_changed_cb, osi);
- if (err == 0)
- err = dsl_prop_register(ds, "copies",
- copies_changed_cb, osi);
- if (err) {
- VERIFY(arc_buf_remove_ref(osi->os_phys_buf,
- &osi->os_phys_buf) == 1);
- kmem_free(osi, sizeof (objset_impl_t));
- return (err);
- }
- } else if (ds == NULL) {
- /* It's the meta-objset. */
- osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
- osi->os_compress = ZIO_COMPRESS_LZJB;
- osi->os_copies = spa_max_replication(spa);
- }
-
- osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header);
-
- /*
- * Metadata always gets compressed and checksummed.
- * If the data checksum is multi-bit correctable, and it's not
- * a ZBT-style checksum, then it's suitable for metadata as well.
- * Otherwise, the metadata checksum defaults to fletcher4.
- */
- checksum = osi->os_checksum;
-
- if (zio_checksum_table[checksum].ci_correctable &&
- !zio_checksum_table[checksum].ci_zbt)
- osi->os_md_checksum = checksum;
- else
- osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4;
- osi->os_md_compress = ZIO_COMPRESS_LZJB;
-
- for (i = 0; i < TXG_SIZE; i++) {
- list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t),
- offsetof(dnode_t, dn_dirty_link[i]));
- list_create(&osi->os_free_dnodes[i], sizeof (dnode_t),
- offsetof(dnode_t, dn_dirty_link[i]));
- }
- list_create(&osi->os_dnodes, sizeof (dnode_t),
- offsetof(dnode_t, dn_link));
- list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
- offsetof(dmu_buf_impl_t, db_link));
-
- mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
-
- osi->os_meta_dnode = dnode_special_open(osi,
- &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
-
- if (ds != NULL) {
- winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict);
- if (winner) {
- dmu_objset_evict(ds, osi);
- osi = winner;
- }
- }
-
- *osip = osi;
- return (0);
-}
-
-/* called from zpl */
-int
-dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
- objset_t **osp)
-{
- dsl_dataset_t *ds;
- int err;
- objset_t *os;
- objset_impl_t *osi;
-
- os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
- err = dsl_dataset_open(name, mode, os, &ds);
- if (err) {
- kmem_free(os, sizeof (objset_t));
- return (err);
- }
-
- osi = dsl_dataset_get_user_ptr(ds);
- if (osi == NULL) {
- err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
- ds, &ds->ds_phys->ds_bp, &osi);
- if (err) {
- dsl_dataset_close(ds, mode, os);
- kmem_free(os, sizeof (objset_t));
- return (err);
- }
- }
-
- os->os = osi;
- os->os_mode = mode;
-
- if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) {
- dmu_objset_close(os);
- return (EINVAL);
- }
- *osp = os;
- return (0);
-}
-
-void
-dmu_objset_close(objset_t *os)
-{
- dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os);
- kmem_free(os, sizeof (objset_t));
-}
-
-int
-dmu_objset_evict_dbufs(objset_t *os, int try)
-{
- objset_impl_t *osi = os->os;
- dnode_t *dn;
-
- mutex_enter(&osi->os_lock);
-
- /* process the mdn last, since the other dnodes have holds on it */
- list_remove(&osi->os_dnodes, osi->os_meta_dnode);
- list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode);
-
- /*
- * Find the first dnode with holds. We have to do this dance
- * because dnode_add_ref() only works if you already have a
- * hold. If there are no holds then it has no dbufs so OK to
- * skip.
- */
- for (dn = list_head(&osi->os_dnodes);
- dn && refcount_is_zero(&dn->dn_holds);
- dn = list_next(&osi->os_dnodes, dn))
- continue;
- if (dn)
- dnode_add_ref(dn, FTAG);
-
- while (dn) {
- dnode_t *next_dn = dn;
-
- do {
- next_dn = list_next(&osi->os_dnodes, next_dn);
- } while (next_dn && refcount_is_zero(&next_dn->dn_holds));
- if (next_dn)
- dnode_add_ref(next_dn, FTAG);
-
- mutex_exit(&osi->os_lock);
- if (dnode_evict_dbufs(dn, try)) {
- dnode_rele(dn, FTAG);
- if (next_dn)
- dnode_rele(next_dn, FTAG);
- return (1);
- }
- dnode_rele(dn, FTAG);
- mutex_enter(&osi->os_lock);
- dn = next_dn;
- }
- mutex_exit(&osi->os_lock);
- return (0);
-}
-
-void
-dmu_objset_evict(dsl_dataset_t *ds, void *arg)
-{
- objset_impl_t *osi = arg;
- objset_t os;
- int i;
-
- for (i = 0; i < TXG_SIZE; i++) {
- ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL);
- ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL);
- }
-
- if (ds && ds->ds_phys->ds_num_children == 0) {
- VERIFY(0 == dsl_prop_unregister(ds, "checksum",
- checksum_changed_cb, osi));
- VERIFY(0 == dsl_prop_unregister(ds, "compression",
- compression_changed_cb, osi));
- VERIFY(0 == dsl_prop_unregister(ds, "copies",
- copies_changed_cb, osi));
- }
-
- /*
- * We should need only a single pass over the dnode list, since
- * nothing can be added to the list at this point.
- */
- os.os = osi;
- (void) dmu_objset_evict_dbufs(&os, 0);
-
- ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode);
- ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode);
- ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL);
-
- dnode_special_close(osi->os_meta_dnode);
- zil_free(osi->os_zil);
-
- VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
- mutex_destroy(&osi->os_lock);
- mutex_destroy(&osi->os_obj_lock);
- kmem_free(osi, sizeof (objset_impl_t));
-}
-
-/* called from dsl for meta-objset */
-objset_impl_t *
-dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
- dmu_objset_type_t type, dmu_tx_t *tx)
-{
- objset_impl_t *osi;
- dnode_t *mdn;
-
- ASSERT(dmu_tx_is_syncing(tx));
- VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi));
- mdn = osi->os_meta_dnode;
-
- dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
- DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
-
- /*
- * We don't want to have to increase the meta-dnode's nlevels
- * later, because then we could do it in quescing context while
- * we are also accessing it in open context.
- *
- * This precaution is not necessary for the MOS (ds == NULL),
- * because the MOS is only updated in syncing context.
- * This is most fortunate: the MOS is the only objset that
- * needs to be synced multiple times as spa_sync() iterates
- * to convergence, so minimizing its dn_nlevels matters.
- */
- if (ds != NULL) {
- int levels = 1;
-
- /*
- * Determine the number of levels necessary for the meta-dnode
- * to contain DN_MAX_OBJECT dnodes.
- */
- while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
- (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
- DN_MAX_OBJECT * sizeof (dnode_phys_t))
- levels++;
-
- mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
- mdn->dn_nlevels = levels;
- }
-
- ASSERT(type != DMU_OST_NONE);
- ASSERT(type != DMU_OST_ANY);
- ASSERT(type < DMU_OST_NUMTYPES);
- osi->os_phys->os_type = type;
-
- dsl_dataset_dirty(ds, tx);
-
- return (osi);
-}
-
-struct oscarg {
- void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx);
- void *userarg;
- dsl_dataset_t *clone_parent;
- const char *lastname;
- dmu_objset_type_t type;
-};
-
-/* ARGSUSED */
-static int
-dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct oscarg *oa = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- int err;
- uint64_t ddobj;
-
- err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
- oa->lastname, sizeof (uint64_t), 1, &ddobj);
- if (err != ENOENT)
- return (err ? err : EEXIST);
-
- if (oa->clone_parent != NULL) {
- /*
- * You can't clone across pools.
- */
- if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool)
- return (EXDEV);
-
- /*
- * You can only clone snapshots, not the head datasets.
- */
- if (oa->clone_parent->ds_phys->ds_num_children == 0)
- return (EINVAL);
- }
- return (0);
-}
-
-static void
-dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct oscarg *oa = arg2;
- dsl_dataset_t *ds;
- blkptr_t *bp;
- uint64_t dsobj;
-
- ASSERT(dmu_tx_is_syncing(tx));
-
- dsobj = dsl_dataset_create_sync(dd, oa->lastname,
- oa->clone_parent, tx);
-
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL,
- DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds));
- bp = dsl_dataset_get_blkptr(ds);
- if (BP_IS_HOLE(bp)) {
- objset_impl_t *osi;
-
- /* This is an empty dmu_objset; not a clone. */
- osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
- ds, bp, oa->type, tx);
-
- if (oa->userfunc)
- oa->userfunc(&osi->os, oa->userarg, tx);
- }
- dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG);
-}
-
-int
-dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent,
- void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg)
-{
- dsl_dir_t *pdd;
- const char *tail;
- int err = 0;
- struct oscarg oa = { 0 };
-
- ASSERT(strchr(name, '@') == NULL);
- err = dsl_dir_open(name, FTAG, &pdd, &tail);
- if (err)
- return (err);
- if (tail == NULL) {
- dsl_dir_close(pdd, FTAG);
- return (EEXIST);
- }
-
- dprintf("name=%s\n", name);
-
- oa.userfunc = func;
- oa.userarg = arg;
- oa.lastname = tail;
- oa.type = type;
- if (clone_parent != NULL) {
- /*
- * You can't clone to a different type.
- */
- if (clone_parent->os->os_phys->os_type != type) {
- dsl_dir_close(pdd, FTAG);
- return (EINVAL);
- }
- oa.clone_parent = clone_parent->os->os_dsl_dataset;
- }
- err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
- dmu_objset_create_sync, pdd, &oa, 5);
- dsl_dir_close(pdd, FTAG);
- return (err);
-}
-
-int
-dmu_objset_destroy(const char *name)
-{
- objset_t *os;
- int error;
-
- /*
- * If it looks like we'll be able to destroy it, and there's
- * an unplayed replay log sitting around, destroy the log.
- * It would be nicer to do this in dsl_dataset_destroy_sync(),
- * but the replay log objset is modified in open context.
- */
- error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os);
- if (error == 0) {
- zil_destroy(dmu_objset_zil(os), B_FALSE);
- dmu_objset_close(os);
- }
-
- return (dsl_dataset_destroy(name));
-}
-
-int
-dmu_objset_rollback(const char *name)
-{
- int err;
- objset_t *os;
-
- err = dmu_objset_open(name, DMU_OST_ANY,
- DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
- if (err == 0) {
- err = zil_suspend(dmu_objset_zil(os));
- if (err == 0)
- zil_resume(dmu_objset_zil(os));
- if (err == 0) {
- /* XXX uncache everything? */
- err = dsl_dataset_rollback(os->os->os_dsl_dataset);
- }
- dmu_objset_close(os);
- }
- return (err);
-}
-
-struct snaparg {
- dsl_sync_task_group_t *dstg;
- char *snapname;
- char failed[MAXPATHLEN];
-};
-
-static int
-dmu_objset_snapshot_one(char *name, void *arg)
-{
- struct snaparg *sn = arg;
- objset_t *os;
- dmu_objset_stats_t stat;
- int err;
-
- (void) strcpy(sn->failed, name);
-
- err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os);
- if (err != 0)
- return (err);
-
- /*
- * If the objset is in an inconsistent state, return busy.
- */
- dmu_objset_fast_stat(os, &stat);
- if (stat.dds_inconsistent) {
- dmu_objset_close(os);
- return (EBUSY);
- }
-
- /*
- * NB: we need to wait for all in-flight changes to get to disk,
- * so that we snapshot those changes. zil_suspend does this as
- * a side effect.
- */
- err = zil_suspend(dmu_objset_zil(os));
- if (err == 0) {
- dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check,
- dsl_dataset_snapshot_sync, os, sn->snapname, 3);
- } else {
- dmu_objset_close(os);
- }
-
- return (err);
-}
-
-int
-dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
-{
- dsl_sync_task_t *dst;
- struct snaparg sn = { 0 };
- char *cp;
- spa_t *spa;
- int err;
-
- (void) strcpy(sn.failed, fsname);
-
- cp = strchr(fsname, '/');
- if (cp) {
- *cp = '\0';
- err = spa_open(fsname, &spa, FTAG);
- *cp = '/';
- } else {
- err = spa_open(fsname, &spa, FTAG);
- }
- if (err)
- return (err);
-
- sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- sn.snapname = snapname;
-
- if (recursive) {
- err = dmu_objset_find(fsname,
- dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN);
- } else {
- err = dmu_objset_snapshot_one(fsname, &sn);
- }
-
- if (err)
- goto out;
-
- err = dsl_sync_task_group_wait(sn.dstg);
-
- for (dst = list_head(&sn.dstg->dstg_tasks); dst;
- dst = list_next(&sn.dstg->dstg_tasks, dst)) {
- objset_t *os = dst->dst_arg1;
- if (dst->dst_err)
- dmu_objset_name(os, sn.failed);
- zil_resume(dmu_objset_zil(os));
- dmu_objset_close(os);
- }
-out:
- if (err)
- (void) strcpy(fsname, sn.failed);
- dsl_sync_task_group_destroy(sn.dstg);
- spa_close(spa, FTAG);
- return (err);
-}
-
-static void
-dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
-{
- dnode_t *dn;
-
- while (dn = list_head(list)) {
- ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
- ASSERT(dn->dn_dbuf->db_data_pending);
- /*
- * Initialize dn_zio outside dnode_sync()
- * to accomodate meta-dnode
- */
- dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
- ASSERT(dn->dn_zio);
-
- ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
- list_remove(list, dn);
- dnode_sync(dn, tx);
- }
-}
-
-/* ARGSUSED */
-static void
-ready(zio_t *zio, arc_buf_t *abuf, void *arg)
-{
- objset_impl_t *os = arg;
- blkptr_t *bp = os->os_rootbp;
- dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
- int i;
-
- /*
- * Update rootbp fill count.
- */
- bp->blk_fill = 1; /* count the meta-dnode */
- for (i = 0; i < dnp->dn_nblkptr; i++)
- bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
-}
-
-/* ARGSUSED */
-static void
-killer(zio_t *zio, arc_buf_t *abuf, void *arg)
-{
- objset_impl_t *os = arg;
-
- ASSERT3U(zio->io_error, ==, 0);
-
- BP_SET_TYPE(zio->io_bp, DMU_OT_OBJSET);
- BP_SET_LEVEL(zio->io_bp, 0);
-
- if (!DVA_EQUAL(BP_IDENTITY(zio->io_bp),
- BP_IDENTITY(&zio->io_bp_orig))) {
- if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg)
- dsl_dataset_block_kill(os->os_dsl_dataset,
- &zio->io_bp_orig, NULL, os->os_synctx);
- dsl_dataset_block_born(os->os_dsl_dataset, zio->io_bp,
- os->os_synctx);
- }
- arc_release(os->os_phys_buf, &os->os_phys_buf);
-}
-
-/* called from dsl */
-void
-dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
-{
- int txgoff;
- zbookmark_t zb;
- zio_t *zio;
- list_t *list;
- dbuf_dirty_record_t *dr;
- int zio_flags;
-
- dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
-
- ASSERT(dmu_tx_is_syncing(tx));
- /* XXX the write_done callback should really give us the tx... */
- os->os_synctx = tx;
-
- if (os->os_dsl_dataset == NULL) {
- /*
- * This is the MOS. If we have upgraded,
- * spa_max_replication() could change, so reset
- * os_copies here.
- */
- os->os_copies = spa_max_replication(os->os_spa);
- }
-
- /*
- * Create the root block IO
- */
- zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = 0;
- zio_flags = ZIO_FLAG_MUSTSUCCEED;
- if (dmu_ot[DMU_OT_OBJSET].ot_metadata || zb.zb_level != 0)
- zio_flags |= ZIO_FLAG_METADATA;
- if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg))
- dsl_dataset_block_kill(os->os_dsl_dataset,
- os->os_rootbp, pio, tx);
- zio = arc_write(pio, os->os_spa, os->os_md_checksum,
- os->os_md_compress,
- dmu_get_replication_level(os, &zb, DMU_OT_OBJSET),
- tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, killer, os,
- ZIO_PRIORITY_ASYNC_WRITE, zio_flags, &zb);
-
- /*
- * Sync meta-dnode - the parent IO for the sync is the root block
- */
- os->os_meta_dnode->dn_zio = zio;
- dnode_sync(os->os_meta_dnode, tx);
-
- txgoff = tx->tx_txg & TXG_MASK;
-
- dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
- dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);
-
- list = &os->os_meta_dnode->dn_dirty_records[txgoff];
- while (dr = list_head(list)) {
- ASSERT(dr->dr_dbuf->db_level == 0);
- list_remove(list, dr);
- if (dr->dr_zio)
- zio_nowait(dr->dr_zio);
- }
- /*
- * Free intent log blocks up to this tx.
- */
- zil_sync(os->os_zil, tx);
- zio_nowait(zio);
-}
-
-void
-dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
- uint64_t *usedobjsp, uint64_t *availobjsp)
-{
- dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp,
- usedobjsp, availobjsp);
-}
-
-uint64_t
-dmu_objset_fsid_guid(objset_t *os)
-{
- return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset));
-}
-
-void
-dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
-{
- stat->dds_type = os->os->os_phys->os_type;
- if (os->os->os_dsl_dataset)
- dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat);
-}
-
-void
-dmu_objset_stats(objset_t *os, nvlist_t *nv)
-{
- ASSERT(os->os->os_dsl_dataset ||
- os->os->os_phys->os_type == DMU_OST_META);
-
- if (os->os->os_dsl_dataset != NULL)
- dsl_dataset_stats(os->os->os_dsl_dataset, nv);
-
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
- os->os->os_phys->os_type);
-}
-
-int
-dmu_objset_is_snapshot(objset_t *os)
-{
- if (os->os->os_dsl_dataset != NULL)
- return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset));
- else
- return (B_FALSE);
-}
-
-int
-dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
- uint64_t *idp, uint64_t *offp)
-{
- dsl_dataset_t *ds = os->os->os_dsl_dataset;
- zap_cursor_t cursor;
- zap_attribute_t attr;
-
- if (ds->ds_phys->ds_snapnames_zapobj == 0)
- return (ENOENT);
-
- zap_cursor_init_serialized(&cursor,
- ds->ds_dir->dd_pool->dp_meta_objset,
- ds->ds_phys->ds_snapnames_zapobj, *offp);
-
- if (zap_cursor_retrieve(&cursor, &attr) != 0) {
- zap_cursor_fini(&cursor);
- return (ENOENT);
- }
-
- if (strlen(attr.za_name) + 1 > namelen) {
- zap_cursor_fini(&cursor);
- return (ENAMETOOLONG);
- }
-
- (void) strcpy(name, attr.za_name);
- if (idp)
- *idp = attr.za_first_integer;
- zap_cursor_advance(&cursor);
- *offp = zap_cursor_serialize(&cursor);
- zap_cursor_fini(&cursor);
-
- return (0);
-}
-
-int
-dmu_dir_list_next(objset_t *os, int namelen, char *name,
- uint64_t *idp, uint64_t *offp)
-{
- dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir;
- zap_cursor_t cursor;
- zap_attribute_t attr;
-
- /* there is no next dir on a snapshot! */
- if (os->os->os_dsl_dataset->ds_object !=
- dd->dd_phys->dd_head_dataset_obj)
- return (ENOENT);
-
- zap_cursor_init_serialized(&cursor,
- dd->dd_pool->dp_meta_objset,
- dd->dd_phys->dd_child_dir_zapobj, *offp);
-
- if (zap_cursor_retrieve(&cursor, &attr) != 0) {
- zap_cursor_fini(&cursor);
- return (ENOENT);
- }
-
- if (strlen(attr.za_name) + 1 > namelen) {
- zap_cursor_fini(&cursor);
- return (ENAMETOOLONG);
- }
-
- (void) strcpy(name, attr.za_name);
- if (idp)
- *idp = attr.za_first_integer;
- zap_cursor_advance(&cursor);
- *offp = zap_cursor_serialize(&cursor);
- zap_cursor_fini(&cursor);
-
- return (0);
-}
-
-/*
- * Find all objsets under name, and for each, call 'func(child_name, arg)'.
- */
-int
-dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags)
-{
- dsl_dir_t *dd;
- objset_t *os;
- uint64_t snapobj;
- zap_cursor_t zc;
- zap_attribute_t *attr;
- char *child;
- int do_self, err;
-
- err = dsl_dir_open(name, FTAG, &dd, NULL);
- if (err)
- return (err);
-
- /* NB: the $MOS dir doesn't have a head dataset */
- do_self = (dd->dd_phys->dd_head_dataset_obj != 0);
- attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
-
- /*
- * Iterate over all children.
- */
- if (flags & DS_FIND_CHILDREN) {
- for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset,
- dd->dd_phys->dd_child_dir_zapobj);
- zap_cursor_retrieve(&zc, attr) == 0;
- (void) zap_cursor_advance(&zc)) {
- ASSERT(attr->za_integer_length == sizeof (uint64_t));
- ASSERT(attr->za_num_integers == 1);
-
- /*
- * No separating '/' because parent's name ends in /.
- */
- child = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- /* XXX could probably just use name here */
- dsl_dir_name(dd, child);
- (void) strcat(child, "/");
- (void) strcat(child, attr->za_name);
- err = dmu_objset_find(child, func, arg, flags);
- kmem_free(child, MAXPATHLEN);
- if (err)
- break;
- }
- zap_cursor_fini(&zc);
-
- if (err) {
- dsl_dir_close(dd, FTAG);
- kmem_free(attr, sizeof (zap_attribute_t));
- return (err);
- }
- }
-
- /*
- * Iterate over all snapshots.
- */
- if ((flags & DS_FIND_SNAPSHOTS) &&
- dmu_objset_open(name, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os) == 0) {
-
- snapobj = os->os->os_dsl_dataset->ds_phys->ds_snapnames_zapobj;
- dmu_objset_close(os);
-
- for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, snapobj);
- zap_cursor_retrieve(&zc, attr) == 0;
- (void) zap_cursor_advance(&zc)) {
- ASSERT(attr->za_integer_length == sizeof (uint64_t));
- ASSERT(attr->za_num_integers == 1);
-
- child = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- /* XXX could probably just use name here */
- dsl_dir_name(dd, child);
- (void) strcat(child, "@");
- (void) strcat(child, attr->za_name);
- err = func(child, arg);
- kmem_free(child, MAXPATHLEN);
- if (err)
- break;
- }
- zap_cursor_fini(&zc);
- }
-
- dsl_dir_close(dd, FTAG);
- kmem_free(attr, sizeof (zap_attribute_t));
-
- if (err)
- return (err);
-
- /*
- * Apply to self if appropriate.
- */
- if (do_self)
- err = func(name, arg);
- return (err);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
deleted file mode 100644
index 3e55dc3..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
+++ /dev/null
@@ -1,1009 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_impl.h>
-#include <sys/dmu_tx.h>
-#include <sys/dbuf.h>
-#include <sys/dnode.h>
-#include <sys/zfs_context.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_synctask.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zap.h>
-#include <sys/zio_checksum.h>
-
-struct backuparg {
- dmu_replay_record_t *drr;
- kthread_t *td;
- struct file *fp;
- objset_t *os;
- zio_cksum_t zc;
- int err;
-};
-
-static int
-dump_bytes(struct backuparg *ba, void *buf, int len)
-{
- struct uio auio;
- struct iovec aiov;
-
- ASSERT3U(len % 8, ==, 0);
-
- fletcher_4_incremental_native(buf, len, &ba->zc);
-
- aiov.iov_base = buf;
- aiov.iov_len = len;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_resid = len;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = UIO_WRITE;
- auio.uio_offset = (off_t)-1;
- auio.uio_td = ba->td;
-#ifdef _KERNEL
- if (ba->fp->f_type == DTYPE_VNODE)
- bwillwrite();
- ba->err = fo_write(ba->fp, &auio, ba->td->td_ucred, 0, ba->td);
-#else
- fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__);
- ba->err = EOPNOTSUPP;
-#endif
-
- return (ba->err);
-}
-
-static int
-dump_free(struct backuparg *ba, uint64_t object, uint64_t offset,
- uint64_t length)
-{
- /* write a FREE record */
- bzero(ba->drr, sizeof (dmu_replay_record_t));
- ba->drr->drr_type = DRR_FREE;
- ba->drr->drr_u.drr_free.drr_object = object;
- ba->drr->drr_u.drr_free.drr_offset = offset;
- ba->drr->drr_u.drr_free.drr_length = length;
-
- if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
- return (EINTR);
- return (0);
-}
-
-static int
-dump_data(struct backuparg *ba, dmu_object_type_t type,
- uint64_t object, uint64_t offset, int blksz, void *data)
-{
- /* write a DATA record */
- bzero(ba->drr, sizeof (dmu_replay_record_t));
- ba->drr->drr_type = DRR_WRITE;
- ba->drr->drr_u.drr_write.drr_object = object;
- ba->drr->drr_u.drr_write.drr_type = type;
- ba->drr->drr_u.drr_write.drr_offset = offset;
- ba->drr->drr_u.drr_write.drr_length = blksz;
-
- if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
- return (EINTR);
- if (dump_bytes(ba, data, blksz))
- return (EINTR);
- return (0);
-}
-
-static int
-dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
-{
- /* write a FREEOBJECTS record */
- bzero(ba->drr, sizeof (dmu_replay_record_t));
- ba->drr->drr_type = DRR_FREEOBJECTS;
- ba->drr->drr_u.drr_freeobjects.drr_firstobj = firstobj;
- ba->drr->drr_u.drr_freeobjects.drr_numobjs = numobjs;
-
- if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
- return (EINTR);
- return (0);
-}
-
-static int
-dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp)
-{
- if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
- return (dump_freeobjects(ba, object, 1));
-
- /* write an OBJECT record */
- bzero(ba->drr, sizeof (dmu_replay_record_t));
- ba->drr->drr_type = DRR_OBJECT;
- ba->drr->drr_u.drr_object.drr_object = object;
- ba->drr->drr_u.drr_object.drr_type = dnp->dn_type;
- ba->drr->drr_u.drr_object.drr_bonustype = dnp->dn_bonustype;
- ba->drr->drr_u.drr_object.drr_blksz =
- dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen;
- ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum;
- ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress;
-
- if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
- return (EINTR);
-
- if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)))
- return (EINTR);
-
- /* free anything past the end of the file */
- if (dump_free(ba, object, (dnp->dn_maxblkid + 1) *
- (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
- return (EINTR);
- if (ba->err)
- return (EINTR);
- return (0);
-}
-
-#define BP_SPAN(dnp, level) \
- (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
- (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
-
-static int
-backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
-{
- struct backuparg *ba = arg;
- uint64_t object = bc->bc_bookmark.zb_object;
- int level = bc->bc_bookmark.zb_level;
- uint64_t blkid = bc->bc_bookmark.zb_blkid;
- blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL;
- dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
- void *data = bc->bc_data;
- int err = 0;
-
- if (SIGPENDING(curthread))
- return (EINTR);
-
- ASSERT(data || bp == NULL);
-
- if (bp == NULL && object == 0) {
- uint64_t span = BP_SPAN(bc->bc_dnode, level);
- uint64_t dnobj = (blkid * span) >> DNODE_SHIFT;
- err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
- } else if (bp == NULL) {
- uint64_t span = BP_SPAN(bc->bc_dnode, level);
- err = dump_free(ba, object, blkid * span, span);
- } else if (data && level == 0 && type == DMU_OT_DNODE) {
- dnode_phys_t *blk = data;
- int i;
- int blksz = BP_GET_LSIZE(bp);
-
- for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
- uint64_t dnobj =
- (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
- err = dump_dnode(ba, dnobj, blk+i);
- if (err)
- break;
- }
- } else if (level == 0 &&
- type != DMU_OT_DNODE && type != DMU_OT_OBJSET) {
- int blksz = BP_GET_LSIZE(bp);
- if (data == NULL) {
- uint32_t aflags = ARC_WAIT;
- arc_buf_t *abuf;
- zbookmark_t zb;
-
- zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object;
- zb.zb_object = object;
- zb.zb_level = level;
- zb.zb_blkid = blkid;
- (void) arc_read(NULL, spa, bp,
- dmu_ot[type].ot_byteswap, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_MUSTSUCCEED,
- &aflags, &zb);
-
- if (abuf) {
- err = dump_data(ba, type, object, blkid * blksz,
- blksz, abuf->b_data);
- (void) arc_buf_remove_ref(abuf, &abuf);
- }
- } else {
- err = dump_data(ba, type, object, blkid * blksz,
- blksz, data);
- }
- }
-
- ASSERT(err == 0 || err == EINTR);
- return (err);
-}
-
-int
-dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, struct file *fp)
-{
- dsl_dataset_t *ds = tosnap->os->os_dsl_dataset;
- dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL;
- dmu_replay_record_t *drr;
- struct backuparg ba;
- int err;
-
- /* tosnap must be a snapshot */
- if (ds->ds_phys->ds_next_snap_obj == 0)
- return (EINVAL);
-
- /* fromsnap must be an earlier snapshot from the same fs as tosnap */
- if (fromds && (ds->ds_dir != fromds->ds_dir ||
- fromds->ds_phys->ds_creation_txg >=
- ds->ds_phys->ds_creation_txg))
- return (EXDEV);
-
- drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
- drr->drr_type = DRR_BEGIN;
- drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
- drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION;
- drr->drr_u.drr_begin.drr_creation_time =
- ds->ds_phys->ds_creation_time;
- drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type;
- drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
- if (fromds)
- drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
- dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
-
- ba.drr = drr;
- ba.td = curthread;
- ba.fp = fp;
- ba.os = tosnap;
- ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0);
-
- if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) {
- kmem_free(drr, sizeof (dmu_replay_record_t));
- return (ba.err);
- }
-
- err = traverse_dsl_dataset(ds,
- fromds ? fromds->ds_phys->ds_creation_txg : 0,
- ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK,
- backup_cb, &ba);
-
- if (err) {
- if (err == EINTR && ba.err)
- err = ba.err;
- kmem_free(drr, sizeof (dmu_replay_record_t));
- return (err);
- }
-
- bzero(drr, sizeof (dmu_replay_record_t));
- drr->drr_type = DRR_END;
- drr->drr_u.drr_end.drr_checksum = ba.zc;
-
- if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) {
- kmem_free(drr, sizeof (dmu_replay_record_t));
- return (ba.err);
- }
-
- kmem_free(drr, sizeof (dmu_replay_record_t));
-
- return (0);
-}
-
-struct restorearg {
- int err;
- int byteswap;
- kthread_t *td;
- struct file *fp;
- char *buf;
- uint64_t voff;
- int buflen; /* number of valid bytes in buf */
- int bufoff; /* next offset to read */
- int bufsize; /* amount of memory allocated for buf */
- zio_cksum_t zc;
-};
-
-/* ARGSUSED */
-static int
-replay_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- struct drr_begin *drrb = arg2;
- const char *snapname;
- int err;
- uint64_t val;
-
- /* must already be a snapshot of this fs */
- if (ds->ds_phys->ds_prev_snap_obj == 0)
- return (ENODEV);
-
- /* most recent snapshot must match fromguid */
- if (ds->ds_prev->ds_phys->ds_guid != drrb->drr_fromguid)
- return (ENODEV);
- /* must not have any changes since most recent snapshot */
- if (ds->ds_phys->ds_bp.blk_birth >
- ds->ds_prev->ds_phys->ds_creation_txg)
- return (ETXTBSY);
-
- /* new snapshot name must not exist */
- snapname = strrchr(drrb->drr_toname, '@');
- if (snapname == NULL)
- return (EEXIST);
-
- snapname++;
- err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
- ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val);
- if (err == 0)
- return (EEXIST);
- if (err != ENOENT)
- return (err);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-replay_incremental_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
-}
-
-/* ARGSUSED */
-static int
-replay_full_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct drr_begin *drrb = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- char *cp;
- uint64_t val;
- int err;
-
- cp = strchr(drrb->drr_toname, '@');
- *cp = '\0';
- err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
- strrchr(drrb->drr_toname, '/') + 1,
- sizeof (uint64_t), 1, &val);
- *cp = '@';
-
- if (err != ENOENT)
- return (err ? err : EEXIST);
-
- return (0);
-}
-
-static void
-replay_full_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct drr_begin *drrb = arg2;
- char *cp;
- dsl_dataset_t *ds;
- uint64_t dsobj;
-
- cp = strchr(drrb->drr_toname, '@');
- *cp = '\0';
- dsobj = dsl_dataset_create_sync(dd, strrchr(drrb->drr_toname, '/') + 1,
- NULL, tx);
- *cp = '@';
-
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL,
- DS_MODE_EXCLUSIVE, FTAG, &ds));
-
- (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds),
- ds, &ds->ds_phys->ds_bp, drrb->drr_type, tx);
-
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
-
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-}
-
-static int
-replay_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- objset_t *os = arg1;
- struct drr_begin *drrb = arg2;
- char *snapname;
-
- /* XXX verify that drr_toname is in dd */
-
- snapname = strchr(drrb->drr_toname, '@');
- if (snapname == NULL)
- return (EINVAL);
- snapname++;
-
- return (dsl_dataset_snapshot_check(os, snapname, tx));
-}
-
-static void
-replay_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- objset_t *os = arg1;
- struct drr_begin *drrb = arg2;
- char *snapname;
- dsl_dataset_t *ds, *hds;
-
- snapname = strchr(drrb->drr_toname, '@') + 1;
-
- dsl_dataset_snapshot_sync(os, snapname, tx);
-
- /* set snapshot's creation time and guid */
- hds = os->os->os_dsl_dataset;
- VERIFY(0 == dsl_dataset_open_obj(hds->ds_dir->dd_pool,
- hds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
- FTAG, &ds));
-
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_creation_time = drrb->drr_creation_time;
- ds->ds_phys->ds_guid = drrb->drr_toguid;
- ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
-
- dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG);
-
- dmu_buf_will_dirty(hds->ds_dbuf, tx);
- hds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
-}
-
-static int
-restore_bytes(struct restorearg *ra, void *buf, int len, off_t off, int *resid)
-{
- struct uio auio;
- struct iovec aiov;
- int error;
-
- aiov.iov_base = buf;
- aiov.iov_len = len;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_resid = len;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = UIO_READ;
- auio.uio_offset = off;
- auio.uio_td = ra->td;
-#ifdef _KERNEL
- error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td);
-#else
- fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__);
- error = EOPNOTSUPP;
-#endif
- *resid = auio.uio_resid;
- return (error);
-}
-
-static void *
-restore_read(struct restorearg *ra, int len)
-{
- void *rv;
-
- /* some things will require 8-byte alignment, so everything must */
- ASSERT3U(len % 8, ==, 0);
-
- while (ra->buflen - ra->bufoff < len) {
- int resid;
- int leftover = ra->buflen - ra->bufoff;
-
- (void) memmove(ra->buf, ra->buf + ra->bufoff, leftover);
-
- ra->err = restore_bytes(ra, (caddr_t)ra->buf + leftover,
- ra->bufsize - leftover, ra->voff, &resid);
-
- ra->voff += ra->bufsize - leftover - resid;
- ra->buflen = ra->bufsize - resid;
- ra->bufoff = 0;
- if (resid == ra->bufsize - leftover)
- ra->err = EINVAL;
- if (ra->err)
- return (NULL);
- /* Could compute checksum here? */
- }
-
- ASSERT3U(ra->bufoff % 8, ==, 0);
- ASSERT3U(ra->buflen - ra->bufoff, >=, len);
- rv = ra->buf + ra->bufoff;
- ra->bufoff += len;
- if (ra->byteswap)
- fletcher_4_incremental_byteswap(rv, len, &ra->zc);
- else
- fletcher_4_incremental_native(rv, len, &ra->zc);
- return (rv);
-}
-
-static void
-backup_byteswap(dmu_replay_record_t *drr)
-{
-#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
-#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
- drr->drr_type = BSWAP_32(drr->drr_type);
- switch (drr->drr_type) {
- case DRR_BEGIN:
- DO64(drr_begin.drr_magic);
- DO64(drr_begin.drr_version);
- DO64(drr_begin.drr_creation_time);
- DO32(drr_begin.drr_type);
- DO64(drr_begin.drr_toguid);
- DO64(drr_begin.drr_fromguid);
- break;
- case DRR_OBJECT:
- DO64(drr_object.drr_object);
- /* DO64(drr_object.drr_allocation_txg); */
- DO32(drr_object.drr_type);
- DO32(drr_object.drr_bonustype);
- DO32(drr_object.drr_blksz);
- DO32(drr_object.drr_bonuslen);
- break;
- case DRR_FREEOBJECTS:
- DO64(drr_freeobjects.drr_firstobj);
- DO64(drr_freeobjects.drr_numobjs);
- break;
- case DRR_WRITE:
- DO64(drr_write.drr_object);
- DO32(drr_write.drr_type);
- DO64(drr_write.drr_offset);
- DO64(drr_write.drr_length);
- break;
- case DRR_FREE:
- DO64(drr_free.drr_object);
- DO64(drr_free.drr_offset);
- DO64(drr_free.drr_length);
- break;
- case DRR_END:
- DO64(drr_end.drr_checksum.zc_word[0]);
- DO64(drr_end.drr_checksum.zc_word[1]);
- DO64(drr_end.drr_checksum.zc_word[2]);
- DO64(drr_end.drr_checksum.zc_word[3]);
- break;
- }
-#undef DO64
-#undef DO32
-}
-
-static int
-restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
-{
- int err;
- dmu_tx_t *tx;
-
- err = dmu_object_info(os, drro->drr_object, NULL);
-
- if (err != 0 && err != ENOENT)
- return (EINVAL);
-
- if (drro->drr_type == DMU_OT_NONE ||
- drro->drr_type >= DMU_OT_NUMTYPES ||
- drro->drr_bonustype >= DMU_OT_NUMTYPES ||
- drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS ||
- drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
- P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
- drro->drr_blksz < SPA_MINBLOCKSIZE ||
- drro->drr_blksz > SPA_MAXBLOCKSIZE ||
- drro->drr_bonuslen > DN_MAX_BONUSLEN) {
- return (EINVAL);
- }
-
- tx = dmu_tx_create(os);
-
- if (err == ENOENT) {
- /* currently free, want to be allocated */
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- return (err);
- }
- err = dmu_object_claim(os, drro->drr_object,
- drro->drr_type, drro->drr_blksz,
- drro->drr_bonustype, drro->drr_bonuslen, tx);
- } else {
- /* currently allocated, want to be allocated */
- dmu_tx_hold_bonus(tx, drro->drr_object);
- /*
- * We may change blocksize, so need to
- * hold_write
- */
- dmu_tx_hold_write(tx, drro->drr_object, 0, 1);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- return (err);
- }
-
- err = dmu_object_reclaim(os, drro->drr_object,
- drro->drr_type, drro->drr_blksz,
- drro->drr_bonustype, drro->drr_bonuslen, tx);
- }
- if (err) {
- dmu_tx_commit(tx);
- return (EINVAL);
- }
-
- dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx);
- dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
-
- if (drro->drr_bonuslen) {
- dmu_buf_t *db;
- void *data;
- VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
-
- ASSERT3U(db->db_size, ==, drro->drr_bonuslen);
- data = restore_read(ra, P2ROUNDUP(db->db_size, 8));
- if (data == NULL) {
- dmu_tx_commit(tx);
- return (ra->err);
- }
- bcopy(data, db->db_data, db->db_size);
- if (ra->byteswap) {
- dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
- drro->drr_bonuslen);
- }
- dmu_buf_rele(db, FTAG);
- }
- dmu_tx_commit(tx);
- return (0);
-}
-
-/* ARGSUSED */
-static int
-restore_freeobjects(struct restorearg *ra, objset_t *os,
- struct drr_freeobjects *drrfo)
-{
- uint64_t obj;
-
- if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
- return (EINVAL);
-
- for (obj = drrfo->drr_firstobj;
- obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
- (void) dmu_object_next(os, &obj, FALSE, 0)) {
- dmu_tx_t *tx;
- int err;
-
- if (dmu_object_info(os, obj, NULL) != 0)
- continue;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, obj);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- return (err);
- }
- err = dmu_object_free(os, obj, tx);
- dmu_tx_commit(tx);
- if (err && err != ENOENT)
- return (EINVAL);
- }
- return (0);
-}
-
-static int
-restore_write(struct restorearg *ra, objset_t *os,
- struct drr_write *drrw)
-{
- dmu_tx_t *tx;
- void *data;
- int err;
-
- if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
- drrw->drr_type >= DMU_OT_NUMTYPES)
- return (EINVAL);
-
- data = restore_read(ra, drrw->drr_length);
- if (data == NULL)
- return (ra->err);
-
- if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
- return (EINVAL);
-
- tx = dmu_tx_create(os);
-
- dmu_tx_hold_write(tx, drrw->drr_object,
- drrw->drr_offset, drrw->drr_length);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- return (err);
- }
- if (ra->byteswap)
- dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
- dmu_write(os, drrw->drr_object,
- drrw->drr_offset, drrw->drr_length, data, tx);
- dmu_tx_commit(tx);
- return (0);
-}
-
-/* ARGSUSED */
-static int
-restore_free(struct restorearg *ra, objset_t *os,
- struct drr_free *drrf)
-{
- dmu_tx_t *tx;
- int err;
-
- if (drrf->drr_length != -1ULL &&
- drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
- return (EINVAL);
-
- if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
- return (EINVAL);
-
- tx = dmu_tx_create(os);
-
- dmu_tx_hold_free(tx, drrf->drr_object,
- drrf->drr_offset, drrf->drr_length);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- dmu_tx_abort(tx);
- return (err);
- }
- err = dmu_free_range(os, drrf->drr_object,
- drrf->drr_offset, drrf->drr_length, tx);
- dmu_tx_commit(tx);
- return (err);
-}
-
-int
-dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
- boolean_t force, struct file *fp, uint64_t voffset)
-{
- kthread_t *td = curthread;
- struct restorearg ra;
- dmu_replay_record_t *drr;
- char *cp;
- objset_t *os = NULL;
- zio_cksum_t pzc;
-
- bzero(&ra, sizeof (ra));
- ra.td = td;
- ra.fp = fp;
- ra.voff = voffset;
- ra.bufsize = 1<<20;
- ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
-
- if (drrb->drr_magic == DMU_BACKUP_MAGIC) {
- ra.byteswap = FALSE;
- } else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
- ra.byteswap = TRUE;
- } else {
- ra.err = EINVAL;
- goto out;
- }
-
- /*
- * NB: this assumes that struct drr_begin will be the largest in
- * dmu_replay_record_t's drr_u, and thus we don't need to pad it
- * with zeros to make it the same length as we wrote out.
- */
- ((dmu_replay_record_t *)ra.buf)->drr_type = DRR_BEGIN;
- ((dmu_replay_record_t *)ra.buf)->drr_pad = 0;
- ((dmu_replay_record_t *)ra.buf)->drr_u.drr_begin = *drrb;
- if (ra.byteswap) {
- fletcher_4_incremental_byteswap(ra.buf,
- sizeof (dmu_replay_record_t), &ra.zc);
- } else {
- fletcher_4_incremental_native(ra.buf,
- sizeof (dmu_replay_record_t), &ra.zc);
- }
- (void) strcpy(drrb->drr_toname, tosnap); /* for the sync funcs */
-
- if (ra.byteswap) {
- drrb->drr_magic = BSWAP_64(drrb->drr_magic);
- drrb->drr_version = BSWAP_64(drrb->drr_version);
- drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
- drrb->drr_type = BSWAP_32(drrb->drr_type);
- drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
- drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
- }
-
- ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
-
- if (drrb->drr_version != DMU_BACKUP_VERSION ||
- drrb->drr_type >= DMU_OST_NUMTYPES ||
- strchr(drrb->drr_toname, '@') == NULL) {
- ra.err = EINVAL;
- goto out;
- }
-
- /*
- * Process the begin in syncing context.
- */
- if (drrb->drr_fromguid) {
- /* incremental backup */
- dsl_dataset_t *ds = NULL;
-
- cp = strchr(tosnap, '@');
- *cp = '\0';
- ra.err = dsl_dataset_open(tosnap, DS_MODE_EXCLUSIVE, FTAG, &ds);
- *cp = '@';
- if (ra.err)
- goto out;
-
- /*
- * Only do the rollback if the most recent snapshot
- * matches the incremental source
- */
- if (force) {
- if (ds->ds_prev == NULL ||
- ds->ds_prev->ds_phys->ds_guid !=
- drrb->drr_fromguid) {
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- kmem_free(ra.buf, ra.bufsize);
- return (ENODEV);
- }
- (void) dsl_dataset_rollback(ds);
- }
- ra.err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- replay_incremental_check, replay_incremental_sync,
- ds, drrb, 1);
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- } else {
- /* full backup */
- dsl_dir_t *dd = NULL;
- const char *tail;
-
- /* can't restore full backup into topmost fs, for now */
- if (strrchr(drrb->drr_toname, '/') == NULL) {
- ra.err = EINVAL;
- goto out;
- }
-
- cp = strchr(tosnap, '@');
- *cp = '\0';
- ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail);
- *cp = '@';
- if (ra.err)
- goto out;
- if (tail == NULL) {
- ra.err = EEXIST;
- goto out;
- }
-
- ra.err = dsl_sync_task_do(dd->dd_pool, replay_full_check,
- replay_full_sync, dd, drrb, 5);
- dsl_dir_close(dd, FTAG);
- }
- if (ra.err)
- goto out;
-
- /*
- * Open the objset we are modifying.
- */
-
- cp = strchr(tosnap, '@');
- *cp = '\0';
- ra.err = dmu_objset_open(tosnap, DMU_OST_ANY,
- DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os);
- *cp = '@';
- ASSERT3U(ra.err, ==, 0);
-
- /*
- * Read records and process them.
- */
- pzc = ra.zc;
- while (ra.err == 0 &&
- NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
- if (SIGPENDING(td)) {
- ra.err = EINTR;
- goto out;
- }
-
- if (ra.byteswap)
- backup_byteswap(drr);
-
- switch (drr->drr_type) {
- case DRR_OBJECT:
- {
- /*
- * We need to make a copy of the record header,
- * because restore_{object,write} may need to
- * restore_read(), which will invalidate drr.
- */
- struct drr_object drro = drr->drr_u.drr_object;
- ra.err = restore_object(&ra, os, &drro);
- break;
- }
- case DRR_FREEOBJECTS:
- {
- struct drr_freeobjects drrfo =
- drr->drr_u.drr_freeobjects;
- ra.err = restore_freeobjects(&ra, os, &drrfo);
- break;
- }
- case DRR_WRITE:
- {
- struct drr_write drrw = drr->drr_u.drr_write;
- ra.err = restore_write(&ra, os, &drrw);
- break;
- }
- case DRR_FREE:
- {
- struct drr_free drrf = drr->drr_u.drr_free;
- ra.err = restore_free(&ra, os, &drrf);
- break;
- }
- case DRR_END:
- {
- struct drr_end drre = drr->drr_u.drr_end;
- /*
- * We compare against the *previous* checksum
- * value, because the stored checksum is of
- * everything before the DRR_END record.
- */
- if (drre.drr_checksum.zc_word[0] != 0 &&
- !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pzc)) {
- ra.err = ECKSUM;
- goto out;
- }
-
- ra.err = dsl_sync_task_do(dmu_objset_ds(os)->
- ds_dir->dd_pool, replay_end_check, replay_end_sync,
- os, drrb, 3);
- goto out;
- }
- default:
- ra.err = EINVAL;
- goto out;
- }
- pzc = ra.zc;
- }
-
-out:
- if (os)
- dmu_objset_close(os);
-
- /*
- * Make sure we don't rollback/destroy unless we actually
- * processed the begin properly. 'os' will only be set if this
- * is the case.
- */
- if (ra.err && os && tosnap && strchr(tosnap, '@')) {
- /*
- * rollback or destroy what we created, so we don't
- * leave it in the restoring state.
- */
- dsl_dataset_t *ds;
- int err;
-
- cp = strchr(tosnap, '@');
- *cp = '\0';
- err = dsl_dataset_open(tosnap,
- DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT,
- FTAG, &ds);
- if (err == 0) {
- txg_wait_synced(ds->ds_dir->dd_pool, 0);
- if (drrb->drr_fromguid) {
- /* incremental: rollback to most recent snap */
- (void) dsl_dataset_rollback(ds);
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- } else {
- /* full: destroy whole fs */
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- (void) dsl_dataset_destroy(tosnap);
- }
- }
- *cp = '@';
- }
-
- kmem_free(ra.buf, ra.bufsize);
- if (sizep)
- *sizep = ra.voff;
- return (ra.err);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
deleted file mode 100644
index 3d2bc3e..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ /dev/null
@@ -1,888 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_pool.h>
-#include <sys/dnode.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dmu_impl.h>
-
-#define BP_SPAN_SHIFT(level, width) ((level) * (width))
-
-#define BP_EQUAL(b1, b2) \
- (DVA_EQUAL(BP_IDENTITY(b1), BP_IDENTITY(b2)) && \
- (b1)->blk_birth == (b2)->blk_birth)
-
-/*
- * Compare two bookmarks.
- *
- * For ADVANCE_PRE, the visitation order is:
- *
- * objset 0, 1, 2, ..., ZB_MAXOBJSET.
- * object 0, 1, 2, ..., ZB_MAXOBJECT.
- * blkoff 0, 1, 2, ...
- * level ZB_MAXLEVEL, ..., 2, 1, 0.
- *
- * where blkoff = blkid << BP_SPAN_SHIFT(level, width), and thus a valid
- * ordering vector is:
- *
- * < objset, object, blkoff, -level >
- *
- * For ADVANCE_POST, the starting offsets aren't sequential but ending
- * offsets [blkoff = (blkid + 1) << BP_SPAN_SHIFT(level, width)] are.
- * The visitation order is:
- *
- * objset 1, 2, ..., ZB_MAXOBJSET, 0.
- * object 1, 2, ..., ZB_MAXOBJECT, 0.
- * blkoff 1, 2, ...
- * level 0, 1, 2, ..., ZB_MAXLEVEL.
- *
- * and thus a valid ordering vector is:
- *
- * < objset - 1, object - 1, blkoff, level >
- *
- * Both orderings can be expressed as:
- *
- * < objset + bias, object + bias, blkoff, level ^ bias >
- *
- * where 'bias' is either 0 or -1 (for ADVANCE_PRE or ADVANCE_POST)
- * and 'blkoff' is (blkid - bias) << BP_SPAN_SHIFT(level, wshift).
- *
- * Special case: an objset's osphys is represented as level -1 of object 0.
- * It is always either the very first or very last block we visit in an objset.
- * Therefore, if either bookmark's level is -1, level alone determines order.
- */
-static int
-compare_bookmark(zbookmark_t *szb, zbookmark_t *ezb, dnode_phys_t *dnp,
- int advance)
-{
- int bias = (advance & ADVANCE_PRE) ? 0 : -1;
- uint64_t sblkoff, eblkoff;
- int slevel, elevel, wshift;
-
- if (szb->zb_objset + bias < ezb->zb_objset + bias)
- return (-1);
-
- if (szb->zb_objset + bias > ezb->zb_objset + bias)
- return (1);
-
- slevel = szb->zb_level;
- elevel = ezb->zb_level;
-
- if ((slevel | elevel) < 0)
- return ((slevel ^ bias) - (elevel ^ bias));
-
- if (szb->zb_object + bias < ezb->zb_object + bias)
- return (-1);
-
- if (szb->zb_object + bias > ezb->zb_object + bias)
- return (1);
-
- if (dnp == NULL)
- return (0);
-
- wshift = dnp->dn_indblkshift - SPA_BLKPTRSHIFT;
-
- sblkoff = (szb->zb_blkid - bias) << BP_SPAN_SHIFT(slevel, wshift);
- eblkoff = (ezb->zb_blkid - bias) << BP_SPAN_SHIFT(elevel, wshift);
-
- if (sblkoff < eblkoff)
- return (-1);
-
- if (sblkoff > eblkoff)
- return (1);
-
- return ((elevel ^ bias) - (slevel ^ bias));
-}
-
-#define SET_BOOKMARK(zb, objset, object, level, blkid) \
-{ \
- (zb)->zb_objset = objset; \
- (zb)->zb_object = object; \
- (zb)->zb_level = level; \
- (zb)->zb_blkid = blkid; \
-}
-
-#define SET_BOOKMARK_LB(zb, level, blkid) \
-{ \
- (zb)->zb_level = level; \
- (zb)->zb_blkid = blkid; \
-}
-
-static int
-advance_objset(zseg_t *zseg, uint64_t objset, int advance)
-{
- zbookmark_t *zb = &zseg->seg_start;
-
- if (advance & ADVANCE_PRE) {
- if (objset >= ZB_MAXOBJSET)
- return (ERANGE);
- SET_BOOKMARK(zb, objset, 0, -1, 0);
- } else {
- if (objset >= ZB_MAXOBJSET)
- objset = 0;
- SET_BOOKMARK(zb, objset, 1, 0, 0);
- }
-
- if (compare_bookmark(zb, &zseg->seg_end, NULL, advance) > 0)
- return (ERANGE);
-
- return (EAGAIN);
-}
-
-static int
-advance_object(zseg_t *zseg, uint64_t object, int advance)
-{
- zbookmark_t *zb = &zseg->seg_start;
-
- if (advance & ADVANCE_PRE) {
- if (object >= ZB_MAXOBJECT) {
- SET_BOOKMARK(zb, zb->zb_objset + 1, 0, -1, 0);
- } else {
- SET_BOOKMARK(zb, zb->zb_objset, object, ZB_MAXLEVEL, 0);
- }
- } else {
- if (zb->zb_object == 0) {
- SET_BOOKMARK(zb, zb->zb_objset, 0, -1, 0);
- } else {
- if (object >= ZB_MAXOBJECT)
- object = 0;
- SET_BOOKMARK(zb, zb->zb_objset, object, 0, 0);
- }
- }
-
- if (compare_bookmark(zb, &zseg->seg_end, NULL, advance) > 0)
- return (ERANGE);
-
- return (EAGAIN);
-}
-
-static int
-advance_from_osphys(zseg_t *zseg, int advance)
-{
- zbookmark_t *zb = &zseg->seg_start;
-
- ASSERT(zb->zb_object == 0);
- ASSERT(zb->zb_level == -1);
- ASSERT(zb->zb_blkid == 0);
-
- if (advance & ADVANCE_PRE) {
- SET_BOOKMARK_LB(zb, ZB_MAXLEVEL, 0);
- } else {
- if (zb->zb_objset == 0)
- return (ERANGE);
- SET_BOOKMARK(zb, zb->zb_objset + 1, 1, 0, 0);
- }
-
- if (compare_bookmark(zb, &zseg->seg_end, NULL, advance) > 0)
- return (ERANGE);
-
- return (EAGAIN);
-}
-
-static int
-advance_block(zseg_t *zseg, dnode_phys_t *dnp, int rc, int advance)
-{
- zbookmark_t *zb = &zseg->seg_start;
- int wshift = dnp->dn_indblkshift - SPA_BLKPTRSHIFT;
- int maxlevel = dnp->dn_nlevels - 1;
- int level = zb->zb_level;
- uint64_t blkid = zb->zb_blkid;
-
- if (advance & ADVANCE_PRE) {
- if (level > 0 && rc == 0) {
- level--;
- blkid <<= wshift;
- } else {
- blkid++;
-
- if ((blkid << BP_SPAN_SHIFT(level, wshift)) >
- dnp->dn_maxblkid)
- return (ERANGE);
-
- while (level < maxlevel) {
- if (P2PHASE(blkid, 1ULL << wshift))
- break;
- blkid >>= wshift;
- level++;
- }
- }
- } else {
- if (level >= maxlevel || P2PHASE(blkid + 1, 1ULL << wshift)) {
- blkid = (blkid + 1) << BP_SPAN_SHIFT(level, wshift);
- level = 0;
- } else {
- blkid >>= wshift;
- level++;
- }
-
- while ((blkid << BP_SPAN_SHIFT(level, wshift)) >
- dnp->dn_maxblkid) {
- if (level == maxlevel)
- return (ERANGE);
- blkid >>= wshift;
- level++;
- }
- }
- SET_BOOKMARK_LB(zb, level, blkid);
-
- if (compare_bookmark(zb, &zseg->seg_end, dnp, advance) > 0)
- return (ERANGE);
-
- return (EAGAIN);
-}
-
-static int
-traverse_callback(traverse_handle_t *th, zseg_t *zseg, traverse_blk_cache_t *bc)
-{
- /*
- * Before we issue the callback, prune against maxtxg.
- *
- * We prune against mintxg before we get here because it's a big win.
- * If a given block was born in txg 37, then we know that the entire
- * subtree below that block must have been born in txg 37 or earlier.
- * We can therefore lop off huge branches of the tree as we go.
- *
- * There's no corresponding optimization for maxtxg because knowing
- * that bp->blk_birth >= maxtxg doesn't imply anything about the bp's
- * children. In fact, the copy-on-write design of ZFS ensures that
- * top-level blocks will pretty much always be new.
- *
- * Therefore, in the name of simplicity we don't prune against
- * maxtxg until the last possible moment -- that being right now.
- */
- if (bc->bc_errno == 0 && bc->bc_blkptr.blk_birth >= zseg->seg_maxtxg)
- return (0);
-
- /*
- * Debugging: verify that the order we visit things agrees with the
- * order defined by compare_bookmark(). We don't check this for
- * log blocks because there's no defined ordering for them; they're
- * always visited (or not) as part of visiting the objset_phys_t.
- */
- if (bc->bc_errno == 0 && bc != &th->th_zil_cache) {
- zbookmark_t *zb = &bc->bc_bookmark;
- zbookmark_t *szb = &zseg->seg_start;
- zbookmark_t *ezb = &zseg->seg_end;
- zbookmark_t *lzb = &th->th_lastcb;
- dnode_phys_t *dnp = bc->bc_dnode;
-
- ASSERT(compare_bookmark(zb, ezb, dnp, th->th_advance) <= 0);
- ASSERT(compare_bookmark(zb, szb, dnp, th->th_advance) == 0);
- ASSERT(compare_bookmark(lzb, zb, dnp, th->th_advance) < 0 ||
- lzb->zb_level == ZB_NO_LEVEL);
- *lzb = *zb;
- }
-
- th->th_callbacks++;
- return (th->th_func(bc, th->th_spa, th->th_arg));
-}
-
-static int
-traverse_read(traverse_handle_t *th, traverse_blk_cache_t *bc, blkptr_t *bp,
- dnode_phys_t *dnp)
-{
- zbookmark_t *zb = &bc->bc_bookmark;
- int error;
-
- th->th_hits++;
-
- bc->bc_dnode = dnp;
- bc->bc_errno = 0;
-
- if (BP_EQUAL(&bc->bc_blkptr, bp))
- return (0);
-
- bc->bc_blkptr = *bp;
-
- if (bc->bc_data == NULL)
- return (0);
-
- if (BP_IS_HOLE(bp)) {
- ASSERT(th->th_advance & ADVANCE_HOLES);
- return (0);
- }
-
- if (compare_bookmark(zb, &th->th_noread, dnp, 0) == 0) {
- error = EIO;
- } else if (arc_tryread(th->th_spa, bp, bc->bc_data) == 0) {
- error = 0;
- th->th_arc_hits++;
- } else {
- error = zio_wait(zio_read(NULL, th->th_spa, bp, bc->bc_data,
- BP_GET_LSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_READ,
- th->th_zio_flags | ZIO_FLAG_DONT_CACHE, zb));
-
- if (BP_SHOULD_BYTESWAP(bp) && error == 0)
- (zb->zb_level > 0 ? byteswap_uint64_array :
- dmu_ot[BP_GET_TYPE(bp)].ot_byteswap)(bc->bc_data,
- BP_GET_LSIZE(bp));
- th->th_reads++;
- }
-
- if (error) {
- bc->bc_errno = error;
- error = traverse_callback(th, NULL, bc);
- ASSERT(error == EAGAIN || error == EINTR || error == ERESTART);
- bc->bc_blkptr.blk_birth = -1ULL;
- }
-
- dprintf("cache %02x error %d <%llu, %llu, %d, %llx>\n",
- bc - &th->th_cache[0][0], error,
- zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid);
-
- return (error);
-}
-
-static int
-find_block(traverse_handle_t *th, zseg_t *zseg, dnode_phys_t *dnp, int depth)
-{
- zbookmark_t *zb = &zseg->seg_start;
- traverse_blk_cache_t *bc;
- blkptr_t *bp = dnp->dn_blkptr;
- int i, first, level;
- int nbp = dnp->dn_nblkptr;
- int minlevel = zb->zb_level;
- int maxlevel = dnp->dn_nlevels - 1;
- int wshift = dnp->dn_indblkshift - SPA_BLKPTRSHIFT;
- int bp_shift = BP_SPAN_SHIFT(maxlevel - minlevel, wshift);
- uint64_t blkid = zb->zb_blkid >> bp_shift;
- int do_holes = (th->th_advance & ADVANCE_HOLES) && depth == ZB_DN_CACHE;
- int rc;
-
- if (minlevel > maxlevel || blkid >= nbp)
- return (ERANGE);
-
- for (level = maxlevel; level >= minlevel; level--) {
- first = P2PHASE(blkid, 1ULL << wshift);
-
- for (i = first; i < nbp; i++)
- if (bp[i].blk_birth > zseg->seg_mintxg ||
- BP_IS_HOLE(&bp[i]) && do_holes)
- break;
-
- if (i != first) {
- i--;
- SET_BOOKMARK_LB(zb, level, blkid + (i - first));
- return (ENOTBLK);
- }
-
- bc = &th->th_cache[depth][level];
-
- SET_BOOKMARK(&bc->bc_bookmark, zb->zb_objset, zb->zb_object,
- level, blkid);
-
- if (rc = traverse_read(th, bc, bp + i, dnp)) {
- if (rc != EAGAIN) {
- SET_BOOKMARK_LB(zb, level, blkid);
- }
- return (rc);
- }
-
- if (BP_IS_HOLE(&bp[i])) {
- SET_BOOKMARK_LB(zb, level, blkid);
- th->th_lastcb.zb_level = ZB_NO_LEVEL;
- return (0);
- }
-
- nbp = 1 << wshift;
- bp = bc->bc_data;
- bp_shift -= wshift;
- blkid = zb->zb_blkid >> bp_shift;
- }
-
- return (0);
-}
-
-static int
-get_dnode(traverse_handle_t *th, uint64_t objset, dnode_phys_t *mdn,
- uint64_t *objectp, dnode_phys_t **dnpp, uint64_t txg, int type, int depth)
-{
- zseg_t zseg;
- zbookmark_t *zb = &zseg.seg_start;
- uint64_t object = *objectp;
- int i, rc;
-
- SET_BOOKMARK(zb, objset, 0, 0, object / DNODES_PER_BLOCK);
- SET_BOOKMARK(&zseg.seg_end, objset, 0, 0, ZB_MAXBLKID);
-
- zseg.seg_mintxg = txg;
- zseg.seg_maxtxg = -1ULL;
-
- for (;;) {
- rc = find_block(th, &zseg, mdn, depth);
-
- if (rc == EAGAIN || rc == EINTR || rc == ERANGE)
- break;
-
- if (rc == 0 && zb->zb_level == 0) {
- dnode_phys_t *dnp = th->th_cache[depth][0].bc_data;
- for (i = 0; i < DNODES_PER_BLOCK; i++) {
- object = (zb->zb_blkid * DNODES_PER_BLOCK) + i;
- if (object >= *objectp &&
- dnp[i].dn_type != DMU_OT_NONE &&
- (type == -1 || dnp[i].dn_type == type)) {
- *objectp = object;
- *dnpp = &dnp[i];
- return (0);
- }
- }
- }
-
- rc = advance_block(&zseg, mdn, rc, ADVANCE_PRE);
-
- if (rc == ERANGE)
- break;
- }
-
- if (rc == ERANGE)
- *objectp = ZB_MAXOBJECT;
-
- return (rc);
-}
-
-/* ARGSUSED */
-static void
-traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
-{
- traverse_handle_t *th = arg;
- traverse_blk_cache_t *bc = &th->th_zil_cache;
- zbookmark_t *zb = &bc->bc_bookmark;
- zseg_t *zseg = list_head(&th->th_seglist);
-
- if (bp->blk_birth <= zseg->seg_mintxg)
- return;
-
- if (claim_txg != 0 || bp->blk_birth < spa_first_txg(th->th_spa)) {
- zb->zb_object = 0;
- zb->zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
- bc->bc_blkptr = *bp;
- (void) traverse_callback(th, zseg, bc);
- }
-}
-
-/* ARGSUSED */
-static void
-traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
-{
- traverse_handle_t *th = arg;
- traverse_blk_cache_t *bc = &th->th_zil_cache;
- zbookmark_t *zb = &bc->bc_bookmark;
- zseg_t *zseg = list_head(&th->th_seglist);
-
- if (lrc->lrc_txtype == TX_WRITE) {
- lr_write_t *lr = (lr_write_t *)lrc;
- blkptr_t *bp = &lr->lr_blkptr;
-
- if (bp->blk_birth <= zseg->seg_mintxg)
- return;
-
- if (claim_txg != 0 && bp->blk_birth >= claim_txg) {
- zb->zb_object = lr->lr_foid;
- zb->zb_blkid = lr->lr_offset / BP_GET_LSIZE(bp);
- bc->bc_blkptr = *bp;
- (void) traverse_callback(th, zseg, bc);
- }
- }
-}
-
-static void
-traverse_zil(traverse_handle_t *th, traverse_blk_cache_t *bc)
-{
- spa_t *spa = th->th_spa;
- dsl_pool_t *dp = spa_get_dsl(spa);
- objset_phys_t *osphys = bc->bc_data;
- zil_header_t *zh = &osphys->os_zil_header;
- uint64_t claim_txg = zh->zh_claim_txg;
- zilog_t *zilog;
-
- ASSERT(bc == &th->th_cache[ZB_MDN_CACHE][ZB_MAXLEVEL - 1]);
- ASSERT(bc->bc_bookmark.zb_level == -1);
-
- /*
- * We only want to visit blocks that have been claimed but not yet
- * replayed (or, in read-only mode, blocks that *would* be claimed).
- */
- if (claim_txg == 0 && (spa_mode & FWRITE))
- return;
-
- th->th_zil_cache.bc_bookmark = bc->bc_bookmark;
-
- zilog = zil_alloc(dp->dp_meta_objset, zh);
-
- (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, th,
- claim_txg);
-
- zil_free(zilog);
-}
-
-static int
-traverse_segment(traverse_handle_t *th, zseg_t *zseg, blkptr_t *mosbp)
-{
- zbookmark_t *zb = &zseg->seg_start;
- traverse_blk_cache_t *bc;
- dnode_phys_t *dn, *dn_tmp;
- int worklimit = 100;
- int rc;
-
- dprintf("<%llu, %llu, %d, %llx>\n",
- zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid);
-
- bc = &th->th_cache[ZB_MOS_CACHE][ZB_MAXLEVEL - 1];
- dn = &((objset_phys_t *)bc->bc_data)->os_meta_dnode;
-
- SET_BOOKMARK(&bc->bc_bookmark, 0, 0, -1, 0);
-
- rc = traverse_read(th, bc, mosbp, dn);
-
- if (rc) /* If we get ERESTART, we've got nowhere left to go */
- return (rc == ERESTART ? EINTR : rc);
-
- ASSERT(dn->dn_nlevels < ZB_MAXLEVEL);
-
- if (zb->zb_objset != 0) {
- uint64_t objset = zb->zb_objset;
- dsl_dataset_phys_t *dsp;
-
- rc = get_dnode(th, 0, dn, &objset, &dn_tmp, 0,
- DMU_OT_DSL_DATASET, ZB_MOS_CACHE);
-
- if (objset != zb->zb_objset)
- rc = advance_objset(zseg, objset, th->th_advance);
-
- if (rc != 0)
- return (rc);
-
- dsp = DN_BONUS(dn_tmp);
-
- bc = &th->th_cache[ZB_MDN_CACHE][ZB_MAXLEVEL - 1];
- dn = &((objset_phys_t *)bc->bc_data)->os_meta_dnode;
-
- SET_BOOKMARK(&bc->bc_bookmark, objset, 0, -1, 0);
-
- /*
- * If we're traversing an open snapshot, we know that it
- * can't be deleted (because it's open) and it can't change
- * (because it's a snapshot). Therefore, once we've gotten
- * from the uberblock down to the snapshot's objset_phys_t,
- * we no longer need to synchronize with spa_sync(); we're
- * traversing a completely static block tree from here on.
- */
- if (th->th_advance & ADVANCE_NOLOCK) {
- ASSERT(th->th_locked);
- rw_exit(spa_traverse_rwlock(th->th_spa));
- th->th_locked = 0;
- }
-
- rc = traverse_read(th, bc, &dsp->ds_bp, dn);
-
- if (rc != 0) {
- if (rc == ERESTART)
- rc = advance_objset(zseg, zb->zb_objset + 1,
- th->th_advance);
- return (rc);
- }
-
- if (th->th_advance & ADVANCE_PRUNE)
- zseg->seg_mintxg =
- MAX(zseg->seg_mintxg, dsp->ds_prev_snap_txg);
- }
-
- if (zb->zb_level == -1) {
- ASSERT(zb->zb_object == 0);
- ASSERT(zb->zb_blkid == 0);
- ASSERT(BP_GET_TYPE(&bc->bc_blkptr) == DMU_OT_OBJSET);
-
- if (bc->bc_blkptr.blk_birth > zseg->seg_mintxg) {
- rc = traverse_callback(th, zseg, bc);
- if (rc) {
- ASSERT(rc == EINTR);
- return (rc);
- }
- if ((th->th_advance & ADVANCE_ZIL) &&
- zb->zb_objset != 0)
- traverse_zil(th, bc);
- }
-
- return (advance_from_osphys(zseg, th->th_advance));
- }
-
- if (zb->zb_object != 0) {
- uint64_t object = zb->zb_object;
-
- rc = get_dnode(th, zb->zb_objset, dn, &object, &dn_tmp,
- zseg->seg_mintxg, -1, ZB_MDN_CACHE);
-
- if (object != zb->zb_object)
- rc = advance_object(zseg, object, th->th_advance);
-
- if (rc != 0)
- return (rc);
-
- dn = dn_tmp;
- }
-
- if (zb->zb_level == ZB_MAXLEVEL)
- zb->zb_level = dn->dn_nlevels - 1;
-
- for (;;) {
- rc = find_block(th, zseg, dn, ZB_DN_CACHE);
-
- if (rc == EAGAIN || rc == EINTR || rc == ERANGE)
- break;
-
- if (rc == 0) {
- bc = &th->th_cache[ZB_DN_CACHE][zb->zb_level];
- ASSERT(bc->bc_dnode == dn);
- ASSERT(bc->bc_blkptr.blk_birth <= mosbp->blk_birth);
- rc = traverse_callback(th, zseg, bc);
- if (rc) {
- ASSERT(rc == EINTR);
- return (rc);
- }
- if (BP_IS_HOLE(&bc->bc_blkptr)) {
- ASSERT(th->th_advance & ADVANCE_HOLES);
- rc = ENOTBLK;
- }
- }
-
- rc = advance_block(zseg, dn, rc, th->th_advance);
-
- if (rc == ERANGE)
- break;
-
- /*
- * Give spa_sync() a chance to run.
- */
- if (th->th_locked && spa_traverse_wanted(th->th_spa)) {
- th->th_syncs++;
- return (EAGAIN);
- }
-
- if (--worklimit == 0)
- return (EAGAIN);
- }
-
- if (rc == ERANGE)
- rc = advance_object(zseg, zb->zb_object + 1, th->th_advance);
-
- return (rc);
-}
-
-/*
- * It is the caller's responsibility to ensure that the dsl_dataset_t
- * doesn't go away during traversal.
- */
-int
-traverse_dsl_dataset(dsl_dataset_t *ds, uint64_t txg_start, int advance,
- blkptr_cb_t func, void *arg)
-{
- spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
- traverse_handle_t *th;
- int err;
-
- th = traverse_init(spa, func, arg, advance, ZIO_FLAG_MUSTSUCCEED);
-
- traverse_add_objset(th, txg_start, -1ULL, ds->ds_object);
-
- while ((err = traverse_more(th)) == EAGAIN)
- continue;
-
- traverse_fini(th);
- return (err);
-}
-
-int
-traverse_more(traverse_handle_t *th)
-{
- zseg_t *zseg = list_head(&th->th_seglist);
- uint64_t save_txg; /* XXX won't be necessary with real itinerary */
- krwlock_t *rw = spa_traverse_rwlock(th->th_spa);
- blkptr_t *mosbp = spa_get_rootblkptr(th->th_spa);
- int rc;
-
- if (zseg == NULL)
- return (0);
-
- th->th_restarts++;
-
- save_txg = zseg->seg_mintxg;
-
- rw_enter(rw, RW_READER);
- th->th_locked = 1;
-
- rc = traverse_segment(th, zseg, mosbp);
- ASSERT(rc == ERANGE || rc == EAGAIN || rc == EINTR);
-
- if (th->th_locked)
- rw_exit(rw);
- th->th_locked = 0;
-
- zseg->seg_mintxg = save_txg;
-
- if (rc == ERANGE) {
- list_remove(&th->th_seglist, zseg);
- kmem_free(zseg, sizeof (*zseg));
- return (EAGAIN);
- }
-
- return (rc);
-}
-
-/*
- * Note: (mintxg, maxtxg) is an open interval; mintxg and maxtxg themselves
- * are not included. The blocks covered by this segment will all have
- * mintxg < birth < maxtxg.
- */
-static void
-traverse_add_segment(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg,
- uint64_t sobjset, uint64_t sobject, int slevel, uint64_t sblkid,
- uint64_t eobjset, uint64_t eobject, int elevel, uint64_t eblkid)
-{
- zseg_t *zseg;
-
- zseg = kmem_alloc(sizeof (zseg_t), KM_SLEEP);
-
- zseg->seg_mintxg = mintxg;
- zseg->seg_maxtxg = maxtxg;
-
- zseg->seg_start.zb_objset = sobjset;
- zseg->seg_start.zb_object = sobject;
- zseg->seg_start.zb_level = slevel;
- zseg->seg_start.zb_blkid = sblkid;
-
- zseg->seg_end.zb_objset = eobjset;
- zseg->seg_end.zb_object = eobject;
- zseg->seg_end.zb_level = elevel;
- zseg->seg_end.zb_blkid = eblkid;
-
- list_insert_tail(&th->th_seglist, zseg);
-}
-
-void
-traverse_add_dnode(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg,
- uint64_t objset, uint64_t object)
-{
- if (th->th_advance & ADVANCE_PRE)
- traverse_add_segment(th, mintxg, maxtxg,
- objset, object, ZB_MAXLEVEL, 0,
- objset, object, 0, ZB_MAXBLKID);
- else
- traverse_add_segment(th, mintxg, maxtxg,
- objset, object, 0, 0,
- objset, object, 0, ZB_MAXBLKID);
-}
-
-void
-traverse_add_objset(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg,
- uint64_t objset)
-{
- if (th->th_advance & ADVANCE_PRE)
- traverse_add_segment(th, mintxg, maxtxg,
- objset, 0, -1, 0,
- objset, ZB_MAXOBJECT, 0, ZB_MAXBLKID);
- else
- traverse_add_segment(th, mintxg, maxtxg,
- objset, 1, 0, 0,
- objset, 0, -1, 0);
-}
-
-void
-traverse_add_pool(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg)
-{
- if (th->th_advance & ADVANCE_PRE)
- traverse_add_segment(th, mintxg, maxtxg,
- 0, 0, -1, 0,
- ZB_MAXOBJSET, ZB_MAXOBJECT, 0, ZB_MAXBLKID);
- else
- traverse_add_segment(th, mintxg, maxtxg,
- 1, 1, 0, 0,
- 0, 0, -1, 0);
-}
-
-traverse_handle_t *
-traverse_init(spa_t *spa, blkptr_cb_t func, void *arg, int advance,
- int zio_flags)
-{
- traverse_handle_t *th;
- int d, l;
-
- th = kmem_zalloc(sizeof (*th), KM_SLEEP);
-
- th->th_spa = spa;
- th->th_func = func;
- th->th_arg = arg;
- th->th_advance = advance;
- th->th_lastcb.zb_level = ZB_NO_LEVEL;
- th->th_noread.zb_level = ZB_NO_LEVEL;
- th->th_zio_flags = zio_flags;
-
- list_create(&th->th_seglist, sizeof (zseg_t),
- offsetof(zseg_t, seg_node));
-
- for (d = 0; d < ZB_DEPTH; d++) {
- for (l = 0; l < ZB_MAXLEVEL; l++) {
- if ((advance & ADVANCE_DATA) ||
- l != 0 || d != ZB_DN_CACHE)
- th->th_cache[d][l].bc_data =
- zio_buf_alloc(SPA_MAXBLOCKSIZE);
- }
- }
-
- return (th);
-}
-
-void
-traverse_fini(traverse_handle_t *th)
-{
- int d, l;
- zseg_t *zseg;
-
- for (d = 0; d < ZB_DEPTH; d++)
- for (l = 0; l < ZB_MAXLEVEL; l++)
- if (th->th_cache[d][l].bc_data != NULL)
- zio_buf_free(th->th_cache[d][l].bc_data,
- SPA_MAXBLOCKSIZE);
-
- while ((zseg = list_head(&th->th_seglist)) != NULL) {
- list_remove(&th->th_seglist, zseg);
- kmem_free(zseg, sizeof (*zseg));
- }
-
- list_destroy(&th->th_seglist);
-
- dprintf("%llu hit, %llu ARC, %llu IO, %llu cb, %llu sync, %llu again\n",
- th->th_hits, th->th_arc_hits, th->th_reads, th->th_callbacks,
- th->th_syncs, th->th_restarts);
-
- kmem_free(th, sizeof (*th));
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
deleted file mode 100644
index 13fd8d4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
+++ /dev/null
@@ -1,992 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_impl.h>
-#include <sys/dbuf.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */
-#include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */
-#include <sys/dsl_pool.h>
-#include <sys/zap_impl.h> /* for fzap_default_block_shift */
-#include <sys/spa.h>
-#include <sys/zfs_context.h>
-
-typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
- uint64_t arg1, uint64_t arg2);
-
-
-dmu_tx_t *
-dmu_tx_create_dd(dsl_dir_t *dd)
-{
- dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
- tx->tx_dir = dd;
- if (dd)
- tx->tx_pool = dd->dd_pool;
- list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
- offsetof(dmu_tx_hold_t, txh_node));
-#ifdef ZFS_DEBUG
- refcount_create(&tx->tx_space_written);
- refcount_create(&tx->tx_space_freed);
-#endif
- return (tx);
-}
-
-dmu_tx_t *
-dmu_tx_create(objset_t *os)
-{
- dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir);
- tx->tx_objset = os;
- tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset);
- return (tx);
-}
-
-dmu_tx_t *
-dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg)
-{
- dmu_tx_t *tx = dmu_tx_create_dd(NULL);
-
- ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
- tx->tx_pool = dp;
- tx->tx_txg = txg;
- tx->tx_anyobj = TRUE;
-
- return (tx);
-}
-
-int
-dmu_tx_is_syncing(dmu_tx_t *tx)
-{
- return (tx->tx_anyobj);
-}
-
-int
-dmu_tx_private_ok(dmu_tx_t *tx)
-{
- return (tx->tx_anyobj);
-}
-
-static dmu_tx_hold_t *
-dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
- enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2)
-{
- dmu_tx_hold_t *txh;
- dnode_t *dn = NULL;
- int err;
-
- if (object != DMU_NEW_OBJECT) {
- err = dnode_hold(os->os, object, tx, &dn);
- if (err) {
- tx->tx_err = err;
- return (NULL);
- }
-
- if (err == 0 && tx->tx_txg != 0) {
- mutex_enter(&dn->dn_mtx);
- /*
- * dn->dn_assigned_txg == tx->tx_txg doesn't pose a
- * problem, but there's no way for it to happen (for
- * now, at least).
- */
- ASSERT(dn->dn_assigned_txg == 0);
- dn->dn_assigned_txg = tx->tx_txg;
- (void) refcount_add(&dn->dn_tx_holds, tx);
- mutex_exit(&dn->dn_mtx);
- }
- }
-
- txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
- txh->txh_tx = tx;
- txh->txh_dnode = dn;
-#ifdef ZFS_DEBUG
- txh->txh_type = type;
- txh->txh_arg1 = arg1;
- txh->txh_arg2 = arg2;
-#endif
- list_insert_tail(&tx->tx_holds, txh);
-
- return (txh);
-}
-
-void
-dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
-{
- /*
- * If we're syncing, they can manipulate any object anyhow, and
- * the hold on the dnode_t can cause problems.
- */
- if (!dmu_tx_is_syncing(tx)) {
- (void) dmu_tx_hold_object_impl(tx, os,
- object, THT_NEWOBJECT, 0, 0);
- }
-}
-
-static int
-dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
-{
- int err;
- dmu_buf_impl_t *db;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- db = dbuf_hold_level(dn, level, blkid, FTAG);
- rw_exit(&dn->dn_struct_rwlock);
- if (db == NULL)
- return (EIO);
- err = dbuf_read(db, zio, DB_RF_CANFAIL);
- dbuf_rele(db, FTAG);
- return (err);
-}
-
-/* ARGSUSED */
-static void
-dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
-{
- dnode_t *dn = txh->txh_dnode;
- uint64_t start, end, i;
- int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
- int err = 0;
-
- if (len == 0)
- return;
-
- min_bs = SPA_MINBLOCKSHIFT;
- max_bs = SPA_MAXBLOCKSHIFT;
- min_ibs = DN_MIN_INDBLKSHIFT;
- max_ibs = DN_MAX_INDBLKSHIFT;
-
-
- /*
- * For i/o error checking, read the first and last level-0
- * blocks (if they are not aligned), and all the level-1 blocks.
- */
-
- if (dn) {
- if (dn->dn_maxblkid == 0) {
- err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
- if (err)
- goto out;
- } else {
- zio_t *zio = zio_root(dn->dn_objset->os_spa,
- NULL, NULL, ZIO_FLAG_CANFAIL);
-
- /* first level-0 block */
- start = off >> dn->dn_datablkshift;
- if (P2PHASE(off, dn->dn_datablksz) ||
- len < dn->dn_datablksz) {
- err = dmu_tx_check_ioerr(zio, dn, 0, start);
- if (err)
- goto out;
- }
-
- /* last level-0 block */
- end = (off+len-1) >> dn->dn_datablkshift;
- if (end != start &&
- P2PHASE(off+len, dn->dn_datablksz)) {
- err = dmu_tx_check_ioerr(zio, dn, 0, end);
- if (err)
- goto out;
- }
-
- /* level-1 blocks */
- if (dn->dn_nlevels > 1) {
- start >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (i = start+1; i < end; i++) {
- err = dmu_tx_check_ioerr(zio, dn, 1, i);
- if (err)
- goto out;
- }
- }
-
- err = zio_wait(zio);
- if (err)
- goto out;
- }
- }
-
- /*
- * If there's more than one block, the blocksize can't change,
- * so we can make a more precise estimate. Alternatively,
- * if the dnode's ibs is larger than max_ibs, always use that.
- * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
- * the code will still work correctly on existing pools.
- */
- if (dn && (dn->dn_maxblkid != 0 || dn->dn_indblkshift > max_ibs)) {
- min_ibs = max_ibs = dn->dn_indblkshift;
- if (dn->dn_datablkshift != 0)
- min_bs = max_bs = dn->dn_datablkshift;
- }
-
- /*
- * 'end' is the last thing we will access, not one past.
- * This way we won't overflow when accessing the last byte.
- */
- start = P2ALIGN(off, 1ULL << max_bs);
- end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1;
- txh->txh_space_towrite += end - start + 1;
-
- start >>= min_bs;
- end >>= min_bs;
-
- epbs = min_ibs - SPA_BLKPTRSHIFT;
-
- /*
- * The object contains at most 2^(64 - min_bs) blocks,
- * and each indirect level maps 2^epbs.
- */
- for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
- start >>= epbs;
- end >>= epbs;
- /*
- * If we increase the number of levels of indirection,
- * we'll need new blkid=0 indirect blocks. If start == 0,
- * we're already accounting for that blocks; and if end == 0,
- * we can't increase the number of levels beyond that.
- */
- if (start != 0 && end != 0)
- txh->txh_space_towrite += 1ULL << max_ibs;
- txh->txh_space_towrite += (end - start + 1) << max_ibs;
- }
-
- ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS);
-
-out:
- if (err)
- txh->txh_tx->tx_err = err;
-}
-
-static void
-dmu_tx_count_dnode(dmu_tx_hold_t *txh)
-{
- dnode_t *dn = txh->txh_dnode;
- dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode;
- uint64_t space = mdn->dn_datablksz +
- ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
-
- if (dn && dn->dn_dbuf->db_blkptr &&
- dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- dn->dn_dbuf->db_blkptr->blk_birth)) {
- txh->txh_space_tooverwrite += space;
- } else {
- txh->txh_space_towrite += space;
- }
-}
-
-void
-dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
-{
- dmu_tx_hold_t *txh;
-
- ASSERT(tx->tx_txg == 0);
- ASSERT(len < DMU_MAX_ACCESS);
- ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
-
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- object, THT_WRITE, off, len);
- if (txh == NULL)
- return;
-
- dmu_tx_count_write(txh, off, len);
- dmu_tx_count_dnode(txh);
-}
-
-static void
-dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
-{
- uint64_t blkid, nblks;
- uint64_t space = 0;
- dnode_t *dn = txh->txh_dnode;
- dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
- spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
- int dirty;
-
- /*
- * We don't need to use any locking to check for dirtyness
- * because it's OK if we get stale data -- the dnode may become
- * dirty immediately after our check anyway. This is just a
- * means to avoid the expensive count when we aren't sure we
- * need it. We need to be able to deal with a dirty dnode.
- */
- dirty = list_link_active(&dn->dn_dirty_link[0]) |
- list_link_active(&dn->dn_dirty_link[1]) |
- list_link_active(&dn->dn_dirty_link[2]) |
- list_link_active(&dn->dn_dirty_link[3]);
- if (dirty || dn->dn_assigned_txg || dn->dn_phys->dn_nlevels == 0)
- return;
-
- /*
- * the struct_rwlock protects us against dn_phys->dn_nlevels
- * changing, in case (against all odds) we manage to dirty &
- * sync out the changes after we check for being dirty.
- * also, dbuf_hold_impl() wants us to have the struct_rwlock.
- *
- * It's fine to use dn_datablkshift rather than the dn_phys
- * equivalent because if it is changing, maxblkid==0 and we will
- * bail.
- */
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_phys->dn_maxblkid == 0) {
- if (off == 0 && len >= dn->dn_datablksz) {
- blkid = 0;
- nblks = 1;
- } else {
- rw_exit(&dn->dn_struct_rwlock);
- return;
- }
- } else {
- blkid = off >> dn->dn_datablkshift;
- nblks = (off + len) >> dn->dn_datablkshift;
-
- if (blkid >= dn->dn_phys->dn_maxblkid) {
- rw_exit(&dn->dn_struct_rwlock);
- return;
- }
- if (blkid + nblks > dn->dn_phys->dn_maxblkid)
- nblks = dn->dn_phys->dn_maxblkid - blkid;
-
- /* don't bother after 128,000 blocks */
- nblks = MIN(nblks, 128*1024);
- }
-
- if (dn->dn_phys->dn_nlevels == 1) {
- int i;
- for (i = 0; i < nblks; i++) {
- blkptr_t *bp = dn->dn_phys->dn_blkptr;
- ASSERT3U(blkid + i, <, dn->dn_phys->dn_nblkptr);
- bp += blkid + i;
- if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
- dprintf_bp(bp, "can free old%s", "");
- space += bp_get_dasize(spa, bp);
- }
- }
- nblks = 0;
- }
-
- while (nblks) {
- dmu_buf_impl_t *dbuf;
- int err, epbs, blkoff, tochk;
-
- epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- blkoff = P2PHASE(blkid, 1<<epbs);
- tochk = MIN((1<<epbs) - blkoff, nblks);
-
- err = dbuf_hold_impl(dn, 1, blkid >> epbs, TRUE, FTAG, &dbuf);
- if (err == 0) {
- int i;
- blkptr_t *bp;
-
- err = dbuf_read(dbuf, NULL,
- DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
- if (err != 0) {
- txh->txh_tx->tx_err = err;
- dbuf_rele(dbuf, FTAG);
- break;
- }
-
- bp = dbuf->db.db_data;
- bp += blkoff;
-
- for (i = 0; i < tochk; i++) {
- if (dsl_dataset_block_freeable(ds,
- bp[i].blk_birth)) {
- dprintf_bp(&bp[i],
- "can free old%s", "");
- space += bp_get_dasize(spa, &bp[i]);
- }
- }
- dbuf_rele(dbuf, FTAG);
- }
- if (err && err != ENOENT) {
- txh->txh_tx->tx_err = err;
- break;
- }
-
- blkid += tochk;
- nblks -= tochk;
- }
- rw_exit(&dn->dn_struct_rwlock);
-
- txh->txh_space_tofree += space;
-}
-
-void
-dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
-{
- dmu_tx_hold_t *txh;
- dnode_t *dn;
- uint64_t start, end, i;
- int err, shift;
- zio_t *zio;
-
- ASSERT(tx->tx_txg == 0);
-
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- object, THT_FREE, off, len);
- if (txh == NULL)
- return;
- dn = txh->txh_dnode;
-
- /* first block */
- if (off != 0)
- dmu_tx_count_write(txh, off, 1);
- /* last block */
- if (len != DMU_OBJECT_END)
- dmu_tx_count_write(txh, off+len, 1);
-
- if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
- return;
- if (len == DMU_OBJECT_END)
- len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off;
-
- /*
- * For i/o error checking, read the first and last level-0
- * blocks, and all the level-1 blocks. The above count_write's
- * will take care of the level-0 blocks.
- */
- if (dn->dn_nlevels > 1) {
- shift = dn->dn_datablkshift + dn->dn_indblkshift -
- SPA_BLKPTRSHIFT;
- start = off >> shift;
- end = dn->dn_datablkshift ? ((off+len) >> shift) : 0;
-
- zio = zio_root(tx->tx_pool->dp_spa,
- NULL, NULL, ZIO_FLAG_CANFAIL);
- for (i = start; i <= end; i++) {
- uint64_t ibyte = i << shift;
- err = dnode_next_offset(dn, FALSE, &ibyte, 2, 1, 0);
- i = ibyte >> shift;
- if (err == ESRCH)
- break;
- if (err) {
- tx->tx_err = err;
- return;
- }
-
- err = dmu_tx_check_ioerr(zio, dn, 1, i);
- if (err) {
- tx->tx_err = err;
- return;
- }
- }
- err = zio_wait(zio);
- if (err) {
- tx->tx_err = err;
- return;
- }
- }
-
- dmu_tx_count_dnode(txh);
- dmu_tx_count_free(txh, off, len);
-}
-
-void
-dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
-{
- dmu_tx_hold_t *txh;
- dnode_t *dn;
- uint64_t nblocks;
- int epbs, err;
-
- ASSERT(tx->tx_txg == 0);
-
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- object, THT_ZAP, add, (uintptr_t)name);
- if (txh == NULL)
- return;
- dn = txh->txh_dnode;
-
- dmu_tx_count_dnode(txh);
-
- if (dn == NULL) {
- /*
- * We will be able to fit a new object's entries into one leaf
- * block. So there will be at most 2 blocks total,
- * including the header block.
- */
- dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift);
- return;
- }
-
- ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
-
- if (dn->dn_maxblkid == 0 && !add) {
- /*
- * If there is only one block (i.e. this is a micro-zap)
- * and we are not adding anything, the accounting is simple.
- */
- err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
- if (err) {
- tx->tx_err = err;
- return;
- }
-
- /*
- * Use max block size here, since we don't know how much
- * the size will change between now and the dbuf dirty call.
- */
- if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- dn->dn_phys->dn_blkptr[0].blk_birth))
- txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
- else
- txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
- return;
- }
-
- if (dn->dn_maxblkid > 0 && name) {
- /*
- * access the name in this fat-zap so that we'll check
- * for i/o errors to the leaf blocks, etc.
- */
- err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name,
- 8, 0, NULL);
- if (err == EIO) {
- tx->tx_err = err;
- return;
- }
- }
-
- /*
- * 3 blocks overwritten: target leaf, ptrtbl block, header block
- * 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks
- */
- dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz,
- (3 + add ? 3 : 0) << dn->dn_datablkshift);
-
- /*
- * If the modified blocks are scattered to the four winds,
- * we'll have to modify an indirect twig for each.
- */
- epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
- txh->txh_space_towrite += 3 << dn->dn_indblkshift;
-}
-
-void
-dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
-{
- dmu_tx_hold_t *txh;
-
- ASSERT(tx->tx_txg == 0);
-
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- object, THT_BONUS, 0, 0);
- if (txh)
- dmu_tx_count_dnode(txh);
-}
-
-void
-dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
-{
- dmu_tx_hold_t *txh;
- ASSERT(tx->tx_txg == 0);
-
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- DMU_NEW_OBJECT, THT_SPACE, space, 0);
-
- txh->txh_space_towrite += space;
-}
-
-int
-dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
-{
- dmu_tx_hold_t *txh;
- int holds = 0;
-
- /*
- * By asserting that the tx is assigned, we're counting the
- * number of dn_tx_holds, which is the same as the number of
- * dn_holds. Otherwise, we'd be counting dn_holds, but
- * dn_tx_holds could be 0.
- */
- ASSERT(tx->tx_txg != 0);
-
- /* if (tx->tx_anyobj == TRUE) */
- /* return (0); */
-
- for (txh = list_head(&tx->tx_holds); txh;
- txh = list_next(&tx->tx_holds, txh)) {
- if (txh->txh_dnode && txh->txh_dnode->dn_object == object)
- holds++;
- }
-
- return (holds);
-}
-
-#ifdef ZFS_DEBUG
-void
-dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
-{
- dmu_tx_hold_t *txh;
- int match_object = FALSE, match_offset = FALSE;
- dnode_t *dn = db->db_dnode;
-
- ASSERT(tx->tx_txg != 0);
- ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os);
- ASSERT3U(dn->dn_object, ==, db->db.db_object);
-
- if (tx->tx_anyobj)
- return;
-
- /* XXX No checking on the meta dnode for now */
- if (db->db.db_object == DMU_META_DNODE_OBJECT)
- return;
-
- for (txh = list_head(&tx->tx_holds); txh;
- txh = list_next(&tx->tx_holds, txh)) {
- ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg);
- if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT)
- match_object = TRUE;
- if (txh->txh_dnode == NULL || txh->txh_dnode == dn) {
- int datablkshift = dn->dn_datablkshift ?
- dn->dn_datablkshift : SPA_MAXBLOCKSHIFT;
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- int shift = datablkshift + epbs * db->db_level;
- uint64_t beginblk = shift >= 64 ? 0 :
- (txh->txh_arg1 >> shift);
- uint64_t endblk = shift >= 64 ? 0 :
- ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift);
- uint64_t blkid = db->db_blkid;
-
- /* XXX txh_arg2 better not be zero... */
-
- dprintf("found txh type %x beginblk=%llx endblk=%llx\n",
- txh->txh_type, beginblk, endblk);
-
- switch (txh->txh_type) {
- case THT_WRITE:
- if (blkid >= beginblk && blkid <= endblk)
- match_offset = TRUE;
- /*
- * We will let this hold work for the bonus
- * buffer so that we don't need to hold it
- * when creating a new object.
- */
- if (blkid == DB_BONUS_BLKID)
- match_offset = TRUE;
- /*
- * They might have to increase nlevels,
- * thus dirtying the new TLIBs. Or the
- * might have to change the block size,
- * thus dirying the new lvl=0 blk=0.
- */
- if (blkid == 0)
- match_offset = TRUE;
- break;
- case THT_FREE:
- if (blkid == beginblk &&
- (txh->txh_arg1 != 0 ||
- dn->dn_maxblkid == 0))
- match_offset = TRUE;
- if (blkid == endblk &&
- txh->txh_arg2 != DMU_OBJECT_END)
- match_offset = TRUE;
- break;
- case THT_BONUS:
- if (blkid == DB_BONUS_BLKID)
- match_offset = TRUE;
- break;
- case THT_ZAP:
- match_offset = TRUE;
- break;
- case THT_NEWOBJECT:
- match_object = TRUE;
- break;
- default:
- ASSERT(!"bad txh_type");
- }
- }
- if (match_object && match_offset)
- return;
- }
- panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n",
- (u_longlong_t)db->db.db_object, db->db_level,
- (u_longlong_t)db->db_blkid);
-}
-#endif
-
-static int
-dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
-{
- dmu_tx_hold_t *txh;
- uint64_t lsize, asize, fsize, towrite, tofree, tooverwrite;
-
- ASSERT3U(tx->tx_txg, ==, 0);
- if (tx->tx_err)
- return (tx->tx_err);
-
- tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
- tx->tx_needassign_txh = NULL;
-
- /*
- * NB: No error returns are allowed after txg_hold_open, but
- * before processing the dnode holds, due to the
- * dmu_tx_unassign() logic.
- */
-
- towrite = tofree = tooverwrite = 0;
- for (txh = list_head(&tx->tx_holds); txh;
- txh = list_next(&tx->tx_holds, txh)) {
- dnode_t *dn = txh->txh_dnode;
- if (dn != NULL) {
- mutex_enter(&dn->dn_mtx);
- if (dn->dn_assigned_txg == tx->tx_txg - 1) {
- mutex_exit(&dn->dn_mtx);
- tx->tx_needassign_txh = txh;
- return (ERESTART);
- }
- if (dn->dn_assigned_txg == 0)
- dn->dn_assigned_txg = tx->tx_txg;
- ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
- (void) refcount_add(&dn->dn_tx_holds, tx);
- mutex_exit(&dn->dn_mtx);
- }
- towrite += txh->txh_space_towrite;
- tofree += txh->txh_space_tofree;
- tooverwrite += txh->txh_space_tooverwrite;
- }
-
- /*
- * NB: This check must be after we've held the dnodes, so that
- * the dmu_tx_unassign() logic will work properly
- */
- if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
- return (ERESTART);
-
- /*
- * If a snapshot has been taken since we made our estimates,
- * assume that we won't be able to free or overwrite anything.
- */
- if (tx->tx_objset &&
- dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) >
- tx->tx_lastsnap_txg) {
- towrite += tooverwrite;
- tooverwrite = tofree = 0;
- }
-
- /*
- * Convert logical size to worst-case allocated size.
- */
- fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree;
- lsize = towrite + tooverwrite;
- asize = spa_get_asize(tx->tx_pool->dp_spa, lsize);
-
-#ifdef ZFS_DEBUG
- tx->tx_space_towrite = asize;
- tx->tx_space_tofree = tofree;
- tx->tx_space_tooverwrite = tooverwrite;
-#endif
-
- if (tx->tx_dir && asize != 0) {
- int err = dsl_dir_tempreserve_space(tx->tx_dir,
- lsize, asize, fsize, &tx->tx_tempreserve_cookie, tx);
- if (err)
- return (err);
- }
-
- return (0);
-}
-
-static void
-dmu_tx_unassign(dmu_tx_t *tx)
-{
- dmu_tx_hold_t *txh;
-
- if (tx->tx_txg == 0)
- return;
-
- txg_rele_to_quiesce(&tx->tx_txgh);
-
- for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
- txh = list_next(&tx->tx_holds, txh)) {
- dnode_t *dn = txh->txh_dnode;
-
- if (dn == NULL)
- continue;
- mutex_enter(&dn->dn_mtx);
- ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
-
- if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
- dn->dn_assigned_txg = 0;
- cv_broadcast(&dn->dn_notxholds);
- }
- mutex_exit(&dn->dn_mtx);
- }
-
- txg_rele_to_sync(&tx->tx_txgh);
-
- tx->tx_lasttried_txg = tx->tx_txg;
- tx->tx_txg = 0;
-}
-
-/*
- * Assign tx to a transaction group. txg_how can be one of:
- *
- * (1) TXG_WAIT. If the current open txg is full, waits until there's
- * a new one. This should be used when you're not holding locks.
- * If will only fail if we're truly out of space (or over quota).
- *
- * (2) TXG_NOWAIT. If we can't assign into the current open txg without
- * blocking, returns immediately with ERESTART. This should be used
- * whenever you're holding locks. On an ERESTART error, the caller
- * should drop locks, do a dmu_tx_wait(tx), and try again.
- *
- * (3) A specific txg. Use this if you need to ensure that multiple
- * transactions all sync in the same txg. Like TXG_NOWAIT, it
- * returns ERESTART if it can't assign you into the requested txg.
- */
-int
-dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
-{
- int err;
-
- ASSERT(tx->tx_txg == 0);
- ASSERT(txg_how != 0);
- ASSERT(!dsl_pool_sync_context(tx->tx_pool));
-
- while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
- dmu_tx_unassign(tx);
-
- if (err != ERESTART || txg_how != TXG_WAIT)
- return (err);
-
- dmu_tx_wait(tx);
- }
-
- txg_rele_to_quiesce(&tx->tx_txgh);
-
- return (0);
-}
-
-void
-dmu_tx_wait(dmu_tx_t *tx)
-{
- ASSERT(tx->tx_txg == 0);
- ASSERT(tx->tx_lasttried_txg != 0);
-
- if (tx->tx_needassign_txh) {
- dnode_t *dn = tx->tx_needassign_txh->txh_dnode;
-
- mutex_enter(&dn->dn_mtx);
- while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
- cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
- mutex_exit(&dn->dn_mtx);
- tx->tx_needassign_txh = NULL;
- } else {
- txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
- }
-}
-
-void
-dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
-{
-#ifdef ZFS_DEBUG
- if (tx->tx_dir == NULL || delta == 0)
- return;
-
- if (delta > 0) {
- ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=,
- tx->tx_space_towrite);
- (void) refcount_add_many(&tx->tx_space_written, delta, NULL);
- } else {
- (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL);
- }
-#endif
-}
-
-void
-dmu_tx_commit(dmu_tx_t *tx)
-{
- dmu_tx_hold_t *txh;
-
- ASSERT(tx->tx_txg != 0);
-
- while (txh = list_head(&tx->tx_holds)) {
- dnode_t *dn = txh->txh_dnode;
-
- list_remove(&tx->tx_holds, txh);
- kmem_free(txh, sizeof (dmu_tx_hold_t));
- if (dn == NULL)
- continue;
- mutex_enter(&dn->dn_mtx);
- ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
-
- if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
- dn->dn_assigned_txg = 0;
- cv_broadcast(&dn->dn_notxholds);
- }
- mutex_exit(&dn->dn_mtx);
- dnode_rele(dn, tx);
- }
-
- if (tx->tx_tempreserve_cookie)
- dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
-
- if (tx->tx_anyobj == FALSE)
- txg_rele_to_sync(&tx->tx_txgh);
-#ifdef ZFS_DEBUG
- dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
- tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
- tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
- refcount_destroy_many(&tx->tx_space_written,
- refcount_count(&tx->tx_space_written));
- refcount_destroy_many(&tx->tx_space_freed,
- refcount_count(&tx->tx_space_freed));
-#endif
- kmem_free(tx, sizeof (dmu_tx_t));
-}
-
-void
-dmu_tx_abort(dmu_tx_t *tx)
-{
- dmu_tx_hold_t *txh;
-
- ASSERT(tx->tx_txg == 0);
-
- while (txh = list_head(&tx->tx_holds)) {
- dnode_t *dn = txh->txh_dnode;
-
- list_remove(&tx->tx_holds, txh);
- kmem_free(txh, sizeof (dmu_tx_hold_t));
- if (dn != NULL)
- dnode_rele(dn, tx);
- }
-#ifdef ZFS_DEBUG
- refcount_destroy_many(&tx->tx_space_written,
- refcount_count(&tx->tx_space_written));
- refcount_destroy_many(&tx->tx_space_freed,
- refcount_count(&tx->tx_space_freed));
-#endif
- kmem_free(tx, sizeof (dmu_tx_t));
-}
-
-uint64_t
-dmu_tx_get_txg(dmu_tx_t *tx)
-{
- ASSERT(tx->tx_txg != 0);
- return (tx->tx_txg);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
deleted file mode 100644
index 78d625c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
+++ /dev/null
@@ -1,655 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/dnode.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_zfetch.h>
-#include <sys/dmu.h>
-#include <sys/dbuf.h>
-
-/*
- * I'm against tune-ables, but these should probably exist as tweakable globals
- * until we can get this working the way we want it to.
- */
-
-int zfs_prefetch_disable = 0;
-SYSCTL_DECL(_vfs_zfs);
-TUNABLE_INT("vfs.zfs.prefetch_disable", &zfs_prefetch_disable);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, prefetch_disable, CTLFLAG_RDTUN,
- &zfs_prefetch_disable, 0, "Disable prefetch");
-
-/* max # of streams per zfetch */
-uint32_t zfetch_max_streams = 8;
-/* min time before stream reclaim */
-uint32_t zfetch_min_sec_reap = 2;
-/* max number of blocks to fetch at a time */
-uint32_t zfetch_block_cap = 256;
-/* number of bytes in a array_read at which we stop prefetching (1Mb) */
-uint64_t zfetch_array_rd_sz = 1024 * 1024;
-
-/* forward decls for static routines */
-static int dmu_zfetch_colinear(zfetch_t *, zstream_t *);
-static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
-static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
-static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
-static int dmu_zfetch_find(zfetch_t *, zstream_t *, int);
-static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
-static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
-static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
-static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
-
-/*
- * Given a zfetch structure and a zstream structure, determine whether the
- * blocks to be read are part of a co-linear pair of existing prefetch
- * streams. If a set is found, coalesce the streams, removing one, and
- * configure the prefetch so it looks for a strided access pattern.
- *
- * In other words: if we find two sequential access streams that are
- * the same length and distance N appart, and this read is N from the
- * last stream, then we are probably in a strided access pattern. So
- * combine the two sequential streams into a single strided stream.
- *
- * If no co-linear streams are found, return NULL.
- */
-static int
-dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
-{
- zstream_t *z_walk;
- zstream_t *z_comp;
-
- if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
- return (0);
-
- if (zh == NULL) {
- rw_exit(&zf->zf_rwlock);
- return (0);
- }
-
- for (z_walk = list_head(&zf->zf_stream); z_walk;
- z_walk = list_next(&zf->zf_stream, z_walk)) {
- for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
- z_comp = list_next(&zf->zf_stream, z_comp)) {
- int64_t diff;
-
- if (z_walk->zst_len != z_walk->zst_stride ||
- z_comp->zst_len != z_comp->zst_stride) {
- continue;
- }
-
- diff = z_comp->zst_offset - z_walk->zst_offset;
- if (z_comp->zst_offset + diff == zh->zst_offset) {
- z_walk->zst_offset = zh->zst_offset;
- z_walk->zst_direction = diff < 0 ? -1 : 1;
- z_walk->zst_stride =
- diff * z_walk->zst_direction;
- z_walk->zst_ph_offset =
- zh->zst_offset + z_walk->zst_stride;
- dmu_zfetch_stream_remove(zf, z_comp);
- mutex_destroy(&z_comp->zst_lock);
- kmem_free(z_comp, sizeof (zstream_t));
-
- dmu_zfetch_dofetch(zf, z_walk);
-
- rw_exit(&zf->zf_rwlock);
- return (1);
- }
-
- diff = z_walk->zst_offset - z_comp->zst_offset;
- if (z_walk->zst_offset + diff == zh->zst_offset) {
- z_walk->zst_offset = zh->zst_offset;
- z_walk->zst_direction = diff < 0 ? -1 : 1;
- z_walk->zst_stride =
- diff * z_walk->zst_direction;
- z_walk->zst_ph_offset =
- zh->zst_offset + z_walk->zst_stride;
- dmu_zfetch_stream_remove(zf, z_comp);
- mutex_destroy(&z_comp->zst_lock);
- kmem_free(z_comp, sizeof (zstream_t));
-
- dmu_zfetch_dofetch(zf, z_walk);
-
- rw_exit(&zf->zf_rwlock);
- return (1);
- }
- }
- }
-
- rw_exit(&zf->zf_rwlock);
- return (0);
-}
-
-/*
- * Given a zstream_t, determine the bounds of the prefetch. Then call the
- * routine that actually prefetches the individual blocks.
- */
-static void
-dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
-{
- uint64_t prefetch_tail;
- uint64_t prefetch_limit;
- uint64_t prefetch_ofst;
- uint64_t prefetch_len;
- uint64_t blocks_fetched;
-
- zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
- zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
-
- prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
- (int64_t)(zs->zst_offset + zs->zst_stride));
- /*
- * XXX: use a faster division method?
- */
- prefetch_limit = zs->zst_offset + zs->zst_len +
- (zs->zst_cap * zs->zst_stride) / zs->zst_len;
-
- while (prefetch_tail < prefetch_limit) {
- prefetch_ofst = zs->zst_offset + zs->zst_direction *
- (prefetch_tail - zs->zst_offset);
-
- prefetch_len = zs->zst_len;
-
- /*
- * Don't prefetch beyond the end of the file, if working
- * backwards.
- */
- if ((zs->zst_direction == ZFETCH_BACKWARD) &&
- (prefetch_ofst > prefetch_tail)) {
- prefetch_len += prefetch_ofst;
- prefetch_ofst = 0;
- }
-
- /* don't prefetch more than we're supposed to */
- if (prefetch_len > zs->zst_len)
- break;
-
- blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
- prefetch_ofst, zs->zst_len);
-
- prefetch_tail += zs->zst_stride;
- /* stop if we've run out of stuff to prefetch */
- if (blocks_fetched < zs->zst_len)
- break;
- }
- zs->zst_ph_offset = prefetch_tail;
- zs->zst_last = lbolt;
-}
-
-/*
- * This takes a pointer to a zfetch structure and a dnode. It performs the
- * necessary setup for the zfetch structure, grokking data from the
- * associated dnode.
- */
-void
-dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
-{
- if (zf == NULL) {
- return;
- }
-
- zf->zf_dnode = dno;
- zf->zf_stream_cnt = 0;
- zf->zf_alloc_fail = 0;
-
- list_create(&zf->zf_stream, sizeof (zstream_t),
- offsetof(zstream_t, zst_node));
-
- rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
-}
-
-/*
- * This function computes the actual size, in blocks, that can be prefetched,
- * and fetches it.
- */
-static uint64_t
-dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
-{
- uint64_t fetchsz;
- uint64_t i;
-
- fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
-
- for (i = 0; i < fetchsz; i++) {
- dbuf_prefetch(dn, blkid + i);
- }
-
- return (fetchsz);
-}
-
-/*
- * this function returns the number of blocks that would be prefetched, based
- * upon the supplied dnode, blockid, and nblks. This is used so that we can
- * update streams in place, and then prefetch with their old value after the
- * fact. This way, we can delay the prefetch, but subsequent accesses to the
- * stream won't result in the same data being prefetched multiple times.
- */
-static uint64_t
-dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
-{
- uint64_t fetchsz;
-
- if (blkid > dn->dn_maxblkid) {
- return (0);
- }
-
- /* compute fetch size */
- if (blkid + nblks + 1 > dn->dn_maxblkid) {
- fetchsz = (dn->dn_maxblkid - blkid) + 1;
- ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
- } else {
- fetchsz = nblks;
- }
-
-
- return (fetchsz);
-}
-
-/*
- * given a zfetch and a zsearch structure, see if there is an associated zstream
- * for this block read. If so, it starts a prefetch for the stream it
- * located and returns true, otherwise it returns false
- */
-static int
-dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
-{
- zstream_t *zs;
- int64_t diff;
- int reset = !prefetched;
- int rc = 0;
-
- if (zh == NULL)
- return (0);
-
- /*
- * XXX: This locking strategy is a bit coarse; however, it's impact has
- * yet to be tested. If this turns out to be an issue, it can be
- * modified in a number of different ways.
- */
-
- rw_enter(&zf->zf_rwlock, RW_READER);
-top:
-
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
-
- /*
- * XXX - should this be an assert?
- */
- if (zs->zst_len == 0) {
- /* bogus stream */
- continue;
- }
-
- /*
- * We hit this case when we are in a strided prefetch stream:
- * we will read "len" blocks before "striding".
- */
- if (zh->zst_offset >= zs->zst_offset &&
- zh->zst_offset < zs->zst_offset + zs->zst_len) {
- /* already fetched */
- rc = 1;
- goto out;
- }
-
- /*
- * This is the forward sequential read case: we increment
- * len by one each time we hit here, so we will enter this
- * case on every read.
- */
- if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
-
- reset = !prefetched && zs->zst_len > 1;
-
- mutex_enter(&zs->zst_lock);
-
- if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
- zs->zst_len += zh->zst_len;
- diff = zs->zst_len - zfetch_block_cap;
- if (diff > 0) {
- zs->zst_offset += diff;
- zs->zst_len = zs->zst_len > diff ?
- zs->zst_len - diff : 0;
- }
- zs->zst_direction = ZFETCH_FORWARD;
-
- break;
-
- /*
- * Same as above, but reading backwards through the file.
- */
- } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
- /* backwards sequential access */
-
- reset = !prefetched && zs->zst_len > 1;
-
- mutex_enter(&zs->zst_lock);
-
- if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset = zs->zst_offset > zh->zst_len ?
- zs->zst_offset - zh->zst_len : 0;
- zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
- zs->zst_ph_offset - zh->zst_len : 0;
- zs->zst_len += zh->zst_len;
-
- diff = zs->zst_len - zfetch_block_cap;
- if (diff > 0) {
- zs->zst_ph_offset = zs->zst_ph_offset > diff ?
- zs->zst_ph_offset - diff : 0;
- zs->zst_len = zs->zst_len > diff ?
- zs->zst_len - diff : zs->zst_len;
- }
- zs->zst_direction = ZFETCH_BACKWARD;
-
- break;
-
- } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
- zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
- /* strided forward access */
-
- mutex_enter(&zs->zst_lock);
-
- if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
- zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset += zs->zst_stride;
- zs->zst_direction = ZFETCH_FORWARD;
-
- break;
-
- } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
- zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
- /* strided reverse access */
-
- mutex_enter(&zs->zst_lock);
-
- if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
- zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset = zs->zst_offset > zs->zst_stride ?
- zs->zst_offset - zs->zst_stride : 0;
- zs->zst_ph_offset = (zs->zst_ph_offset >
- (2 * zs->zst_stride)) ?
- (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
- zs->zst_direction = ZFETCH_BACKWARD;
-
- break;
- }
- }
-
- if (zs) {
- if (reset) {
- zstream_t *remove = zs;
-
- rc = 0;
- mutex_exit(&zs->zst_lock);
- rw_exit(&zf->zf_rwlock);
- rw_enter(&zf->zf_rwlock, RW_WRITER);
- /*
- * Relocate the stream, in case someone removes
- * it while we were acquiring the WRITER lock.
- */
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
- if (zs == remove) {
- dmu_zfetch_stream_remove(zf, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- break;
- }
- }
- } else {
- rc = 1;
- dmu_zfetch_dofetch(zf, zs);
- mutex_exit(&zs->zst_lock);
- }
- }
-out:
- rw_exit(&zf->zf_rwlock);
- return (rc);
-}
-
-/*
- * Clean-up state associated with a zfetch structure. This frees allocated
- * structure members, empties the zf_stream tree, and generally makes things
- * nice. This doesn't free the zfetch_t itself, that's left to the caller.
- */
-void
-dmu_zfetch_rele(zfetch_t *zf)
-{
- zstream_t *zs;
- zstream_t *zs_next;
-
- ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
-
- for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs);
-
- list_remove(&zf->zf_stream, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- }
- list_destroy(&zf->zf_stream);
- rw_destroy(&zf->zf_rwlock);
-
- zf->zf_dnode = NULL;
-}
-
-/*
- * Given a zfetch and zstream structure, insert the zstream structure into the
- * AVL tree contained within the zfetch structure. Peform the appropriate
- * book-keeping. It is possible that another thread has inserted a stream which
- * matches one that we are about to insert, so we must be sure to check for this
- * case. If one is found, return failure, and let the caller cleanup the
- * duplicates.
- */
-static int
-dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
-{
- zstream_t *zs_walk;
- zstream_t *zs_next;
-
- ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
-
- for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs_walk);
-
- if (dmu_zfetch_streams_equal(zs_walk, zs)) {
- return (0);
- }
- }
-
- list_insert_head(&zf->zf_stream, zs);
- zf->zf_stream_cnt++;
-
- return (1);
-}
-
-
-/*
- * Walk the list of zstreams in the given zfetch, find an old one (by time), and
- * reclaim it for use by the caller.
- */
-static zstream_t *
-dmu_zfetch_stream_reclaim(zfetch_t *zf)
-{
- zstream_t *zs;
-
- if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
- return (0);
-
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
-
- if (((lbolt - zs->zst_last) / hz) > zfetch_min_sec_reap)
- break;
- }
-
- if (zs) {
- dmu_zfetch_stream_remove(zf, zs);
- mutex_destroy(&zs->zst_lock);
- bzero(zs, sizeof (zstream_t));
- } else {
- zf->zf_alloc_fail++;
- }
- rw_exit(&zf->zf_rwlock);
-
- return (zs);
-}
-
-/*
- * Given a zfetch and zstream structure, remove the zstream structure from its
- * container in the zfetch structure. Perform the appropriate book-keeping.
- */
-static void
-dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
-{
- ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
-
- list_remove(&zf->zf_stream, zs);
- zf->zf_stream_cnt--;
-}
-
-static int
-dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
-{
- if (zs1->zst_offset != zs2->zst_offset)
- return (0);
-
- if (zs1->zst_len != zs2->zst_len)
- return (0);
-
- if (zs1->zst_stride != zs2->zst_stride)
- return (0);
-
- if (zs1->zst_ph_offset != zs2->zst_ph_offset)
- return (0);
-
- if (zs1->zst_cap != zs2->zst_cap)
- return (0);
-
- if (zs1->zst_direction != zs2->zst_direction)
- return (0);
-
- return (1);
-}
-
-/*
- * This is the prefetch entry point. It calls all of the other dmu_zfetch
- * routines to create, delete, find, or operate upon prefetch streams.
- */
-void
-dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
-{
- zstream_t zst;
- zstream_t *newstream;
- int fetched;
- int inserted;
- unsigned int blkshft;
- uint64_t blksz;
-
- if (zfs_prefetch_disable)
- return;
-
- /* files that aren't ln2 blocksz are only one block -- nothing to do */
- if (!zf->zf_dnode->dn_datablkshift)
- return;
-
- /* convert offset and size, into blockid and nblocks */
- blkshft = zf->zf_dnode->dn_datablkshift;
- blksz = (1 << blkshft);
-
- bzero(&zst, sizeof (zstream_t));
- zst.zst_offset = offset >> blkshft;
- zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
- P2ALIGN(offset, blksz)) >> blkshft;
-
- fetched = dmu_zfetch_find(zf, &zst, prefetched);
- if (!fetched) {
- fetched = dmu_zfetch_colinear(zf, &zst);
- }
-
- if (!fetched) {
- newstream = dmu_zfetch_stream_reclaim(zf);
-
- /*
- * we still couldn't find a stream, drop the lock, and allocate
- * one if possible. Otherwise, give up and go home.
- */
- if (newstream == NULL) {
- uint64_t maxblocks;
- uint32_t max_streams;
- uint32_t cur_streams;
-
- cur_streams = zf->zf_stream_cnt;
- maxblocks = zf->zf_dnode->dn_maxblkid;
-
- max_streams = MIN(zfetch_max_streams,
- (maxblocks / zfetch_block_cap));
- if (max_streams == 0) {
- max_streams++;
- }
-
- if (cur_streams >= max_streams) {
- return;
- }
-
- newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
- }
-
- newstream->zst_offset = zst.zst_offset;
- newstream->zst_len = zst.zst_len;
- newstream->zst_stride = zst.zst_len;
- newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
- newstream->zst_cap = zst.zst_len;
- newstream->zst_direction = ZFETCH_FORWARD;
- newstream->zst_last = lbolt;
-
- mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
-
- rw_enter(&zf->zf_rwlock, RW_WRITER);
- inserted = dmu_zfetch_stream_insert(zf, newstream);
- rw_exit(&zf->zf_rwlock);
-
- if (!inserted) {
- mutex_destroy(&newstream->zst_lock);
- kmem_free(newstream, sizeof (zstream_t));
- }
- }
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dnode.c
deleted file mode 100644
index ca50285..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dnode.c
+++ /dev/null
@@ -1,1369 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/dbuf.h>
-#include <sys/dnode.h>
-#include <sys/dmu.h>
-#include <sys/dmu_impl.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_dataset.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dmu_zfetch.h>
-
-static int free_range_compar(const void *node1, const void *node2);
-
-static kmem_cache_t *dnode_cache;
-
-static dnode_phys_t dnode_phys_zero;
-
-int zfs_default_bs = SPA_MINBLOCKSHIFT;
-int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
-
-/* ARGSUSED */
-static int
-dnode_cons(void *arg, void *unused, int kmflag)
-{
- int i;
- dnode_t *dn = arg;
- bzero(dn, sizeof (dnode_t));
-
- cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
- rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
- mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
- refcount_create(&dn->dn_holds);
- refcount_create(&dn->dn_tx_holds);
-
- for (i = 0; i < TXG_SIZE; i++) {
- avl_create(&dn->dn_ranges[i], free_range_compar,
- sizeof (free_range_t),
- offsetof(struct free_range, fr_node));
- list_create(&dn->dn_dirty_records[i],
- sizeof (dbuf_dirty_record_t),
- offsetof(dbuf_dirty_record_t, dr_dirty_node));
- }
-
- list_create(&dn->dn_dbufs, sizeof (dmu_buf_impl_t),
- offsetof(dmu_buf_impl_t, db_link));
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-dnode_dest(void *arg, void *unused)
-{
- int i;
- dnode_t *dn = arg;
-
- cv_destroy(&dn->dn_notxholds);
- rw_destroy(&dn->dn_struct_rwlock);
- mutex_destroy(&dn->dn_mtx);
- mutex_destroy(&dn->dn_dbufs_mtx);
- refcount_destroy(&dn->dn_holds);
- refcount_destroy(&dn->dn_tx_holds);
-
- for (i = 0; i < TXG_SIZE; i++) {
- avl_destroy(&dn->dn_ranges[i]);
- list_destroy(&dn->dn_dirty_records[i]);
- }
-
- list_destroy(&dn->dn_dbufs);
-}
-
-void
-dnode_init(void)
-{
- dnode_cache = kmem_cache_create("dnode_t",
- sizeof (dnode_t),
- 0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
-}
-
-void
-dnode_fini(void)
-{
- kmem_cache_destroy(dnode_cache);
-}
-
-
-#ifdef ZFS_DEBUG
-void
-dnode_verify(dnode_t *dn)
-{
- int drop_struct_lock = FALSE;
-
- ASSERT(dn->dn_phys);
- ASSERT(dn->dn_objset);
-
- ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
-
- if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
- return;
-
- if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- drop_struct_lock = TRUE;
- }
- if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
- int i;
- ASSERT3U(dn->dn_indblkshift, >=, 0);
- ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
- if (dn->dn_datablkshift) {
- ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
- ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT);
- ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
- }
- ASSERT3U(dn->dn_nlevels, <=, 30);
- ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
- ASSERT3U(dn->dn_nblkptr, >=, 1);
- ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
- ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
- ASSERT3U(dn->dn_datablksz, ==,
- dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
- ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
- ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
- dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
- for (i = 0; i < TXG_SIZE; i++) {
- ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
- }
- }
- if (dn->dn_phys->dn_type != DMU_OT_NONE)
- ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
- ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || dn->dn_dbuf != NULL);
- if (dn->dn_dbuf != NULL) {
- ASSERT3P(dn->dn_phys, ==,
- (dnode_phys_t *)dn->dn_dbuf->db.db_data +
- (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT)));
- }
- if (drop_struct_lock)
- rw_exit(&dn->dn_struct_rwlock);
-}
-#endif
-
-void
-dnode_byteswap(dnode_phys_t *dnp)
-{
- uint64_t *buf64 = (void*)&dnp->dn_blkptr;
- int i;
-
- if (dnp->dn_type == DMU_OT_NONE) {
- bzero(dnp, sizeof (dnode_phys_t));
- return;
- }
-
- dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
- dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
- dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
- dnp->dn_used = BSWAP_64(dnp->dn_used);
-
- /*
- * dn_nblkptr is only one byte, so it's OK to read it in either
- * byte order. We can't read dn_bouslen.
- */
- ASSERT(dnp->dn_indblkshift <= SPA_MAXBLOCKSHIFT);
- ASSERT(dnp->dn_nblkptr <= DN_MAX_NBLKPTR);
- for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++)
- buf64[i] = BSWAP_64(buf64[i]);
-
- /*
- * OK to check dn_bonuslen for zero, because it won't matter if
- * we have the wrong byte order. This is necessary because the
- * dnode dnode is smaller than a regular dnode.
- */
- if (dnp->dn_bonuslen != 0) {
- /*
- * Note that the bonus length calculated here may be
- * longer than the actual bonus buffer. This is because
- * we always put the bonus buffer after the last block
- * pointer (instead of packing it against the end of the
- * dnode buffer).
- */
- int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
- size_t len = DN_MAX_BONUSLEN - off;
- ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
- dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
- }
-}
-
-void
-dnode_buf_byteswap(void *vbuf, size_t size)
-{
- dnode_phys_t *buf = vbuf;
- int i;
-
- ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
- ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
-
- size >>= DNODE_SHIFT;
- for (i = 0; i < size; i++) {
- dnode_byteswap(buf);
- buf++;
- }
-}
-
-static int
-free_range_compar(const void *node1, const void *node2)
-{
- const free_range_t *rp1 = node1;
- const free_range_t *rp2 = node2;
-
- if (rp1->fr_blkid < rp2->fr_blkid)
- return (-1);
- else if (rp1->fr_blkid > rp2->fr_blkid)
- return (1);
- else return (0);
-}
-
-static void
-dnode_setdblksz(dnode_t *dn, int size)
-{
- ASSERT3U(P2PHASE(size, SPA_MINBLOCKSIZE), ==, 0);
- ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
- ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
- ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
- 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
- dn->dn_datablksz = size;
- dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
- dn->dn_datablkshift = ISP2(size) ? highbit(size - 1) : 0;
-}
-
-static dnode_t *
-dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
- uint64_t object)
-{
- dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
-
- dn->dn_objset = os;
- dn->dn_object = object;
- dn->dn_dbuf = db;
- dn->dn_phys = dnp;
-
- if (dnp->dn_datablkszsec)
- dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
- dn->dn_indblkshift = dnp->dn_indblkshift;
- dn->dn_nlevels = dnp->dn_nlevels;
- dn->dn_type = dnp->dn_type;
- dn->dn_nblkptr = dnp->dn_nblkptr;
- dn->dn_checksum = dnp->dn_checksum;
- dn->dn_compress = dnp->dn_compress;
- dn->dn_bonustype = dnp->dn_bonustype;
- dn->dn_bonuslen = dnp->dn_bonuslen;
- dn->dn_maxblkid = dnp->dn_maxblkid;
-
- dmu_zfetch_init(&dn->dn_zfetch, dn);
-
- ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
- mutex_enter(&os->os_lock);
- list_insert_head(&os->os_dnodes, dn);
- mutex_exit(&os->os_lock);
-
- return (dn);
-}
-
-static void
-dnode_destroy(dnode_t *dn)
-{
- objset_impl_t *os = dn->dn_objset;
-
-#ifdef ZFS_DEBUG
- int i;
-
- for (i = 0; i < TXG_SIZE; i++) {
- ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
- ASSERT(NULL == list_head(&dn->dn_dirty_records[i]));
- ASSERT(0 == avl_numnodes(&dn->dn_ranges[i]));
- }
- ASSERT(NULL == list_head(&dn->dn_dbufs));
-#endif
-
- mutex_enter(&os->os_lock);
- list_remove(&os->os_dnodes, dn);
- mutex_exit(&os->os_lock);
-
- if (dn->dn_dirtyctx_firstset) {
- kmem_free(dn->dn_dirtyctx_firstset, 1);
- dn->dn_dirtyctx_firstset = NULL;
- }
- dmu_zfetch_rele(&dn->dn_zfetch);
- if (dn->dn_bonus) {
- mutex_enter(&dn->dn_bonus->db_mtx);
- dbuf_evict(dn->dn_bonus);
- dn->dn_bonus = NULL;
- }
- kmem_cache_free(dnode_cache, dn);
-}
-
-void
-dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- int i;
-
- if (blocksize == 0)
- blocksize = 1 << zfs_default_bs;
- else if (blocksize > SPA_MAXBLOCKSIZE)
- blocksize = SPA_MAXBLOCKSIZE;
- else
- blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
-
- if (ibs == 0)
- ibs = zfs_default_ibs;
-
- ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
-
- dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
- dn->dn_object, tx->tx_txg, blocksize, ibs);
-
- ASSERT(dn->dn_type == DMU_OT_NONE);
- ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
- ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
- ASSERT(ot != DMU_OT_NONE);
- ASSERT3U(ot, <, DMU_OT_NUMTYPES);
- ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
- (bonustype != DMU_OT_NONE && bonuslen != 0));
- ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
- ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
- ASSERT(dn->dn_type == DMU_OT_NONE);
- ASSERT3U(dn->dn_maxblkid, ==, 0);
- ASSERT3U(dn->dn_allocated_txg, ==, 0);
- ASSERT3U(dn->dn_assigned_txg, ==, 0);
- ASSERT(refcount_is_zero(&dn->dn_tx_holds));
- ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
- ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
-
- for (i = 0; i < TXG_SIZE; i++) {
- ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
- ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
- ASSERT3U(dn->dn_next_blksz[i], ==, 0);
- ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
- ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
- ASSERT3U(avl_numnodes(&dn->dn_ranges[i]), ==, 0);
- }
-
- dn->dn_type = ot;
- dnode_setdblksz(dn, blocksize);
- dn->dn_indblkshift = ibs;
- dn->dn_nlevels = 1;
- dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
- dn->dn_bonustype = bonustype;
- dn->dn_bonuslen = bonuslen;
- dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
- dn->dn_compress = ZIO_COMPRESS_INHERIT;
- dn->dn_dirtyctx = 0;
-
- dn->dn_free_txg = 0;
- if (dn->dn_dirtyctx_firstset) {
- kmem_free(dn->dn_dirtyctx_firstset, 1);
- dn->dn_dirtyctx_firstset = NULL;
- }
-
- dn->dn_allocated_txg = tx->tx_txg;
-
- dnode_setdirty(dn, tx);
- dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
- dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
-}
-
-void
-dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- int i;
- dmu_buf_impl_t *db = NULL;
-
- ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
- ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
- ASSERT3U(blocksize % SPA_MINBLOCKSIZE, ==, 0);
- ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
- ASSERT(tx->tx_txg != 0);
- ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
- (bonustype != DMU_OT_NONE && bonuslen != 0));
- ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
- ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
-
- for (i = 0; i < TXG_SIZE; i++)
- ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
-
- /* clean up any unreferenced dbufs */
- (void) dnode_evict_dbufs(dn, 0);
- ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
-
- /*
- * XXX I should really have a generation number to tell if we
- * need to do this...
- */
- if (blocksize != dn->dn_datablksz ||
- dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) {
- /* free all old data */
- dnode_free_range(dn, 0, -1ULL, tx);
- }
-
- /* change blocksize */
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- if (blocksize != dn->dn_datablksz &&
- (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
- list_head(&dn->dn_dbufs) != NULL)) {
- db = dbuf_hold(dn, 0, FTAG);
- dbuf_new_size(db, blocksize, tx);
- }
- dnode_setdblksz(dn, blocksize);
- dnode_setdirty(dn, tx);
- dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
- rw_exit(&dn->dn_struct_rwlock);
- if (db) {
- dbuf_rele(db, FTAG);
- db = NULL;
- }
-
- /* change type */
- dn->dn_type = ot;
-
- if (dn->dn_bonuslen != bonuslen) {
- /* change bonus size */
- if (bonuslen == 0)
- bonuslen = 1; /* XXX */
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- if (dn->dn_bonus == NULL)
- dn->dn_bonus = dbuf_create_bonus(dn);
- db = dn->dn_bonus;
- rw_exit(&dn->dn_struct_rwlock);
- if (refcount_add(&db->db_holds, FTAG) == 1)
- dnode_add_ref(dn, db);
- VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED));
- mutex_enter(&db->db_mtx);
- ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen);
- ASSERT(db->db.db_data != NULL);
- db->db.db_size = bonuslen;
- mutex_exit(&db->db_mtx);
- (void) dbuf_dirty(db, tx);
- }
-
- /* change bonus size and type */
- mutex_enter(&dn->dn_mtx);
- dn->dn_bonustype = bonustype;
- dn->dn_bonuslen = bonuslen;
- dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
- dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
- dn->dn_compress = ZIO_COMPRESS_INHERIT;
- ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
-
- /*
- * NB: we have to do the dbuf_rele after we've changed the
- * dn_bonuslen, for the sake of dbuf_verify().
- */
- if (db)
- dbuf_rele(db, FTAG);
-
- dn->dn_allocated_txg = tx->tx_txg;
- mutex_exit(&dn->dn_mtx);
-}
-
-void
-dnode_special_close(dnode_t *dn)
-{
- /*
- * Wait for final references to the dnode to clear. This can
- * only happen if the arc is asyncronously evicting state that
- * has a hold on this dnode while we are trying to evict this
- * dnode.
- */
- while (refcount_count(&dn->dn_holds) > 0)
- delay(1);
- dnode_destroy(dn);
-}
-
-dnode_t *
-dnode_special_open(objset_impl_t *os, dnode_phys_t *dnp, uint64_t object)
-{
- dnode_t *dn = dnode_create(os, dnp, NULL, object);
- DNODE_VERIFY(dn);
- return (dn);
-}
-
-static void
-dnode_buf_pageout(dmu_buf_t *db, void *arg)
-{
- dnode_t **children_dnodes = arg;
- int i;
- int epb = db->db_size >> DNODE_SHIFT;
-
- for (i = 0; i < epb; i++) {
- dnode_t *dn = children_dnodes[i];
- int n;
-
- if (dn == NULL)
- continue;
-#ifdef ZFS_DEBUG
- /*
- * If there are holds on this dnode, then there should
- * be holds on the dnode's containing dbuf as well; thus
- * it wouldn't be eligable for eviction and this function
- * would not have been called.
- */
- ASSERT(refcount_is_zero(&dn->dn_holds));
- ASSERT(list_head(&dn->dn_dbufs) == NULL);
- ASSERT(refcount_is_zero(&dn->dn_tx_holds));
-
- for (n = 0; n < TXG_SIZE; n++)
- ASSERT(!list_link_active(&dn->dn_dirty_link[n]));
-#endif
- children_dnodes[i] = NULL;
- dnode_destroy(dn);
- }
- kmem_free(children_dnodes, epb * sizeof (dnode_t *));
-}
-
-/*
- * errors:
- * EINVAL - invalid object number.
- * EIO - i/o error.
- * succeeds even for free dnodes.
- */
-int
-dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
- void *tag, dnode_t **dnp)
-{
- int epb, idx, err;
- int drop_struct_lock = FALSE;
- int type;
- uint64_t blk;
- dnode_t *mdn, *dn;
- dmu_buf_impl_t *db;
- dnode_t **children_dnodes;
-
- if (object == 0 || object >= DN_MAX_OBJECT)
- return (EINVAL);
-
- mdn = os->os_meta_dnode;
-
- DNODE_VERIFY(mdn);
-
- if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) {
- rw_enter(&mdn->dn_struct_rwlock, RW_READER);
- drop_struct_lock = TRUE;
- }
-
- blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t));
-
- db = dbuf_hold(mdn, blk, FTAG);
- if (drop_struct_lock)
- rw_exit(&mdn->dn_struct_rwlock);
- if (db == NULL)
- return (EIO);
- err = dbuf_read(db, NULL, DB_RF_CANFAIL);
- if (err) {
- dbuf_rele(db, FTAG);
- return (err);
- }
-
- ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
- epb = db->db.db_size >> DNODE_SHIFT;
-
- idx = object & (epb-1);
-
- children_dnodes = dmu_buf_get_user(&db->db);
- if (children_dnodes == NULL) {
- dnode_t **winner;
- children_dnodes = kmem_zalloc(epb * sizeof (dnode_t *),
- KM_SLEEP);
- if (winner = dmu_buf_set_user(&db->db, children_dnodes, NULL,
- dnode_buf_pageout)) {
- kmem_free(children_dnodes, epb * sizeof (dnode_t *));
- children_dnodes = winner;
- }
- }
-
- if ((dn = children_dnodes[idx]) == NULL) {
- dnode_t *winner;
- dn = dnode_create(os, (dnode_phys_t *)db->db.db_data+idx,
- db, object);
- winner = atomic_cas_ptr(&children_dnodes[idx], NULL, dn);
- if (winner != NULL) {
- dnode_destroy(dn);
- dn = winner;
- }
- }
-
- mutex_enter(&dn->dn_mtx);
- type = dn->dn_type;
- if (dn->dn_free_txg ||
- ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
- ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)) {
- mutex_exit(&dn->dn_mtx);
- dbuf_rele(db, FTAG);
- return (type == DMU_OT_NONE ? ENOENT : EEXIST);
- }
- mutex_exit(&dn->dn_mtx);
-
- if (refcount_add(&dn->dn_holds, tag) == 1)
- dbuf_add_ref(db, dn);
-
- DNODE_VERIFY(dn);
- ASSERT3P(dn->dn_dbuf, ==, db);
- ASSERT3U(dn->dn_object, ==, object);
- dbuf_rele(db, FTAG);
-
- *dnp = dn;
- return (0);
-}
-
-/*
- * Return held dnode if the object is allocated, NULL if not.
- */
-int
-dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp)
-{
- return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
-}
-
-void
-dnode_add_ref(dnode_t *dn, void *tag)
-{
- ASSERT(refcount_count(&dn->dn_holds) > 0);
- (void) refcount_add(&dn->dn_holds, tag);
-}
-
-void
-dnode_rele(dnode_t *dn, void *tag)
-{
- uint64_t refs;
-
- refs = refcount_remove(&dn->dn_holds, tag);
- /* NOTE: the DNODE_DNODE does not have a dn_dbuf */
- if (refs == 0 && dn->dn_dbuf)
- dbuf_rele(dn->dn_dbuf, dn);
-}
-
-void
-dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
-{
- objset_impl_t *os = dn->dn_objset;
- uint64_t txg = tx->tx_txg;
-
- if (dn->dn_object == DMU_META_DNODE_OBJECT)
- return;
-
- DNODE_VERIFY(dn);
-
-#ifdef ZFS_DEBUG
- mutex_enter(&dn->dn_mtx);
- ASSERT(dn->dn_phys->dn_type || dn->dn_allocated_txg);
- /* ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= txg); */
- mutex_exit(&dn->dn_mtx);
-#endif
-
- mutex_enter(&os->os_lock);
-
- /*
- * If we are already marked dirty, we're done.
- */
- if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
- mutex_exit(&os->os_lock);
- return;
- }
-
- ASSERT(!refcount_is_zero(&dn->dn_holds) || list_head(&dn->dn_dbufs));
- ASSERT(dn->dn_datablksz != 0);
- ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0);
-
- dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
- dn->dn_object, txg);
-
- if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) {
- list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn);
- } else {
- list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn);
- }
-
- mutex_exit(&os->os_lock);
-
- /*
- * The dnode maintains a hold on its containing dbuf as
- * long as there are holds on it. Each instantiated child
- * dbuf maintaines a hold on the dnode. When the last child
- * drops its hold, the dnode will drop its hold on the
- * containing dbuf. We add a "dirty hold" here so that the
- * dnode will hang around after we finish processing its
- * children.
- */
- dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg);
-
- (void) dbuf_dirty(dn->dn_dbuf, tx);
-
- dsl_dataset_dirty(os->os_dsl_dataset, tx);
-}
-
-void
-dnode_free(dnode_t *dn, dmu_tx_t *tx)
-{
- int txgoff = tx->tx_txg & TXG_MASK;
-
- dprintf("dn=%p txg=%llu\n", dn, tx->tx_txg);
-
- /* we should be the only holder... hopefully */
- /* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */
-
- mutex_enter(&dn->dn_mtx);
- if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
- mutex_exit(&dn->dn_mtx);
- return;
- }
- dn->dn_free_txg = tx->tx_txg;
- mutex_exit(&dn->dn_mtx);
-
- /*
- * If the dnode is already dirty, it needs to be moved from
- * the dirty list to the free list.
- */
- mutex_enter(&dn->dn_objset->os_lock);
- if (list_link_active(&dn->dn_dirty_link[txgoff])) {
- list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn);
- list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn);
- mutex_exit(&dn->dn_objset->os_lock);
- } else {
- mutex_exit(&dn->dn_objset->os_lock);
- dnode_setdirty(dn, tx);
- }
-}
-
-/*
- * Try to change the block size for the indicated dnode. This can only
- * succeed if there are no blocks allocated or dirty beyond first block
- */
-int
-dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db, *db_next;
- int have_db0 = FALSE;
-
- if (size == 0)
- size = SPA_MINBLOCKSIZE;
- if (size > SPA_MAXBLOCKSIZE)
- size = SPA_MAXBLOCKSIZE;
- else
- size = P2ROUNDUP(size, SPA_MINBLOCKSIZE);
-
- if (ibs == dn->dn_indblkshift)
- ibs = 0;
-
- if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
- return (0);
-
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
-
- /* Check for any allocated blocks beyond the first */
- if (dn->dn_phys->dn_maxblkid != 0)
- goto fail;
-
- mutex_enter(&dn->dn_dbufs_mtx);
- for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
- db_next = list_next(&dn->dn_dbufs, db);
-
- if (db->db_blkid == 0) {
- have_db0 = TRUE;
- } else if (db->db_blkid != DB_BONUS_BLKID) {
- mutex_exit(&dn->dn_dbufs_mtx);
- goto fail;
- }
- }
- mutex_exit(&dn->dn_dbufs_mtx);
-
- if (ibs && dn->dn_nlevels != 1)
- goto fail;
-
- db = NULL;
- if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || have_db0) {
- /* obtain the old block */
- db = dbuf_hold(dn, 0, FTAG);
- dbuf_new_size(db, size, tx);
- }
-
- dnode_setdblksz(dn, size);
- dnode_setdirty(dn, tx);
- dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
- if (ibs) {
- dn->dn_indblkshift = ibs;
- dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
- }
-
- if (db)
- dbuf_rele(db, FTAG);
-
- rw_exit(&dn->dn_struct_rwlock);
- return (0);
-
-fail:
- rw_exit(&dn->dn_struct_rwlock);
- return (ENOTSUP);
-}
-
-void
-dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
-{
- uint64_t txgoff = tx->tx_txg & TXG_MASK;
- int drop_struct_lock = FALSE;
- int epbs, new_nlevels;
- uint64_t sz;
-
- ASSERT(blkid != DB_BONUS_BLKID);
-
- if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- drop_struct_lock = TRUE;
- }
-
- if (blkid <= dn->dn_maxblkid)
- goto out;
-
- dn->dn_maxblkid = blkid;
-
- /*
- * Compute the number of levels necessary to support the new maxblkid.
- */
- new_nlevels = 1;
- epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (sz = dn->dn_nblkptr;
- sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs)
- new_nlevels++;
-
- if (new_nlevels > dn->dn_nlevels) {
- int old_nlevels = dn->dn_nlevels;
- dmu_buf_impl_t *db;
- list_t *list;
- dbuf_dirty_record_t *new, *dr, *dr_next;
-
- dn->dn_nlevels = new_nlevels;
-
- ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
- dn->dn_next_nlevels[txgoff] = new_nlevels;
-
- /* dirty the left indirects */
- db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
- new = dbuf_dirty(db, tx);
- dbuf_rele(db, FTAG);
-
- /* transfer the dirty records to the new indirect */
- mutex_enter(&dn->dn_mtx);
- mutex_enter(&new->dt.di.dr_mtx);
- list = &dn->dn_dirty_records[txgoff];
- for (dr = list_head(list); dr; dr = dr_next) {
- dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
- if (dr->dr_dbuf->db_level != new_nlevels-1 &&
- dr->dr_dbuf->db_blkid != DB_BONUS_BLKID) {
- ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
- list_remove(&dn->dn_dirty_records[txgoff], dr);
- list_insert_tail(&new->dt.di.dr_children, dr);
- dr->dr_parent = new;
- }
- }
- mutex_exit(&new->dt.di.dr_mtx);
- mutex_exit(&dn->dn_mtx);
- }
-
-out:
- if (drop_struct_lock)
- rw_exit(&dn->dn_struct_rwlock);
-}
-
-void
-dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
-{
- avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
- avl_index_t where;
- free_range_t *rp;
- free_range_t rp_tofind;
- uint64_t endblk = blkid + nblks;
-
- ASSERT(MUTEX_HELD(&dn->dn_mtx));
- ASSERT(nblks <= UINT64_MAX - blkid); /* no overflow */
-
- dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
- blkid, nblks, tx->tx_txg);
- rp_tofind.fr_blkid = blkid;
- rp = avl_find(tree, &rp_tofind, &where);
- if (rp == NULL)
- rp = avl_nearest(tree, where, AVL_BEFORE);
- if (rp == NULL)
- rp = avl_nearest(tree, where, AVL_AFTER);
-
- while (rp && (rp->fr_blkid <= blkid + nblks)) {
- uint64_t fr_endblk = rp->fr_blkid + rp->fr_nblks;
- free_range_t *nrp = AVL_NEXT(tree, rp);
-
- if (blkid <= rp->fr_blkid && endblk >= fr_endblk) {
- /* clear this entire range */
- avl_remove(tree, rp);
- kmem_free(rp, sizeof (free_range_t));
- } else if (blkid <= rp->fr_blkid &&
- endblk > rp->fr_blkid && endblk < fr_endblk) {
- /* clear the beginning of this range */
- rp->fr_blkid = endblk;
- rp->fr_nblks = fr_endblk - endblk;
- } else if (blkid > rp->fr_blkid && blkid < fr_endblk &&
- endblk >= fr_endblk) {
- /* clear the end of this range */
- rp->fr_nblks = blkid - rp->fr_blkid;
- } else if (blkid > rp->fr_blkid && endblk < fr_endblk) {
- /* clear a chunk out of this range */
- free_range_t *new_rp =
- kmem_alloc(sizeof (free_range_t), KM_SLEEP);
-
- new_rp->fr_blkid = endblk;
- new_rp->fr_nblks = fr_endblk - endblk;
- avl_insert_here(tree, new_rp, rp, AVL_AFTER);
- rp->fr_nblks = blkid - rp->fr_blkid;
- }
- /* there may be no overlap */
- rp = nrp;
- }
-}
-
-void
-dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db;
- uint64_t blkoff, blkid, nblks;
- int blksz, head;
- int trunc = FALSE;
-
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- blksz = dn->dn_datablksz;
-
- /* If the range is past the end of the file, this is a no-op */
- if (off >= blksz * (dn->dn_maxblkid+1))
- goto out;
- if (len == -1ULL) {
- len = UINT64_MAX - off;
- trunc = TRUE;
- }
-
- /*
- * First, block align the region to free:
- */
- if (ISP2(blksz)) {
- head = P2NPHASE(off, blksz);
- blkoff = P2PHASE(off, blksz);
- } else {
- ASSERT(dn->dn_maxblkid == 0);
- if (off == 0 && len >= blksz) {
- /* Freeing the whole block; don't do any head. */
- head = 0;
- } else {
- /* Freeing part of the block. */
- head = blksz - off;
- ASSERT3U(head, >, 0);
- }
- blkoff = off;
- }
- /* zero out any partial block data at the start of the range */
- if (head) {
- ASSERT3U(blkoff + head, ==, blksz);
- if (len < head)
- head = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off), TRUE,
- FTAG, &db) == 0) {
- caddr_t data;
-
- /* don't dirty if it isn't on disk and isn't dirty */
- if (db->db_last_dirty ||
- (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
- rw_exit(&dn->dn_struct_rwlock);
- dbuf_will_dirty(db, tx);
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- data = db->db.db_data;
- bzero(data + blkoff, head);
- }
- dbuf_rele(db, FTAG);
- }
- off += head;
- len -= head;
- }
-
- /* If the range was less than one block, we're done */
- if (len == 0 || off >= blksz * (dn->dn_maxblkid+1))
- goto out;
-
- if (!ISP2(blksz)) {
- /*
- * They are freeing the whole block of a
- * non-power-of-two blocksize file. Skip all the messy
- * math.
- */
- ASSERT3U(off, ==, 0);
- ASSERT3U(len, >=, blksz);
- blkid = 0;
- nblks = 1;
- } else {
- int tail;
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- int blkshift = dn->dn_datablkshift;
-
- /* If the remaining range is past end of file, we're done */
- if (off > dn->dn_maxblkid << blkshift)
- goto out;
-
- if (off + len == UINT64_MAX)
- tail = 0;
- else
- tail = P2PHASE(len, blksz);
-
- ASSERT3U(P2PHASE(off, blksz), ==, 0);
- /* zero out any partial block data at the end of the range */
- if (tail) {
- if (len < tail)
- tail = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len),
- TRUE, FTAG, &db) == 0) {
- /* don't dirty if not on disk and not dirty */
- if (db->db_last_dirty ||
- (db->db_blkptr &&
- !BP_IS_HOLE(db->db_blkptr))) {
- rw_exit(&dn->dn_struct_rwlock);
- dbuf_will_dirty(db, tx);
- rw_enter(&dn->dn_struct_rwlock,
- RW_WRITER);
- bzero(db->db.db_data, tail);
- }
- dbuf_rele(db, FTAG);
- }
- len -= tail;
- }
- /* If the range did not include a full block, we are done */
- if (len == 0)
- goto out;
-
- /* dirty the left indirects */
- if (dn->dn_nlevels > 1 && off != 0) {
- db = dbuf_hold_level(dn, 1,
- (off - head) >> (blkshift + epbs), FTAG);
- dbuf_will_dirty(db, tx);
- dbuf_rele(db, FTAG);
- }
-
- /* dirty the right indirects */
- if (dn->dn_nlevels > 1 && !trunc) {
- db = dbuf_hold_level(dn, 1,
- (off + len + tail - 1) >> (blkshift + epbs), FTAG);
- dbuf_will_dirty(db, tx);
- dbuf_rele(db, FTAG);
- }
-
- /*
- * Finally, add this range to the dnode range list, we
- * will finish up this free operation in the syncing phase.
- */
- ASSERT(IS_P2ALIGNED(off, 1<<blkshift));
- ASSERT(off + len == UINT64_MAX ||
- IS_P2ALIGNED(len, 1<<blkshift));
- blkid = off >> blkshift;
- nblks = len >> blkshift;
-
- if (trunc)
- dn->dn_maxblkid = (blkid ? blkid - 1 : 0);
- }
-
- mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, blkid, nblks, tx);
- {
- free_range_t *rp, *found;
- avl_index_t where;
- avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
-
- /* Add new range to dn_ranges */
- rp = kmem_alloc(sizeof (free_range_t), KM_SLEEP);
- rp->fr_blkid = blkid;
- rp->fr_nblks = nblks;
- found = avl_find(tree, rp, &where);
- ASSERT(found == NULL);
- avl_insert(tree, rp, where);
- dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
- blkid, nblks, tx->tx_txg);
- }
- mutex_exit(&dn->dn_mtx);
-
- dbuf_free_range(dn, blkid, nblks, tx);
- dnode_setdirty(dn, tx);
-out:
- rw_exit(&dn->dn_struct_rwlock);
-}
-
-/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
-uint64_t
-dnode_block_freed(dnode_t *dn, uint64_t blkid)
-{
- free_range_t range_tofind;
- void *dp = spa_get_dsl(dn->dn_objset->os_spa);
- int i;
-
- if (blkid == DB_BONUS_BLKID)
- return (FALSE);
-
- /*
- * If we're in the process of opening the pool, dp will not be
- * set yet, but there shouldn't be anything dirty.
- */
- if (dp == NULL)
- return (FALSE);
-
- if (dn->dn_free_txg)
- return (TRUE);
-
- /*
- * If dn_datablkshift is not set, then there's only a single
- * block, in which case there will never be a free range so it
- * won't matter.
- */
- range_tofind.fr_blkid = blkid;
- mutex_enter(&dn->dn_mtx);
- for (i = 0; i < TXG_SIZE; i++) {
- free_range_t *range_found;
- avl_index_t idx;
-
- range_found = avl_find(&dn->dn_ranges[i], &range_tofind, &idx);
- if (range_found) {
- ASSERT(range_found->fr_nblks > 0);
- break;
- }
- range_found = avl_nearest(&dn->dn_ranges[i], idx, AVL_BEFORE);
- if (range_found &&
- range_found->fr_blkid + range_found->fr_nblks > blkid)
- break;
- }
- mutex_exit(&dn->dn_mtx);
- return (i < TXG_SIZE);
-}
-
-/* call from syncing context when we actually write/free space for this dnode */
-void
-dnode_diduse_space(dnode_t *dn, int64_t delta)
-{
- uint64_t space;
- dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
- dn, dn->dn_phys,
- (u_longlong_t)dn->dn_phys->dn_used,
- (longlong_t)delta);
-
- mutex_enter(&dn->dn_mtx);
- space = DN_USED_BYTES(dn->dn_phys);
- if (delta > 0) {
- ASSERT3U(space + delta, >=, space); /* no overflow */
- } else {
- ASSERT3U(space, >=, -delta); /* no underflow */
- }
- space += delta;
- if (spa_version(dn->dn_objset->os_spa) < ZFS_VERSION_DNODE_BYTES) {
- ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
- ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0);
- dn->dn_phys->dn_used = space >> DEV_BSHIFT;
- } else {
- dn->dn_phys->dn_used = space;
- dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES;
- }
- mutex_exit(&dn->dn_mtx);
-}
-
-/*
- * Call when we think we're going to write/free space in open context.
- * Be conservative (ie. OK to write less than this or free more than
- * this, but don't write more or free less).
- */
-void
-dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
-{
- objset_impl_t *os = dn->dn_objset;
- dsl_dataset_t *ds = os->os_dsl_dataset;
-
- if (space > 0)
- space = spa_get_asize(os->os_spa, space);
-
- if (ds)
- dsl_dir_willuse_space(ds->ds_dir, space, tx);
-
- dmu_tx_willuse_space(tx, space);
-}
-
-static int
-dnode_next_offset_level(dnode_t *dn, boolean_t hole, uint64_t *offset,
- int lvl, uint64_t blkfill, uint64_t txg)
-{
- dmu_buf_impl_t *db = NULL;
- void *data = NULL;
- uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
- uint64_t epb = 1ULL << epbs;
- uint64_t minfill, maxfill;
- int i, error, span;
-
- dprintf("probing object %llu offset %llx level %d of %u\n",
- dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
-
- if (lvl == dn->dn_phys->dn_nlevels) {
- error = 0;
- epb = dn->dn_phys->dn_nblkptr;
- data = dn->dn_phys->dn_blkptr;
- } else {
- uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl);
- error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db);
- if (error) {
- if (error == ENOENT)
- return (hole ? 0 : ESRCH);
- return (error);
- }
- error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
- if (error) {
- dbuf_rele(db, FTAG);
- return (error);
- }
- data = db->db.db_data;
- }
-
- if (db && txg &&
- (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) {
- error = ESRCH;
- } else if (lvl == 0) {
- dnode_phys_t *dnp = data;
- span = DNODE_SHIFT;
- ASSERT(dn->dn_type == DMU_OT_DNODE);
-
- for (i = (*offset >> span) & (blkfill - 1); i < blkfill; i++) {
- boolean_t newcontents = B_TRUE;
- if (txg) {
- int j;
- newcontents = B_FALSE;
- for (j = 0; j < dnp[i].dn_nblkptr; j++) {
- if (dnp[i].dn_blkptr[j].blk_birth > txg)
- newcontents = B_TRUE;
- }
- }
- if (!dnp[i].dn_type == hole && newcontents)
- break;
- *offset += 1ULL << span;
- }
- if (i == blkfill)
- error = ESRCH;
- } else {
- blkptr_t *bp = data;
- span = (lvl - 1) * epbs + dn->dn_datablkshift;
- minfill = 0;
- maxfill = blkfill << ((lvl - 1) * epbs);
-
- if (hole)
- maxfill--;
- else
- minfill++;
-
- for (i = (*offset >> span) & ((1ULL << epbs) - 1);
- i < epb; i++) {
- if (bp[i].blk_fill >= minfill &&
- bp[i].blk_fill <= maxfill &&
- bp[i].blk_birth > txg)
- break;
- *offset += 1ULL << span;
- }
- if (i >= epb)
- error = ESRCH;
- }
-
- if (db)
- dbuf_rele(db, FTAG);
-
- return (error);
-}
-
-/*
- * Find the next hole, data, or sparse region at or after *offset.
- * The value 'blkfill' tells us how many items we expect to find
- * in an L0 data block; this value is 1 for normal objects,
- * DNODES_PER_BLOCK for the meta dnode, and some fraction of
- * DNODES_PER_BLOCK when searching for sparse regions thereof.
- *
- * Examples:
- *
- * dnode_next_offset(dn, hole, offset, 1, 1, 0);
- * Finds the next hole/data in a file.
- * Used in dmu_offset_next().
- *
- * dnode_next_offset(mdn, hole, offset, 0, DNODES_PER_BLOCK, txg);
- * Finds the next free/allocated dnode an objset's meta-dnode.
- * Only finds objects that have new contents since txg (ie.
- * bonus buffer changes and content removal are ignored).
- * Used in dmu_object_next().
- *
- * dnode_next_offset(mdn, TRUE, offset, 2, DNODES_PER_BLOCK >> 2, 0);
- * Finds the next L2 meta-dnode bp that's at most 1/4 full.
- * Used in dmu_object_alloc().
- */
-int
-dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *offset,
- int minlvl, uint64_t blkfill, uint64_t txg)
-{
- int lvl, maxlvl;
- int error = 0;
- uint64_t initial_offset = *offset;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
-
- if (dn->dn_phys->dn_nlevels == 0) {
- rw_exit(&dn->dn_struct_rwlock);
- return (ESRCH);
- }
-
- if (dn->dn_datablkshift == 0) {
- if (*offset < dn->dn_datablksz) {
- if (hole)
- *offset = dn->dn_datablksz;
- } else {
- error = ESRCH;
- }
- rw_exit(&dn->dn_struct_rwlock);
- return (error);
- }
-
- maxlvl = dn->dn_phys->dn_nlevels;
-
- for (lvl = minlvl; lvl <= maxlvl; lvl++) {
- error = dnode_next_offset_level(dn,
- hole, offset, lvl, blkfill, txg);
- if (error != ESRCH)
- break;
- }
-
- while (--lvl >= minlvl && error == 0) {
- error = dnode_next_offset_level(dn,
- hole, offset, lvl, blkfill, txg);
- }
-
- rw_exit(&dn->dn_struct_rwlock);
-
- if (error == 0 && initial_offset > *offset)
- error = ESRCH;
-
- return (error);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
deleted file mode 100644
index 9e8c7ad..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/dbuf.h>
-#include <sys/dnode.h>
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h>
-#include <sys/spa.h>
-
-static void
-dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db;
- int txgoff = tx->tx_txg & TXG_MASK;
- int nblkptr = dn->dn_phys->dn_nblkptr;
- int old_toplvl = dn->dn_phys->dn_nlevels - 1;
- int new_level = dn->dn_next_nlevels[txgoff];
- int i;
-
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
-
- /* this dnode can't be paged out because it's dirty */
- ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
- ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
- ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
-
- db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
- ASSERT(db != NULL);
-
- dn->dn_phys->dn_nlevels = new_level;
- dprintf("os=%p obj=%llu, increase to %d\n",
- dn->dn_objset, dn->dn_object,
- dn->dn_phys->dn_nlevels);
-
- /* check for existing blkptrs in the dnode */
- for (i = 0; i < nblkptr; i++)
- if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
- break;
- if (i != nblkptr) {
- /* transfer dnode's block pointers to new indirect block */
- (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
- ASSERT(db->db.db_data);
- ASSERT(arc_released(db->db_buf));
- ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
- bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
- sizeof (blkptr_t) * nblkptr);
- arc_buf_freeze(db->db_buf);
- }
-
- /* set dbuf's parent pointers to new indirect buf */
- for (i = 0; i < nblkptr; i++) {
- dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i);
-
- if (child == NULL)
- continue;
- ASSERT3P(child->db_dnode, ==, dn);
- if (child->db_parent && child->db_parent != dn->dn_dbuf) {
- ASSERT(child->db_parent->db_level == db->db_level);
- ASSERT(child->db_blkptr !=
- &dn->dn_phys->dn_blkptr[child->db_blkid]);
- mutex_exit(&child->db_mtx);
- continue;
- }
- ASSERT(child->db_parent == NULL ||
- child->db_parent == dn->dn_dbuf);
-
- child->db_parent = db;
- dbuf_add_ref(db, child);
- if (db->db.db_data)
- child->db_blkptr = (blkptr_t *)db->db.db_data + i;
- else
- child->db_blkptr = NULL;
- dprintf_dbuf_bp(child, child->db_blkptr,
- "changed db_blkptr to new indirect %s", "");
-
- mutex_exit(&child->db_mtx);
- }
-
- bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
-
- dbuf_rele(db, FTAG);
-
- rw_exit(&dn->dn_struct_rwlock);
-}
-
-static void
-free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
-{
- objset_impl_t *os = dn->dn_objset;
- uint64_t bytesfreed = 0;
- int i;
-
- dprintf("os=%p obj=%llx num=%d\n", os, dn->dn_object, num);
-
- for (i = 0; i < num; i++, bp++) {
- if (BP_IS_HOLE(bp))
- continue;
-
- bytesfreed += bp_get_dasize(os->os_spa, bp);
- ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
- dsl_dataset_block_kill(os->os_dsl_dataset, bp, dn->dn_zio, tx);
- bzero(bp, sizeof (blkptr_t));
- }
- dnode_diduse_space(dn, -bytesfreed);
-}
-
-#ifdef ZFS_DEBUG
-static void
-free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
-{
- int off, num;
- int i, err, epbs;
- uint64_t txg = tx->tx_txg;
-
- epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
- off = start - (db->db_blkid * 1<<epbs);
- num = end - start + 1;
-
- ASSERT3U(off, >=, 0);
- ASSERT3U(num, >=, 0);
- ASSERT3U(db->db_level, >, 0);
- ASSERT3U(db->db.db_size, ==, 1<<db->db_dnode->dn_phys->dn_indblkshift);
- ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
- ASSERT(db->db_blkptr != NULL);
-
- for (i = off; i < off+num; i++) {
- uint64_t *buf;
- dmu_buf_impl_t *child;
- dbuf_dirty_record_t *dr;
- int j;
-
- ASSERT(db->db_level == 1);
-
- rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(db->db_dnode, db->db_level-1,
- (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
- rw_exit(&db->db_dnode->dn_struct_rwlock);
- if (err == ENOENT)
- continue;
- ASSERT(err == 0);
- ASSERT(child->db_level == 0);
- dr = child->db_last_dirty;
- while (dr && dr->dr_txg > txg)
- dr = dr->dr_next;
- ASSERT(dr == NULL || dr->dr_txg == txg);
-
- /* data_old better be zeroed */
- if (dr) {
- buf = dr->dt.dl.dr_data->b_data;
- for (j = 0; j < child->db.db_size >> 3; j++) {
- if (buf[j] != 0) {
- panic("freed data not zero: "
- "child=%p i=%d off=%d num=%d\n",
- child, i, off, num);
- }
- }
- }
-
- /*
- * db_data better be zeroed unless it's dirty in a
- * future txg.
- */
- mutex_enter(&child->db_mtx);
- buf = child->db.db_data;
- if (buf != NULL && child->db_state != DB_FILL &&
- child->db_last_dirty == NULL) {
- for (j = 0; j < child->db.db_size >> 3; j++) {
- if (buf[j] != 0) {
- panic("freed data not zero: "
- "child=%p i=%d off=%d num=%d\n",
- child, i, off, num);
- }
- }
- }
- mutex_exit(&child->db_mtx);
-
- dbuf_rele(child, FTAG);
- }
-}
-#endif
-
-static int
-free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
- dmu_tx_t *tx)
-{
- dnode_t *dn = db->db_dnode;
- blkptr_t *bp;
- dmu_buf_impl_t *subdb;
- uint64_t start, end, dbstart, dbend, i;
- int epbs, shift, err;
- int all = TRUE;
-
- (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
- arc_release(db->db_buf, db);
- bp = (blkptr_t *)db->db.db_data;
-
- epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
- shift = (db->db_level - 1) * epbs;
- dbstart = db->db_blkid << epbs;
- start = blkid >> shift;
- if (dbstart < start) {
- bp += start - dbstart;
- all = FALSE;
- } else {
- start = dbstart;
- }
- dbend = ((db->db_blkid + 1) << epbs) - 1;
- end = (blkid + nblks - 1) >> shift;
- if (dbend <= end)
- end = dbend;
- else if (all)
- all = trunc;
- ASSERT3U(start, <=, end);
-
- if (db->db_level == 1) {
- FREE_VERIFY(db, start, end, tx);
- free_blocks(dn, bp, end-start+1, tx);
- arc_buf_freeze(db->db_buf);
- ASSERT(all || db->db_last_dirty);
- return (all);
- }
-
- for (i = start; i <= end; i++, bp++) {
- if (BP_IS_HOLE(bp))
- continue;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
- ASSERT3U(err, ==, 0);
- rw_exit(&dn->dn_struct_rwlock);
-
- if (free_children(subdb, blkid, nblks, trunc, tx)) {
- ASSERT3P(subdb->db_blkptr, ==, bp);
- free_blocks(dn, bp, 1, tx);
- } else {
- all = FALSE;
- }
- dbuf_rele(subdb, FTAG);
- }
- arc_buf_freeze(db->db_buf);
-#ifdef ZFS_DEBUG
- bp -= (end-start)+1;
- for (i = start; i <= end; i++, bp++) {
- if (i == start && blkid != 0)
- continue;
- else if (i == end && !trunc)
- continue;
- ASSERT3U(bp->blk_birth, ==, 0);
- }
-#endif
- ASSERT(all || db->db_last_dirty);
- return (all);
-}
-
-/*
- * free_range: Traverse the indicated range of the provided file
- * and "free" all the blocks contained there.
- */
-static void
-dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
-{
- blkptr_t *bp = dn->dn_phys->dn_blkptr;
- dmu_buf_impl_t *db;
- int trunc, start, end, shift, i, err;
- int dnlevel = dn->dn_phys->dn_nlevels;
-
- if (blkid > dn->dn_phys->dn_maxblkid)
- return;
-
- ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
- trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
- if (trunc)
- nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
-
- /* There are no indirect blocks in the object */
- if (dnlevel == 1) {
- if (blkid >= dn->dn_phys->dn_nblkptr) {
- /* this range was never made persistent */
- return;
- }
- ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
- free_blocks(dn, bp + blkid, nblks, tx);
- if (trunc) {
- uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
- (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
- dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
- ASSERT(off < dn->dn_phys->dn_maxblkid ||
- dn->dn_phys->dn_maxblkid == 0 ||
- dnode_next_offset(dn, FALSE, &off,
- 1, 1, 0) != 0);
- }
- return;
- }
-
- shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
- start = blkid >> shift;
- ASSERT(start < dn->dn_phys->dn_nblkptr);
- end = (blkid + nblks - 1) >> shift;
- bp += start;
- for (i = start; i <= end; i++, bp++) {
- if (BP_IS_HOLE(bp))
- continue;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
- ASSERT3U(err, ==, 0);
- rw_exit(&dn->dn_struct_rwlock);
-
- if (free_children(db, blkid, nblks, trunc, tx)) {
- ASSERT3P(db->db_blkptr, ==, bp);
- free_blocks(dn, bp, 1, tx);
- }
- dbuf_rele(db, FTAG);
- }
- if (trunc) {
- uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
- (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
- dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
- ASSERT(off < dn->dn_phys->dn_maxblkid ||
- dn->dn_phys->dn_maxblkid == 0 ||
- dnode_next_offset(dn, FALSE, &off, 1, 1, 0) != 0);
- }
-}
-
-/*
- * Try to kick all the dnodes dbufs out of the cache...
- */
-int
-dnode_evict_dbufs(dnode_t *dn, int try)
-{
- int progress;
- int pass = 0;
-
- do {
- dmu_buf_impl_t *db, marker;
- int evicting = FALSE;
-
- progress = FALSE;
- mutex_enter(&dn->dn_dbufs_mtx);
- list_insert_tail(&dn->dn_dbufs, &marker);
- db = list_head(&dn->dn_dbufs);
- for (; db != &marker; db = list_head(&dn->dn_dbufs)) {
- list_remove(&dn->dn_dbufs, db);
- list_insert_tail(&dn->dn_dbufs, db);
-
- mutex_enter(&db->db_mtx);
- if (db->db_state == DB_EVICTING) {
- progress = TRUE;
- evicting = TRUE;
- mutex_exit(&db->db_mtx);
- } else if (refcount_is_zero(&db->db_holds)) {
- progress = TRUE;
- ASSERT(!arc_released(db->db_buf));
- dbuf_clear(db); /* exits db_mtx for us */
- } else {
- mutex_exit(&db->db_mtx);
- }
-
- }
- list_remove(&dn->dn_dbufs, &marker);
- /*
- * NB: we need to drop dn_dbufs_mtx between passes so
- * that any DB_EVICTING dbufs can make progress.
- * Ideally, we would have some cv we could wait on, but
- * since we don't, just wait a bit to give the other
- * thread a chance to run.
- */
- mutex_exit(&dn->dn_dbufs_mtx);
- if (evicting)
- delay(1);
- pass++;
- ASSERT(pass < 100); /* sanity check */
- } while (progress);
-
- /*
- * This function works fine even if it can't evict everything.
- * If were only asked to try to evict everything then
- * return an error if we can't. Otherwise panic as the caller
- * expects total eviction.
- */
- if (list_head(&dn->dn_dbufs) != NULL) {
- if (try) {
- return (1);
- } else {
- panic("dangling dbufs (dn=%p, dbuf=%p)\n",
- dn, list_head(&dn->dn_dbufs));
- }
- }
-
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
- mutex_enter(&dn->dn_bonus->db_mtx);
- dbuf_evict(dn->dn_bonus);
- dn->dn_bonus = NULL;
- }
- rw_exit(&dn->dn_struct_rwlock);
- return (0);
-}
-
-static void
-dnode_undirty_dbufs(list_t *list)
-{
- dbuf_dirty_record_t *dr;
-
- while (dr = list_head(list)) {
- dmu_buf_impl_t *db = dr->dr_dbuf;
- uint64_t txg = dr->dr_txg;
-
- mutex_enter(&db->db_mtx);
- /* XXX - use dbuf_undirty()? */
- list_remove(list, dr);
- ASSERT(db->db_last_dirty == dr);
- db->db_last_dirty = NULL;
- db->db_dirtycnt -= 1;
- if (db->db_level == 0) {
- ASSERT(db->db_blkid == DB_BONUS_BLKID ||
- dr->dt.dl.dr_data == db->db_buf);
- dbuf_unoverride(dr);
- mutex_exit(&db->db_mtx);
- } else {
- mutex_exit(&db->db_mtx);
- dnode_undirty_dbufs(&dr->dt.di.dr_children);
- list_destroy(&dr->dt.di.dr_children);
- mutex_destroy(&dr->dt.di.dr_mtx);
- }
- kmem_free(dr, sizeof (dbuf_dirty_record_t));
- dbuf_rele(db, (void *)(uintptr_t)txg);
- }
-}
-
-static void
-dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
-{
- int txgoff = tx->tx_txg & TXG_MASK;
-
- ASSERT(dmu_tx_is_syncing(tx));
-
- dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
- (void) dnode_evict_dbufs(dn, 0);
- ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
-
- /*
- * XXX - It would be nice to assert this, but we may still
- * have residual holds from async evictions from the arc...
- *
- * zfs_obj_to_path() also depends on this being
- * commented out.
- *
- * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
- */
-
- /* Undirty next bits */
- dn->dn_next_nlevels[txgoff] = 0;
- dn->dn_next_indblkshift[txgoff] = 0;
- dn->dn_next_blksz[txgoff] = 0;
-
- /* free up all the blocks in the file. */
- dnode_sync_free_range(dn, 0, dn->dn_phys->dn_maxblkid+1, tx);
- ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
-
- /* ASSERT(blkptrs are zero); */
- ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
- ASSERT(dn->dn_type != DMU_OT_NONE);
-
- ASSERT(dn->dn_free_txg > 0);
- if (dn->dn_allocated_txg != dn->dn_free_txg)
- dbuf_will_dirty(dn->dn_dbuf, tx);
- bzero(dn->dn_phys, sizeof (dnode_phys_t));
-
- mutex_enter(&dn->dn_mtx);
- dn->dn_type = DMU_OT_NONE;
- dn->dn_maxblkid = 0;
- dn->dn_allocated_txg = 0;
- mutex_exit(&dn->dn_mtx);
-
- ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
-
- dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
- /*
- * Now that we've released our hold, the dnode may
- * be evicted, so we musn't access it.
- */
-}
-
-/*
- * Write out the dnode's dirty buffers.
- *
- * NOTE: The dnode is kept in memory by being dirty. Once the
- * dirty bit is cleared, it may be evicted. Beware of this!
- */
-void
-dnode_sync(dnode_t *dn, dmu_tx_t *tx)
-{
- free_range_t *rp;
- dnode_phys_t *dnp = dn->dn_phys;
- int txgoff = tx->tx_txg & TXG_MASK;
- list_t *list = &dn->dn_dirty_records[txgoff];
-
- ASSERT(dmu_tx_is_syncing(tx));
- ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
- DNODE_VERIFY(dn);
-
- ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
-
- mutex_enter(&dn->dn_mtx);
- if (dn->dn_allocated_txg == tx->tx_txg) {
- /* The dnode is newly allocated or reallocated */
- if (dnp->dn_type == DMU_OT_NONE) {
- /* this is a first alloc, not a realloc */
- /* XXX shouldn't the phys already be zeroed? */
- bzero(dnp, DNODE_CORE_SIZE);
- dnp->dn_nlevels = 1;
- }
-
- if (dn->dn_nblkptr > dnp->dn_nblkptr) {
- /* zero the new blkptrs we are gaining */
- bzero(dnp->dn_blkptr + dnp->dn_nblkptr,
- sizeof (blkptr_t) *
- (dn->dn_nblkptr - dnp->dn_nblkptr));
- }
- dnp->dn_type = dn->dn_type;
- dnp->dn_bonustype = dn->dn_bonustype;
- dnp->dn_bonuslen = dn->dn_bonuslen;
- dnp->dn_nblkptr = dn->dn_nblkptr;
- }
-
- ASSERT(dnp->dn_nlevels > 1 ||
- BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
- BP_GET_LSIZE(&dnp->dn_blkptr[0]) ==
- dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
-
- if (dn->dn_next_blksz[txgoff]) {
- ASSERT(P2PHASE(dn->dn_next_blksz[txgoff],
- SPA_MINBLOCKSIZE) == 0);
- ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
- list_head(list) != NULL ||
- dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
- dnp->dn_datablkszsec);
- dnp->dn_datablkszsec =
- dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT;
- dn->dn_next_blksz[txgoff] = 0;
- }
-
- if (dn->dn_next_indblkshift[txgoff]) {
- ASSERT(dnp->dn_nlevels == 1);
- dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
- dn->dn_next_indblkshift[txgoff] = 0;
- }
-
- /*
- * Just take the live (open-context) values for checksum and compress.
- * Strictly speaking it's a future leak, but nothing bad happens if we
- * start using the new checksum or compress algorithm a little early.
- */
- dnp->dn_checksum = dn->dn_checksum;
- dnp->dn_compress = dn->dn_compress;
-
- mutex_exit(&dn->dn_mtx);
-
- /* process all the "freed" ranges in the file */
- if (dn->dn_free_txg == 0 || dn->dn_free_txg > tx->tx_txg) {
- for (rp = avl_last(&dn->dn_ranges[txgoff]); rp != NULL;
- rp = AVL_PREV(&dn->dn_ranges[txgoff], rp))
- dnode_sync_free_range(dn,
- rp->fr_blkid, rp->fr_nblks, tx);
- }
- mutex_enter(&dn->dn_mtx);
- for (rp = avl_first(&dn->dn_ranges[txgoff]); rp; ) {
- free_range_t *last = rp;
- rp = AVL_NEXT(&dn->dn_ranges[txgoff], rp);
- avl_remove(&dn->dn_ranges[txgoff], last);
- kmem_free(last, sizeof (free_range_t));
- }
- mutex_exit(&dn->dn_mtx);
-
- if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
- dnode_sync_free(dn, tx);
- return;
- }
-
- if (dn->dn_next_nlevels[txgoff]) {
- dnode_increase_indirection(dn, tx);
- dn->dn_next_nlevels[txgoff] = 0;
- }
-
- dbuf_sync_list(list, tx);
-
- if (dn->dn_object != DMU_META_DNODE_OBJECT) {
- ASSERT3P(list_head(list), ==, NULL);
- dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
- }
-
- /*
- * Although we have dropped our reference to the dnode, it
- * can't be evicted until its written, and we haven't yet
- * initiated the IO for the dnode's dbuf.
- */
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
deleted file mode 100644
index 7d4689f..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
+++ /dev/null
@@ -1,2035 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_synctask.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dmu_tx.h>
-#include <sys/arc.h>
-#include <sys/zio.h>
-#include <sys/zap.h>
-#include <sys/unique.h>
-#include <sys/zfs_context.h>
-#include <sys/zfs_ioctl.h>
-
-static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
-static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
-static dsl_checkfunc_t dsl_dataset_rollback_check;
-static dsl_syncfunc_t dsl_dataset_rollback_sync;
-static dsl_checkfunc_t dsl_dataset_destroy_check;
-static dsl_syncfunc_t dsl_dataset_destroy_sync;
-
-#define DS_REF_MAX (1ULL << 62)
-
-#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
-
-/*
- * We use weighted reference counts to express the various forms of exclusion
- * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open
- * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
- * This makes the exclusion logic simple: the total refcnt for all opens cannot
- * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their
- * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume
- * just over half of the refcnt space, so there can't be more than one, but it
- * can peacefully coexist with any number of STANDARD opens.
- */
-static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
- 0, /* DS_MODE_NONE - invalid */
- 1, /* DS_MODE_STANDARD - unlimited number */
- (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */
- DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */
-};
-
-
-void
-dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
-{
- int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
- int compressed = BP_GET_PSIZE(bp);
- int uncompressed = BP_GET_UCSIZE(bp);
-
- dprintf_bp(bp, "born, ds=%p\n", ds);
-
- ASSERT(dmu_tx_is_syncing(tx));
- /* It could have been compressed away to nothing */
- if (BP_IS_HOLE(bp))
- return;
- ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
- ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
- if (ds == NULL) {
- /*
- * Account for the meta-objset space in its placeholder
- * dsl_dir.
- */
- ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
- dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
- used, compressed, uncompressed, tx);
- dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
- return;
- }
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- mutex_enter(&ds->ds_lock);
- ds->ds_phys->ds_used_bytes += used;
- ds->ds_phys->ds_compressed_bytes += compressed;
- ds->ds_phys->ds_uncompressed_bytes += uncompressed;
- ds->ds_phys->ds_unique_bytes += used;
- mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir,
- used, compressed, uncompressed, tx);
-}
-
-void
-dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
- dmu_tx_t *tx)
-{
- int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
- int compressed = BP_GET_PSIZE(bp);
- int uncompressed = BP_GET_UCSIZE(bp);
-
- ASSERT(dmu_tx_is_syncing(tx));
- /* No block pointer => nothing to free */
- if (BP_IS_HOLE(bp))
- return;
-
- ASSERT(used > 0);
- if (ds == NULL) {
- int err;
- /*
- * Account for the meta-objset space in its placeholder
- * dataset.
- */
- err = arc_free(pio, tx->tx_pool->dp_spa,
- tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
- ASSERT(err == 0);
-
- dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
- -used, -compressed, -uncompressed, tx);
- dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
- return;
- }
- ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
-
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
-
- if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
- int err;
-
- dprintf_bp(bp, "freeing: %s", "");
- err = arc_free(pio, tx->tx_pool->dp_spa,
- tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
- ASSERT(err == 0);
-
- mutex_enter(&ds->ds_lock);
- /* XXX unique_bytes is not accurate for head datasets */
- /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
- ds->ds_phys->ds_unique_bytes -= used;
- mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir,
- -used, -compressed, -uncompressed, tx);
- } else {
- dprintf_bp(bp, "putting on dead list: %s", "");
- VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
- /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
- if (ds->ds_phys->ds_prev_snap_obj != 0) {
- ASSERT3U(ds->ds_prev->ds_object, ==,
- ds->ds_phys->ds_prev_snap_obj);
- ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
- if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
- ds->ds_object && bp->blk_birth >
- ds->ds_prev->ds_phys->ds_prev_snap_txg) {
- dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
- mutex_enter(&ds->ds_prev->ds_lock);
- ds->ds_prev->ds_phys->ds_unique_bytes +=
- used;
- mutex_exit(&ds->ds_prev->ds_lock);
- }
- }
- }
- mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
- ds->ds_phys->ds_used_bytes -= used;
- ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
- ds->ds_phys->ds_compressed_bytes -= compressed;
- ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
- ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
- mutex_exit(&ds->ds_lock);
-}
-
-uint64_t
-dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
-{
- uint64_t trysnap = 0;
-
- if (ds == NULL)
- return (0);
- /*
- * The snapshot creation could fail, but that would cause an
- * incorrect FALSE return, which would only result in an
- * overestimation of the amount of space that an operation would
- * consume, which is OK.
- *
- * There's also a small window where we could miss a pending
- * snapshot, because we could set the sync task in the quiescing
- * phase. So this should only be used as a guess.
- */
- if (ds->ds_trysnap_txg >
- spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
- trysnap = ds->ds_trysnap_txg;
- return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
-}
-
-int
-dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
-{
- return (blk_birth > dsl_dataset_prev_snap_txg(ds));
-}
-
-/* ARGSUSED */
-static void
-dsl_dataset_evict(dmu_buf_t *db, void *dsv)
-{
- dsl_dataset_t *ds = dsv;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
- /* open_refcount == DS_REF_MAX when deleting */
- ASSERT(ds->ds_open_refcount == 0 ||
- ds->ds_open_refcount == DS_REF_MAX);
-
- dprintf_ds(ds, "evicting %s\n", "");
-
- unique_remove(ds->ds_phys->ds_fsid_guid);
-
- if (ds->ds_user_ptr != NULL)
- ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-
- if (ds->ds_prev) {
- dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
- ds->ds_prev = NULL;
- }
-
- bplist_close(&ds->ds_deadlist);
- dsl_dir_close(ds->ds_dir, ds);
-
- if (list_link_active(&ds->ds_synced_link))
- list_remove(&dp->dp_synced_objsets, ds);
-
- mutex_destroy(&ds->ds_lock);
- mutex_destroy(&ds->ds_deadlist.bpl_lock);
-
- kmem_free(ds, sizeof (dsl_dataset_t));
-}
-
-static int
-dsl_dataset_get_snapname(dsl_dataset_t *ds)
-{
- dsl_dataset_phys_t *headphys;
- int err;
- dmu_buf_t *headdbuf;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
-
- if (ds->ds_snapname[0])
- return (0);
- if (ds->ds_phys->ds_next_snap_obj == 0)
- return (0);
-
- err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
- FTAG, &headdbuf);
- if (err)
- return (err);
- headphys = headdbuf->db_data;
- err = zap_value_search(dp->dp_meta_objset,
- headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
- dmu_buf_rele(headdbuf, FTAG);
- return (err);
-}
-
-int
-dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
- int mode, void *tag, dsl_dataset_t **dsp)
-{
- uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
- objset_t *mos = dp->dp_meta_objset;
- dmu_buf_t *dbuf;
- dsl_dataset_t *ds;
- int err;
-
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
- dsl_pool_sync_context(dp));
-
- err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
- if (err)
- return (err);
- ds = dmu_buf_get_user(dbuf);
- if (ds == NULL) {
- dsl_dataset_t *winner;
-
- ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
- ds->ds_dbuf = dbuf;
- ds->ds_object = dsobj;
- ds->ds_phys = dbuf->db_data;
-
- mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
- NULL);
-
- err = bplist_open(&ds->ds_deadlist,
- mos, ds->ds_phys->ds_deadlist_obj);
- if (err == 0) {
- err = dsl_dir_open_obj(dp,
- ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
- }
- if (err) {
- /*
- * we don't really need to close the blist if we
- * just opened it.
- */
- mutex_destroy(&ds->ds_lock);
- mutex_destroy(&ds->ds_deadlist.bpl_lock);
- kmem_free(ds, sizeof (dsl_dataset_t));
- dmu_buf_rele(dbuf, tag);
- return (err);
- }
-
- if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
- ds->ds_snapname[0] = '\0';
- if (ds->ds_phys->ds_prev_snap_obj) {
- err = dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_NONE, ds, &ds->ds_prev);
- }
- } else {
- if (snapname) {
-#ifdef ZFS_DEBUG
- dsl_dataset_phys_t *headphys;
- dmu_buf_t *headdbuf;
- err = dmu_bonus_hold(mos,
- ds->ds_dir->dd_phys->dd_head_dataset_obj,
- FTAG, &headdbuf);
- if (err == 0) {
- headphys = headdbuf->db_data;
- uint64_t foundobj;
- err = zap_lookup(dp->dp_meta_objset,
- headphys->ds_snapnames_zapobj,
- snapname, sizeof (foundobj), 1,
- &foundobj);
- ASSERT3U(foundobj, ==, dsobj);
- dmu_buf_rele(headdbuf, FTAG);
- }
-#endif
- (void) strcat(ds->ds_snapname, snapname);
- } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
- err = dsl_dataset_get_snapname(ds);
- }
- }
-
- if (err == 0) {
- winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
- dsl_dataset_evict);
- }
- if (err || winner) {
- bplist_close(&ds->ds_deadlist);
- if (ds->ds_prev) {
- dsl_dataset_close(ds->ds_prev,
- DS_MODE_NONE, ds);
- }
- dsl_dir_close(ds->ds_dir, ds);
- mutex_destroy(&ds->ds_lock);
- mutex_destroy(&ds->ds_deadlist.bpl_lock);
- kmem_free(ds, sizeof (dsl_dataset_t));
- if (err) {
- dmu_buf_rele(dbuf, tag);
- return (err);
- }
- ds = winner;
- } else {
- uint64_t new =
- unique_insert(ds->ds_phys->ds_fsid_guid);
- if (new != ds->ds_phys->ds_fsid_guid) {
- /* XXX it won't necessarily be synced... */
- ds->ds_phys->ds_fsid_guid = new;
- }
- }
- }
- ASSERT3P(ds->ds_dbuf, ==, dbuf);
- ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
-
- mutex_enter(&ds->ds_lock);
- if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
- (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
- !DS_MODE_IS_INCONSISTENT(mode)) ||
- (ds->ds_open_refcount + weight > DS_REF_MAX)) {
- mutex_exit(&ds->ds_lock);
- dsl_dataset_close(ds, DS_MODE_NONE, tag);
- return (EBUSY);
- }
- ds->ds_open_refcount += weight;
- mutex_exit(&ds->ds_lock);
-
- *dsp = ds;
- return (0);
-}
-
-int
-dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
- void *tag, dsl_dataset_t **dsp)
-{
- dsl_dir_t *dd;
- dsl_pool_t *dp;
- const char *tail;
- uint64_t obj;
- dsl_dataset_t *ds = NULL;
- int err = 0;
-
- err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
- if (err)
- return (err);
-
- dp = dd->dd_pool;
- obj = dd->dd_phys->dd_head_dataset_obj;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if (obj == 0) {
- /* A dataset with no associated objset */
- err = ENOENT;
- goto out;
- }
-
- if (tail != NULL) {
- objset_t *mos = dp->dp_meta_objset;
-
- err = dsl_dataset_open_obj(dp, obj, NULL,
- DS_MODE_NONE, tag, &ds);
- if (err)
- goto out;
- obj = ds->ds_phys->ds_snapnames_zapobj;
- dsl_dataset_close(ds, DS_MODE_NONE, tag);
- ds = NULL;
-
- if (tail[0] != '@') {
- err = ENOENT;
- goto out;
- }
- tail++;
-
- /* Look for a snapshot */
- if (!DS_MODE_IS_READONLY(mode)) {
- err = EROFS;
- goto out;
- }
- dprintf("looking for snapshot '%s'\n", tail);
- err = zap_lookup(mos, obj, tail, 8, 1, &obj);
- if (err)
- goto out;
- }
- err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
-
-out:
- rw_exit(&dp->dp_config_rwlock);
- dsl_dir_close(dd, FTAG);
-
- ASSERT3U((err == 0), ==, (ds != NULL));
- /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
-
- *dsp = ds;
- return (err);
-}
-
-int
-dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
-{
- return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
-}
-
-void
-dsl_dataset_name(dsl_dataset_t *ds, char *name)
-{
- if (ds == NULL) {
- (void) strcpy(name, "mos");
- } else {
- dsl_dir_name(ds->ds_dir, name);
- VERIFY(0 == dsl_dataset_get_snapname(ds));
- if (ds->ds_snapname[0]) {
- (void) strcat(name, "@");
- if (!MUTEX_HELD(&ds->ds_lock)) {
- /*
- * We use a "recursive" mutex so that we
- * can call dprintf_ds() with ds_lock held.
- */
- mutex_enter(&ds->ds_lock);
- (void) strcat(name, ds->ds_snapname);
- mutex_exit(&ds->ds_lock);
- } else {
- (void) strcat(name, ds->ds_snapname);
- }
- }
- }
-}
-
-static int
-dsl_dataset_namelen(dsl_dataset_t *ds)
-{
- int result;
-
- if (ds == NULL) {
- result = 3; /* "mos" */
- } else {
- result = dsl_dir_namelen(ds->ds_dir);
- VERIFY(0 == dsl_dataset_get_snapname(ds));
- if (ds->ds_snapname[0]) {
- ++result; /* adding one for the @-sign */
- if (!MUTEX_HELD(&ds->ds_lock)) {
- /* see dsl_datset_name */
- mutex_enter(&ds->ds_lock);
- result += strlen(ds->ds_snapname);
- mutex_exit(&ds->ds_lock);
- } else {
- result += strlen(ds->ds_snapname);
- }
- }
- }
-
- return (result);
-}
-
-void
-dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
-{
- uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
- mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_open_refcount, >=, weight);
- ds->ds_open_refcount -= weight;
- dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
- mode, ds->ds_open_refcount);
- mutex_exit(&ds->ds_lock);
-
- dmu_buf_rele(ds->ds_dbuf, tag);
-}
-
-void
-dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
-{
- objset_t *mos = dp->dp_meta_objset;
- dmu_buf_t *dbuf;
- dsl_dataset_phys_t *dsphys;
- dsl_dataset_t *ds;
- uint64_t dsobj;
- dsl_dir_t *dd;
-
- dsl_dir_create_root(mos, ddobjp, tx);
- VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
-
- dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
- DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
- VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsphys = dbuf->db_data;
- dsphys->ds_dir_obj = dd->dd_object;
- dsphys->ds_fsid_guid = unique_create();
- unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
- (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
- sizeof (dsphys->ds_guid));
- dsphys->ds_snapnames_zapobj =
- zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
- dsphys->ds_creation_time = gethrestime_sec();
- dsphys->ds_creation_txg = tx->tx_txg;
- dsphys->ds_deadlist_obj =
- bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
- dmu_buf_rele(dbuf, FTAG);
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- dd->dd_phys->dd_head_dataset_obj = dsobj;
- dsl_dir_close(dd, FTAG);
-
- VERIFY(0 ==
- dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
- (void) dmu_objset_create_impl(dp->dp_spa, ds,
- &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
-}
-
-uint64_t
-dsl_dataset_create_sync(dsl_dir_t *pdd,
- const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
-{
- dsl_pool_t *dp = pdd->dd_pool;
- dmu_buf_t *dbuf;
- dsl_dataset_phys_t *dsphys;
- uint64_t dsobj, ddobj;
- objset_t *mos = dp->dp_meta_objset;
- dsl_dir_t *dd;
-
- ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp);
- ASSERT(clone_parent == NULL ||
- clone_parent->ds_phys->ds_num_children > 0);
- ASSERT(lastname[0] != '@');
- ASSERT(dmu_tx_is_syncing(tx));
-
- ddobj = dsl_dir_create_sync(pdd, lastname, tx);
- VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
-
- dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
- DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
- VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsphys = dbuf->db_data;
- dsphys->ds_dir_obj = dd->dd_object;
- dsphys->ds_fsid_guid = unique_create();
- unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
- (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
- sizeof (dsphys->ds_guid));
- dsphys->ds_snapnames_zapobj =
- zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
- dsphys->ds_creation_time = gethrestime_sec();
- dsphys->ds_creation_txg = tx->tx_txg;
- dsphys->ds_deadlist_obj =
- bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
- if (clone_parent) {
- dsphys->ds_prev_snap_obj = clone_parent->ds_object;
- dsphys->ds_prev_snap_txg =
- clone_parent->ds_phys->ds_creation_txg;
- dsphys->ds_used_bytes =
- clone_parent->ds_phys->ds_used_bytes;
- dsphys->ds_compressed_bytes =
- clone_parent->ds_phys->ds_compressed_bytes;
- dsphys->ds_uncompressed_bytes =
- clone_parent->ds_phys->ds_uncompressed_bytes;
- dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
-
- dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
- clone_parent->ds_phys->ds_num_children++;
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
- }
- dmu_buf_rele(dbuf, FTAG);
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- dd->dd_phys->dd_head_dataset_obj = dsobj;
- dsl_dir_close(dd, FTAG);
-
- return (dsobj);
-}
-
-struct destroyarg {
- dsl_sync_task_group_t *dstg;
- char *snapname;
- char *failed;
-};
-
-static int
-dsl_snapshot_destroy_one(char *name, void *arg)
-{
- struct destroyarg *da = arg;
- dsl_dataset_t *ds;
- char *cp;
- int err;
-
- (void) strcat(name, "@");
- (void) strcat(name, da->snapname);
- err = dsl_dataset_open(name,
- DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
- da->dstg, &ds);
- cp = strchr(name, '@');
- *cp = '\0';
- if (err == ENOENT)
- return (0);
- if (err) {
- (void) strcpy(da->failed, name);
- return (err);
- }
-
- dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, ds, da->dstg, 0);
- return (0);
-}
-
-/*
- * Destroy 'snapname' in all descendants of 'fsname'.
- */
-#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
-int
-dsl_snapshots_destroy(char *fsname, char *snapname)
-{
- int err;
- struct destroyarg da;
- dsl_sync_task_t *dst;
- spa_t *spa;
- char *cp;
-
- cp = strchr(fsname, '/');
- if (cp) {
- *cp = '\0';
- err = spa_open(fsname, &spa, FTAG);
- *cp = '/';
- } else {
- err = spa_open(fsname, &spa, FTAG);
- }
- if (err)
- return (err);
- da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- da.snapname = snapname;
- da.failed = fsname;
-
- err = dmu_objset_find(fsname,
- dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
-
- if (err == 0)
- err = dsl_sync_task_group_wait(da.dstg);
-
- for (dst = list_head(&da.dstg->dstg_tasks); dst;
- dst = list_next(&da.dstg->dstg_tasks, dst)) {
- dsl_dataset_t *ds = dst->dst_arg1;
- if (dst->dst_err) {
- dsl_dataset_name(ds, fsname);
- cp = strchr(fsname, '@');
- *cp = '\0';
- }
- /*
- * If it was successful, destroy_sync would have
- * closed the ds
- */
- if (err)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg);
- }
-
- dsl_sync_task_group_destroy(da.dstg);
- spa_close(spa, FTAG);
- return (err);
-}
-
-int
-dsl_dataset_destroy(const char *name)
-{
- int err;
- dsl_sync_task_group_t *dstg;
- objset_t *os;
- dsl_dataset_t *ds;
- dsl_dir_t *dd;
- uint64_t obj;
-
- if (strchr(name, '@')) {
- /* Destroying a snapshot is simpler */
- err = dsl_dataset_open(name,
- DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
- FTAG, &ds);
- if (err)
- return (err);
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
- ds, FTAG, 0);
- if (err)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- return (err);
- }
-
- err = dmu_objset_open(name, DMU_OST_ANY,
- DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
- if (err)
- return (err);
- ds = os->os->os_dsl_dataset;
- dd = ds->ds_dir;
-
- /*
- * Check for errors and mark this ds as inconsistent, in
- * case we crash while freeing the objects.
- */
- err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
- dsl_dataset_destroy_begin_sync, ds, NULL, 0);
- if (err) {
- dmu_objset_close(os);
- return (err);
- }
-
- /*
- * remove the objects in open context, so that we won't
- * have too much to do in syncing context.
- */
- for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
- ds->ds_phys->ds_prev_snap_txg)) {
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
- dmu_tx_hold_bonus(tx, obj);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
- /*
- * Perhaps there is not enough disk
- * space. Just deal with it from
- * dsl_dataset_destroy_sync().
- */
- dmu_tx_abort(tx);
- continue;
- }
- VERIFY(0 == dmu_object_free(os, obj, tx));
- dmu_tx_commit(tx);
- }
- /* Make sure it's not dirty before we finish destroying it. */
- txg_wait_synced(dd->dd_pool, 0);
-
- dmu_objset_close(os);
- if (err != ESRCH)
- return (err);
-
- err = dsl_dataset_open(name,
- DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
- FTAG, &ds);
- if (err)
- return (err);
-
- err = dsl_dir_open(name, FTAG, &dd, NULL);
- if (err) {
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- return (err);
- }
-
- /*
- * Blow away the dsl_dir + head dataset.
- */
- dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
- dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, ds, FTAG, 0);
- dsl_sync_task_create(dstg, dsl_dir_destroy_check,
- dsl_dir_destroy_sync, dd, FTAG, 0);
- err = dsl_sync_task_group_wait(dstg);
- dsl_sync_task_group_destroy(dstg);
- /* if it is successful, *destroy_sync will close the ds+dd */
- if (err) {
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- dsl_dir_close(dd, FTAG);
- }
- return (err);
-}
-
-int
-dsl_dataset_rollback(dsl_dataset_t *ds)
-{
- ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
- return (dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
- ds, NULL, 0));
-}
-
-void *
-dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
- void *p, dsl_dataset_evict_func_t func)
-{
- void *old;
-
- mutex_enter(&ds->ds_lock);
- old = ds->ds_user_ptr;
- if (old == NULL) {
- ds->ds_user_ptr = p;
- ds->ds_user_evict_func = func;
- }
- mutex_exit(&ds->ds_lock);
- return (old);
-}
-
-void *
-dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
-{
- return (ds->ds_user_ptr);
-}
-
-
-blkptr_t *
-dsl_dataset_get_blkptr(dsl_dataset_t *ds)
-{
- return (&ds->ds_phys->ds_bp);
-}
-
-void
-dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
-{
- ASSERT(dmu_tx_is_syncing(tx));
- /* If it's the meta-objset, set dp_meta_rootbp */
- if (ds == NULL) {
- tx->tx_pool->dp_meta_rootbp = *bp;
- } else {
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_bp = *bp;
- }
-}
-
-spa_t *
-dsl_dataset_get_spa(dsl_dataset_t *ds)
-{
- return (ds->ds_dir->dd_pool->dp_spa);
-}
-
-void
-dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
- dsl_pool_t *dp;
-
- if (ds == NULL) /* this is the meta-objset */
- return;
-
- ASSERT(ds->ds_user_ptr != NULL);
-
- if (ds->ds_phys->ds_next_snap_obj != 0)
- panic("dirtying snapshot!");
-
- dp = ds->ds_dir->dd_pool;
-
- if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
- /* up the hold count until we can be written out */
- dmu_buf_add_ref(ds->ds_dbuf, ds);
- }
-}
-
-struct killarg {
- uint64_t *usedp;
- uint64_t *compressedp;
- uint64_t *uncompressedp;
- zio_t *zio;
- dmu_tx_t *tx;
-};
-
-static int
-kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
-{
- struct killarg *ka = arg;
- blkptr_t *bp = &bc->bc_blkptr;
-
- ASSERT3U(bc->bc_errno, ==, 0);
-
- /*
- * Since this callback is not called concurrently, no lock is
- * needed on the accounting values.
- */
- *ka->usedp += bp_get_dasize(spa, bp);
- *ka->compressedp += BP_GET_PSIZE(bp);
- *ka->uncompressedp += BP_GET_UCSIZE(bp);
- /* XXX check for EIO? */
- (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
- ARC_NOWAIT);
- return (0);
-}
-
-/* ARGSUSED */
-static int
-dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
-
- /*
- * There must be a previous snapshot. I suppose we could roll
- * it back to being empty (and re-initialize the upper (ZPL)
- * layer). But for now there's no way to do this via the user
- * interface.
- */
- if (ds->ds_phys->ds_prev_snap_txg == 0)
- return (EINVAL);
-
- /*
- * This must not be a snapshot.
- */
- if (ds->ds_phys->ds_next_snap_obj != 0)
- return (EINVAL);
-
- /*
- * If we made changes this txg, traverse_dsl_dataset won't find
- * them. Try again.
- */
- if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
- return (EAGAIN);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
-
- /* Zero out the deadlist. */
- bplist_close(&ds->ds_deadlist);
- bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
- ds->ds_phys->ds_deadlist_obj =
- bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
- VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
- ds->ds_phys->ds_deadlist_obj));
-
- {
- /* Free blkptrs that we gave birth to */
- zio_t *zio;
- uint64_t used = 0, compressed = 0, uncompressed = 0;
- struct killarg ka;
-
- zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
- ZIO_FLAG_MUSTSUCCEED);
- ka.usedp = &used;
- ka.compressedp = &compressed;
- ka.uncompressedp = &uncompressed;
- ka.zio = zio;
- ka.tx = tx;
- (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- ADVANCE_POST, kill_blkptr, &ka);
- (void) zio_wait(zio);
-
- dsl_dir_diduse_space(ds->ds_dir,
- -used, -compressed, -uncompressed, tx);
- }
-
- /* Change our contents to that of the prev snapshot */
- ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
- ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
- ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
- ds->ds_phys->ds_compressed_bytes =
- ds->ds_prev->ds_phys->ds_compressed_bytes;
- ds->ds_phys->ds_uncompressed_bytes =
- ds->ds_prev->ds_phys->ds_uncompressed_bytes;
- ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
- ds->ds_phys->ds_unique_bytes = 0;
-
- if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
- dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
- ds->ds_prev->ds_phys->ds_unique_bytes = 0;
- }
-}
-
-/* ARGSUSED */
-static int
-dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
-
- /*
- * Can't delete a head dataset if there are snapshots of it.
- * (Except if the only snapshots are from the branch we cloned
- * from.)
- */
- if (ds->ds_prev != NULL &&
- ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
- return (EINVAL);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
-
- /* Mark it as inconsistent on-disk, in case we crash */
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
-}
-
-/* ARGSUSED */
-static int
-dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
-
- /* Can't delete a branch point. */
- if (ds->ds_phys->ds_num_children > 1)
- return (EEXIST);
-
- /*
- * Can't delete a head dataset if there are snapshots of it.
- * (Except if the only snapshots are from the branch we cloned
- * from.)
- */
- if (ds->ds_prev != NULL &&
- ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
- return (EINVAL);
-
- /*
- * If we made changes this txg, traverse_dsl_dataset won't find
- * them. Try again.
- */
- if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
- return (EAGAIN);
-
- /* XXX we should do some i/o error checking... */
- return (0);
-}
-
-static void
-dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- uint64_t used = 0, compressed = 0, uncompressed = 0;
- zio_t *zio;
- int err;
- int after_branch_point = FALSE;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- dsl_dataset_t *ds_prev = NULL;
- uint64_t obj;
-
- ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
- ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
- ASSERT(ds->ds_prev == NULL ||
- ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
- ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
-
- ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
-
- obj = ds->ds_object;
-
- if (ds->ds_phys->ds_prev_snap_obj != 0) {
- if (ds->ds_prev) {
- ds_prev = ds->ds_prev;
- } else {
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_prev));
- }
- after_branch_point =
- (ds_prev->ds_phys->ds_next_snap_obj != obj);
-
- dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
- if (after_branch_point &&
- ds->ds_phys->ds_next_snap_obj == 0) {
- /* This clone is toast. */
- ASSERT(ds_prev->ds_phys->ds_num_children > 1);
- ds_prev->ds_phys->ds_num_children--;
- } else if (!after_branch_point) {
- ds_prev->ds_phys->ds_next_snap_obj =
- ds->ds_phys->ds_next_snap_obj;
- }
- }
-
- zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
-
- if (ds->ds_phys->ds_next_snap_obj != 0) {
- blkptr_t bp;
- dsl_dataset_t *ds_next;
- uint64_t itor = 0;
-
- spa_scrub_restart(dp->dp_spa, tx->tx_txg);
-
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_next_snap_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_next));
- ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
-
- dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
- ds_next->ds_phys->ds_prev_snap_obj =
- ds->ds_phys->ds_prev_snap_obj;
- ds_next->ds_phys->ds_prev_snap_txg =
- ds->ds_phys->ds_prev_snap_txg;
- ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
- ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
-
- /*
- * Transfer to our deadlist (which will become next's
- * new deadlist) any entries from next's current
- * deadlist which were born before prev, and free the
- * other entries.
- *
- * XXX we're doing this long task with the config lock held
- */
- while (bplist_iterate(&ds_next->ds_deadlist, &itor,
- &bp) == 0) {
- if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
- VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
- &bp, tx));
- if (ds_prev && !after_branch_point &&
- bp.blk_birth >
- ds_prev->ds_phys->ds_prev_snap_txg) {
- ds_prev->ds_phys->ds_unique_bytes +=
- bp_get_dasize(dp->dp_spa, &bp);
- }
- } else {
- used += bp_get_dasize(dp->dp_spa, &bp);
- compressed += BP_GET_PSIZE(&bp);
- uncompressed += BP_GET_UCSIZE(&bp);
- /* XXX check return value? */
- (void) arc_free(zio, dp->dp_spa, tx->tx_txg,
- &bp, NULL, NULL, ARC_NOWAIT);
- }
- }
-
- /* free next's deadlist */
- bplist_close(&ds_next->ds_deadlist);
- bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
-
- /* set next's deadlist to our deadlist */
- ds_next->ds_phys->ds_deadlist_obj =
- ds->ds_phys->ds_deadlist_obj;
- VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
- ds_next->ds_phys->ds_deadlist_obj));
- ds->ds_phys->ds_deadlist_obj = 0;
-
- if (ds_next->ds_phys->ds_next_snap_obj != 0) {
- /*
- * Update next's unique to include blocks which
- * were previously shared by only this snapshot
- * and it. Those blocks will be born after the
- * prev snap and before this snap, and will have
- * died after the next snap and before the one
- * after that (ie. be on the snap after next's
- * deadlist).
- *
- * XXX we're doing this long task with the
- * config lock held
- */
- dsl_dataset_t *ds_after_next;
-
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds_next->ds_phys->ds_next_snap_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_after_next));
- itor = 0;
- while (bplist_iterate(&ds_after_next->ds_deadlist,
- &itor, &bp) == 0) {
- if (bp.blk_birth >
- ds->ds_phys->ds_prev_snap_txg &&
- bp.blk_birth <=
- ds->ds_phys->ds_creation_txg) {
- ds_next->ds_phys->ds_unique_bytes +=
- bp_get_dasize(dp->dp_spa, &bp);
- }
- }
-
- dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
- ASSERT3P(ds_next->ds_prev, ==, NULL);
- } else {
- /*
- * It would be nice to update the head dataset's
- * unique. To do so we would have to traverse
- * it for blocks born after ds_prev, which is
- * pretty expensive just to maintain something
- * for debugging purposes.
- */
- ASSERT3P(ds_next->ds_prev, ==, ds);
- dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
- ds_next);
- if (ds_prev) {
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL,
- DS_MODE_NONE, ds_next, &ds_next->ds_prev));
- } else {
- ds_next->ds_prev = NULL;
- }
- }
- dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
-
- /*
- * NB: unique_bytes is not accurate for head objsets
- * because we don't update it when we delete the most
- * recent snapshot -- see above comment.
- */
- ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
- } else {
- /*
- * There's no next snapshot, so this is a head dataset.
- * Destroy the deadlist. Unless it's a clone, the
- * deadlist should be empty. (If it's a clone, it's
- * safe to ignore the deadlist contents.)
- */
- struct killarg ka;
-
- ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
- bplist_close(&ds->ds_deadlist);
- bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
- ds->ds_phys->ds_deadlist_obj = 0;
-
- /*
- * Free everything that we point to (that's born after
- * the previous snapshot, if we are a clone)
- *
- * XXX we're doing this long task with the config lock held
- */
- ka.usedp = &used;
- ka.compressedp = &compressed;
- ka.uncompressedp = &uncompressed;
- ka.zio = zio;
- ka.tx = tx;
- err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- ADVANCE_POST, kill_blkptr, &ka);
- ASSERT3U(err, ==, 0);
- }
-
- err = zio_wait(zio);
- ASSERT3U(err, ==, 0);
-
- dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
-
- if (ds->ds_phys->ds_snapnames_zapobj) {
- err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
- ASSERT(err == 0);
- }
-
- if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
- /* Erase the link in the dataset */
- dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
- ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
- /*
- * dsl_dir_sync_destroy() called us, they'll destroy
- * the dataset.
- */
- } else {
- /* remove from snapshot namespace */
- dsl_dataset_t *ds_head;
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
- DS_MODE_NONE, FTAG, &ds_head));
- VERIFY(0 == dsl_dataset_get_snapname(ds));
-#ifdef ZFS_DEBUG
- {
- uint64_t val;
- err = zap_lookup(mos,
- ds_head->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, 8, 1, &val);
- ASSERT3U(err, ==, 0);
- ASSERT3U(val, ==, obj);
- }
-#endif
- err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, tx);
- ASSERT(err == 0);
- dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
- }
-
- if (ds_prev && ds->ds_prev != ds_prev)
- dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
-
- spa_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
- VERIFY(0 == dmu_object_free(mos, obj, tx));
-
-}
-
-/* ARGSUSED */
-int
-dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- objset_t *os = arg1;
- dsl_dataset_t *ds = os->os->os_dsl_dataset;
- const char *snapname = arg2;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- int err;
- uint64_t value;
-
- /*
- * We don't allow multiple snapshots of the same txg. If there
- * is already one, try again.
- */
- if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
- return (EAGAIN);
-
- /*
- * Check for conflicting name snapshot name.
- */
- err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
- snapname, 8, 1, &value);
- if (err == 0)
- return (EEXIST);
- if (err != ENOENT)
- return (err);
-
- /*
- * Check that the dataset's name is not too long. Name consists
- * of the dataset's length + 1 for the @-sign + snapshot name's length
- */
- if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
- return (ENAMETOOLONG);
-
- ds->ds_trysnap_txg = tx->tx_txg;
- return (0);
-}
-
-void
-dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- objset_t *os = arg1;
- dsl_dataset_t *ds = os->os->os_dsl_dataset;
- const char *snapname = arg2;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- dmu_buf_t *dbuf;
- dsl_dataset_phys_t *dsphys;
- uint64_t dsobj;
- objset_t *mos = dp->dp_meta_objset;
- int err;
-
- spa_scrub_restart(dp->dp_spa, tx->tx_txg);
- ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
-
- dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
- DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
- VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsphys = dbuf->db_data;
- dsphys->ds_dir_obj = ds->ds_dir->dd_object;
- dsphys->ds_fsid_guid = unique_create();
- unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
- (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
- sizeof (dsphys->ds_guid));
- dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
- dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
- dsphys->ds_next_snap_obj = ds->ds_object;
- dsphys->ds_num_children = 1;
- dsphys->ds_creation_time = gethrestime_sec();
- dsphys->ds_creation_txg = tx->tx_txg;
- dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
- dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
- dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
- dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
- dsphys->ds_flags = ds->ds_phys->ds_flags;
- dsphys->ds_bp = ds->ds_phys->ds_bp;
- dmu_buf_rele(dbuf, FTAG);
-
- ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
- if (ds->ds_prev) {
- ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
- ds->ds_object ||
- ds->ds_prev->ds_phys->ds_num_children > 1);
- if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
- dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
- ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
- ds->ds_prev->ds_phys->ds_creation_txg);
- ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
- }
- }
-
- bplist_close(&ds->ds_deadlist);
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
- ds->ds_phys->ds_prev_snap_obj = dsobj;
- ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
- ds->ds_phys->ds_unique_bytes = 0;
- ds->ds_phys->ds_deadlist_obj =
- bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
- VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
- ds->ds_phys->ds_deadlist_obj));
-
- dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
- err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
- snapname, 8, 1, &dsobj, tx);
- ASSERT(err == 0);
-
- if (ds->ds_prev)
- dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, snapname,
- DS_MODE_NONE, ds, &ds->ds_prev));
-}
-
-void
-dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
-{
- ASSERT(dmu_tx_is_syncing(tx));
- ASSERT(ds->ds_user_ptr != NULL);
- ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
-
- dsl_dir_dirty(ds->ds_dir, tx);
- dmu_objset_sync(ds->ds_user_ptr, zio, tx);
- /* Unneeded? bplist_close(&ds->ds_deadlist); */
-}
-
-void
-dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
-{
- dsl_dir_stats(ds->ds_dir, nv);
-
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
- ds->ds_phys->ds_creation_time);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
- ds->ds_phys->ds_creation_txg);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
- ds->ds_phys->ds_used_bytes);
-
- if (ds->ds_phys->ds_next_snap_obj) {
- /*
- * This is a snapshot; override the dd's space used with
- * our unique space and compression ratio.
- */
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
- ds->ds_phys->ds_unique_bytes);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
- ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
- (ds->ds_phys->ds_uncompressed_bytes * 100 /
- ds->ds_phys->ds_compressed_bytes));
- }
-}
-
-void
-dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
-{
- stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
- stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
- if (ds->ds_phys->ds_next_snap_obj) {
- stat->dds_is_snapshot = B_TRUE;
- stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
- }
-
- /* clone origin is really a dsl_dir thing... */
- if (ds->ds_dir->dd_phys->dd_clone_parent_obj) {
- dsl_dataset_t *ods;
-
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
- ds->ds_dir->dd_phys->dd_clone_parent_obj,
- NULL, DS_MODE_NONE, FTAG, &ods));
- dsl_dataset_name(ods, stat->dds_clone_of);
- dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
- }
-}
-
-uint64_t
-dsl_dataset_fsid_guid(dsl_dataset_t *ds)
-{
- return (ds->ds_phys->ds_fsid_guid);
-}
-
-void
-dsl_dataset_space(dsl_dataset_t *ds,
- uint64_t *refdbytesp, uint64_t *availbytesp,
- uint64_t *usedobjsp, uint64_t *availobjsp)
-{
- *refdbytesp = ds->ds_phys->ds_used_bytes;
- *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
- *usedobjsp = ds->ds_phys->ds_bp.blk_fill;
- *availobjsp = DN_MAX_OBJECT - *usedobjsp;
-}
-
-/* ARGSUSED */
-static int
-dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- char *newsnapname = arg2;
- dsl_dir_t *dd = ds->ds_dir;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- dsl_dataset_t *hds;
- uint64_t val;
- int err;
-
- err = dsl_dataset_open_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
- if (err)
- return (err);
-
- /* new name better not be in use */
- err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
- newsnapname, 8, 1, &val);
- dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
-
- if (err == 0)
- err = EEXIST;
- else if (err == ENOENT)
- err = 0;
-
- /* dataset name + 1 for the "@" + the new snapshot name must fit */
- if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
- err = ENAMETOOLONG;
-
- return (err);
-}
-
-static void
-dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- char *newsnapname = arg2;
- dsl_dir_t *dd = ds->ds_dir;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- dsl_dataset_t *hds;
- int err;
-
- ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
-
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
-
- VERIFY(0 == dsl_dataset_get_snapname(ds));
- err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, tx);
- ASSERT3U(err, ==, 0);
- mutex_enter(&ds->ds_lock);
- (void) strcpy(ds->ds_snapname, newsnapname);
- mutex_exit(&ds->ds_lock);
- err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, 8, 1, &ds->ds_object, tx);
- ASSERT3U(err, ==, 0);
-
- dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
-}
-
-struct renamearg {
- dsl_sync_task_group_t *dstg;
- char failed[MAXPATHLEN];
- char *oldsnap;
- char *newsnap;
-};
-
-static int
-dsl_snapshot_rename_one(char *name, void *arg)
-{
- struct renamearg *ra = arg;
- dsl_dataset_t *ds = NULL;
- char *cp;
- int err;
-
- cp = name + strlen(name);
- *cp = '@';
- (void) strcpy(cp + 1, ra->oldsnap);
- err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD,
- ra->dstg, &ds);
- if (err == ENOENT) {
- *cp = '\0';
- return (0);
- }
- if (err) {
- (void) strcpy(ra->failed, name);
- *cp = '\0';
- dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
- return (err);
- }
-
-#ifdef _KERNEL
- /* for all filesystems undergoing rename, we'll need to unmount it */
- (void) zfs_unmount_snap(name, NULL);
-#endif
-
- *cp = '\0';
-
- dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
- dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
-
- return (0);
-}
-
-static int
-dsl_recursive_rename(char *oldname, const char *newname)
-{
- int err;
- struct renamearg *ra;
- dsl_sync_task_t *dst;
- spa_t *spa;
- char *cp, *fsname = spa_strdup(oldname);
- int len = strlen(oldname);
-
- /* truncate the snapshot name to get the fsname */
- cp = strchr(fsname, '@');
- *cp = '\0';
-
- cp = strchr(fsname, '/');
- if (cp) {
- *cp = '\0';
- err = spa_open(fsname, &spa, FTAG);
- *cp = '/';
- } else {
- err = spa_open(fsname, &spa, FTAG);
- }
- if (err) {
- kmem_free(fsname, len + 1);
- return (err);
- }
- ra = kmem_alloc(sizeof (struct renamearg), KM_SLEEP);
- ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-
- ra->oldsnap = strchr(oldname, '@') + 1;
- ra->newsnap = strchr(newname, '@') + 1;
- *ra->failed = '\0';
-
- err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
- DS_FIND_CHILDREN);
- kmem_free(fsname, len + 1);
-
- if (err == 0) {
- err = dsl_sync_task_group_wait(ra->dstg);
- }
-
- for (dst = list_head(&ra->dstg->dstg_tasks); dst;
- dst = list_next(&ra->dstg->dstg_tasks, dst)) {
- dsl_dataset_t *ds = dst->dst_arg1;
- if (dst->dst_err) {
- dsl_dir_name(ds->ds_dir, ra->failed);
- (void) strcat(ra->failed, "@");
- (void) strcat(ra->failed, ra->newsnap);
- }
- dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
- }
-
- (void) strcpy(oldname, ra->failed);
-
- dsl_sync_task_group_destroy(ra->dstg);
- kmem_free(ra, sizeof (struct renamearg));
- spa_close(spa, FTAG);
- return (err);
-}
-
-#pragma weak dmu_objset_rename = dsl_dataset_rename
-int
-dsl_dataset_rename(char *oldname, const char *newname,
- boolean_t recursive)
-{
- dsl_dir_t *dd;
- dsl_dataset_t *ds;
- const char *tail;
- int err;
-
- err = dsl_dir_open(oldname, FTAG, &dd, &tail);
- if (err)
- return (err);
- if (tail == NULL) {
- err = dsl_dir_rename(dd, newname);
- dsl_dir_close(dd, FTAG);
- return (err);
- }
- if (tail[0] != '@') {
- /* the name ended in a nonexistant component */
- dsl_dir_close(dd, FTAG);
- return (ENOENT);
- }
-
- dsl_dir_close(dd, FTAG);
-
- /* new name must be snapshot in same filesystem */
- tail = strchr(newname, '@');
- if (tail == NULL)
- return (EINVAL);
- tail++;
- if (strncmp(oldname, newname, tail - newname) != 0)
- return (EXDEV);
-
- if (recursive) {
- err = dsl_recursive_rename(oldname, newname);
- } else {
- err = dsl_dataset_open(oldname,
- DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
- if (err)
- return (err);
-
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_snapshot_rename_check,
- dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
-
- dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
- }
-
- return (err);
-}
-
-struct promotearg {
- uint64_t used, comp, uncomp, unique;
- uint64_t newnext_obj, snapnames_obj;
-};
-
-static int
-dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *hds = arg1;
- struct promotearg *pa = arg2;
- dsl_dir_t *dd = hds->ds_dir;
- dsl_pool_t *dp = hds->ds_dir->dd_pool;
- dsl_dir_t *pdd = NULL;
- dsl_dataset_t *ds = NULL;
- dsl_dataset_t *pivot_ds = NULL;
- dsl_dataset_t *newnext_ds = NULL;
- int err;
- char *name = NULL;
- uint64_t itor = 0;
- blkptr_t bp;
-
- bzero(pa, sizeof (*pa));
-
- /* Check that it is a clone */
- if (dd->dd_phys->dd_clone_parent_obj == 0)
- return (EINVAL);
-
- /* Since this is so expensive, don't do the preliminary check */
- if (!dmu_tx_is_syncing(tx))
- return (0);
-
- if (err = dsl_dataset_open_obj(dp,
- dd->dd_phys->dd_clone_parent_obj,
- NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
- goto out;
- pdd = pivot_ds->ds_dir;
-
- {
- dsl_dataset_t *phds;
- if (err = dsl_dataset_open_obj(dd->dd_pool,
- pdd->dd_phys->dd_head_dataset_obj,
- NULL, DS_MODE_NONE, FTAG, &phds))
- goto out;
- pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
- dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
- }
-
- if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
- err = EXDEV;
- goto out;
- }
-
- /* find pivot point's new next ds */
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
- NULL, DS_MODE_NONE, FTAG, &newnext_ds));
- while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
- dsl_dataset_t *prev;
-
- if (err = dsl_dataset_open_obj(dd->dd_pool,
- newnext_ds->ds_phys->ds_prev_snap_obj,
- NULL, DS_MODE_NONE, FTAG, &prev))
- goto out;
- dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
- newnext_ds = prev;
- }
- pa->newnext_obj = newnext_ds->ds_object;
-
- /* compute pivot point's new unique space */
- while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
- &itor, &bp)) == 0) {
- if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
- pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
- }
- if (err != ENOENT)
- goto out;
-
- /* Walk the snapshots that we are moving */
- name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- ds = pivot_ds;
- /* CONSTCOND */
- while (TRUE) {
- uint64_t val, dlused, dlcomp, dluncomp;
- dsl_dataset_t *prev;
-
- /* Check that the snapshot name does not conflict */
- dsl_dataset_name(ds, name);
- err = zap_lookup(dd->dd_pool->dp_meta_objset,
- hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
- 8, 1, &val);
- if (err != ENOENT) {
- if (err == 0)
- err = EEXIST;
- goto out;
- }
-
- /*
- * compute space to transfer. Each snapshot gave birth to:
- * (my used) - (prev's used) + (deadlist's used)
- */
- pa->used += ds->ds_phys->ds_used_bytes;
- pa->comp += ds->ds_phys->ds_compressed_bytes;
- pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
-
- /* If we reach the first snapshot, we're done. */
- if (ds->ds_phys->ds_prev_snap_obj == 0)
- break;
-
- if (err = bplist_space(&ds->ds_deadlist,
- &dlused, &dlcomp, &dluncomp))
- goto out;
- if (err = dsl_dataset_open_obj(dd->dd_pool,
- ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
- FTAG, &prev))
- goto out;
- pa->used += dlused - prev->ds_phys->ds_used_bytes;
- pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
- pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
-
- /*
- * We could be a clone of a clone. If we reach our
- * parent's branch point, we're done.
- */
- if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
- dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
- break;
- }
- if (ds != pivot_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- ds = prev;
- }
-
- /* Check that there is enough space here */
- err = dsl_dir_transfer_possible(pdd, dd, pa->used);
-
-out:
- if (ds && ds != pivot_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- if (pivot_ds)
- dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
- if (newnext_ds)
- dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
- if (name)
- kmem_free(name, MAXPATHLEN);
- return (err);
-}
-
-static void
-dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *hds = arg1;
- struct promotearg *pa = arg2;
- dsl_dir_t *dd = hds->ds_dir;
- dsl_pool_t *dp = hds->ds_dir->dd_pool;
- dsl_dir_t *pdd = NULL;
- dsl_dataset_t *ds, *pivot_ds;
- char *name;
-
- ASSERT(dd->dd_phys->dd_clone_parent_obj != 0);
- ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
-
- VERIFY(0 == dsl_dataset_open_obj(dp,
- dd->dd_phys->dd_clone_parent_obj,
- NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds));
- /*
- * We need to explicitly open pdd, since pivot_ds's pdd will be
- * changing.
- */
- VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object,
- NULL, FTAG, &pdd));
-
- /* move snapshots to this dir */
- name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- ds = pivot_ds;
- /* CONSTCOND */
- while (TRUE) {
- dsl_dataset_t *prev;
-
- /* move snap name entry */
- dsl_dataset_name(ds, name);
- VERIFY(0 == zap_remove(dp->dp_meta_objset,
- pa->snapnames_obj, ds->ds_snapname, tx));
- VERIFY(0 == zap_add(dp->dp_meta_objset,
- hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
- 8, 1, &ds->ds_object, tx));
-
- /* change containing dsl_dir */
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
- ds->ds_phys->ds_dir_obj = dd->dd_object;
- ASSERT3P(ds->ds_dir, ==, pdd);
- dsl_dir_close(ds->ds_dir, ds);
- VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
- NULL, ds, &ds->ds_dir));
-
- ASSERT3U(dsl_prop_numcb(ds), ==, 0);
-
- if (ds->ds_phys->ds_prev_snap_obj == 0)
- break;
-
- VERIFY(0 == dsl_dataset_open_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
- FTAG, &prev));
-
- if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
- dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
- break;
- }
- if (ds != pivot_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
- ds = prev;
- }
- if (ds != pivot_ds)
- dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-
- /* change pivot point's next snap */
- dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
- pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
-
- /* change clone_parent-age */
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
- dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
- dmu_buf_will_dirty(pdd->dd_dbuf, tx);
- pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
-
- /* change space accounting */
- dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx);
- dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
- pivot_ds->ds_phys->ds_unique_bytes = pa->unique;
-
- dsl_dir_close(pdd, FTAG);
- dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
- kmem_free(name, MAXPATHLEN);
-}
-
-int
-dsl_dataset_promote(const char *name)
-{
- dsl_dataset_t *ds;
- int err;
- dmu_object_info_t doi;
- struct promotearg pa;
-
- err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
- if (err)
- return (err);
-
- err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
- ds->ds_phys->ds_snapnames_zapobj, &doi);
- if (err) {
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
- return (err);
- }
-
- /*
- * Add in 128x the snapnames zapobj size, since we will be moving
- * a bunch of snapnames to the promoted ds, and dirtying their
- * bonus buffers.
- */
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_promote_check,
- dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
- return (err);
-}
-
-/*
- * Given a pool name and a dataset object number in that pool,
- * return the name of that dataset.
- */
-int
-dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
-{
- spa_t *spa;
- dsl_pool_t *dp;
- dsl_dataset_t *ds = NULL;
- int error;
-
- if ((error = spa_open(pname, &spa, FTAG)) != 0)
- return (error);
- dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if ((error = dsl_dataset_open_obj(dp, obj,
- NULL, DS_MODE_NONE, FTAG, &ds)) != 0) {
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
- return (error);
- }
- dsl_dataset_name(ds, buf);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
-
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
deleted file mode 100644
index 5e563b6..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
+++ /dev/null
@@ -1,1215 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_synctask.h>
-#include <sys/spa.h>
-#include <sys/zap.h>
-#include <sys/zio.h>
-#include <sys/arc.h>
-#include "zfs_namecheck.h"
-
-static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
-
-
-/* ARGSUSED */
-static void
-dsl_dir_evict(dmu_buf_t *db, void *arg)
-{
- dsl_dir_t *dd = arg;
- dsl_pool_t *dp = dd->dd_pool;
- int t;
-
- for (t = 0; t < TXG_SIZE; t++) {
- ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
- ASSERT(dd->dd_tempreserved[t] == 0);
- ASSERT(dd->dd_space_towrite[t] == 0);
- }
-
- ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes);
-
- if (dd->dd_parent)
- dsl_dir_close(dd->dd_parent, dd);
-
- spa_close(dd->dd_pool->dp_spa, dd);
-
- /*
- * The props callback list should be empty since they hold the
- * dir open.
- */
- list_destroy(&dd->dd_prop_cbs);
- mutex_destroy(&dd->dd_lock);
- kmem_free(dd, sizeof (dsl_dir_t));
-}
-
-int
-dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
- const char *tail, void *tag, dsl_dir_t **ddp)
-{
- dmu_buf_t *dbuf;
- dsl_dir_t *dd;
- int err;
-
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
- dsl_pool_sync_context(dp));
-
- err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
- if (err)
- return (err);
- dd = dmu_buf_get_user(dbuf);
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(dbuf, &doi);
- ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
- }
-#endif
- /* XXX assert bonus buffer size is correct */
- if (dd == NULL) {
- dsl_dir_t *winner;
- int err;
-
- dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
- dd->dd_object = ddobj;
- dd->dd_dbuf = dbuf;
- dd->dd_pool = dp;
- dd->dd_phys = dbuf->db_data;
- dd->dd_used_bytes = dd->dd_phys->dd_used_bytes;
- mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
-
- list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
- offsetof(dsl_prop_cb_record_t, cbr_node));
-
- if (dd->dd_phys->dd_parent_obj) {
- err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
- NULL, dd, &dd->dd_parent);
- if (err) {
- mutex_destroy(&dd->dd_lock);
- kmem_free(dd, sizeof (dsl_dir_t));
- dmu_buf_rele(dbuf, tag);
- return (err);
- }
- if (tail) {
-#ifdef ZFS_DEBUG
- uint64_t foundobj;
-
- err = zap_lookup(dp->dp_meta_objset,
- dd->dd_parent->dd_phys->
- dd_child_dir_zapobj,
- tail, sizeof (foundobj), 1, &foundobj);
- ASSERT(err || foundobj == ddobj);
-#endif
- (void) strcpy(dd->dd_myname, tail);
- } else {
- err = zap_value_search(dp->dp_meta_objset,
- dd->dd_parent->dd_phys->
- dd_child_dir_zapobj,
- ddobj, dd->dd_myname);
- }
- if (err) {
- dsl_dir_close(dd->dd_parent, dd);
- mutex_destroy(&dd->dd_lock);
- kmem_free(dd, sizeof (dsl_dir_t));
- dmu_buf_rele(dbuf, tag);
- return (err);
- }
- } else {
- (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
- }
-
- winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
- dsl_dir_evict);
- if (winner) {
- if (dd->dd_parent)
- dsl_dir_close(dd->dd_parent, dd);
- mutex_destroy(&dd->dd_lock);
- kmem_free(dd, sizeof (dsl_dir_t));
- dd = winner;
- } else {
- spa_open_ref(dp->dp_spa, dd);
- }
- }
-
- /*
- * The dsl_dir_t has both open-to-close and instantiate-to-evict
- * holds on the spa. We need the open-to-close holds because
- * otherwise the spa_refcnt wouldn't change when we open a
- * dir which the spa also has open, so we could incorrectly
- * think it was OK to unload/export/destroy the pool. We need
- * the instantiate-to-evict hold because the dsl_dir_t has a
- * pointer to the dd_pool, which has a pointer to the spa_t.
- */
- spa_open_ref(dp->dp_spa, tag);
- ASSERT3P(dd->dd_pool, ==, dp);
- ASSERT3U(dd->dd_object, ==, ddobj);
- ASSERT3P(dd->dd_dbuf, ==, dbuf);
- *ddp = dd;
- return (0);
-}
-
-void
-dsl_dir_close(dsl_dir_t *dd, void *tag)
-{
- dprintf_dd(dd, "%s\n", "");
- spa_close(dd->dd_pool->dp_spa, tag);
- dmu_buf_rele(dd->dd_dbuf, tag);
-}
-
-/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
-void
-dsl_dir_name(dsl_dir_t *dd, char *buf)
-{
- if (dd->dd_parent) {
- dsl_dir_name(dd->dd_parent, buf);
- (void) strcat(buf, "/");
- } else {
- buf[0] = '\0';
- }
- if (!MUTEX_HELD(&dd->dd_lock)) {
- /*
- * recursive mutex so that we can use
- * dprintf_dd() with dd_lock held
- */
- mutex_enter(&dd->dd_lock);
- (void) strcat(buf, dd->dd_myname);
- mutex_exit(&dd->dd_lock);
- } else {
- (void) strcat(buf, dd->dd_myname);
- }
-}
-
-/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
-int
-dsl_dir_namelen(dsl_dir_t *dd)
-{
- int result = 0;
-
- if (dd->dd_parent) {
- /* parent's name + 1 for the "/" */
- result = dsl_dir_namelen(dd->dd_parent) + 1;
- }
-
- if (!MUTEX_HELD(&dd->dd_lock)) {
- /* see dsl_dir_name */
- mutex_enter(&dd->dd_lock);
- result += strlen(dd->dd_myname);
- mutex_exit(&dd->dd_lock);
- } else {
- result += strlen(dd->dd_myname);
- }
-
- return (result);
-}
-
-int
-dsl_dir_is_private(dsl_dir_t *dd)
-{
- int rv = FALSE;
-
- if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent))
- rv = TRUE;
- if (dataset_name_hidden(dd->dd_myname))
- rv = TRUE;
- return (rv);
-}
-
-
-static int
-getcomponent(const char *path, char *component, const char **nextp)
-{
- char *p;
- if (path == NULL)
- return (ENOENT);
- /* This would be a good place to reserve some namespace... */
- p = strpbrk(path, "/@");
- if (p && (p[1] == '/' || p[1] == '@')) {
- /* two separators in a row */
- return (EINVAL);
- }
- if (p == NULL || p == path) {
- /*
- * if the first thing is an @ or /, it had better be an
- * @ and it had better not have any more ats or slashes,
- * and it had better have something after the @.
- */
- if (p != NULL &&
- (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
- return (EINVAL);
- if (strlen(path) >= MAXNAMELEN)
- return (ENAMETOOLONG);
- (void) strcpy(component, path);
- p = NULL;
- } else if (p[0] == '/') {
- if (p-path >= MAXNAMELEN)
- return (ENAMETOOLONG);
- (void) strncpy(component, path, p - path);
- component[p-path] = '\0';
- p++;
- } else if (p[0] == '@') {
- /*
- * if the next separator is an @, there better not be
- * any more slashes.
- */
- if (strchr(path, '/'))
- return (EINVAL);
- if (p-path >= MAXNAMELEN)
- return (ENAMETOOLONG);
- (void) strncpy(component, path, p - path);
- component[p-path] = '\0';
- } else {
- ASSERT(!"invalid p");
- }
- *nextp = p;
- return (0);
-}
-
-/*
- * same as dsl_open_dir, ignore the first component of name and use the
- * spa instead
- */
-int
-dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
- dsl_dir_t **ddp, const char **tailp)
-{
- char buf[MAXNAMELEN];
- const char *next, *nextnext = NULL;
- int err;
- dsl_dir_t *dd;
- dsl_pool_t *dp;
- uint64_t ddobj;
- int openedspa = FALSE;
-
- dprintf("%s\n", name);
-
- err = getcomponent(name, buf, &next);
- if (err)
- return (err);
- if (spa == NULL) {
- err = spa_open(buf, &spa, FTAG);
- if (err) {
- dprintf("spa_open(%s) failed\n", buf);
- return (err);
- }
- openedspa = TRUE;
-
- /* XXX this assertion belongs in spa_open */
- ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
- }
-
- dp = spa_get_dsl(spa);
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
- if (err) {
- rw_exit(&dp->dp_config_rwlock);
- if (openedspa)
- spa_close(spa, FTAG);
- return (err);
- }
-
- while (next != NULL) {
- dsl_dir_t *child_ds;
- err = getcomponent(next, buf, &nextnext);
- if (err)
- break;
- ASSERT(next[0] != '\0');
- if (next[0] == '@')
- break;
- dprintf("looking up %s in obj%lld\n",
- buf, dd->dd_phys->dd_child_dir_zapobj);
-
- err = zap_lookup(dp->dp_meta_objset,
- dd->dd_phys->dd_child_dir_zapobj,
- buf, sizeof (ddobj), 1, &ddobj);
- if (err) {
- if (err == ENOENT)
- err = 0;
- break;
- }
-
- err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
- if (err)
- break;
- dsl_dir_close(dd, tag);
- dd = child_ds;
- next = nextnext;
- }
- rw_exit(&dp->dp_config_rwlock);
-
- if (err) {
- dsl_dir_close(dd, tag);
- if (openedspa)
- spa_close(spa, FTAG);
- return (err);
- }
-
- /*
- * It's an error if there's more than one component left, or
- * tailp==NULL and there's any component left.
- */
- if (next != NULL &&
- (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
- /* bad path name */
- dsl_dir_close(dd, tag);
- dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
- err = ENOENT;
- }
- if (tailp)
- *tailp = next;
- if (openedspa)
- spa_close(spa, FTAG);
- *ddp = dd;
- return (err);
-}
-
-/*
- * Return the dsl_dir_t, and possibly the last component which couldn't
- * be found in *tail. Return NULL if the path is bogus, or if
- * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@'
- * means that the last component is a snapshot.
- */
-int
-dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
-{
- return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
-}
-
-uint64_t
-dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx)
-{
- objset_t *mos = pds->dd_pool->dp_meta_objset;
- uint64_t ddobj;
- dsl_dir_phys_t *dsphys;
- dmu_buf_t *dbuf;
-
- ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
- DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
- VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
- name, sizeof (uint64_t), 1, &ddobj, tx));
- VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsphys = dbuf->db_data;
-
- dsphys->dd_creation_time = gethrestime_sec();
- dsphys->dd_parent_obj = pds->dd_object;
- dsphys->dd_props_zapobj = zap_create(mos,
- DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
- dsphys->dd_child_dir_zapobj = zap_create(mos,
- DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
- dmu_buf_rele(dbuf, FTAG);
-
- return (ddobj);
-}
-
-/* ARGSUSED */
-int
-dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- dsl_pool_t *dp = dd->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- int err;
- uint64_t count;
-
- /*
- * There should be exactly two holds, both from
- * dsl_dataset_destroy: one on the dd directory, and one on its
- * head ds. Otherwise, someone is trying to lookup something
- * inside this dir while we want to destroy it. The
- * config_rwlock ensures that nobody else opens it after we
- * check.
- */
- if (dmu_buf_refcount(dd->dd_dbuf) > 2)
- return (EBUSY);
-
- err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
- if (err)
- return (err);
- if (count != 0)
- return (EEXIST);
-
- return (0);
-}
-
-void
-dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- uint64_t val, obj;
-
- ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
- ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
-
- /* Remove our reservation. */
- val = 0;
- dsl_dir_set_reservation_sync(dd, &val, tx);
- ASSERT3U(dd->dd_used_bytes, ==, 0);
- ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
-
- VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
- VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
- VERIFY(0 == zap_remove(mos,
- dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
-
- obj = dd->dd_object;
- dsl_dir_close(dd, tag);
- VERIFY(0 == dmu_object_free(mos, obj, tx));
-}
-
-void
-dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx)
-{
- dsl_dir_phys_t *dsp;
- dmu_buf_t *dbuf;
- int error;
-
- *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
- DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
-
- error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET,
- sizeof (uint64_t), 1, ddobjp, tx);
- ASSERT3U(error, ==, 0);
-
- VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsp = dbuf->db_data;
-
- dsp->dd_creation_time = gethrestime_sec();
- dsp->dd_props_zapobj = zap_create(mos,
- DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
- dsp->dd_child_dir_zapobj = zap_create(mos,
- DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
-
- dmu_buf_rele(dbuf, FTAG);
-}
-
-void
-dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
-{
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
- dsl_dir_space_available(dd, NULL, 0, TRUE));
-
- mutex_enter(&dd->dd_lock);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
- dd->dd_phys->dd_quota);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
- dd->dd_phys->dd_reserved);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
- dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
- (dd->dd_phys->dd_uncompressed_bytes * 100 /
- dd->dd_phys->dd_compressed_bytes));
- mutex_exit(&dd->dd_lock);
-
- if (dd->dd_phys->dd_clone_parent_obj) {
- dsl_dataset_t *ds;
- char buf[MAXNAMELEN];
-
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
- dd->dd_phys->dd_clone_parent_obj,
- NULL, DS_MODE_NONE, FTAG, &ds));
- dsl_dataset_name(ds, buf);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
- rw_exit(&dd->dd_pool->dp_config_rwlock);
-
- dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
- }
-}
-
-void
-dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
-{
- dsl_pool_t *dp = dd->dd_pool;
-
- ASSERT(dd->dd_phys);
-
- if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
- /* up the hold count until we can be written out */
- dmu_buf_add_ref(dd->dd_dbuf, dd);
- }
-}
-
-static int64_t
-parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
-{
- uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
- uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
- return (new_accounted - old_accounted);
-}
-
-void
-dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
-{
- ASSERT(dmu_tx_is_syncing(tx));
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
-
- mutex_enter(&dd->dd_lock);
- ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
- dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
- dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
- dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
- dd->dd_phys->dd_used_bytes = dd->dd_used_bytes;
- mutex_exit(&dd->dd_lock);
-
- /* release the hold from dsl_dir_dirty */
- dmu_buf_rele(dd->dd_dbuf, dd);
-}
-
-static uint64_t
-dsl_dir_estimated_space(dsl_dir_t *dd)
-{
- int64_t space;
- int i;
-
- ASSERT(MUTEX_HELD(&dd->dd_lock));
-
- space = dd->dd_phys->dd_used_bytes;
- ASSERT(space >= 0);
- for (i = 0; i < TXG_SIZE; i++) {
- space += dd->dd_space_towrite[i&TXG_MASK];
- ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
- }
- return (space);
-}
-
-/*
- * How much space would dd have available if ancestor had delta applied
- * to it? If ondiskonly is set, we're only interested in what's
- * on-disk, not estimated pending changes.
- */
-uint64_t
-dsl_dir_space_available(dsl_dir_t *dd,
- dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
-{
- uint64_t parentspace, myspace, quota, used;
-
- /*
- * If there are no restrictions otherwise, assume we have
- * unlimited space available.
- */
- quota = UINT64_MAX;
- parentspace = UINT64_MAX;
-
- if (dd->dd_parent != NULL) {
- parentspace = dsl_dir_space_available(dd->dd_parent,
- ancestor, delta, ondiskonly);
- }
-
- mutex_enter(&dd->dd_lock);
- if (dd->dd_phys->dd_quota != 0)
- quota = dd->dd_phys->dd_quota;
- if (ondiskonly) {
- used = dd->dd_used_bytes;
- } else {
- used = dsl_dir_estimated_space(dd);
- }
- if (dd == ancestor)
- used += delta;
-
- if (dd->dd_parent == NULL) {
- uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
- quota = MIN(quota, poolsize);
- }
-
- if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
- /*
- * We have some space reserved, in addition to what our
- * parent gave us.
- */
- parentspace += dd->dd_phys->dd_reserved - used;
- }
-
- if (used > quota) {
- /* over quota */
- myspace = 0;
-
- /*
- * While it's OK to be a little over quota, if
- * we think we are using more space than there
- * is in the pool (which is already 1.6% more than
- * dsl_pool_adjustedsize()), something is very
- * wrong.
- */
- ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
- } else {
- /*
- * the lesser of the space provided by our parent and
- * the space left in our quota
- */
- myspace = MIN(parentspace, quota - used);
- }
-
- mutex_exit(&dd->dd_lock);
-
- return (myspace);
-}
-
-struct tempreserve {
- list_node_t tr_node;
- dsl_dir_t *tr_ds;
- uint64_t tr_size;
-};
-
-/*
- * Reserve space in this dsl_dir, to be used in this tx's txg.
- * After the space has been dirtied (and thus
- * dsl_dir_willuse_space() has been called), the reservation should
- * be canceled, using dsl_dir_tempreserve_clear().
- */
-static int
-dsl_dir_tempreserve_impl(dsl_dir_t *dd,
- uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx)
-{
- uint64_t txg = tx->tx_txg;
- uint64_t est_used, quota, parent_rsrv;
- int edquot = EDQUOT;
- int txgidx = txg & TXG_MASK;
- int i;
- struct tempreserve *tr;
-
- ASSERT3U(txg, !=, 0);
- ASSERT3S(asize, >=, 0);
-
- mutex_enter(&dd->dd_lock);
- /*
- * Check against the dsl_dir's quota. We don't add in the delta
- * when checking for over-quota because they get one free hit.
- */
- est_used = dsl_dir_estimated_space(dd);
- for (i = 0; i < TXG_SIZE; i++)
- est_used += dd->dd_tempreserved[i];
-
- quota = UINT64_MAX;
-
- if (dd->dd_phys->dd_quota)
- quota = dd->dd_phys->dd_quota;
-
- /*
- * If this transaction will result in a net free of space, we want
- * to let it through, but we have to be careful: the space that it
- * frees won't become available until *after* this txg syncs.
- * Therefore, to ensure that it's possible to remove files from
- * a full pool without inducing transient overcommits, we throttle
- * netfree transactions against a quota that is slightly larger,
- * but still within the pool's allocation slop. In cases where
- * we're very close to full, this will allow a steady trickle of
- * removes to get through.
- */
- if (dd->dd_parent == NULL) {
- uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
- if (poolsize < quota) {
- quota = poolsize;
- edquot = ENOSPC;
- }
- } else if (netfree) {
- quota = UINT64_MAX;
- }
-
- /*
- * If they are requesting more space, and our current estimate
- * is over quota. They get to try again unless the actual
- * on-disk is over quota and there are no pending changes (which
- * may free up space for us).
- */
- if (asize > 0 && est_used > quota) {
- if (dd->dd_space_towrite[txg & TXG_MASK] != 0 ||
- dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 ||
- dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 ||
- dd->dd_used_bytes < quota)
- edquot = ERESTART;
- dprintf_dd(dd, "failing: used=%lluK est_used = %lluK "
- "quota=%lluK tr=%lluK err=%d\n",
- dd->dd_used_bytes>>10, est_used>>10,
- quota>>10, asize>>10, edquot);
- mutex_exit(&dd->dd_lock);
- return (edquot);
- }
-
- /* We need to up our estimated delta before dropping dd_lock */
- dd->dd_tempreserved[txgidx] += asize;
-
- parent_rsrv = parent_delta(dd, est_used, asize);
- mutex_exit(&dd->dd_lock);
-
- tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
- tr->tr_ds = dd;
- tr->tr_size = asize;
- list_insert_tail(tr_list, tr);
-
- /* see if it's OK with our parent */
- if (dd->dd_parent && parent_rsrv) {
- return (dsl_dir_tempreserve_impl(dd->dd_parent,
- parent_rsrv, netfree, tr_list, tx));
- } else {
- return (0);
- }
-}
-
-/*
- * Reserve space in this dsl_dir, to be used in this tx's txg.
- * After the space has been dirtied (and thus
- * dsl_dir_willuse_space() has been called), the reservation should
- * be canceled, using dsl_dir_tempreserve_clear().
- */
-int
-dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize,
- uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx)
-{
- int err = 0;
- list_t *tr_list;
-
- tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
- list_create(tr_list, sizeof (struct tempreserve),
- offsetof(struct tempreserve, tr_node));
- ASSERT3S(asize, >=, 0);
- ASSERT3S(fsize, >=, 0);
-
- err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
- tr_list, tx);
-
- if (err == 0) {
- struct tempreserve *tr;
-
- err = arc_tempreserve_space(lsize);
- if (err == 0) {
- tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
- tr->tr_ds = NULL;
- tr->tr_size = lsize;
- list_insert_tail(tr_list, tr);
- }
- }
-
- if (err)
- dsl_dir_tempreserve_clear(tr_list, tx);
- else
- *tr_cookiep = tr_list;
- return (err);
-}
-
-/*
- * Clear a temporary reservation that we previously made with
- * dsl_dir_tempreserve_space().
- */
-void
-dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
-{
- int txgidx = tx->tx_txg & TXG_MASK;
- list_t *tr_list = tr_cookie;
- struct tempreserve *tr;
-
- ASSERT3U(tx->tx_txg, !=, 0);
-
- while (tr = list_head(tr_list)) {
- if (tr->tr_ds == NULL) {
- arc_tempreserve_clear(tr->tr_size);
- } else {
- mutex_enter(&tr->tr_ds->dd_lock);
- ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
- tr->tr_size);
- tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
- mutex_exit(&tr->tr_ds->dd_lock);
- }
- list_remove(tr_list, tr);
- kmem_free(tr, sizeof (struct tempreserve));
- }
-
- kmem_free(tr_list, sizeof (list_t));
-}
-
-/*
- * Call in open context when we think we're going to write/free space,
- * eg. when dirtying data. Be conservative (ie. OK to write less than
- * this or free more than this, but don't write more or free less).
- */
-void
-dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
-{
- int64_t parent_space;
- uint64_t est_used;
-
- mutex_enter(&dd->dd_lock);
- if (space > 0)
- dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
-
- est_used = dsl_dir_estimated_space(dd);
- parent_space = parent_delta(dd, est_used, space);
- mutex_exit(&dd->dd_lock);
-
- /* Make sure that we clean up dd_space_to* */
- dsl_dir_dirty(dd, tx);
-
- /* XXX this is potentially expensive and unnecessary... */
- if (parent_space && dd->dd_parent)
- dsl_dir_willuse_space(dd->dd_parent, parent_space, tx);
-}
-
-/* call from syncing context when we actually write/free space for this dd */
-void
-dsl_dir_diduse_space(dsl_dir_t *dd,
- int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
-{
- int64_t accounted_delta;
-
- ASSERT(dmu_tx_is_syncing(tx));
-
- dsl_dir_dirty(dd, tx);
-
- mutex_enter(&dd->dd_lock);
- accounted_delta = parent_delta(dd, dd->dd_used_bytes, used);
- ASSERT(used >= 0 || dd->dd_used_bytes >= -used);
- ASSERT(compressed >= 0 ||
- dd->dd_phys->dd_compressed_bytes >= -compressed);
- ASSERT(uncompressed >= 0 ||
- dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
- dd->dd_used_bytes += used;
- dd->dd_phys->dd_uncompressed_bytes += uncompressed;
- dd->dd_phys->dd_compressed_bytes += compressed;
- mutex_exit(&dd->dd_lock);
-
- if (dd->dd_parent != NULL) {
- dsl_dir_diduse_space(dd->dd_parent,
- accounted_delta, compressed, uncompressed, tx);
- }
-}
-
-/* ARGSUSED */
-static int
-dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- uint64_t *quotap = arg2;
- uint64_t new_quota = *quotap;
- int err = 0;
- uint64_t towrite;
-
- if (new_quota == 0)
- return (0);
-
- mutex_enter(&dd->dd_lock);
- /*
- * If we are doing the preliminary check in open context, and
- * there are pending changes, then don't fail it, since the
- * pending changes could under-estimat the amount of space to be
- * freed up.
- */
- towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] +
- dd->dd_space_towrite[2] + dd->dd_space_towrite[3];
- if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
- (new_quota < dd->dd_phys->dd_reserved ||
- new_quota < dsl_dir_estimated_space(dd))) {
- err = ENOSPC;
- }
- mutex_exit(&dd->dd_lock);
- return (err);
-}
-
-static void
-dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- uint64_t *quotap = arg2;
- uint64_t new_quota = *quotap;
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
-
- mutex_enter(&dd->dd_lock);
- dd->dd_phys->dd_quota = new_quota;
- mutex_exit(&dd->dd_lock);
-}
-
-int
-dsl_dir_set_quota(const char *ddname, uint64_t quota)
-{
- dsl_dir_t *dd;
- int err;
-
- err = dsl_dir_open(ddname, FTAG, &dd, NULL);
- if (err)
- return (err);
- /*
- * If someone removes a file, then tries to set the quota, we
- * want to make sure the file freeing takes effect.
- */
- txg_wait_open(dd->dd_pool, 0);
-
- err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
- dsl_dir_set_quota_sync, dd, &quota, 0);
- dsl_dir_close(dd, FTAG);
- return (err);
-}
-
-/* ARGSUSED */
-static int
-dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- uint64_t *reservationp = arg2;
- uint64_t new_reservation = *reservationp;
- uint64_t used, avail;
- int64_t delta;
-
- if (new_reservation > INT64_MAX)
- return (EOVERFLOW);
-
- /*
- * If we are doing the preliminary check in open context, the
- * space estimates may be inaccurate.
- */
- if (!dmu_tx_is_syncing(tx))
- return (0);
-
- mutex_enter(&dd->dd_lock);
- used = dd->dd_used_bytes;
- delta = MAX(used, new_reservation) -
- MAX(used, dd->dd_phys->dd_reserved);
- mutex_exit(&dd->dd_lock);
-
- if (dd->dd_parent) {
- avail = dsl_dir_space_available(dd->dd_parent,
- NULL, 0, FALSE);
- } else {
- avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
- }
-
- if (delta > 0 && delta > avail)
- return (ENOSPC);
- if (delta > 0 && dd->dd_phys->dd_quota > 0 &&
- new_reservation > dd->dd_phys->dd_quota)
- return (ENOSPC);
- return (0);
-}
-
-static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- uint64_t *reservationp = arg2;
- uint64_t new_reservation = *reservationp;
- uint64_t used;
- int64_t delta;
-
- mutex_enter(&dd->dd_lock);
- used = dd->dd_used_bytes;
- delta = MAX(used, new_reservation) -
- MAX(used, dd->dd_phys->dd_reserved);
- mutex_exit(&dd->dd_lock);
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- dd->dd_phys->dd_reserved = new_reservation;
-
- if (dd->dd_parent != NULL) {
- /* Roll up this additional usage into our ancestors */
- dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx);
- }
-}
-
-int
-dsl_dir_set_reservation(const char *ddname, uint64_t reservation)
-{
- dsl_dir_t *dd;
- int err;
-
- err = dsl_dir_open(ddname, FTAG, &dd, NULL);
- if (err)
- return (err);
- err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
- dsl_dir_set_reservation_sync, dd, &reservation, 0);
- dsl_dir_close(dd, FTAG);
- return (err);
-}
-
-static dsl_dir_t *
-closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
-{
- for (; ds1; ds1 = ds1->dd_parent) {
- dsl_dir_t *dd;
- for (dd = ds2; dd; dd = dd->dd_parent) {
- if (ds1 == dd)
- return (dd);
- }
- }
- return (NULL);
-}
-
-/*
- * If delta is applied to dd, how much of that delta would be applied to
- * ancestor? Syncing context only.
- */
-static int64_t
-would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
-{
- if (dd == ancestor)
- return (delta);
-
- mutex_enter(&dd->dd_lock);
- delta = parent_delta(dd, dd->dd_used_bytes, delta);
- mutex_exit(&dd->dd_lock);
- return (would_change(dd->dd_parent, delta, ancestor));
-}
-
-struct renamearg {
- dsl_dir_t *newparent;
- const char *mynewname;
-};
-
-/* ARGSUSED */
-static int
-dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct renamearg *ra = arg2;
- dsl_pool_t *dp = dd->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- int err;
- uint64_t val;
-
- /* There should be 2 references: the open and the dirty */
- if (dmu_buf_refcount(dd->dd_dbuf) > 2)
- return (EBUSY);
-
- /* check for existing name */
- err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
- ra->mynewname, 8, 1, &val);
- if (err == 0)
- return (EEXIST);
- if (err != ENOENT)
- return (err);
-
- if (ra->newparent != dd->dd_parent) {
- /* is there enough space? */
- uint64_t myspace =
- MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
-
- /* no rename into our descendant */
- if (closest_common_ancestor(dd, ra->newparent) == dd)
- return (EINVAL);
-
- if (err = dsl_dir_transfer_possible(dd->dd_parent,
- ra->newparent, myspace))
- return (err);
- }
-
- return (0);
-}
-
-static void
-dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct renamearg *ra = arg2;
- dsl_pool_t *dp = dd->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- int err;
-
- ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
-
- if (ra->newparent != dd->dd_parent) {
- uint64_t myspace =
- MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
-
- dsl_dir_diduse_space(dd->dd_parent, -myspace,
- -dd->dd_phys->dd_compressed_bytes,
- -dd->dd_phys->dd_uncompressed_bytes, tx);
- dsl_dir_diduse_space(ra->newparent, myspace,
- dd->dd_phys->dd_compressed_bytes,
- dd->dd_phys->dd_uncompressed_bytes, tx);
- }
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
-
- /* remove from old parent zapobj */
- err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
- dd->dd_myname, tx);
- ASSERT3U(err, ==, 0);
-
- (void) strcpy(dd->dd_myname, ra->mynewname);
- dsl_dir_close(dd->dd_parent, dd);
- dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
- VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
- ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
-
- /* add to new parent zapobj */
- err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
- dd->dd_myname, 8, 1, &dd->dd_object, tx);
- ASSERT3U(err, ==, 0);
-}
-
-int
-dsl_dir_rename(dsl_dir_t *dd, const char *newname)
-{
- struct renamearg ra;
- int err;
-
- /* new parent should exist */
- err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
- if (err)
- return (err);
-
- /* can't rename to different pool */
- if (dd->dd_pool != ra.newparent->dd_pool) {
- err = ENXIO;
- goto out;
- }
-
- /* new name should not already exist */
- if (ra.mynewname == NULL) {
- err = EEXIST;
- goto out;
- }
-
-
- err = dsl_sync_task_do(dd->dd_pool,
- dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
-
-out:
- dsl_dir_close(ra.newparent, FTAG);
- return (err);
-}
-
-int
-dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
-{
- dsl_dir_t *ancestor;
- int64_t adelta;
- uint64_t avail;
-
- ancestor = closest_common_ancestor(sdd, tdd);
- adelta = would_change(sdd, -space, ancestor);
- avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
- if (avail < space)
- return (ENOSPC);
-
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
deleted file mode 100644
index 00abf7e..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dsl_pool.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_synctask.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_objset.h>
-#include <sys/arc.h>
-#include <sys/zap.h>
-#include <sys/zio.h>
-#include <sys/zfs_context.h>
-#include <sys/fs/zfs.h>
-
-static int
-dsl_pool_open_mos_dir(dsl_pool_t *dp, dsl_dir_t **ddp)
-{
- uint64_t obj;
- int err;
-
- err = zap_lookup(dp->dp_meta_objset,
- dp->dp_root_dir->dd_phys->dd_child_dir_zapobj,
- MOS_DIR_NAME, sizeof (obj), 1, &obj);
- if (err)
- return (err);
-
- return (dsl_dir_open_obj(dp, obj, MOS_DIR_NAME, dp, ddp));
-}
-
-static dsl_pool_t *
-dsl_pool_open_impl(spa_t *spa, uint64_t txg)
-{
- dsl_pool_t *dp;
- blkptr_t *bp = spa_get_rootblkptr(spa);
-
- dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
- dp->dp_spa = spa;
- dp->dp_meta_rootbp = *bp;
- rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
- txg_init(dp, txg);
-
- txg_list_create(&dp->dp_dirty_datasets,
- offsetof(dsl_dataset_t, ds_dirty_link));
- txg_list_create(&dp->dp_dirty_dirs,
- offsetof(dsl_dir_t, dd_dirty_link));
- txg_list_create(&dp->dp_sync_tasks,
- offsetof(dsl_sync_task_group_t, dstg_node));
- list_create(&dp->dp_synced_objsets, sizeof (dsl_dataset_t),
- offsetof(dsl_dataset_t, ds_synced_link));
-
- return (dp);
-}
-
-int
-dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
-{
- int err;
- dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
- objset_impl_t *osi;
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi);
- if (err)
- goto out;
- dp->dp_meta_objset = &osi->os;
-
- err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
- &dp->dp_root_dir_obj);
- if (err)
- goto out;
-
- err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
- NULL, dp, &dp->dp_root_dir);
- if (err)
- goto out;
-
- err = dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir);
- if (err)
- goto out;
-
-out:
- rw_exit(&dp->dp_config_rwlock);
- if (err)
- dsl_pool_close(dp);
- else
- *dpp = dp;
-
- return (err);
-}
-
-void
-dsl_pool_close(dsl_pool_t *dp)
-{
- /* drop our reference from dsl_pool_open() */
- if (dp->dp_mos_dir)
- dsl_dir_close(dp->dp_mos_dir, dp);
- if (dp->dp_root_dir)
- dsl_dir_close(dp->dp_root_dir, dp);
-
- /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
- if (dp->dp_meta_objset)
- dmu_objset_evict(NULL, dp->dp_meta_objset->os);
-
- txg_list_destroy(&dp->dp_dirty_datasets);
- txg_list_destroy(&dp->dp_dirty_dirs);
- txg_list_destroy(&dp->dp_sync_tasks);
- list_destroy(&dp->dp_synced_objsets);
-
- arc_flush();
- txg_fini(dp);
- rw_destroy(&dp->dp_config_rwlock);
- kmem_free(dp, sizeof (dsl_pool_t));
-}
-
-dsl_pool_t *
-dsl_pool_create(spa_t *spa, uint64_t txg)
-{
- int err;
- dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
- dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
- dp->dp_meta_objset = &dmu_objset_create_impl(spa,
- NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os;
-
- /* create the pool directory */
- err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
- ASSERT3U(err, ==, 0);
-
- /* create and open the root dir */
- dsl_dataset_create_root(dp, &dp->dp_root_dir_obj, tx);
- VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
- NULL, dp, &dp->dp_root_dir));
-
- /* create and open the meta-objset dir */
- (void) dsl_dir_create_sync(dp->dp_root_dir, MOS_DIR_NAME, tx);
- VERIFY(0 == dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir));
-
- dmu_tx_commit(tx);
-
- return (dp);
-}
-
-void
-dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
-{
- zio_t *zio;
- dmu_tx_t *tx;
- dsl_dir_t *dd;
- dsl_dataset_t *ds;
- dsl_sync_task_group_t *dstg;
- objset_impl_t *mosi = dp->dp_meta_objset->os;
- int err;
-
- tx = dmu_tx_create_assigned(dp, txg);
-
- zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
- while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
- if (!list_link_active(&ds->ds_synced_link))
- list_insert_tail(&dp->dp_synced_objsets, ds);
- else
- dmu_buf_rele(ds->ds_dbuf, ds);
- dsl_dataset_sync(ds, zio, tx);
- }
- err = zio_wait(zio);
- ASSERT(err == 0);
-
- while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
- dsl_sync_task_group_sync(dstg, tx);
- while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg))
- dsl_dir_sync(dd, tx);
-
- if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
- list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) {
- zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
- dmu_objset_sync(mosi, zio, tx);
- err = zio_wait(zio);
- ASSERT(err == 0);
- dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
- spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
- }
-
- dmu_tx_commit(tx);
-}
-
-void
-dsl_pool_zil_clean(dsl_pool_t *dp)
-{
- dsl_dataset_t *ds;
-
- while (ds = list_head(&dp->dp_synced_objsets)) {
- list_remove(&dp->dp_synced_objsets, ds);
- ASSERT(ds->ds_user_ptr != NULL);
- zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
- dmu_buf_rele(ds->ds_dbuf, ds);
- }
-}
-
-/*
- * TRUE if the current thread is the tx_sync_thread or if we
- * are being called from SPA context during pool initialization.
- */
-int
-dsl_pool_sync_context(dsl_pool_t *dp)
-{
- return (curthread == dp->dp_tx.tx_sync_thread ||
- spa_get_dsl(dp->dp_spa) == NULL);
-}
-
-uint64_t
-dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
-{
- uint64_t space, resv;
-
- /*
- * Reserve about 1.6% (1/64), or at least 32MB, for allocation
- * efficiency.
- * XXX The intent log is not accounted for, so it must fit
- * within this slop.
- *
- * If we're trying to assess whether it's OK to do a free,
- * cut the reservation in half to allow forward progress
- * (e.g. make it possible to rm(1) files from a full pool).
- */
- space = spa_get_dspace(dp->dp_spa);
- resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1);
- if (netfree)
- resv >>= 1;
-
- return (space - resv);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
deleted file mode 100644
index 2fff66d..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_tx.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_synctask.h>
-#include <sys/spa.h>
-#include <sys/zio_checksum.h> /* for the default checksum value */
-#include <sys/zap.h>
-#include <sys/fs/zfs.h>
-
-#include "zfs_prop.h"
-
-static int
-dodefault(const char *propname, int intsz, int numint, void *buf)
-{
- zfs_prop_t prop;
-
- if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL ||
- zfs_prop_readonly(prop))
- return (ENOENT);
-
- if (zfs_prop_get_type(prop) == prop_type_string) {
- if (intsz != 1)
- return (EOVERFLOW);
- (void) strncpy(buf, zfs_prop_default_string(prop), numint);
- } else {
- if (intsz != 8 || numint < 1)
- return (EOVERFLOW);
-
- *(uint64_t *)buf = zfs_prop_default_numeric(prop);
- }
-
- return (0);
-}
-
-static int
-dsl_prop_get_impl(dsl_dir_t *dd, const char *propname,
- int intsz, int numint, void *buf, char *setpoint)
-{
- int err = ENOENT;
- zfs_prop_t prop;
-
- if (setpoint)
- setpoint[0] = '\0';
-
- prop = zfs_name_to_prop(propname);
-
- /*
- * Note: dd may be NULL, therefore we shouldn't dereference it
- * ouside this loop.
- */
- for (; dd != NULL; dd = dd->dd_parent) {
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
- err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj,
- propname, intsz, numint, buf);
- if (err != ENOENT) {
- if (setpoint)
- dsl_dir_name(dd, setpoint);
- break;
- }
-
- /*
- * Break out of this loop for non-inheritable properties.
- */
- if (prop != ZFS_PROP_INVAL &&
- !zfs_prop_inheritable(prop))
- break;
- }
- if (err == ENOENT)
- err = dodefault(propname, intsz, numint, buf);
-
- return (err);
-}
-
-/*
- * Register interest in the named property. We'll call the callback
- * once to notify it of the current property value, and again each time
- * the property changes, until this callback is unregistered.
- *
- * Return 0 on success, errno if the prop is not an integer value.
- */
-int
-dsl_prop_register(dsl_dataset_t *ds, const char *propname,
- dsl_prop_changed_cb_t *callback, void *cbarg)
-{
- dsl_dir_t *dd = ds->ds_dir;
- uint64_t value;
- dsl_prop_cb_record_t *cbr;
- int err;
- int need_rwlock;
-
- need_rwlock = !RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock);
- if (need_rwlock)
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
-
- err = dsl_prop_get_impl(dd, propname, 8, 1, &value, NULL);
- if (err != 0) {
- rw_exit(&dd->dd_pool->dp_config_rwlock);
- return (err);
- }
-
- cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
- cbr->cbr_ds = ds;
- cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP);
- (void) strcpy((char *)cbr->cbr_propname, propname);
- cbr->cbr_func = callback;
- cbr->cbr_arg = cbarg;
- mutex_enter(&dd->dd_lock);
- list_insert_head(&dd->dd_prop_cbs, cbr);
- mutex_exit(&dd->dd_lock);
-
- cbr->cbr_func(cbr->cbr_arg, value);
-
- VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object,
- NULL, cbr, &dd));
- if (need_rwlock)
- rw_exit(&dd->dd_pool->dp_config_rwlock);
- /* Leave dataset open until this callback is unregistered */
- return (0);
-}
-
-int
-dsl_prop_get_ds(dsl_dir_t *dd, const char *propname,
- int intsz, int numints, void *buf, char *setpoint)
-{
- int err;
-
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_impl(dd, propname, intsz, numints, buf, setpoint);
- rw_exit(&dd->dd_pool->dp_config_rwlock);
-
- return (err);
-}
-
-int
-dsl_prop_get(const char *ddname, const char *propname,
- int intsz, int numints, void *buf, char *setpoint)
-{
- dsl_dir_t *dd;
- const char *tail;
- int err;
-
- err = dsl_dir_open(ddname, FTAG, &dd, &tail);
- if (err)
- return (err);
- if (tail && tail[0] != '@') {
- dsl_dir_close(dd, FTAG);
- return (ENOENT);
- }
-
- err = dsl_prop_get_ds(dd, propname, intsz, numints, buf, setpoint);
-
- dsl_dir_close(dd, FTAG);
- return (err);
-}
-
-/*
- * Get the current property value. It may have changed by the time this
- * function returns, so it is NOT safe to follow up with
- * dsl_prop_register() and assume that the value has not changed in
- * between.
- *
- * Return 0 on success, ENOENT if ddname is invalid.
- */
-int
-dsl_prop_get_integer(const char *ddname, const char *propname,
- uint64_t *valuep, char *setpoint)
-{
- return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
-}
-
-/*
- * Unregister this callback. Return 0 on success, ENOENT if ddname is
- * invalid, ENOMSG if no matching callback registered.
- */
-int
-dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
- dsl_prop_changed_cb_t *callback, void *cbarg)
-{
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_cb_record_t *cbr;
-
- mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs);
- cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- if (cbr->cbr_ds == ds &&
- cbr->cbr_func == callback &&
- cbr->cbr_arg == cbarg &&
- strcmp(cbr->cbr_propname, propname) == 0)
- break;
- }
-
- if (cbr == NULL) {
- mutex_exit(&dd->dd_lock);
- return (ENOMSG);
- }
-
- list_remove(&dd->dd_prop_cbs, cbr);
- mutex_exit(&dd->dd_lock);
- kmem_free((void*)cbr->cbr_propname, strlen(cbr->cbr_propname)+1);
- kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
-
- /* Clean up from dsl_prop_register */
- dsl_dir_close(dd, cbr);
- return (0);
-}
-
-/*
- * Return the number of callbacks that are registered for this dataset.
- */
-int
-dsl_prop_numcb(dsl_dataset_t *ds)
-{
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_cb_record_t *cbr;
- int num = 0;
-
- mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs);
- cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- if (cbr->cbr_ds == ds)
- num++;
- }
- mutex_exit(&dd->dd_lock);
-
- return (num);
-}
-
-static void
-dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
- const char *propname, uint64_t value, int first)
-{
- dsl_dir_t *dd;
- dsl_prop_cb_record_t *cbr;
- objset_t *mos = dp->dp_meta_objset;
- zap_cursor_t zc;
- zap_attribute_t za;
- int err;
-
- ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
- err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
- if (err)
- return;
-
- if (!first) {
- /*
- * If the prop is set here, then this change is not
- * being inherited here or below; stop the recursion.
- */
- err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, propname,
- 8, 1, &value);
- if (err == 0) {
- dsl_dir_close(dd, FTAG);
- return;
- }
- ASSERT3U(err, ==, ENOENT);
- }
-
- mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs);
- cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- if (strcmp(cbr->cbr_propname, propname) == 0) {
- cbr->cbr_func(cbr->cbr_arg, value);
- }
- }
- mutex_exit(&dd->dd_lock);
-
- for (zap_cursor_init(&zc, mos,
- dd->dd_phys->dd_child_dir_zapobj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
- /* XXX recursion could blow stack; esp. za! */
- dsl_prop_changed_notify(dp, za.za_first_integer,
- propname, value, FALSE);
- }
- zap_cursor_fini(&zc);
- dsl_dir_close(dd, FTAG);
-}
-
-struct prop_set_arg {
- const char *name;
- int intsz;
- int numints;
- const void *buf;
-};
-
-
-static void
-dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct prop_set_arg *psa = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
- uint64_t intval;
- int isint;
-
- isint = (dodefault(psa->name, 8, 1, &intval) == 0);
-
- if (psa->numints == 0) {
- int err = zap_remove(mos, zapobj, psa->name, tx);
- ASSERT(err == 0 || err == ENOENT);
- if (isint) {
- VERIFY(0 == dsl_prop_get_impl(dd->dd_parent,
- psa->name, 8, 1, &intval, NULL));
- }
- } else {
- VERIFY(0 == zap_update(mos, zapobj, psa->name,
- psa->intsz, psa->numints, psa->buf, tx));
- if (isint)
- intval = *(uint64_t *)psa->buf;
- }
-
- if (isint) {
- dsl_prop_changed_notify(dd->dd_pool,
- dd->dd_object, psa->name, intval, TRUE);
- }
-}
-
-int
-dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
- int intsz, int numints, const void *buf)
-{
- struct prop_set_arg psa;
-
- psa.name = propname;
- psa.intsz = intsz;
- psa.numints = numints;
- psa.buf = buf;
-
- return (dsl_sync_task_do(dd->dd_pool,
- NULL, dsl_prop_set_sync, dd, &psa, 2));
-}
-
-int
-dsl_prop_set(const char *ddname, const char *propname,
- int intsz, int numints, const void *buf)
-{
- dsl_dir_t *dd;
- int err;
-
- /*
- * We must do these checks before we get to the syncfunc, since
- * it can't fail.
- */
- if (strlen(propname) >= ZAP_MAXNAMELEN)
- return (ENAMETOOLONG);
- if (intsz * numints >= ZAP_MAXVALUELEN)
- return (E2BIG);
-
- err = dsl_dir_open(ddname, FTAG, &dd, NULL);
- if (err)
- return (err);
- err = dsl_prop_set_dd(dd, propname, intsz, numints, buf);
- dsl_dir_close(dd, FTAG);
- return (err);
-}
-
-/*
- * Iterate over all properties for this dataset and return them in an nvlist.
- */
-int
-dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
-{
- dsl_dataset_t *ds = os->os->os_dsl_dataset;
- dsl_dir_t *dd = ds->ds_dir;
- int err = 0;
- dsl_pool_t *dp;
- objset_t *mos;
-
- if (dsl_dataset_is_snapshot(ds)) {
- VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- return (0);
- }
-
- VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- dp = dd->dd_pool;
- mos = dp->dp_meta_objset;
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- for (; dd != NULL; dd = dd->dd_parent) {
- char setpoint[MAXNAMELEN];
- zap_cursor_t zc;
- zap_attribute_t za;
-
- dsl_dir_name(dd, setpoint);
-
- for (zap_cursor_init(&zc, mos, dd->dd_phys->dd_props_zapobj);
- (err = zap_cursor_retrieve(&zc, &za)) == 0;
- zap_cursor_advance(&zc)) {
- nvlist_t *propval;
- zfs_prop_t prop;
- /*
- * Skip non-inheritable properties.
- */
- if ((prop = zfs_name_to_prop(za.za_name)) !=
- ZFS_PROP_INVAL && !zfs_prop_inheritable(prop) &&
- dd != ds->ds_dir)
- continue;
-
- if (nvlist_lookup_nvlist(*nvp, za.za_name,
- &propval) == 0)
- continue;
-
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
- if (za.za_integer_length == 1) {
- /*
- * String property
- */
- char *tmp = kmem_alloc(za.za_num_integers,
- KM_SLEEP);
- err = zap_lookup(mos,
- dd->dd_phys->dd_props_zapobj,
- za.za_name, 1, za.za_num_integers,
- tmp);
- if (err != 0) {
- kmem_free(tmp, za.za_num_integers);
- break;
- }
- VERIFY(nvlist_add_string(propval,
- ZFS_PROP_VALUE, tmp) == 0);
- kmem_free(tmp, za.za_num_integers);
- } else {
- /*
- * Integer property
- */
- ASSERT(za.za_integer_length == 8);
- (void) nvlist_add_uint64(propval,
- ZFS_PROP_VALUE, za.za_first_integer);
- }
-
- VERIFY(nvlist_add_string(propval,
- ZFS_PROP_SOURCE, setpoint) == 0);
- VERIFY(nvlist_add_nvlist(*nvp, za.za_name,
- propval) == 0);
- nvlist_free(propval);
- }
- zap_cursor_fini(&zc);
-
- if (err != ENOENT)
- break;
- err = 0;
- }
- rw_exit(&dp->dp_config_rwlock);
-
- return (err);
-}
-
-void
-dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
-{
- nvlist_t *propval;
-
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(propval, ZFS_PROP_VALUE, value) == 0);
- VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(prop), propval) == 0);
- nvlist_free(propval);
-}
-
-void
-dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
-{
- nvlist_t *propval;
-
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_string(propval, ZFS_PROP_VALUE, value) == 0);
- VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(prop), propval) == 0);
- nvlist_free(propval);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c b/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c
deleted file mode 100644
index 17deb56..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_synctask.h>
-
-#define DST_AVG_BLKSHIFT 14
-
-/* ARGSUSED */
-static int
-dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- return (0);
-}
-
-dsl_sync_task_group_t *
-dsl_sync_task_group_create(dsl_pool_t *dp)
-{
- dsl_sync_task_group_t *dstg;
-
- dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
- list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
- offsetof(dsl_sync_task_t, dst_node));
- dstg->dstg_pool = dp;
-
- return (dstg);
-}
-
-void
-dsl_sync_task_create(dsl_sync_task_group_t *dstg,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified)
-{
- dsl_sync_task_t *dst;
-
- if (checkfunc == NULL)
- checkfunc = dsl_null_checkfunc;
- dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
- dst->dst_checkfunc = checkfunc;
- dst->dst_syncfunc = syncfunc;
- dst->dst_arg1 = arg1;
- dst->dst_arg2 = arg2;
- list_insert_tail(&dstg->dstg_tasks, dst);
-
- dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
-}
-
-int
-dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
-{
- dmu_tx_t *tx;
- uint64_t txg;
- dsl_sync_task_t *dst;
-
-top:
- tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
- VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
-
- txg = dmu_tx_get_txg(tx);
-
- /* Do a preliminary error check. */
- dstg->dstg_err = 0;
- rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
-#ifdef ZFS_DEBUG
- /*
- * Only check half the time, otherwise, the sync-context
- * check will almost never fail.
- */
- if (spa_get_random(2) == 0)
- continue;
-#endif
- dst->dst_err =
- dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
- if (dst->dst_err)
- dstg->dstg_err = dst->dst_err;
- }
- rw_exit(&dstg->dstg_pool->dp_config_rwlock);
-
- if (dstg->dstg_err) {
- dmu_tx_commit(tx);
- return (dstg->dstg_err);
- }
-
- VERIFY(0 == txg_list_add(&dstg->dstg_pool->dp_sync_tasks, dstg, txg));
-
- dmu_tx_commit(tx);
-
- txg_wait_synced(dstg->dstg_pool, txg);
-
- if (dstg->dstg_err == EAGAIN)
- goto top;
-
- return (dstg->dstg_err);
-}
-
-void
-dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
-{
- dsl_sync_task_t *dst;
-
- while (dst = list_head(&dstg->dstg_tasks)) {
- list_remove(&dstg->dstg_tasks, dst);
- kmem_free(dst, sizeof (dsl_sync_task_t));
- }
- kmem_free(dstg, sizeof (dsl_sync_task_group_t));
-}
-
-void
-dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
-{
- dsl_sync_task_t *dst;
- void *tr_cookie;
-
- ASSERT3U(dstg->dstg_err, ==, 0);
-
- /*
- * Check for sufficient space.
- */
- dstg->dstg_err = dsl_dir_tempreserve_space(dstg->dstg_pool->dp_mos_dir,
- dstg->dstg_space, dstg->dstg_space * 3, 0, &tr_cookie, tx);
- /* don't bother trying again */
- if (dstg->dstg_err == ERESTART)
- dstg->dstg_err = EAGAIN;
- if (dstg->dstg_err)
- return;
-
- /*
- * Check for errors by calling checkfuncs.
- */
- rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_WRITER);
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
- dst->dst_err =
- dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
- if (dst->dst_err)
- dstg->dstg_err = dst->dst_err;
- }
-
- if (dstg->dstg_err == 0) {
- /*
- * Execute sync tasks.
- */
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
- dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
- }
- }
- rw_exit(&dstg->dstg_pool->dp_config_rwlock);
-
- dsl_dir_tempreserve_clear(tr_cookie, tx);
-}
-
-int
-dsl_sync_task_do(dsl_pool_t *dp,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified)
-{
- dsl_sync_task_group_t *dstg;
- int err;
-
- dstg = dsl_sync_task_group_create(dp);
- dsl_sync_task_create(dstg, checkfunc, syncfunc,
- arg1, arg2, blocks_modified);
- err = dsl_sync_task_group_wait(dstg);
- dsl_sync_task_group_destroy(dstg);
- return (err);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/fletcher.c b/sys/contrib/opensolaris/uts/common/fs/zfs/fletcher.c
deleted file mode 100644
index edda3c9..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/fletcher.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <sys/byteorder.h>
-#include <sys/spa.h>
-
-void
-fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
-{
- const uint64_t *ip = buf;
- const uint64_t *ipend = ip + (size / sizeof (uint64_t));
- uint64_t a0, b0, a1, b1;
-
- for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
- a0 += ip[0];
- a1 += ip[1];
- b0 += a0;
- b1 += a1;
- }
-
- ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
-}
-
-void
-fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
-{
- const uint64_t *ip = buf;
- const uint64_t *ipend = ip + (size / sizeof (uint64_t));
- uint64_t a0, b0, a1, b1;
-
- for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
- a0 += BSWAP_64(ip[0]);
- a1 += BSWAP_64(ip[1]);
- b0 += a0;
- b1 += a1;
- }
-
- ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
-}
-
-void
-fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
-{
- const uint32_t *ip = buf;
- const uint32_t *ipend = ip + (size / sizeof (uint32_t));
- uint64_t a, b, c, d;
-
- for (a = b = c = d = 0; ip < ipend; ip++) {
- a += ip[0];
- b += a;
- c += b;
- d += c;
- }
-
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
-}
-
-void
-fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
-{
- const uint32_t *ip = buf;
- const uint32_t *ipend = ip + (size / sizeof (uint32_t));
- uint64_t a, b, c, d;
-
- for (a = b = c = d = 0; ip < ipend; ip++) {
- a += BSWAP_32(ip[0]);
- b += a;
- c += b;
- d += c;
- }
-
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
-}
-
-void
-fletcher_4_incremental_native(const void *buf, uint64_t size,
- zio_cksum_t *zcp)
-{
- const uint32_t *ip = buf;
- const uint32_t *ipend = ip + (size / sizeof (uint32_t));
- uint64_t a, b, c, d;
-
- a = zcp->zc_word[0];
- b = zcp->zc_word[1];
- c = zcp->zc_word[2];
- d = zcp->zc_word[3];
-
- for (; ip < ipend; ip++) {
- a += ip[0];
- b += a;
- c += b;
- d += c;
- }
-
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
-}
-
-void
-fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
- zio_cksum_t *zcp)
-{
- const uint32_t *ip = buf;
- const uint32_t *ipend = ip + (size / sizeof (uint32_t));
- uint64_t a, b, c, d;
-
- a = zcp->zc_word[0];
- b = zcp->zc_word[1];
- c = zcp->zc_word[2];
- d = zcp->zc_word[3];
-
- for (; ip < ipend; ip++) {
- a += BSWAP_32(ip[0]);
- b += a;
- c += b;
- d += c;
- }
-
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/gzip.c b/sys/contrib/opensolaris/uts/common/fs/zfs/gzip.c
deleted file mode 100644
index b257d4a..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/gzip.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/debug.h>
-#include <sys/types.h>
-#include <sys/zmod.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <strings.h>
-#endif
-
-size_t
-gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
-{
- size_t dstlen = d_len;
-
- ASSERT(d_len <= s_len);
-
- if (z_compress_level(d_start, &dstlen, s_start, s_len, n) != Z_OK) {
- if (d_len != s_len)
- return (s_len);
-
- bcopy(s_start, d_start, s_len);
- return (s_len);
- }
-
- return (dstlen);
-}
-
-/*ARGSUSED*/
-int
-gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
-{
- size_t dstlen = d_len;
-
- ASSERT(d_len >= s_len);
-
- if (z_uncompress(d_start, &dstlen, s_start, s_len) != Z_OK)
- return (-1);
-
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/lzjb.c b/sys/contrib/opensolaris/uts/common/fs/zfs/lzjb.c
deleted file mode 100644
index a88b85c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/lzjb.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * We keep our own copy of this algorithm for 2 main reasons:
- * 1. If we didn't, anyone modifying common/os/compress.c would
- * directly break our on disk format
- * 2. Our version of lzjb does not have a number of checks that the
- * common/os version needs and uses
- * In particular, we are adding the "feature" that compress() can
- * take a destination buffer size and return -1 if the data will not
- * compress to d_len or less.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/types.h>
-
-#define MATCH_BITS 6
-#define MATCH_MIN 3
-#define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1))
-#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1)
-#define LEMPEL_SIZE 256
-
-/*ARGSUSED*/
-size_t
-lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
-{
- uchar_t *src = s_start;
- uchar_t *dst = d_start;
- uchar_t *cpy, *copymap;
- int copymask = 1 << (NBBY - 1);
- int mlen, offset;
- uint16_t *hp;
- uint16_t lempel[LEMPEL_SIZE]; /* uninitialized; see above */
-
- while (src < (uchar_t *)s_start + s_len) {
- if ((copymask <<= 1) == (1 << NBBY)) {
- if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) {
- if (d_len != s_len)
- return (s_len);
- mlen = s_len;
- for (src = s_start, dst = d_start; mlen; mlen--)
- *dst++ = *src++;
- return (s_len);
- }
- copymask = 1;
- copymap = dst;
- *dst++ = 0;
- }
- if (src > (uchar_t *)s_start + s_len - MATCH_MAX) {
- *dst++ = *src++;
- continue;
- }
- hp = &lempel[((src[0] + 13) ^ (src[1] - 13) ^ src[2]) &
- (LEMPEL_SIZE - 1)];
- offset = (intptr_t)(src - *hp) & OFFSET_MASK;
- *hp = (uint16_t)(uintptr_t)src;
- cpy = src - offset;
- if (cpy >= (uchar_t *)s_start && cpy != src &&
- src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) {
- *copymap |= copymask;
- for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++)
- if (src[mlen] != cpy[mlen])
- break;
- *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) |
- (offset >> NBBY);
- *dst++ = (uchar_t)offset;
- src += mlen;
- } else {
- *dst++ = *src++;
- }
- }
- return (dst - (uchar_t *)d_start);
-}
-
-/*ARGSUSED*/
-int
-lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
-{
- uchar_t *src = s_start;
- uchar_t *dst = d_start;
- uchar_t *d_end = (uchar_t *)d_start + d_len;
- uchar_t *cpy, copymap;
- int copymask = 1 << (NBBY - 1);
-
- while (dst < d_end) {
- if ((copymask <<= 1) == (1 << NBBY)) {
- copymask = 1;
- copymap = *src++;
- }
- if (copymap & copymask) {
- int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN;
- int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK;
- src += 2;
- if ((cpy = dst - offset) < (uchar_t *)d_start)
- return (-1);
- while (--mlen >= 0 && dst < d_end)
- *dst++ = *cpy++;
- } else {
- *dst++ = *src++;
- }
- }
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
deleted file mode 100644
index 0dba134..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa_impl.h>
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/space_map.h>
-#include <sys/metaslab_impl.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-
-uint64_t metaslab_aliquot = 512ULL << 10;
-
-/*
- * ==========================================================================
- * Metaslab classes
- * ==========================================================================
- */
-metaslab_class_t *
-metaslab_class_create(void)
-{
- metaslab_class_t *mc;
-
- mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
-
- mc->mc_rotor = NULL;
-
- return (mc);
-}
-
-void
-metaslab_class_destroy(metaslab_class_t *mc)
-{
- metaslab_group_t *mg;
-
- while ((mg = mc->mc_rotor) != NULL) {
- metaslab_class_remove(mc, mg);
- metaslab_group_destroy(mg);
- }
-
- kmem_free(mc, sizeof (metaslab_class_t));
-}
-
-void
-metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg)
-{
- metaslab_group_t *mgprev, *mgnext;
-
- ASSERT(mg->mg_class == NULL);
-
- if ((mgprev = mc->mc_rotor) == NULL) {
- mg->mg_prev = mg;
- mg->mg_next = mg;
- } else {
- mgnext = mgprev->mg_next;
- mg->mg_prev = mgprev;
- mg->mg_next = mgnext;
- mgprev->mg_next = mg;
- mgnext->mg_prev = mg;
- }
- mc->mc_rotor = mg;
- mg->mg_class = mc;
-}
-
-void
-metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg)
-{
- metaslab_group_t *mgprev, *mgnext;
-
- ASSERT(mg->mg_class == mc);
-
- mgprev = mg->mg_prev;
- mgnext = mg->mg_next;
-
- if (mg == mgnext) {
- mc->mc_rotor = NULL;
- } else {
- mc->mc_rotor = mgnext;
- mgprev->mg_next = mgnext;
- mgnext->mg_prev = mgprev;
- }
-
- mg->mg_prev = NULL;
- mg->mg_next = NULL;
- mg->mg_class = NULL;
-}
-
-/*
- * ==========================================================================
- * Metaslab groups
- * ==========================================================================
- */
-static int
-metaslab_compare(const void *x1, const void *x2)
-{
- const metaslab_t *m1 = x1;
- const metaslab_t *m2 = x2;
-
- if (m1->ms_weight < m2->ms_weight)
- return (1);
- if (m1->ms_weight > m2->ms_weight)
- return (-1);
-
- /*
- * If the weights are identical, use the offset to force uniqueness.
- */
- if (m1->ms_map.sm_start < m2->ms_map.sm_start)
- return (-1);
- if (m1->ms_map.sm_start > m2->ms_map.sm_start)
- return (1);
-
- ASSERT3P(m1, ==, m2);
-
- return (0);
-}
-
-metaslab_group_t *
-metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
-{
- metaslab_group_t *mg;
-
- mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP);
- mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
- avl_create(&mg->mg_metaslab_tree, metaslab_compare,
- sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node));
- mg->mg_aliquot = metaslab_aliquot * MAX(1, vd->vdev_children);
- mg->mg_vd = vd;
- metaslab_class_add(mc, mg);
-
- return (mg);
-}
-
-void
-metaslab_group_destroy(metaslab_group_t *mg)
-{
- avl_destroy(&mg->mg_metaslab_tree);
- mutex_destroy(&mg->mg_lock);
- kmem_free(mg, sizeof (metaslab_group_t));
-}
-
-static void
-metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
-{
- mutex_enter(&mg->mg_lock);
- ASSERT(msp->ms_group == NULL);
- msp->ms_group = mg;
- msp->ms_weight = 0;
- avl_add(&mg->mg_metaslab_tree, msp);
- mutex_exit(&mg->mg_lock);
-}
-
-static void
-metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
-{
- mutex_enter(&mg->mg_lock);
- ASSERT(msp->ms_group == mg);
- avl_remove(&mg->mg_metaslab_tree, msp);
- msp->ms_group = NULL;
- mutex_exit(&mg->mg_lock);
-}
-
-static void
-metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
-{
- /*
- * Although in principle the weight can be any value, in
- * practice we do not use values in the range [1, 510].
- */
- ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0);
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- mutex_enter(&mg->mg_lock);
- ASSERT(msp->ms_group == mg);
- avl_remove(&mg->mg_metaslab_tree, msp);
- msp->ms_weight = weight;
- avl_add(&mg->mg_metaslab_tree, msp);
- mutex_exit(&mg->mg_lock);
-}
-
-/*
- * ==========================================================================
- * The first-fit block allocator
- * ==========================================================================
- */
-static void
-metaslab_ff_load(space_map_t *sm)
-{
- ASSERT(sm->sm_ppd == NULL);
- sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
-}
-
-static void
-metaslab_ff_unload(space_map_t *sm)
-{
- kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
- sm->sm_ppd = NULL;
-}
-
-static uint64_t
-metaslab_ff_alloc(space_map_t *sm, uint64_t size)
-{
- avl_tree_t *t = &sm->sm_root;
- uint64_t align = size & -size;
- uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
- space_seg_t *ss, ssearch;
- avl_index_t where;
-
- ssearch.ss_start = *cursor;
- ssearch.ss_end = *cursor + size;
-
- ss = avl_find(t, &ssearch, &where);
- if (ss == NULL)
- ss = avl_nearest(t, where, AVL_AFTER);
-
- while (ss != NULL) {
- uint64_t offset = P2ROUNDUP(ss->ss_start, align);
-
- if (offset + size <= ss->ss_end) {
- *cursor = offset + size;
- return (offset);
- }
- ss = AVL_NEXT(t, ss);
- }
-
- /*
- * If we know we've searched the whole map (*cursor == 0), give up.
- * Otherwise, reset the cursor to the beginning and try again.
- */
- if (*cursor == 0)
- return (-1ULL);
-
- *cursor = 0;
- return (metaslab_ff_alloc(sm, size));
-}
-
-/* ARGSUSED */
-static void
-metaslab_ff_claim(space_map_t *sm, uint64_t start, uint64_t size)
-{
- /* No need to update cursor */
-}
-
-/* ARGSUSED */
-static void
-metaslab_ff_free(space_map_t *sm, uint64_t start, uint64_t size)
-{
- /* No need to update cursor */
-}
-
-static space_map_ops_t metaslab_ff_ops = {
- metaslab_ff_load,
- metaslab_ff_unload,
- metaslab_ff_alloc,
- metaslab_ff_claim,
- metaslab_ff_free
-};
-
-/*
- * ==========================================================================
- * Metaslabs
- * ==========================================================================
- */
-metaslab_t *
-metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
- uint64_t start, uint64_t size, uint64_t txg)
-{
- vdev_t *vd = mg->mg_vd;
- metaslab_t *msp;
-
- msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP);
- mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL);
-
- msp->ms_smo_syncing = *smo;
-
- /*
- * We create the main space map here, but we don't create the
- * allocmaps and freemaps until metaslab_sync_done(). This serves
- * two purposes: it allows metaslab_sync_done() to detect the
- * addition of new space; and for debugging, it ensures that we'd
- * data fault on any attempt to use this metaslab before it's ready.
- */
- space_map_create(&msp->ms_map, start, size,
- vd->vdev_ashift, &msp->ms_lock);
-
- metaslab_group_add(mg, msp);
-
- /*
- * If we're opening an existing pool (txg == 0) or creating
- * a new one (txg == TXG_INITIAL), all space is available now.
- * If we're adding space to an existing pool, the new space
- * does not become available until after this txg has synced.
- */
- if (txg <= TXG_INITIAL)
- metaslab_sync_done(msp, 0);
-
- if (txg != 0) {
- /*
- * The vdev is dirty, but the metaslab isn't -- it just needs
- * to have metaslab_sync_done() invoked from vdev_sync_done().
- * [We could just dirty the metaslab, but that would cause us
- * to allocate a space map object for it, which is wasteful
- * and would mess up the locality logic in metaslab_weight().]
- */
- ASSERT(TXG_CLEAN(txg) == spa_last_synced_txg(vd->vdev_spa));
- vdev_dirty(vd, 0, NULL, txg);
- vdev_dirty(vd, VDD_METASLAB, msp, TXG_CLEAN(txg));
- }
-
- return (msp);
-}
-
-void
-metaslab_fini(metaslab_t *msp)
-{
- metaslab_group_t *mg = msp->ms_group;
- int t;
-
- vdev_space_update(mg->mg_vd, -msp->ms_map.sm_size,
- -msp->ms_smo.smo_alloc);
-
- metaslab_group_remove(mg, msp);
-
- mutex_enter(&msp->ms_lock);
-
- space_map_unload(&msp->ms_map);
- space_map_destroy(&msp->ms_map);
-
- for (t = 0; t < TXG_SIZE; t++) {
- space_map_destroy(&msp->ms_allocmap[t]);
- space_map_destroy(&msp->ms_freemap[t]);
- }
-
- mutex_exit(&msp->ms_lock);
- mutex_destroy(&msp->ms_lock);
-
- kmem_free(msp, sizeof (metaslab_t));
-}
-
-#define METASLAB_WEIGHT_PRIMARY (1ULL << 63)
-#define METASLAB_WEIGHT_SECONDARY (1ULL << 62)
-#define METASLAB_ACTIVE_MASK \
- (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
-#define METASLAB_SMO_BONUS_MULTIPLIER 2
-
-static uint64_t
-metaslab_weight(metaslab_t *msp)
-{
- metaslab_group_t *mg = msp->ms_group;
- space_map_t *sm = &msp->ms_map;
- space_map_obj_t *smo = &msp->ms_smo;
- vdev_t *vd = mg->mg_vd;
- uint64_t weight, space;
-
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- /*
- * The baseline weight is the metaslab's free space.
- */
- space = sm->sm_size - smo->smo_alloc;
- weight = space;
-
- /*
- * Modern disks have uniform bit density and constant angular velocity.
- * Therefore, the outer recording zones are faster (higher bandwidth)
- * than the inner zones by the ratio of outer to inner track diameter,
- * which is typically around 2:1. We account for this by assigning
- * higher weight to lower metaslabs (multiplier ranging from 2x to 1x).
- * In effect, this means that we'll select the metaslab with the most
- * free bandwidth rather than simply the one with the most free space.
- */
- weight = 2 * weight -
- ((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count;
- ASSERT(weight >= space && weight <= 2 * space);
-
- /*
- * For locality, assign higher weight to metaslabs we've used before.
- */
- if (smo->smo_object != 0)
- weight *= METASLAB_SMO_BONUS_MULTIPLIER;
- ASSERT(weight >= space &&
- weight <= 2 * METASLAB_SMO_BONUS_MULTIPLIER * space);
-
- /*
- * If this metaslab is one we're actively using, adjust its weight to
- * make it preferable to any inactive metaslab so we'll polish it off.
- */
- weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
-
- return (weight);
-}
-
-static int
-metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
-{
- space_map_t *sm = &msp->ms_map;
-
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
- int error = space_map_load(sm, &metaslab_ff_ops,
- SM_FREE, &msp->ms_smo,
- msp->ms_group->mg_vd->vdev_spa->spa_meta_objset);
- if (error) {
- metaslab_group_sort(msp->ms_group, msp, 0);
- return (error);
- }
- metaslab_group_sort(msp->ms_group, msp,
- msp->ms_weight | activation_weight);
- }
- ASSERT(sm->sm_loaded);
- ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
-
- return (0);
-}
-
-static void
-metaslab_passivate(metaslab_t *msp, uint64_t size)
-{
- /*
- * If size < SPA_MINBLOCKSIZE, then we will not allocate from
- * this metaslab again. In that case, it had better be empty,
- * or we would be leaving space on the table.
- */
- ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0);
- metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
- ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
-}
-
-/*
- * Write a metaslab to disk in the context of the specified transaction group.
- */
-void
-metaslab_sync(metaslab_t *msp, uint64_t txg)
-{
- vdev_t *vd = msp->ms_group->mg_vd;
- spa_t *spa = vd->vdev_spa;
- objset_t *mos = spa->spa_meta_objset;
- space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
- space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
- space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
- space_map_t *sm = &msp->ms_map;
- space_map_obj_t *smo = &msp->ms_smo_syncing;
- dmu_buf_t *db;
- dmu_tx_t *tx;
- int t;
-
- tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
-
- /*
- * The only state that can actually be changing concurrently with
- * metaslab_sync() is the metaslab's ms_map. No other thread can
- * be modifying this txg's allocmap, freemap, freed_map, or smo.
- * Therefore, we only hold ms_lock to satify space_map ASSERTs.
- * We drop it whenever we call into the DMU, because the DMU
- * can call down to us (e.g. via zio_free()) at any time.
- */
- mutex_enter(&msp->ms_lock);
-
- if (smo->smo_object == 0) {
- ASSERT(smo->smo_objsize == 0);
- ASSERT(smo->smo_alloc == 0);
- mutex_exit(&msp->ms_lock);
- smo->smo_object = dmu_object_alloc(mos,
- DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
- DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
- ASSERT(smo->smo_object != 0);
- dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
- (sm->sm_start >> vd->vdev_ms_shift),
- sizeof (uint64_t), &smo->smo_object, tx);
- mutex_enter(&msp->ms_lock);
- }
-
- space_map_walk(freemap, space_map_add, freed_map);
-
- if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >=
- 2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) {
- /*
- * The in-core space map representation is twice as compact
- * as the on-disk one, so it's time to condense the latter
- * by generating a pure allocmap from first principles.
- *
- * This metaslab is 100% allocated,
- * minus the content of the in-core map (sm),
- * minus what's been freed this txg (freed_map),
- * minus allocations from txgs in the future
- * (because they haven't been committed yet).
- */
- space_map_vacate(allocmap, NULL, NULL);
- space_map_vacate(freemap, NULL, NULL);
-
- space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size);
-
- space_map_walk(sm, space_map_remove, allocmap);
- space_map_walk(freed_map, space_map_remove, allocmap);
-
- for (t = 1; t < TXG_CONCURRENT_STATES; t++)
- space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK],
- space_map_remove, allocmap);
-
- mutex_exit(&msp->ms_lock);
- space_map_truncate(smo, mos, tx);
- mutex_enter(&msp->ms_lock);
- }
-
- space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
- space_map_sync(freemap, SM_FREE, smo, mos, tx);
-
- mutex_exit(&msp->ms_lock);
-
- VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
- ASSERT3U(db->db_size, ==, sizeof (*smo));
- bcopy(smo, db->db_data, db->db_size);
- dmu_buf_rele(db, FTAG);
-
- dmu_tx_commit(tx);
-}
-
-/*
- * Called after a transaction group has completely synced to mark
- * all of the metaslab's free space as usable.
- */
-void
-metaslab_sync_done(metaslab_t *msp, uint64_t txg)
-{
- space_map_obj_t *smo = &msp->ms_smo;
- space_map_obj_t *smosync = &msp->ms_smo_syncing;
- space_map_t *sm = &msp->ms_map;
- space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
- metaslab_group_t *mg = msp->ms_group;
- vdev_t *vd = mg->mg_vd;
- int t;
-
- mutex_enter(&msp->ms_lock);
-
- /*
- * If this metaslab is just becoming available, initialize its
- * allocmaps and freemaps and add its capacity to the vdev.
- */
- if (freed_map->sm_size == 0) {
- for (t = 0; t < TXG_SIZE; t++) {
- space_map_create(&msp->ms_allocmap[t], sm->sm_start,
- sm->sm_size, sm->sm_shift, sm->sm_lock);
- space_map_create(&msp->ms_freemap[t], sm->sm_start,
- sm->sm_size, sm->sm_shift, sm->sm_lock);
- }
- vdev_space_update(vd, sm->sm_size, 0);
- }
-
- vdev_space_update(vd, 0, smosync->smo_alloc - smo->smo_alloc);
-
- ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
- ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
-
- /*
- * If there's a space_map_load() in progress, wait for it to complete
- * so that we have a consistent view of the in-core space map.
- * Then, add everything we freed in this txg to the map.
- */
- space_map_load_wait(sm);
- space_map_vacate(freed_map, sm->sm_loaded ? space_map_free : NULL, sm);
-
- *smo = *smosync;
-
- /*
- * If the map is loaded but no longer active, evict it as soon as all
- * future allocations have synced. (If we unloaded it now and then
- * loaded a moment later, the map wouldn't reflect those allocations.)
- */
- if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
- int evictable = 1;
-
- for (t = 1; t < TXG_CONCURRENT_STATES; t++)
- if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space)
- evictable = 0;
-
- if (evictable)
- space_map_unload(sm);
- }
-
- metaslab_group_sort(mg, msp, metaslab_weight(msp));
-
- mutex_exit(&msp->ms_lock);
-}
-
-static uint64_t
-metaslab_distance(metaslab_t *msp, dva_t *dva)
-{
- uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
- uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
- uint64_t start = msp->ms_map.sm_start >> ms_shift;
-
- if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
- return (1ULL << 63);
-
- if (offset < start)
- return ((start - offset) << ms_shift);
- if (offset > start)
- return ((offset - start) << ms_shift);
- return (0);
-}
-
-static uint64_t
-metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
- uint64_t min_distance, dva_t *dva, int d)
-{
- metaslab_t *msp = NULL;
- uint64_t offset = -1ULL;
- avl_tree_t *t = &mg->mg_metaslab_tree;
- uint64_t activation_weight;
- uint64_t target_distance;
- int i;
-
- activation_weight = METASLAB_WEIGHT_PRIMARY;
- for (i = 0; i < d; i++)
- if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id)
- activation_weight = METASLAB_WEIGHT_SECONDARY;
-
- for (;;) {
- mutex_enter(&mg->mg_lock);
- for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
- if (msp->ms_weight < size) {
- mutex_exit(&mg->mg_lock);
- return (-1ULL);
- }
-
- if (activation_weight == METASLAB_WEIGHT_PRIMARY)
- break;
-
- target_distance = min_distance +
- (msp->ms_smo.smo_alloc ? 0 : min_distance >> 1);
-
- for (i = 0; i < d; i++)
- if (metaslab_distance(msp, &dva[i]) <
- target_distance)
- break;
- if (i == d)
- break;
- }
- mutex_exit(&mg->mg_lock);
- if (msp == NULL)
- return (-1ULL);
-
- mutex_enter(&msp->ms_lock);
-
- /*
- * Ensure that the metaslab we have selected is still
- * capable of handling our request. It's possible that
- * another thread may have changed the weight while we
- * were blocked on the metaslab lock.
- */
- if (msp->ms_weight < size) {
- mutex_exit(&msp->ms_lock);
- continue;
- }
-
- if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) &&
- activation_weight == METASLAB_WEIGHT_PRIMARY) {
- metaslab_passivate(msp,
- msp->ms_weight & ~METASLAB_ACTIVE_MASK);
- mutex_exit(&msp->ms_lock);
- continue;
- }
-
- if (metaslab_activate(msp, activation_weight) != 0) {
- mutex_exit(&msp->ms_lock);
- continue;
- }
-
- if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
- break;
-
- metaslab_passivate(msp, size - 1);
-
- mutex_exit(&msp->ms_lock);
- }
-
- if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
- vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
-
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
-
- mutex_exit(&msp->ms_lock);
-
- return (offset);
-}
-
-/*
- * Allocate a block for the specified i/o.
- */
-static int
-metaslab_alloc_dva(spa_t *spa, uint64_t psize, dva_t *dva, int d,
- dva_t *hintdva, uint64_t txg, boolean_t hintdva_avoid)
-{
- metaslab_group_t *mg, *rotor;
- metaslab_class_t *mc;
- vdev_t *vd;
- int dshift = 3;
- int all_zero;
- uint64_t offset = -1ULL;
- uint64_t asize;
- uint64_t distance;
-
- ASSERT(!DVA_IS_VALID(&dva[d]));
-
- mc = spa_metaslab_class_select(spa);
-
- /*
- * Start at the rotor and loop through all mgs until we find something.
- * Note that there's no locking on mc_rotor or mc_allocated because
- * nothing actually breaks if we miss a few updates -- we just won't
- * allocate quite as evenly. It all balances out over time.
- *
- * If we are doing ditto or log blocks, try to spread them across
- * consecutive vdevs. If we're forced to reuse a vdev before we've
- * allocated all of our ditto blocks, then try and spread them out on
- * that vdev as much as possible. If it turns out to not be possible,
- * gradually lower our standards until anything becomes acceptable.
- * Also, allocating on consecutive vdevs (as opposed to random vdevs)
- * gives us hope of containing our fault domains to something we're
- * able to reason about. Otherwise, any two top-level vdev failures
- * will guarantee the loss of data. With consecutive allocation,
- * only two adjacent top-level vdev failures will result in data loss.
- *
- * If we are doing gang blocks (hintdva is non-NULL), try to keep
- * ourselves on the same vdev as our gang block header. That
- * way, we can hope for locality in vdev_cache, plus it makes our
- * fault domains something tractable.
- */
- if (hintdva) {
- vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d]));
- if (hintdva_avoid)
- mg = vd->vdev_mg->mg_next;
- else
- mg = vd->vdev_mg;
- } else if (d != 0) {
- vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
- mg = vd->vdev_mg->mg_next;
- } else {
- mg = mc->mc_rotor;
- }
- rotor = mg;
-
-top:
- all_zero = B_TRUE;
- do {
- vd = mg->mg_vd;
-
- distance = vd->vdev_asize >> dshift;
- if (distance <= (1ULL << vd->vdev_ms_shift))
- distance = 0;
- else
- all_zero = B_FALSE;
-
- asize = vdev_psize_to_asize(vd, psize);
- ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
-
- offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d);
- if (offset != -1ULL) {
- /*
- * If we've just selected this metaslab group,
- * figure out whether the corresponding vdev is
- * over- or under-used relative to the pool,
- * and set an allocation bias to even it out.
- */
- if (mc->mc_allocated == 0) {
- vdev_stat_t *vs = &vd->vdev_stat;
- uint64_t alloc, space;
- int64_t vu, su;
-
- alloc = spa_get_alloc(spa);
- space = spa_get_space(spa);
-
- /*
- * Determine percent used in units of 0..1024.
- * (This is just to avoid floating point.)
- */
- vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
- su = (alloc << 10) / (space + 1);
-
- /*
- * Bias by at most +/- 25% of the aliquot.
- */
- mg->mg_bias = ((su - vu) *
- (int64_t)mg->mg_aliquot) / (1024 * 4);
- }
-
- if (atomic_add_64_nv(&mc->mc_allocated, asize) >=
- mg->mg_aliquot + mg->mg_bias) {
- mc->mc_rotor = mg->mg_next;
- mc->mc_allocated = 0;
- }
-
- DVA_SET_VDEV(&dva[d], vd->vdev_id);
- DVA_SET_OFFSET(&dva[d], offset);
- DVA_SET_GANG(&dva[d], 0);
- DVA_SET_ASIZE(&dva[d], asize);
-
- return (0);
- }
- mc->mc_rotor = mg->mg_next;
- mc->mc_allocated = 0;
- } while ((mg = mg->mg_next) != rotor);
-
- if (!all_zero) {
- dshift++;
- ASSERT(dshift < 64);
- goto top;
- }
-
- bzero(&dva[d], sizeof (dva_t));
-
- return (ENOSPC);
-}
-
-/*
- * Free the block represented by DVA in the context of the specified
- * transaction group.
- */
-static void
-metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
-{
- uint64_t vdev = DVA_GET_VDEV(dva);
- uint64_t offset = DVA_GET_OFFSET(dva);
- uint64_t size = DVA_GET_ASIZE(dva);
- vdev_t *vd;
- metaslab_t *msp;
-
- ASSERT(DVA_IS_VALID(dva));
-
- if (txg > spa_freeze_txg(spa))
- return;
-
- if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
- (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) {
- cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu",
- (u_longlong_t)vdev, (u_longlong_t)offset);
- ASSERT(0);
- return;
- }
-
- msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
-
- if (DVA_GET_GANG(dva))
- size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
-
- mutex_enter(&msp->ms_lock);
-
- if (now) {
- space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
- offset, size);
- space_map_free(&msp->ms_map, offset, size);
- } else {
- if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
- vdev_dirty(vd, VDD_METASLAB, msp, txg);
- space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
-
- /*
- * verify that this region is actually allocated in
- * either a ms_allocmap or the ms_map
- */
- if (msp->ms_map.sm_loaded) {
- boolean_t allocd = B_FALSE;
- int i;
-
- if (!space_map_contains(&msp->ms_map, offset, size)) {
- allocd = B_TRUE;
- } else {
- for (i = 0; i < TXG_CONCURRENT_STATES; i++) {
- space_map_t *sm = &msp->ms_allocmap
- [(txg - i) & TXG_MASK];
- if (space_map_contains(sm,
- offset, size)) {
- allocd = B_TRUE;
- break;
- }
- }
- }
-
- if (!allocd) {
- zfs_panic_recover("freeing free segment "
- "(vdev=%llu offset=%llx size=%llx)",
- (longlong_t)vdev, (longlong_t)offset,
- (longlong_t)size);
- }
- }
-
-
- }
-
- mutex_exit(&msp->ms_lock);
-}
-
-/*
- * Intent log support: upon opening the pool after a crash, notify the SPA
- * of blocks that the intent log has allocated for immediate write, but
- * which are still considered free by the SPA because the last transaction
- * group didn't commit yet.
- */
-static int
-metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
-{
- uint64_t vdev = DVA_GET_VDEV(dva);
- uint64_t offset = DVA_GET_OFFSET(dva);
- uint64_t size = DVA_GET_ASIZE(dva);
- vdev_t *vd;
- metaslab_t *msp;
- int error;
-
- ASSERT(DVA_IS_VALID(dva));
-
- if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
- (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
- return (ENXIO);
-
- msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
-
- if (DVA_GET_GANG(dva))
- size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
-
- mutex_enter(&msp->ms_lock);
-
- error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
- if (error) {
- mutex_exit(&msp->ms_lock);
- return (error);
- }
-
- if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
- vdev_dirty(vd, VDD_METASLAB, msp, txg);
-
- space_map_claim(&msp->ms_map, offset, size);
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
-
- mutex_exit(&msp->ms_lock);
-
- return (0);
-}
-
-int
-metaslab_alloc(spa_t *spa, uint64_t psize, blkptr_t *bp, int ndvas,
- uint64_t txg, blkptr_t *hintbp, boolean_t hintbp_avoid)
-{
- dva_t *dva = bp->blk_dva;
- dva_t *hintdva = hintbp->blk_dva;
- int d;
- int error = 0;
-
- ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
- ASSERT(BP_GET_NDVAS(bp) == 0);
- ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
-
- for (d = 0; d < ndvas; d++) {
- error = metaslab_alloc_dva(spa, psize, dva, d, hintdva,
- txg, hintbp_avoid);
- if (error) {
- for (d--; d >= 0; d--) {
- metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
- bzero(&dva[d], sizeof (dva_t));
- }
- return (error);
- }
- }
- ASSERT(error == 0);
- ASSERT(BP_GET_NDVAS(bp) == ndvas);
-
- return (0);
-}
-
-void
-metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
-{
- const dva_t *dva = bp->blk_dva;
- int ndvas = BP_GET_NDVAS(bp);
- int d;
-
- ASSERT(!BP_IS_HOLE(bp));
-
- for (d = 0; d < ndvas; d++)
- metaslab_free_dva(spa, &dva[d], txg, now);
-}
-
-int
-metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
-{
- const dva_t *dva = bp->blk_dva;
- int ndvas = BP_GET_NDVAS(bp);
- int d, error;
- int last_error = 0;
-
- ASSERT(!BP_IS_HOLE(bp));
-
- for (d = 0; d < ndvas; d++)
- if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0)
- last_error = error;
-
- return (last_error);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/refcount.c b/sys/contrib/opensolaris/uts/common/fs/zfs/refcount.c
deleted file mode 100644
index 411ed46..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/refcount.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/refcount.h>
-
-#if defined(DEBUG) || !defined(_KERNEL)
-
-#ifdef _KERNEL
-int reference_tracking_enable = FALSE; /* runs out of memory too easily */
-#else
-int reference_tracking_enable = TRUE;
-#endif
-int reference_history = 4; /* tunable */
-
-static kmem_cache_t *reference_cache;
-static kmem_cache_t *reference_history_cache;
-
-void
-refcount_init(void)
-{
- reference_cache = kmem_cache_create("reference_cache",
- sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
-
- reference_history_cache = kmem_cache_create("reference_history_cache",
- sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
-}
-
-void
-refcount_fini(void)
-{
- kmem_cache_destroy(reference_cache);
- kmem_cache_destroy(reference_history_cache);
-}
-
-void
-refcount_create(refcount_t *rc)
-{
- list_create(&rc->rc_list, sizeof (reference_t),
- offsetof(reference_t, ref_link));
- list_create(&rc->rc_removed, sizeof (reference_t),
- offsetof(reference_t, ref_link));
- mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL);
-}
-
-void
-refcount_destroy_many(refcount_t *rc, uint64_t number)
-{
- reference_t *ref;
-
- ASSERT(rc->rc_count == number);
- while (ref = list_head(&rc->rc_list)) {
- list_remove(&rc->rc_list, ref);
- kmem_cache_free(reference_cache, ref);
- }
- list_destroy(&rc->rc_list);
-
- while (ref = list_head(&rc->rc_removed)) {
- list_remove(&rc->rc_removed, ref);
- kmem_cache_free(reference_history_cache, ref->ref_removed);
- kmem_cache_free(reference_cache, ref);
- }
- list_destroy(&rc->rc_removed);
- mutex_destroy(&rc->rc_mtx);
-}
-
-void
-refcount_destroy(refcount_t *rc)
-{
- refcount_destroy_many(rc, 0);
-}
-
-int
-refcount_is_zero(refcount_t *rc)
-{
- ASSERT(rc->rc_count >= 0);
- return (rc->rc_count == 0);
-}
-
-int64_t
-refcount_count(refcount_t *rc)
-{
- ASSERT(rc->rc_count >= 0);
- return (rc->rc_count);
-}
-
-int64_t
-refcount_add_many(refcount_t *rc, uint64_t number, void *holder)
-{
- reference_t *ref;
- int64_t count;
-
- if (reference_tracking_enable) {
- ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
- ref->ref_holder = holder;
- ref->ref_number = number;
- }
- mutex_enter(&rc->rc_mtx);
- ASSERT(rc->rc_count >= 0);
- if (reference_tracking_enable)
- list_insert_head(&rc->rc_list, ref);
- rc->rc_count += number;
- count = rc->rc_count;
- mutex_exit(&rc->rc_mtx);
-
- return (count);
-}
-
-int64_t
-refcount_add(refcount_t *rc, void *holder)
-{
- return (refcount_add_many(rc, 1, holder));
-}
-
-int64_t
-refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
-{
- reference_t *ref;
- int64_t count;
-
- mutex_enter(&rc->rc_mtx);
- ASSERT(rc->rc_count >= number);
-
- if (!reference_tracking_enable) {
- rc->rc_count -= number;
- count = rc->rc_count;
- mutex_exit(&rc->rc_mtx);
- return (count);
- }
-
- for (ref = list_head(&rc->rc_list); ref;
- ref = list_next(&rc->rc_list, ref)) {
- if (ref->ref_holder == holder && ref->ref_number == number) {
- list_remove(&rc->rc_list, ref);
- if (reference_history > 0) {
- ref->ref_removed =
- kmem_cache_alloc(reference_history_cache,
- KM_SLEEP);
- list_insert_head(&rc->rc_removed, ref);
- rc->rc_removed_count++;
- if (rc->rc_removed_count >= reference_history) {
- ref = list_tail(&rc->rc_removed);
- list_remove(&rc->rc_removed, ref);
- kmem_cache_free(reference_history_cache,
- ref->ref_removed);
- kmem_cache_free(reference_cache, ref);
- rc->rc_removed_count--;
- }
- } else {
- kmem_cache_free(reference_cache, ref);
- }
- rc->rc_count -= number;
- count = rc->rc_count;
- mutex_exit(&rc->rc_mtx);
- return (count);
- }
- }
- panic("No such hold %p on refcount %llx", holder,
- (u_longlong_t)(uintptr_t)rc);
- return (-1);
-}
-
-int64_t
-refcount_remove(refcount_t *rc, void *holder)
-{
- return (refcount_remove_many(rc, 1, holder));
-}
-
-#endif
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sha256.c b/sys/contrib/opensolaris/uts/common/fs/zfs/sha256.c
deleted file mode 100644
index ce5c261..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sha256.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-
-/*
- * SHA-256 checksum, as specified in FIPS 180-2, available at:
- * http://csrc.nist.gov/cryptval
- *
- * This is a very compact implementation of SHA-256.
- * It is designed to be simple and portable, not to be fast.
- */
-
-/*
- * The literal definitions according to FIPS180-2 would be:
- *
- * Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
- * Maj(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
- *
- * We use logical equivalents which require one less op.
- */
-#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
-#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y))))
-#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s)))
-#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22))
-#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25))
-#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3))
-#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10))
-
-static const uint32_t SHA256_K[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-static void
-SHA256Transform(uint32_t *H, const uint8_t *cp)
-{
- uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];
-
- for (t = 0; t < 16; t++, cp += 4)
- W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3];
-
- for (t = 16; t < 64; t++)
- W[t] = sigma1(W[t - 2]) + W[t - 7] +
- sigma0(W[t - 15]) + W[t - 16];
-
- a = H[0]; b = H[1]; c = H[2]; d = H[3];
- e = H[4]; f = H[5]; g = H[6]; h = H[7];
-
- for (t = 0; t < 64; t++) {
- T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
- T2 = SIGMA0(a) + Maj(a, b, c);
- h = g; g = f; f = e; e = d + T1;
- d = c; c = b; b = a; a = T1 + T2;
- }
-
- H[0] += a; H[1] += b; H[2] += c; H[3] += d;
- H[4] += e; H[5] += f; H[6] += g; H[7] += h;
-}
-
-void
-zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp)
-{
- uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
- 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
- uint8_t pad[128];
- int padsize = size & 63;
- int i;
-
- for (i = 0; i < size - padsize; i += 64)
- SHA256Transform(H, (uint8_t *)buf + i);
-
- for (i = 0; i < padsize; i++)
- pad[i] = ((uint8_t *)buf)[i];
-
- for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++)
- pad[padsize] = 0;
-
- for (i = 0; i < 8; i++)
- pad[padsize++] = (size << 3) >> (56 - 8 * i);
-
- for (i = 0; i < padsize; i += 64)
- SHA256Transform(H, pad + i);
-
- ZIO_SET_CHECKSUM(zcp,
- (uint64_t)H[0] << 32 | H[1],
- (uint64_t)H[2] << 32 | H[3],
- (uint64_t)H[4] << 32 | H[5],
- (uint64_t)H[6] << 32 | H[7]);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/contrib/opensolaris/uts/common/fs/zfs/spa.c
deleted file mode 100644
index 6a7c525..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ /dev/null
@@ -1,3301 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This file contains all the routines used when modifying on-disk SPA state.
- * This includes opening, importing, destroying, exporting a pool, and syncing a
- * pool.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/fm/fs/zfs.h>
-#include <sys/spa_impl.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/zap.h>
-#include <sys/zil.h>
-#include <sys/vdev_impl.h>
-#include <sys/metaslab.h>
-#include <sys/uberblock_impl.h>
-#include <sys/txg.h>
-#include <sys/avl.h>
-#include <sys/dmu_traverse.h>
-#include <sys/dmu_objset.h>
-#include <sys/unique.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_synctask.h>
-#include <sys/fs/zfs.h>
-#include <sys/callb.h>
-#include <sys/sunddi.h>
-
-int zio_taskq_threads = 0;
-SYSCTL_DECL(_vfs_zfs);
-SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
-TUNABLE_INT("vfs.zfs.zio.taskq_threads", &zio_taskq_threads);
-SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, taskq_threads, CTLFLAG_RW,
- &zio_taskq_threads, 0, "Number of ZIO threads per ZIO type");
-
-
-/*
- * ==========================================================================
- * SPA state manipulation (open/create/destroy/import/export)
- * ==========================================================================
- */
-
-static int
-spa_error_entry_compare(const void *a, const void *b)
-{
- spa_error_entry_t *sa = (spa_error_entry_t *)a;
- spa_error_entry_t *sb = (spa_error_entry_t *)b;
- int ret;
-
- ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
- sizeof (zbookmark_t));
-
- if (ret < 0)
- return (-1);
- else if (ret > 0)
- return (1);
- else
- return (0);
-}
-
-/*
- * Utility function which retrieves copies of the current logs and
- * re-initializes them in the process.
- */
-void
-spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
-{
- ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
-
- bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
- bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
-
- avl_create(&spa->spa_errlist_scrub,
- spa_error_entry_compare, sizeof (spa_error_entry_t),
- offsetof(spa_error_entry_t, se_avl));
- avl_create(&spa->spa_errlist_last,
- spa_error_entry_compare, sizeof (spa_error_entry_t),
- offsetof(spa_error_entry_t, se_avl));
-}
-
-/*
- * Activate an uninitialized pool.
- */
-static void
-spa_activate(spa_t *spa)
-{
- int t;
- int nthreads = zio_taskq_threads;
- char name[32];
-
- ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
-
- spa->spa_state = POOL_STATE_ACTIVE;
-
- spa->spa_normal_class = metaslab_class_create();
-
- if (nthreads == 0)
- nthreads = max_ncpus;
- for (t = 0; t < ZIO_TYPES; t++) {
- snprintf(name, sizeof(name), "spa_zio_issue %d", t);
- spa->spa_zio_issue_taskq[t] = taskq_create(name, nthreads,
- maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE);
- snprintf(name, sizeof(name), "spa_zio_intr %d", t);
- spa->spa_zio_intr_taskq[t] = taskq_create(name, nthreads,
- maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE);
- }
-
- rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL);
-
- mutex_init(&spa->spa_uberblock_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_config_lock.scl_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&spa->spa_config_lock.scl_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
-
- list_create(&spa->spa_dirty_list, sizeof (vdev_t),
- offsetof(vdev_t, vdev_dirty_node));
-
- txg_list_create(&spa->spa_vdev_txg_list,
- offsetof(struct vdev, vdev_txg_node));
-
- avl_create(&spa->spa_errlist_scrub,
- spa_error_entry_compare, sizeof (spa_error_entry_t),
- offsetof(spa_error_entry_t, se_avl));
- avl_create(&spa->spa_errlist_last,
- spa_error_entry_compare, sizeof (spa_error_entry_t),
- offsetof(spa_error_entry_t, se_avl));
-}
-
-/*
- * Opposite of spa_activate().
- */
-static void
-spa_deactivate(spa_t *spa)
-{
- int t;
-
- ASSERT(spa->spa_sync_on == B_FALSE);
- ASSERT(spa->spa_dsl_pool == NULL);
- ASSERT(spa->spa_root_vdev == NULL);
-
- ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
-
- txg_list_destroy(&spa->spa_vdev_txg_list);
-
- list_destroy(&spa->spa_dirty_list);
-
- for (t = 0; t < ZIO_TYPES; t++) {
- taskq_destroy(spa->spa_zio_issue_taskq[t]);
- taskq_destroy(spa->spa_zio_intr_taskq[t]);
- spa->spa_zio_issue_taskq[t] = NULL;
- spa->spa_zio_intr_taskq[t] = NULL;
- }
-
- metaslab_class_destroy(spa->spa_normal_class);
- spa->spa_normal_class = NULL;
-
- /*
- * If this was part of an import or the open otherwise failed, we may
- * still have errors left in the queues. Empty them just in case.
- */
- spa_errlog_drain(spa);
-
- avl_destroy(&spa->spa_errlist_scrub);
- avl_destroy(&spa->spa_errlist_last);
-
- rw_destroy(&spa->spa_traverse_lock);
- mutex_destroy(&spa->spa_uberblock_lock);
- mutex_destroy(&spa->spa_errlog_lock);
- mutex_destroy(&spa->spa_errlist_lock);
- mutex_destroy(&spa->spa_config_lock.scl_lock);
- cv_destroy(&spa->spa_config_lock.scl_cv);
- mutex_destroy(&spa->spa_sync_bplist.bpl_lock);
- mutex_destroy(&spa->spa_history_lock);
- mutex_destroy(&spa->spa_props_lock);
-
- spa->spa_state = POOL_STATE_UNINITIALIZED;
-}
-
-/*
- * Verify a pool configuration, and construct the vdev tree appropriately. This
- * will create all the necessary vdevs in the appropriate layout, with each vdev
- * in the CLOSED state. This will prep the pool before open/creation/import.
- * All vdev validation is done by the vdev_alloc() routine.
- */
-static int
-spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
- uint_t id, int atype)
-{
- nvlist_t **child;
- uint_t c, children;
- int error;
-
- if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
- return (error);
-
- if ((*vdp)->vdev_ops->vdev_op_leaf)
- return (0);
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) != 0) {
- vdev_free(*vdp);
- *vdp = NULL;
- return (EINVAL);
- }
-
- for (c = 0; c < children; c++) {
- vdev_t *vd;
- if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
- atype)) != 0) {
- vdev_free(*vdp);
- *vdp = NULL;
- return (error);
- }
- }
-
- ASSERT(*vdp != NULL);
-
- return (0);
-}
-
-/*
- * Opposite of spa_load().
- */
-static void
-spa_unload(spa_t *spa)
-{
- int i;
-
- /*
- * Stop async tasks.
- */
- spa_async_suspend(spa);
-
- /*
- * Stop syncing.
- */
- if (spa->spa_sync_on) {
- txg_sync_stop(spa->spa_dsl_pool);
- spa->spa_sync_on = B_FALSE;
- }
-
- /*
- * Wait for any outstanding prefetch I/O to complete.
- */
- spa_config_enter(spa, RW_WRITER, FTAG);
- spa_config_exit(spa, FTAG);
-
- /*
- * Close the dsl pool.
- */
- if (spa->spa_dsl_pool) {
- dsl_pool_close(spa->spa_dsl_pool);
- spa->spa_dsl_pool = NULL;
- }
-
- /*
- * Close all vdevs.
- */
- if (spa->spa_root_vdev)
- vdev_free(spa->spa_root_vdev);
- ASSERT(spa->spa_root_vdev == NULL);
-
- for (i = 0; i < spa->spa_nspares; i++)
- vdev_free(spa->spa_spares[i]);
- if (spa->spa_spares) {
- kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
- spa->spa_spares = NULL;
- }
- if (spa->spa_sparelist) {
- nvlist_free(spa->spa_sparelist);
- spa->spa_sparelist = NULL;
- }
-
- spa->spa_async_suspended = 0;
-}
-
-/*
- * Load (or re-load) the current list of vdevs describing the active spares for
- * this pool. When this is called, we have some form of basic information in
- * 'spa_sparelist'. We parse this into vdevs, try to open them, and then
- * re-generate a more complete list including status information.
- */
-static void
-spa_load_spares(spa_t *spa)
-{
- nvlist_t **spares;
- uint_t nspares;
- int i;
- vdev_t *vd, *tvd;
-
- /*
- * First, close and free any existing spare vdevs.
- */
- for (i = 0; i < spa->spa_nspares; i++) {
- vd = spa->spa_spares[i];
-
- /* Undo the call to spa_activate() below */
- if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL &&
- tvd->vdev_isspare)
- spa_spare_remove(tvd);
- vdev_close(vd);
- vdev_free(vd);
- }
-
- if (spa->spa_spares)
- kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
-
- if (spa->spa_sparelist == NULL)
- nspares = 0;
- else
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
-
- spa->spa_nspares = (int)nspares;
- spa->spa_spares = NULL;
-
- if (nspares == 0)
- return;
-
- /*
- * Construct the array of vdevs, opening them to get status in the
- * process. For each spare, there is potentially two different vdev_t
- * structures associated with it: one in the list of spares (used only
- * for basic validation purposes) and one in the active vdev
- * configuration (if it's spared in). During this phase we open and
- * validate each vdev on the spare list. If the vdev also exists in the
- * active configuration, then we also mark this vdev as an active spare.
- */
- spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP);
- for (i = 0; i < spa->spa_nspares; i++) {
- VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
- VDEV_ALLOC_SPARE) == 0);
- ASSERT(vd != NULL);
-
- spa->spa_spares[i] = vd;
-
- if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) {
- if (!tvd->vdev_isspare)
- spa_spare_add(tvd);
-
- /*
- * We only mark the spare active if we were successfully
- * able to load the vdev. Otherwise, importing a pool
- * with a bad active spare would result in strange
- * behavior, because multiple pool would think the spare
- * is actively in use.
- *
- * There is a vulnerability here to an equally bizarre
- * circumstance, where a dead active spare is later
- * brought back to life (onlined or otherwise). Given
- * the rarity of this scenario, and the extra complexity
- * it adds, we ignore the possibility.
- */
- if (!vdev_is_dead(tvd))
- spa_spare_activate(tvd);
- }
-
- if (vdev_open(vd) != 0)
- continue;
-
- vd->vdev_top = vd;
- (void) vdev_validate_spare(vd);
- }
-
- /*
- * Recompute the stashed list of spares, with status information
- * this time.
- */
- VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
- DATA_TYPE_NVLIST_ARRAY) == 0);
-
- spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP);
- for (i = 0; i < spa->spa_nspares; i++)
- spares[i] = vdev_config_generate(spa, spa->spa_spares[i],
- B_TRUE, B_TRUE);
- VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
- spares, spa->spa_nspares) == 0);
- for (i = 0; i < spa->spa_nspares; i++)
- nvlist_free(spares[i]);
- kmem_free(spares, spa->spa_nspares * sizeof (void *));
-}
-
-static int
-load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
-{
- dmu_buf_t *db;
- char *packed = NULL;
- size_t nvsize = 0;
- int error;
- *value = NULL;
-
- VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
- nvsize = *(uint64_t *)db->db_data;
- dmu_buf_rele(db, FTAG);
-
- packed = kmem_alloc(nvsize, KM_SLEEP);
- error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed);
- if (error == 0)
- error = nvlist_unpack(packed, nvsize, value, 0);
- kmem_free(packed, nvsize);
-
- return (error);
-}
-
-/*
- * Load an existing storage pool, using the pool's builtin spa_config as a
- * source of configuration information.
- */
-static int
-spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
-{
- int error = 0;
- nvlist_t *nvroot = NULL;
- vdev_t *rvd;
- uberblock_t *ub = &spa->spa_uberblock;
- uint64_t config_cache_txg = spa->spa_config_txg;
- uint64_t pool_guid;
- uint64_t version;
- zio_t *zio;
-
- spa->spa_load_state = state;
-
- if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * Versioning wasn't explicitly added to the label until later, so if
- * it's not present treat it as the initial version.
- */
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
- version = ZFS_VERSION_INITIAL;
-
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- &spa->spa_config_txg);
-
- if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
- spa_guid_exists(pool_guid, 0)) {
- error = EEXIST;
- goto out;
- }
-
- spa->spa_load_guid = pool_guid;
-
- /*
- * Parse the configuration into a vdev tree. We explicitly set the
- * value that will be returned by spa_version() since parsing the
- * configuration requires knowing the version number.
- */
- spa_config_enter(spa, RW_WRITER, FTAG);
- spa->spa_ubsync.ub_version = version;
- error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
- spa_config_exit(spa, FTAG);
-
- if (error != 0)
- goto out;
-
- ASSERT(spa->spa_root_vdev == rvd);
- ASSERT(spa_guid(spa) == pool_guid);
-
- /*
- * Try to open all vdevs, loading each label in the process.
- */
- error = vdev_open(rvd);
- if (error != 0)
- goto out;
-
- /*
- * Validate the labels for all leaf vdevs. We need to grab the config
- * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD
- * flag.
- */
- spa_config_enter(spa, RW_READER, FTAG);
- error = vdev_validate(rvd);
- spa_config_exit(spa, FTAG);
-
- if (error != 0)
- goto out;
-
- if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
- error = ENXIO;
- goto out;
- }
-
- /*
- * Find the best uberblock.
- */
- bzero(ub, sizeof (uberblock_t));
-
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
- vdev_uberblock_load(zio, rvd, ub);
- error = zio_wait(zio);
-
- /*
- * If we weren't able to find a single valid uberblock, return failure.
- */
- if (ub->ub_txg == 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = ENXIO;
- goto out;
- }
-
- /*
- * If the pool is newer than the code, we can't open it.
- */
- if (ub->ub_version > ZFS_VERSION) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_VERSION_NEWER);
- error = ENOTSUP;
- goto out;
- }
-
- /*
- * If the vdev guid sum doesn't match the uberblock, we have an
- * incomplete configuration.
- */
- if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_BAD_GUID_SUM);
- error = ENXIO;
- goto out;
- }
-
- /*
- * Initialize internal SPA structures.
- */
- spa->spa_state = POOL_STATE_ACTIVE;
- spa->spa_ubsync = spa->spa_uberblock;
- spa->spa_first_txg = spa_last_synced_txg(spa) + 1;
- error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
- if (error) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- goto out;
- }
- spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
-
- if (zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
- sizeof (uint64_t), 1, &spa->spa_config_object) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- if (!mosconfig) {
- nvlist_t *newconfig;
- uint64_t hostid;
-
- if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- /*
- * hostid is set after the root file system is mounted, so
- * ignore the check until it's done.
- */
- if (nvlist_lookup_uint64(newconfig, ZPOOL_CONFIG_HOSTID,
- &hostid) == 0 && root_mounted()) {
- char *hostname;
- unsigned long myhostid = 0;
-
- VERIFY(nvlist_lookup_string(newconfig,
- ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
-
- (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
- if ((unsigned long)hostid != myhostid) {
- cmn_err(CE_WARN, "pool '%s' could not be "
- "loaded as it was last accessed by "
- "another system (host: %s hostid: 0x%lx). "
- "See: http://www.sun.com/msg/ZFS-8000-EY",
- spa->spa_name, hostname,
- (unsigned long)hostid);
- error = EBADF;
- goto out;
- }
- }
-
- spa_config_set(spa, newconfig);
- spa_unload(spa);
- spa_deactivate(spa);
- spa_activate(spa);
-
- return (spa_load(spa, newconfig, state, B_TRUE));
- }
-
- if (zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
- sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- /*
- * Load the bit that tells us to use the new accounting function
- * (raid-z deflation). If we have an older pool, this will not
- * be present.
- */
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
- sizeof (uint64_t), 1, &spa->spa_deflate);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- /*
- * Load the persistent error log. If we have an older pool, this will
- * not be present.
- */
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST,
- sizeof (uint64_t), 1, &spa->spa_errlog_last);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB,
- sizeof (uint64_t), 1, &spa->spa_errlog_scrub);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- /*
- * Load the history object. If we have an older pool, this
- * will not be present.
- */
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY,
- sizeof (uint64_t), 1, &spa->spa_history);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- /*
- * Load any hot spares for this pool.
- */
- error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
- if (error == 0) {
- ASSERT(spa_version(spa) >= ZFS_VERSION_SPARES);
- if (load_nvlist(spa, spa->spa_spares_object,
- &spa->spa_sparelist) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- spa_config_enter(spa, RW_WRITER, FTAG);
- spa_load_spares(spa);
- spa_config_exit(spa, FTAG);
- }
-
- error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object);
-
- if (error && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
-
- if (error == 0) {
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZFS_PROP_BOOTFS),
- sizeof (uint64_t), 1, &spa->spa_bootfs);
- }
-
- /*
- * Load the vdev state for all toplevel vdevs.
- */
- vdev_load(rvd);
-
- /*
- * Propagate the leaf DTLs we just loaded all the way up the tree.
- */
- spa_config_enter(spa, RW_WRITER, FTAG);
- vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
- spa_config_exit(spa, FTAG);
-
- /*
- * Check the state of the root vdev. If it can't be opened, it
- * indicates one or more toplevel vdevs are faulted.
- */
- if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
- error = ENXIO;
- goto out;
- }
-
- if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) {
- dmu_tx_t *tx;
- int need_update = B_FALSE;
- int c;
-
- /*
- * Claim log blocks that haven't been committed yet.
- * This must all happen in a single txg.
- */
- tx = dmu_tx_create_assigned(spa_get_dsl(spa),
- spa_first_txg(spa));
- (void) dmu_objset_find(spa->spa_name,
- zil_claim, tx, DS_FIND_CHILDREN);
- dmu_tx_commit(tx);
-
- spa->spa_sync_on = B_TRUE;
- txg_sync_start(spa->spa_dsl_pool);
-
- /*
- * Wait for all claims to sync.
- */
- txg_wait_synced(spa->spa_dsl_pool, 0);
-
- /*
- * If the config cache is stale, or we have uninitialized
- * metaslabs (see spa_vdev_add()), then update the config.
- */
- if (config_cache_txg != spa->spa_config_txg ||
- state == SPA_LOAD_IMPORT)
- need_update = B_TRUE;
-
- for (c = 0; c < rvd->vdev_children; c++)
- if (rvd->vdev_child[c]->vdev_ms_array == 0)
- need_update = B_TRUE;
-
- /*
- * Update the config cache asychronously in case we're the
- * root pool, in which case the config cache isn't writable yet.
- */
- if (need_update)
- spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
- }
-
- error = 0;
-out:
- if (error && error != EBADF)
- zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0);
- spa->spa_load_state = SPA_LOAD_NONE;
- spa->spa_ena = 0;
-
- return (error);
-}
-
-/*
- * Pool Open/Import
- *
- * The import case is identical to an open except that the configuration is sent
- * down from userland, instead of grabbed from the configuration cache. For the
- * case of an open, the pool configuration will exist in the
- * POOL_STATE_UNITIALIZED state.
- *
- * The stats information (gen/count/ustats) is used to gather vdev statistics at
- * the same time open the pool, without having to keep around the spa_t in some
- * ambiguous state.
- */
-static int
-spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config)
-{
- spa_t *spa;
- int error;
- int loaded = B_FALSE;
- int locked = B_FALSE;
-
- *spapp = NULL;
-
- /*
- * As disgusting as this is, we need to support recursive calls to this
- * function because dsl_dir_open() is called during spa_load(), and ends
- * up calling spa_open() again. The real fix is to figure out how to
- * avoid dsl_dir_open() calling this in the first place.
- */
- if (mutex_owner(&spa_namespace_lock) != curthread) {
- mutex_enter(&spa_namespace_lock);
- locked = B_TRUE;
- }
-
- if ((spa = spa_lookup(pool)) == NULL) {
- if (locked)
- mutex_exit(&spa_namespace_lock);
- return (ENOENT);
- }
- if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
-
- spa_activate(spa);
-
- error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE);
-
- if (error == EBADF) {
- /*
- * If vdev_validate() returns failure (indicated by
- * EBADF), it indicates that one of the vdevs indicates
- * that the pool has been exported or destroyed. If
- * this is the case, the config cache is out of sync and
- * we should remove the pool from the namespace.
- */
- zfs_post_ok(spa, NULL);
- spa_unload(spa);
- spa_deactivate(spa);
- spa_remove(spa);
- spa_config_sync();
- if (locked)
- mutex_exit(&spa_namespace_lock);
- return (ENOENT);
- }
-
- if (error) {
- /*
- * We can't open the pool, but we still have useful
- * information: the state of each vdev after the
- * attempted vdev_open(). Return this to the user.
- */
- if (config != NULL && spa->spa_root_vdev != NULL) {
- spa_config_enter(spa, RW_READER, FTAG);
- *config = spa_config_generate(spa, NULL, -1ULL,
- B_TRUE);
- spa_config_exit(spa, FTAG);
- }
- spa_unload(spa);
- spa_deactivate(spa);
- spa->spa_last_open_failed = B_TRUE;
- if (locked)
- mutex_exit(&spa_namespace_lock);
- *spapp = NULL;
- return (error);
- } else {
- zfs_post_ok(spa, NULL);
- spa->spa_last_open_failed = B_FALSE;
- }
-
- loaded = B_TRUE;
- }
-
- spa_open_ref(spa, tag);
- if (locked)
- mutex_exit(&spa_namespace_lock);
-
- *spapp = spa;
-
- if (config != NULL) {
- spa_config_enter(spa, RW_READER, FTAG);
- *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
- spa_config_exit(spa, FTAG);
- }
-
- /*
- * If we just loaded the pool, resilver anything that's out of date.
- */
- if (loaded && (spa_mode & FWRITE))
- VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0);
-
- return (0);
-}
-
-int
-spa_open(const char *name, spa_t **spapp, void *tag)
-{
- return (spa_open_common(name, spapp, tag, NULL));
-}
-
-/*
- * Lookup the given spa_t, incrementing the inject count in the process,
- * preventing it from being exported or destroyed.
- */
-spa_t *
-spa_inject_addref(char *name)
-{
- spa_t *spa;
-
- mutex_enter(&spa_namespace_lock);
- if ((spa = spa_lookup(name)) == NULL) {
- mutex_exit(&spa_namespace_lock);
- return (NULL);
- }
- spa->spa_inject_ref++;
- mutex_exit(&spa_namespace_lock);
-
- return (spa);
-}
-
-void
-spa_inject_delref(spa_t *spa)
-{
- mutex_enter(&spa_namespace_lock);
- spa->spa_inject_ref--;
- mutex_exit(&spa_namespace_lock);
-}
-
-static void
-spa_add_spares(spa_t *spa, nvlist_t *config)
-{
- nvlist_t **spares;
- uint_t i, nspares;
- nvlist_t *nvroot;
- uint64_t guid;
- vdev_stat_t *vs;
- uint_t vsc;
- uint64_t pool;
-
- if (spa->spa_nspares == 0)
- return;
-
- VERIFY(nvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
- if (nspares != 0) {
- VERIFY(nvlist_add_nvlist_array(nvroot,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
- VERIFY(nvlist_lookup_nvlist_array(nvroot,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
-
- /*
- * Go through and find any spares which have since been
- * repurposed as an active spare. If this is the case, update
- * their status appropriately.
- */
- for (i = 0; i < nspares; i++) {
- VERIFY(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &guid) == 0);
- if (spa_spare_exists(guid, &pool) && pool != 0ULL) {
- VERIFY(nvlist_lookup_uint64_array(
- spares[i], ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
- vs->vs_state = VDEV_STATE_CANT_OPEN;
- vs->vs_aux = VDEV_AUX_SPARED;
- }
- }
- }
-}
-
-int
-spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
-{
- int error;
- spa_t *spa;
-
- *config = NULL;
- error = spa_open_common(name, &spa, FTAG, config);
-
- if (spa && *config != NULL) {
- VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT,
- spa_get_errlog_size(spa)) == 0);
-
- spa_add_spares(spa, *config);
- }
-
- /*
- * We want to get the alternate root even for faulted pools, so we cheat
- * and call spa_lookup() directly.
- */
- if (altroot) {
- if (spa == NULL) {
- mutex_enter(&spa_namespace_lock);
- spa = spa_lookup(name);
- if (spa)
- spa_altroot(spa, altroot, buflen);
- else
- altroot[0] = '\0';
- spa = NULL;
- mutex_exit(&spa_namespace_lock);
- } else {
- spa_altroot(spa, altroot, buflen);
- }
- }
-
- if (spa != NULL)
- spa_close(spa, FTAG);
-
- return (error);
-}
-
-/*
- * Validate that the 'spares' array is well formed. We must have an array of
- * nvlists, each which describes a valid leaf vdev. If this is an import (mode
- * is VDEV_ALLOC_SPARE), then we allow corrupted spares to be specified, as long
- * as they are well-formed.
- */
-static int
-spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
-{
- nvlist_t **spares;
- uint_t i, nspares;
- vdev_t *vd;
- int error;
-
- /*
- * It's acceptable to have no spares specified.
- */
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) != 0)
- return (0);
-
- if (nspares == 0)
- return (EINVAL);
-
- /*
- * Make sure the pool is formatted with a version that supports hot
- * spares.
- */
- if (spa_version(spa) < ZFS_VERSION_SPARES)
- return (ENOTSUP);
-
- /*
- * Set the pending spare list so we correctly handle device in-use
- * checking.
- */
- spa->spa_pending_spares = spares;
- spa->spa_pending_nspares = nspares;
-
- for (i = 0; i < nspares; i++) {
- if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0,
- mode)) != 0)
- goto out;
-
- if (!vd->vdev_ops->vdev_op_leaf) {
- vdev_free(vd);
- error = EINVAL;
- goto out;
- }
-
- vd->vdev_top = vd;
-
- if ((error = vdev_open(vd)) == 0 &&
- (error = vdev_label_init(vd, crtxg,
- VDEV_LABEL_SPARE)) == 0) {
- VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
- }
-
- vdev_free(vd);
-
- if (error && mode != VDEV_ALLOC_SPARE)
- goto out;
- else
- error = 0;
- }
-
-out:
- spa->spa_pending_spares = NULL;
- spa->spa_pending_nspares = 0;
- return (error);
-}
-
-/*
- * Pool Creation
- */
-int
-spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
-{
- spa_t *spa;
- vdev_t *rvd;
- dsl_pool_t *dp;
- dmu_tx_t *tx;
- int c, error = 0;
- uint64_t txg = TXG_INITIAL;
- nvlist_t **spares;
- uint_t nspares;
-
- /*
- * If this pool already exists, return failure.
- */
- mutex_enter(&spa_namespace_lock);
- if (spa_lookup(pool) != NULL) {
- mutex_exit(&spa_namespace_lock);
- return (EEXIST);
- }
-
- /*
- * Allocate a new spa_t structure.
- */
- spa = spa_add(pool, altroot);
- spa_activate(spa);
-
- spa->spa_uberblock.ub_txg = txg - 1;
- spa->spa_uberblock.ub_version = ZFS_VERSION;
- spa->spa_ubsync = spa->spa_uberblock;
-
- /*
- * Create the root vdev.
- */
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
-
- ASSERT(error != 0 || rvd != NULL);
- ASSERT(error != 0 || spa->spa_root_vdev == rvd);
-
- if (error == 0 && rvd->vdev_children == 0)
- error = EINVAL;
-
- if (error == 0 &&
- (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
- (error = spa_validate_spares(spa, nvroot, txg,
- VDEV_ALLOC_ADD)) == 0) {
- for (c = 0; c < rvd->vdev_children; c++)
- vdev_init(rvd->vdev_child[c], txg);
- vdev_config_dirty(rvd);
- }
-
- spa_config_exit(spa, FTAG);
-
- if (error != 0) {
- spa_unload(spa);
- spa_deactivate(spa);
- spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
- return (error);
- }
-
- /*
- * Get the list of spares, if specified.
- */
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
- spa_config_enter(spa, RW_WRITER, FTAG);
- spa_load_spares(spa);
- spa_config_exit(spa, FTAG);
- spa->spa_sync_spares = B_TRUE;
- }
-
- spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg);
- spa->spa_meta_objset = dp->dp_meta_objset;
-
- tx = dmu_tx_create_assigned(dp, txg);
-
- /*
- * Create the pool config object.
- */
- spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
- DMU_OT_PACKED_NVLIST, 1 << 14,
- DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
-
- if (zap_add(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
- sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
- cmn_err(CE_PANIC, "failed to add pool config");
- }
-
- /* Newly created pools are always deflated. */
- spa->spa_deflate = TRUE;
- if (zap_add(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
- sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
- cmn_err(CE_PANIC, "failed to add deflate");
- }
-
- /*
- * Create the deferred-free bplist object. Turn off compression
- * because sync-to-convergence takes longer if the blocksize
- * keeps changing.
- */
- spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset,
- 1 << 14, tx);
- dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj,
- ZIO_COMPRESS_OFF, tx);
-
- if (zap_add(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
- sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) {
- cmn_err(CE_PANIC, "failed to add bplist");
- }
-
- /*
- * Create the pool's history object.
- */
- spa_history_create_obj(spa, tx);
-
- dmu_tx_commit(tx);
-
- spa->spa_bootfs = zfs_prop_default_numeric(ZFS_PROP_BOOTFS);
- spa->spa_sync_on = B_TRUE;
- txg_sync_start(spa->spa_dsl_pool);
-
- /*
- * We explicitly wait for the first transaction to complete so that our
- * bean counters are appropriately updated.
- */
- txg_wait_synced(spa->spa_dsl_pool, txg);
-
- spa_config_sync();
-
- mutex_exit(&spa_namespace_lock);
-
- return (0);
-}
-
-/*
- * Import the given pool into the system. We set up the necessary spa_t and
- * then call spa_load() to do the dirty work.
- */
-int
-spa_import(const char *pool, nvlist_t *config, const char *altroot)
-{
- spa_t *spa;
- int error;
- nvlist_t *nvroot;
- nvlist_t **spares;
- uint_t nspares;
-
- if (!(spa_mode & FWRITE))
- return (EROFS);
-
- /*
- * If a pool with this name exists, return failure.
- */
- mutex_enter(&spa_namespace_lock);
- if (spa_lookup(pool) != NULL) {
- mutex_exit(&spa_namespace_lock);
- return (EEXIST);
- }
-
- /*
- * Create and initialize the spa structure.
- */
- spa = spa_add(pool, altroot);
- spa_activate(spa);
-
- /*
- * Pass off the heavy lifting to spa_load().
- * Pass TRUE for mosconfig because the user-supplied config
- * is actually the one to trust when doing an import.
- */
- error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE);
-
- spa_config_enter(spa, RW_WRITER, FTAG);
- /*
- * Toss any existing sparelist, as it doesn't have any validity anymore,
- * and conflicts with spa_has_spare().
- */
- if (spa->spa_sparelist) {
- nvlist_free(spa->spa_sparelist);
- spa->spa_sparelist = NULL;
- spa_load_spares(spa);
- }
-
- VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (error == 0)
- error = spa_validate_spares(spa, nvroot, -1ULL,
- VDEV_ALLOC_SPARE);
- spa_config_exit(spa, FTAG);
-
- if (error != 0) {
- spa_unload(spa);
- spa_deactivate(spa);
- spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
- return (error);
- }
-
- /*
- * Override any spares as specified by the user, as these may have
- * correct device names/devids, etc.
- */
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- if (spa->spa_sparelist)
- VERIFY(nvlist_remove(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
- else
- VERIFY(nvlist_alloc(&spa->spa_sparelist,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
- spa_config_enter(spa, RW_WRITER, FTAG);
- spa_load_spares(spa);
- spa_config_exit(spa, FTAG);
- spa->spa_sync_spares = B_TRUE;
- }
-
- /*
- * Update the config cache to include the newly-imported pool.
- */
- spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
-
- mutex_exit(&spa_namespace_lock);
-
- /*
- * Resilver anything that's out of date.
- */
- if (spa_mode & FWRITE)
- VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0);
-
- return (0);
-}
-
-/*
- * This (illegal) pool name is used when temporarily importing a spa_t in order
- * to get the vdev stats associated with the imported devices.
- */
-#define TRYIMPORT_NAME "$import"
-
-nvlist_t *
-spa_tryimport(nvlist_t *tryconfig)
-{
- nvlist_t *config = NULL;
- char *poolname;
- spa_t *spa;
- uint64_t state;
-
- if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
- return (NULL);
-
- if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
- return (NULL);
-
- /*
- * Create and initialize the spa structure.
- */
- mutex_enter(&spa_namespace_lock);
- spa = spa_add(TRYIMPORT_NAME, NULL);
- spa_activate(spa);
-
- /*
- * Pass off the heavy lifting to spa_load().
- * Pass TRUE for mosconfig because the user-supplied config
- * is actually the one to trust when doing an import.
- */
- (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE);
-
- /*
- * If 'tryconfig' was at least parsable, return the current config.
- */
- if (spa->spa_root_vdev != NULL) {
- spa_config_enter(spa, RW_READER, FTAG);
- config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
- spa_config_exit(spa, FTAG);
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
- poolname) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- state) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
- spa->spa_uberblock.ub_timestamp) == 0);
-
- /*
- * Add the list of hot spares.
- */
- spa_add_spares(spa, config);
- }
-
- spa_unload(spa);
- spa_deactivate(spa);
- spa_remove(spa);
- mutex_exit(&spa_namespace_lock);
-
- return (config);
-}
-
-/*
- * Pool export/destroy
- *
- * The act of destroying or exporting a pool is very simple. We make sure there
- * is no more pending I/O and any references to the pool are gone. Then, we
- * update the pool state and sync all the labels to disk, removing the
- * configuration from the cache afterwards.
- */
-static int
-spa_export_common(char *pool, int new_state, nvlist_t **oldconfig)
-{
- spa_t *spa;
-
- if (oldconfig)
- *oldconfig = NULL;
-
- if (!(spa_mode & FWRITE))
- return (EROFS);
-
- mutex_enter(&spa_namespace_lock);
- if ((spa = spa_lookup(pool)) == NULL) {
- mutex_exit(&spa_namespace_lock);
- return (ENOENT);
- }
-
- /*
- * Put a hold on the pool, drop the namespace lock, stop async tasks,
- * reacquire the namespace lock, and see if we can export.
- */
- spa_open_ref(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
- spa_async_suspend(spa);
- mutex_enter(&spa_namespace_lock);
- spa_close(spa, FTAG);
-
- /*
- * The pool will be in core if it's openable,
- * in which case we can modify its state.
- */
- if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
- /*
- * Objsets may be open only because they're dirty, so we
- * have to force it to sync before checking spa_refcnt.
- */
- spa_scrub_suspend(spa);
- txg_wait_synced(spa->spa_dsl_pool, 0);
-
- /*
- * A pool cannot be exported or destroyed if there are active
- * references. If we are resetting a pool, allow references by
- * fault injection handlers.
- */
- if (!spa_refcount_zero(spa) ||
- (spa->spa_inject_ref != 0 &&
- new_state != POOL_STATE_UNINITIALIZED)) {
- spa_scrub_resume(spa);
- spa_async_resume(spa);
- mutex_exit(&spa_namespace_lock);
- return (EBUSY);
- }
-
- spa_scrub_resume(spa);
- VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0);
-
- /*
- * We want this to be reflected on every label,
- * so mark them all dirty. spa_unload() will do the
- * final sync that pushes these changes out.
- */
- if (new_state != POOL_STATE_UNINITIALIZED) {
- spa_config_enter(spa, RW_WRITER, FTAG);
- spa->spa_state = new_state;
- spa->spa_final_txg = spa_last_synced_txg(spa) + 1;
- vdev_config_dirty(spa->spa_root_vdev);
- spa_config_exit(spa, FTAG);
- }
- }
-
- if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
- spa_unload(spa);
- spa_deactivate(spa);
- }
-
- if (oldconfig && spa->spa_config)
- VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
-
- if (new_state != POOL_STATE_UNINITIALIZED) {
- spa_remove(spa);
- spa_config_sync();
- }
- mutex_exit(&spa_namespace_lock);
-
- return (0);
-}
-
-/*
- * Destroy a storage pool.
- */
-int
-spa_destroy(char *pool)
-{
- return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL));
-}
-
-/*
- * Export a storage pool.
- */
-int
-spa_export(char *pool, nvlist_t **oldconfig)
-{
- return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig));
-}
-
-/*
- * Similar to spa_export(), this unloads the spa_t without actually removing it
- * from the namespace in any way.
- */
-int
-spa_reset(char *pool)
-{
- return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL));
-}
-
-
-/*
- * ==========================================================================
- * Device manipulation
- * ==========================================================================
- */
-
-/*
- * Add capacity to a storage pool.
- */
-int
-spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
-{
- uint64_t txg;
- int c, error;
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd, *tvd;
- nvlist_t **spares;
- uint_t i, nspares;
-
- txg = spa_vdev_enter(spa);
-
- if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
- VDEV_ALLOC_ADD)) != 0)
- return (spa_vdev_exit(spa, NULL, txg, error));
-
- spa->spa_pending_vdev = vd;
-
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) != 0)
- nspares = 0;
-
- if (vd->vdev_children == 0 && nspares == 0) {
- spa->spa_pending_vdev = NULL;
- return (spa_vdev_exit(spa, vd, txg, EINVAL));
- }
-
- if (vd->vdev_children != 0) {
- if ((error = vdev_create(vd, txg, B_FALSE)) != 0) {
- spa->spa_pending_vdev = NULL;
- return (spa_vdev_exit(spa, vd, txg, error));
- }
- }
-
- /*
- * We must validate the spares after checking the children. Otherwise,
- * vdev_inuse() will blindly overwrite the spare.
- */
- if ((error = spa_validate_spares(spa, nvroot, txg,
- VDEV_ALLOC_ADD)) != 0) {
- spa->spa_pending_vdev = NULL;
- return (spa_vdev_exit(spa, vd, txg, error));
- }
-
- spa->spa_pending_vdev = NULL;
-
- /*
- * Transfer each new top-level vdev from vd to rvd.
- */
- for (c = 0; c < vd->vdev_children; c++) {
- tvd = vd->vdev_child[c];
- vdev_remove_child(vd, tvd);
- tvd->vdev_id = rvd->vdev_children;
- vdev_add_child(rvd, tvd);
- vdev_config_dirty(tvd);
- }
-
- if (nspares != 0) {
- if (spa->spa_sparelist != NULL) {
- nvlist_t **oldspares;
- uint_t oldnspares;
- nvlist_t **newspares;
-
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0);
-
- newspares = kmem_alloc(sizeof (void *) *
- (nspares + oldnspares), KM_SLEEP);
- for (i = 0; i < oldnspares; i++)
- VERIFY(nvlist_dup(oldspares[i],
- &newspares[i], KM_SLEEP) == 0);
- for (i = 0; i < nspares; i++)
- VERIFY(nvlist_dup(spares[i],
- &newspares[i + oldnspares],
- KM_SLEEP) == 0);
-
- VERIFY(nvlist_remove(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
-
- VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, newspares,
- nspares + oldnspares) == 0);
- for (i = 0; i < oldnspares + nspares; i++)
- nvlist_free(newspares[i]);
- kmem_free(newspares, (oldnspares + nspares) *
- sizeof (void *));
- } else {
- VERIFY(nvlist_alloc(&spa->spa_sparelist,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
- }
-
- spa_load_spares(spa);
- spa->spa_sync_spares = B_TRUE;
- }
-
- /*
- * We have to be careful when adding new vdevs to an existing pool.
- * If other threads start allocating from these vdevs before we
- * sync the config cache, and we lose power, then upon reboot we may
- * fail to open the pool because there are DVAs that the config cache
- * can't translate. Therefore, we first add the vdevs without
- * initializing metaslabs; sync the config cache (via spa_vdev_exit());
- * and then let spa_config_update() initialize the new metaslabs.
- *
- * spa_load() checks for added-but-not-initialized vdevs, so that
- * if we lose power at any point in this sequence, the remaining
- * steps will be completed the next time we load the pool.
- */
- (void) spa_vdev_exit(spa, vd, txg, 0);
-
- mutex_enter(&spa_namespace_lock);
- spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
- mutex_exit(&spa_namespace_lock);
-
- return (0);
-}
-
-/*
- * Attach a device to a mirror. The arguments are the path to any device
- * in the mirror, and the nvroot for the new device. If the path specifies
- * a device that is not mirrored, we automatically insert the mirror vdev.
- *
- * If 'replacing' is specified, the new device is intended to replace the
- * existing device; in this case the two devices are made into their own
- * mirror using the 'replacing' vdev, which is functionally idendical to
- * the mirror vdev (it actually reuses all the same ops) but has a few
- * extra rules: you can't attach to it after it's been created, and upon
- * completion of resilvering, the first disk (the one being replaced)
- * is automatically detached.
- */
-int
-spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
-{
- uint64_t txg, open_txg;
- int error;
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
- vdev_ops_t *pvops;
-
- txg = spa_vdev_enter(spa);
-
- oldvd = vdev_lookup_by_guid(rvd, guid);
-
- if (oldvd == NULL)
- return (spa_vdev_exit(spa, NULL, txg, ENODEV));
-
- if (!oldvd->vdev_ops->vdev_op_leaf)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- pvd = oldvd->vdev_parent;
-
- if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
- VDEV_ALLOC_ADD)) != 0 || newrootvd->vdev_children != 1)
- return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
-
- newvd = newrootvd->vdev_child[0];
-
- if (!newvd->vdev_ops->vdev_op_leaf)
- return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
-
- if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
- return (spa_vdev_exit(spa, newrootvd, txg, error));
-
- if (!replacing) {
- /*
- * For attach, the only allowable parent is a mirror or the root
- * vdev.
- */
- if (pvd->vdev_ops != &vdev_mirror_ops &&
- pvd->vdev_ops != &vdev_root_ops)
- return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
-
- pvops = &vdev_mirror_ops;
- } else {
- /*
- * Active hot spares can only be replaced by inactive hot
- * spares.
- */
- if (pvd->vdev_ops == &vdev_spare_ops &&
- pvd->vdev_child[1] == oldvd &&
- !spa_has_spare(spa, newvd->vdev_guid))
- return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
-
- /*
- * If the source is a hot spare, and the parent isn't already a
- * spare, then we want to create a new hot spare. Otherwise, we
- * want to create a replacing vdev. The user is not allowed to
- * attach to a spared vdev child unless the 'isspare' state is
- * the same (spare replaces spare, non-spare replaces
- * non-spare).
- */
- if (pvd->vdev_ops == &vdev_replacing_ops)
- return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
- else if (pvd->vdev_ops == &vdev_spare_ops &&
- newvd->vdev_isspare != oldvd->vdev_isspare)
- return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
- else if (pvd->vdev_ops != &vdev_spare_ops &&
- newvd->vdev_isspare)
- pvops = &vdev_spare_ops;
- else
- pvops = &vdev_replacing_ops;
- }
-
- /*
- * Compare the new device size with the replaceable/attachable
- * device size.
- */
- if (newvd->vdev_psize < vdev_get_rsize(oldvd))
- return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
-
- /*
- * The new device cannot have a higher alignment requirement
- * than the top-level vdev.
- */
- if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
- return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
-
- /*
- * If this is an in-place replacement, update oldvd's path and devid
- * to make it distinguishable from newvd, and unopenable from now on.
- */
- if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
- spa_strfree(oldvd->vdev_path);
- oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
- KM_SLEEP);
- (void) sprintf(oldvd->vdev_path, "%s/%s",
- newvd->vdev_path, "old");
- if (oldvd->vdev_devid != NULL) {
- spa_strfree(oldvd->vdev_devid);
- oldvd->vdev_devid = NULL;
- }
- }
-
- /*
- * If the parent is not a mirror, or if we're replacing, insert the new
- * mirror/replacing/spare vdev above oldvd.
- */
- if (pvd->vdev_ops != pvops)
- pvd = vdev_add_parent(oldvd, pvops);
-
- ASSERT(pvd->vdev_top->vdev_parent == rvd);
- ASSERT(pvd->vdev_ops == pvops);
- ASSERT(oldvd->vdev_parent == pvd);
-
- /*
- * Extract the new device from its root and add it to pvd.
- */
- vdev_remove_child(newrootvd, newvd);
- newvd->vdev_id = pvd->vdev_children;
- vdev_add_child(pvd, newvd);
-
- /*
- * If newvd is smaller than oldvd, but larger than its rsize,
- * the addition of newvd may have decreased our parent's asize.
- */
- pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize);
-
- tvd = newvd->vdev_top;
- ASSERT(pvd->vdev_top == tvd);
- ASSERT(tvd->vdev_parent == rvd);
-
- vdev_config_dirty(tvd);
-
- /*
- * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate
- * upward when spa_vdev_exit() calls vdev_dtl_reassess().
- */
- open_txg = txg + TXG_CONCURRENT_STATES - 1;
-
- mutex_enter(&newvd->vdev_dtl_lock);
- space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL,
- open_txg - TXG_INITIAL + 1);
- mutex_exit(&newvd->vdev_dtl_lock);
-
- if (newvd->vdev_isspare)
- spa_spare_activate(newvd);
-
- /*
- * Mark newvd's DTL dirty in this txg.
- */
- vdev_dirty(tvd, VDD_DTL, newvd, txg);
-
- (void) spa_vdev_exit(spa, newrootvd, open_txg, 0);
-
- /*
- * Kick off a resilver to update newvd.
- */
- VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0);
-
- return (0);
-}
-
-/*
- * Detach a device from a mirror or replacing vdev.
- * If 'replace_done' is specified, only detach if the parent
- * is a replacing vdev.
- */
-int
-spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
-{
- uint64_t txg;
- int c, t, error;
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd, *pvd, *cvd, *tvd;
- boolean_t unspare = B_FALSE;
- uint64_t unspare_guid;
-
- txg = spa_vdev_enter(spa);
-
- vd = vdev_lookup_by_guid(rvd, guid);
-
- if (vd == NULL)
- return (spa_vdev_exit(spa, NULL, txg, ENODEV));
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- pvd = vd->vdev_parent;
-
- /*
- * If replace_done is specified, only remove this device if it's
- * the first child of a replacing vdev. For the 'spare' vdev, either
- * disk can be removed.
- */
- if (replace_done) {
- if (pvd->vdev_ops == &vdev_replacing_ops) {
- if (vd->vdev_id != 0)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
- } else if (pvd->vdev_ops != &vdev_spare_ops) {
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
- }
- }
-
- ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
- spa_version(spa) >= ZFS_VERSION_SPARES);
-
- /*
- * Only mirror, replacing, and spare vdevs support detach.
- */
- if (pvd->vdev_ops != &vdev_replacing_ops &&
- pvd->vdev_ops != &vdev_mirror_ops &&
- pvd->vdev_ops != &vdev_spare_ops)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- /*
- * If there's only one replica, you can't detach it.
- */
- if (pvd->vdev_children <= 1)
- return (spa_vdev_exit(spa, NULL, txg, EBUSY));
-
- /*
- * If all siblings have non-empty DTLs, this device may have the only
- * valid copy of the data, which means we cannot safely detach it.
- *
- * XXX -- as in the vdev_offline() case, we really want a more
- * precise DTL check.
- */
- for (c = 0; c < pvd->vdev_children; c++) {
- uint64_t dirty;
-
- cvd = pvd->vdev_child[c];
- if (cvd == vd)
- continue;
- if (vdev_is_dead(cvd))
- continue;
- mutex_enter(&cvd->vdev_dtl_lock);
- dirty = cvd->vdev_dtl_map.sm_space |
- cvd->vdev_dtl_scrub.sm_space;
- mutex_exit(&cvd->vdev_dtl_lock);
- if (!dirty)
- break;
- }
-
- /*
- * If we are a replacing or spare vdev, then we can always detach the
- * latter child, as that is how one cancels the operation.
- */
- if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) &&
- c == pvd->vdev_children)
- return (spa_vdev_exit(spa, NULL, txg, EBUSY));
-
- /*
- * If we are detaching the original disk from a spare, then it implies
- * that the spare should become a real disk, and be removed from the
- * active spare list for the pool.
- */
- if (pvd->vdev_ops == &vdev_spare_ops &&
- vd->vdev_id == 0)
- unspare = B_TRUE;
-
- /*
- * Erase the disk labels so the disk can be used for other things.
- * This must be done after all other error cases are handled,
- * but before we disembowel vd (so we can still do I/O to it).
- * But if we can't do it, don't treat the error as fatal --
- * it may be that the unwritability of the disk is the reason
- * it's being detached!
- */
- error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
-
- /*
- * Remove vd from its parent and compact the parent's children.
- */
- vdev_remove_child(pvd, vd);
- vdev_compact_children(pvd);
-
- /*
- * Remember one of the remaining children so we can get tvd below.
- */
- cvd = pvd->vdev_child[0];
-
- /*
- * If we need to remove the remaining child from the list of hot spares,
- * do it now, marking the vdev as no longer a spare in the process. We
- * must do this before vdev_remove_parent(), because that can change the
- * GUID if it creates a new toplevel GUID.
- */
- if (unspare) {
- ASSERT(cvd->vdev_isspare);
- spa_spare_remove(cvd);
- unspare_guid = cvd->vdev_guid;
- }
-
- /*
- * If the parent mirror/replacing vdev only has one child,
- * the parent is no longer needed. Remove it from the tree.
- */
- if (pvd->vdev_children == 1)
- vdev_remove_parent(cvd);
-
- /*
- * We don't set tvd until now because the parent we just removed
- * may have been the previous top-level vdev.
- */
- tvd = cvd->vdev_top;
- ASSERT(tvd->vdev_parent == rvd);
-
- /*
- * Reevaluate the parent vdev state.
- */
- vdev_propagate_state(cvd->vdev_parent);
-
- /*
- * If the device we just detached was smaller than the others, it may be
- * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init()
- * can't fail because the existing metaslabs are already in core, so
- * there's nothing to read from disk.
- */
- VERIFY(vdev_metaslab_init(tvd, txg) == 0);
-
- vdev_config_dirty(tvd);
-
- /*
- * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that
- * vd->vdev_detached is set and free vd's DTL object in syncing context.
- * But first make sure we're not on any *other* txg's DTL list, to
- * prevent vd from being accessed after it's freed.
- */
- for (t = 0; t < TXG_SIZE; t++)
- (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
- vd->vdev_detached = B_TRUE;
- vdev_dirty(tvd, VDD_DTL, vd, txg);
-
- error = spa_vdev_exit(spa, vd, txg, 0);
-
- /*
- * If this was the removal of the original device in a hot spare vdev,
- * then we want to go through and remove the device from the hot spare
- * list of every other pool.
- */
- if (unspare) {
- spa = NULL;
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (spa->spa_state != POOL_STATE_ACTIVE)
- continue;
-
- (void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
- }
- mutex_exit(&spa_namespace_lock);
- }
-
- return (error);
-}
-
-/*
- * Remove a device from the pool. Currently, this supports removing only hot
- * spares.
- */
-int
-spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
-{
- vdev_t *vd;
- nvlist_t **spares, *nv, **newspares;
- uint_t i, j, nspares;
- int ret = 0;
-
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- vd = spa_lookup_by_guid(spa, guid);
-
- nv = NULL;
- if (spa->spa_spares != NULL &&
- nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- for (i = 0; i < nspares; i++) {
- uint64_t theguid;
-
- VERIFY(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &theguid) == 0);
- if (theguid == guid) {
- nv = spares[i];
- break;
- }
- }
- }
-
- /*
- * We only support removing a hot spare, and only if it's not currently
- * in use in this pool.
- */
- if (nv == NULL && vd == NULL) {
- ret = ENOENT;
- goto out;
- }
-
- if (nv == NULL && vd != NULL) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (!unspare && nv != NULL && vd != NULL) {
- ret = EBUSY;
- goto out;
- }
-
- if (nspares == 1) {
- newspares = NULL;
- } else {
- newspares = kmem_alloc((nspares - 1) * sizeof (void *),
- KM_SLEEP);
- for (i = 0, j = 0; i < nspares; i++) {
- if (spares[i] != nv)
- VERIFY(nvlist_dup(spares[i],
- &newspares[j++], KM_SLEEP) == 0);
- }
- }
-
- VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
- DATA_TYPE_NVLIST_ARRAY) == 0);
- VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
- newspares, nspares - 1) == 0);
- for (i = 0; i < nspares - 1; i++)
- nvlist_free(newspares[i]);
- kmem_free(newspares, (nspares - 1) * sizeof (void *));
- spa_load_spares(spa);
- spa->spa_sync_spares = B_TRUE;
-
-out:
- spa_config_exit(spa, FTAG);
-
- return (ret);
-}
-
-/*
- * Find any device that's done replacing, so we can detach it.
- */
-static vdev_t *
-spa_vdev_replace_done_hunt(vdev_t *vd)
-{
- vdev_t *newvd, *oldvd;
- int c;
-
- for (c = 0; c < vd->vdev_children; c++) {
- oldvd = spa_vdev_replace_done_hunt(vd->vdev_child[c]);
- if (oldvd != NULL)
- return (oldvd);
- }
-
- if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) {
- oldvd = vd->vdev_child[0];
- newvd = vd->vdev_child[1];
-
- mutex_enter(&newvd->vdev_dtl_lock);
- if (newvd->vdev_dtl_map.sm_space == 0 &&
- newvd->vdev_dtl_scrub.sm_space == 0) {
- mutex_exit(&newvd->vdev_dtl_lock);
- return (oldvd);
- }
- mutex_exit(&newvd->vdev_dtl_lock);
- }
-
- return (NULL);
-}
-
-static void
-spa_vdev_replace_done(spa_t *spa)
-{
- vdev_t *vd;
- vdev_t *pvd;
- uint64_t guid;
- uint64_t pguid = 0;
-
- spa_config_enter(spa, RW_READER, FTAG);
-
- while ((vd = spa_vdev_replace_done_hunt(spa->spa_root_vdev)) != NULL) {
- guid = vd->vdev_guid;
- /*
- * If we have just finished replacing a hot spared device, then
- * we need to detach the parent's first child (the original hot
- * spare) as well.
- */
- pvd = vd->vdev_parent;
- if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
- pvd->vdev_id == 0) {
- ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
- ASSERT(pvd->vdev_parent->vdev_children == 2);
- pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid;
- }
- spa_config_exit(spa, FTAG);
- if (spa_vdev_detach(spa, guid, B_TRUE) != 0)
- return;
- if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0)
- return;
- spa_config_enter(spa, RW_READER, FTAG);
- }
-
- spa_config_exit(spa, FTAG);
-}
-
-/*
- * Update the stored path for this vdev. Dirty the vdev configuration, relying
- * on spa_vdev_enter/exit() to synchronize the labels and cache.
- */
-int
-spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
-{
- vdev_t *rvd, *vd;
- uint64_t txg;
-
- rvd = spa->spa_root_vdev;
-
- txg = spa_vdev_enter(spa);
-
- if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
- /*
- * Determine if this is a reference to a hot spare. In that
- * case, update the path as stored in the spare list.
- */
- nvlist_t **spares;
- uint_t i, nspares;
- if (spa->spa_sparelist != NULL) {
- VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
- ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
- for (i = 0; i < nspares; i++) {
- uint64_t theguid;
- VERIFY(nvlist_lookup_uint64(spares[i],
- ZPOOL_CONFIG_GUID, &theguid) == 0);
- if (theguid == guid)
- break;
- }
-
- if (i == nspares)
- return (spa_vdev_exit(spa, NULL, txg, ENOENT));
-
- VERIFY(nvlist_add_string(spares[i],
- ZPOOL_CONFIG_PATH, newpath) == 0);
- spa_load_spares(spa);
- spa->spa_sync_spares = B_TRUE;
- return (spa_vdev_exit(spa, NULL, txg, 0));
- } else {
- return (spa_vdev_exit(spa, NULL, txg, ENOENT));
- }
- }
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- spa_strfree(vd->vdev_path);
- vd->vdev_path = spa_strdup(newpath);
-
- vdev_config_dirty(vd->vdev_top);
-
- return (spa_vdev_exit(spa, NULL, txg, 0));
-}
-
-/*
- * ==========================================================================
- * SPA Scrubbing
- * ==========================================================================
- */
-
-static void
-spa_scrub_io_done(zio_t *zio)
-{
- spa_t *spa = zio->io_spa;
-
- zio_data_buf_free(zio->io_data, zio->io_size);
-
- mutex_enter(&spa->spa_scrub_lock);
- if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev;
- spa->spa_scrub_errors++;
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_scrub_errors++;
- mutex_exit(&vd->vdev_stat_lock);
- }
-
- if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight)
- cv_broadcast(&spa->spa_scrub_io_cv);
-
- ASSERT(spa->spa_scrub_inflight >= 0);
-
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-static void
-spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags,
- zbookmark_t *zb)
-{
- size_t size = BP_GET_LSIZE(bp);
- void *data;
-
- mutex_enter(&spa->spa_scrub_lock);
- /*
- * Do not give too much work to vdev(s).
- */
- while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) {
- cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
- }
- spa->spa_scrub_inflight++;
- mutex_exit(&spa->spa_scrub_lock);
-
- data = zio_data_buf_alloc(size);
-
- if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
- flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */
-
- flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL;
-
- zio_nowait(zio_read(NULL, spa, bp, data, size,
- spa_scrub_io_done, NULL, priority, flags, zb));
-}
-
-/* ARGSUSED */
-static int
-spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
-{
- blkptr_t *bp = &bc->bc_blkptr;
- vdev_t *vd = spa->spa_root_vdev;
- dva_t *dva = bp->blk_dva;
- int needs_resilver = B_FALSE;
- int d;
-
- if (bc->bc_errno) {
- /*
- * We can't scrub this block, but we can continue to scrub
- * the rest of the pool. Note the error and move along.
- */
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_errors++;
- mutex_exit(&spa->spa_scrub_lock);
-
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_scrub_errors++;
- mutex_exit(&vd->vdev_stat_lock);
-
- return (ERESTART);
- }
-
- ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg);
-
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]));
-
- ASSERT(vd != NULL);
-
- /*
- * Keep track of how much data we've examined so that
- * zpool(1M) status can make useful progress reports.
- */
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]);
- mutex_exit(&vd->vdev_stat_lock);
-
- if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) {
- if (DVA_GET_GANG(&dva[d])) {
- /*
- * Gang members may be spread across multiple
- * vdevs, so the best we can do is look at the
- * pool-wide DTL.
- * XXX -- it would be better to change our
- * allocation policy to ensure that this can't
- * happen.
- */
- vd = spa->spa_root_vdev;
- }
- if (vdev_dtl_contains(&vd->vdev_dtl_map,
- bp->blk_birth, 1))
- needs_resilver = B_TRUE;
- }
- }
-
- if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING)
- spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB,
- ZIO_FLAG_SCRUB, &bc->bc_bookmark);
- else if (needs_resilver)
- spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER,
- ZIO_FLAG_RESILVER, &bc->bc_bookmark);
-
- return (0);
-}
-
-static void
-spa_scrub_thread(void *arg)
-{
- spa_t *spa = arg;
- callb_cpr_t cprinfo;
- traverse_handle_t *th = spa->spa_scrub_th;
- vdev_t *rvd = spa->spa_root_vdev;
- pool_scrub_type_t scrub_type = spa->spa_scrub_type;
- int error = 0;
- boolean_t complete;
-
- CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG);
-
- /*
- * If we're restarting due to a snapshot create/delete,
- * wait for that to complete.
- */
- txg_wait_synced(spa_get_dsl(spa), 0);
-
- dprintf("start %s mintxg=%llu maxtxg=%llu\n",
- scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub",
- spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg);
-
- spa_config_enter(spa, RW_WRITER, FTAG);
- vdev_reopen(rvd); /* purge all vdev caches */
- vdev_config_dirty(rvd); /* rewrite all disk labels */
- vdev_scrub_stat_update(rvd, scrub_type, B_FALSE);
- spa_config_exit(spa, FTAG);
-
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_errors = 0;
- spa->spa_scrub_active = 1;
- ASSERT(spa->spa_scrub_inflight == 0);
-
- while (!spa->spa_scrub_stop) {
- CALLB_CPR_SAFE_BEGIN(&cprinfo);
- while (spa->spa_scrub_suspended) {
- spa->spa_scrub_active = 0;
- cv_broadcast(&spa->spa_scrub_cv);
- cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock);
- spa->spa_scrub_active = 1;
- }
- CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock);
-
- if (spa->spa_scrub_restart_txg != 0)
- break;
-
- mutex_exit(&spa->spa_scrub_lock);
- error = traverse_more(th);
- mutex_enter(&spa->spa_scrub_lock);
- if (error != EAGAIN)
- break;
- }
-
- while (spa->spa_scrub_inflight)
- cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
-
- spa->spa_scrub_active = 0;
- cv_broadcast(&spa->spa_scrub_cv);
-
- mutex_exit(&spa->spa_scrub_lock);
-
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- mutex_enter(&spa->spa_scrub_lock);
-
- /*
- * Note: we check spa_scrub_restart_txg under both spa_scrub_lock
- * AND the spa config lock to synchronize with any config changes
- * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit().
- */
- if (spa->spa_scrub_restart_txg != 0)
- error = ERESTART;
-
- if (spa->spa_scrub_stop)
- error = EINTR;
-
- /*
- * Even if there were uncorrectable errors, we consider the scrub
- * completed. The downside is that if there is a transient error during
- * a resilver, we won't resilver the data properly to the target. But
- * if the damage is permanent (more likely) we will resilver forever,
- * which isn't really acceptable. Since there is enough information for
- * the user to know what has failed and why, this seems like a more
- * tractable approach.
- */
- complete = (error == 0);
-
- dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n",
- scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub",
- spa->spa_scrub_maxtxg, complete ? "done" : "FAILED",
- error, spa->spa_scrub_errors, spa->spa_scrub_stop);
-
- mutex_exit(&spa->spa_scrub_lock);
-
- /*
- * If the scrub/resilver completed, update all DTLs to reflect this.
- * Whether it succeeded or not, vacate all temporary scrub DTLs.
- */
- vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1,
- complete ? spa->spa_scrub_maxtxg : 0, B_TRUE);
- vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete);
- spa_errlog_rotate(spa);
-
- spa_config_exit(spa, FTAG);
-
- mutex_enter(&spa->spa_scrub_lock);
-
- /*
- * We may have finished replacing a device.
- * Let the async thread assess this and handle the detach.
- */
- spa_async_request(spa, SPA_ASYNC_REPLACE_DONE);
-
- /*
- * If we were told to restart, our final act is to start a new scrub.
- */
- if (error == ERESTART)
- spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ?
- SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB);
-
- spa->spa_scrub_type = POOL_SCRUB_NONE;
- spa->spa_scrub_active = 0;
- spa->spa_scrub_thread = NULL;
- cv_broadcast(&spa->spa_scrub_cv);
- CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */
- thread_exit();
-}
-
-void
-spa_scrub_suspend(spa_t *spa)
-{
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_suspended++;
- while (spa->spa_scrub_active) {
- cv_broadcast(&spa->spa_scrub_cv);
- cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock);
- }
- while (spa->spa_scrub_inflight)
- cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-void
-spa_scrub_resume(spa_t *spa)
-{
- mutex_enter(&spa->spa_scrub_lock);
- ASSERT(spa->spa_scrub_suspended != 0);
- if (--spa->spa_scrub_suspended == 0)
- cv_broadcast(&spa->spa_scrub_cv);
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-void
-spa_scrub_restart(spa_t *spa, uint64_t txg)
-{
- /*
- * Something happened (e.g. snapshot create/delete) that means
- * we must restart any in-progress scrubs. The itinerary will
- * fix this properly.
- */
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_restart_txg = txg;
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-int
-spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force)
-{
- space_seg_t *ss;
- uint64_t mintxg, maxtxg;
- vdev_t *rvd = spa->spa_root_vdev;
-
- if ((uint_t)type >= POOL_SCRUB_TYPES)
- return (ENOTSUP);
-
- mutex_enter(&spa->spa_scrub_lock);
-
- /*
- * If there's a scrub or resilver already in progress, stop it.
- */
- while (spa->spa_scrub_thread != NULL) {
- /*
- * Don't stop a resilver unless forced.
- */
- if (spa->spa_scrub_type == POOL_SCRUB_RESILVER && !force) {
- mutex_exit(&spa->spa_scrub_lock);
- return (EBUSY);
- }
- spa->spa_scrub_stop = 1;
- cv_broadcast(&spa->spa_scrub_cv);
- cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock);
- }
-
- /*
- * Terminate the previous traverse.
- */
- if (spa->spa_scrub_th != NULL) {
- traverse_fini(spa->spa_scrub_th);
- spa->spa_scrub_th = NULL;
- }
-
- if (rvd == NULL) {
- ASSERT(spa->spa_scrub_stop == 0);
- ASSERT(spa->spa_scrub_type == type);
- ASSERT(spa->spa_scrub_restart_txg == 0);
- mutex_exit(&spa->spa_scrub_lock);
- return (0);
- }
-
- mintxg = TXG_INITIAL - 1;
- maxtxg = spa_last_synced_txg(spa) + 1;
-
- mutex_enter(&rvd->vdev_dtl_lock);
-
- if (rvd->vdev_dtl_map.sm_space == 0) {
- /*
- * The pool-wide DTL is empty.
- * If this is a resilver, there's nothing to do except
- * check whether any in-progress replacements have completed.
- */
- if (type == POOL_SCRUB_RESILVER) {
- type = POOL_SCRUB_NONE;
- spa_async_request(spa, SPA_ASYNC_REPLACE_DONE);
- }
- } else {
- /*
- * The pool-wide DTL is non-empty.
- * If this is a normal scrub, upgrade to a resilver instead.
- */
- if (type == POOL_SCRUB_EVERYTHING)
- type = POOL_SCRUB_RESILVER;
- }
-
- if (type == POOL_SCRUB_RESILVER) {
- /*
- * Determine the resilvering boundaries.
- *
- * Note: (mintxg, maxtxg) is an open interval,
- * i.e. mintxg and maxtxg themselves are not included.
- *
- * Note: for maxtxg, we MIN with spa_last_synced_txg(spa) + 1
- * so we don't claim to resilver a txg that's still changing.
- */
- ss = avl_first(&rvd->vdev_dtl_map.sm_root);
- mintxg = ss->ss_start - 1;
- ss = avl_last(&rvd->vdev_dtl_map.sm_root);
- maxtxg = MIN(ss->ss_end, maxtxg);
- }
-
- mutex_exit(&rvd->vdev_dtl_lock);
-
- spa->spa_scrub_stop = 0;
- spa->spa_scrub_type = type;
- spa->spa_scrub_restart_txg = 0;
-
- if (type != POOL_SCRUB_NONE) {
- spa->spa_scrub_mintxg = mintxg;
- spa->spa_scrub_maxtxg = maxtxg;
- spa->spa_scrub_th = traverse_init(spa, spa_scrub_cb, NULL,
- ADVANCE_PRE | ADVANCE_PRUNE | ADVANCE_ZIL,
- ZIO_FLAG_CANFAIL);
- traverse_add_pool(spa->spa_scrub_th, mintxg, maxtxg);
- spa->spa_scrub_thread = thread_create(NULL, 0,
- spa_scrub_thread, spa, 0, &p0, TS_RUN, minclsyspri);
- }
-
- mutex_exit(&spa->spa_scrub_lock);
-
- return (0);
-}
-
-/*
- * ==========================================================================
- * SPA async task processing
- * ==========================================================================
- */
-
-static void
-spa_async_reopen(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *tvd;
- int c;
-
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- for (c = 0; c < rvd->vdev_children; c++) {
- tvd = rvd->vdev_child[c];
- if (tvd->vdev_reopen_wanted) {
- tvd->vdev_reopen_wanted = 0;
- vdev_reopen(tvd);
- }
- }
-
- spa_config_exit(spa, FTAG);
-}
-
-static void
-spa_async_thread(void *arg)
-{
- spa_t *spa = arg;
- int tasks;
-
- ASSERT(spa->spa_sync_on);
-
- mutex_enter(&spa->spa_async_lock);
- tasks = spa->spa_async_tasks;
- spa->spa_async_tasks = 0;
- mutex_exit(&spa->spa_async_lock);
-
- /*
- * See if the config needs to be updated.
- */
- if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
- mutex_enter(&spa_namespace_lock);
- spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
- mutex_exit(&spa_namespace_lock);
- }
-
- /*
- * See if any devices need to be reopened.
- */
- if (tasks & SPA_ASYNC_REOPEN)
- spa_async_reopen(spa);
-
- /*
- * If any devices are done replacing, detach them.
- */
- if (tasks & SPA_ASYNC_REPLACE_DONE)
- spa_vdev_replace_done(spa);
-
- /*
- * Kick off a scrub.
- */
- if (tasks & SPA_ASYNC_SCRUB)
- VERIFY(spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_TRUE) == 0);
-
- /*
- * Kick off a resilver.
- */
- if (tasks & SPA_ASYNC_RESILVER)
- VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0);
-
- /*
- * Let the world know that we're done.
- */
- mutex_enter(&spa->spa_async_lock);
- spa->spa_async_thread = NULL;
- cv_broadcast(&spa->spa_async_cv);
- mutex_exit(&spa->spa_async_lock);
- thread_exit();
-}
-
-void
-spa_async_suspend(spa_t *spa)
-{
- mutex_enter(&spa->spa_async_lock);
- spa->spa_async_suspended++;
- while (spa->spa_async_thread != NULL)
- cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
- mutex_exit(&spa->spa_async_lock);
-}
-
-void
-spa_async_resume(spa_t *spa)
-{
- mutex_enter(&spa->spa_async_lock);
- ASSERT(spa->spa_async_suspended != 0);
- spa->spa_async_suspended--;
- mutex_exit(&spa->spa_async_lock);
-}
-
-static void
-spa_async_dispatch(spa_t *spa)
-{
- mutex_enter(&spa->spa_async_lock);
- if (spa->spa_async_tasks && !spa->spa_async_suspended &&
- spa->spa_async_thread == NULL &&
- rootdir != NULL && !vn_is_readonly(rootdir))
- spa->spa_async_thread = thread_create(NULL, 0,
- spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
- mutex_exit(&spa->spa_async_lock);
-}
-
-void
-spa_async_request(spa_t *spa, int task)
-{
- mutex_enter(&spa->spa_async_lock);
- spa->spa_async_tasks |= task;
- mutex_exit(&spa->spa_async_lock);
-}
-
-/*
- * ==========================================================================
- * SPA syncing routines
- * ==========================================================================
- */
-
-static void
-spa_sync_deferred_frees(spa_t *spa, uint64_t txg)
-{
- bplist_t *bpl = &spa->spa_sync_bplist;
- dmu_tx_t *tx;
- blkptr_t blk;
- uint64_t itor = 0;
- zio_t *zio;
- int error;
- uint8_t c = 1;
-
- zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CONFIG_HELD);
-
- while (bplist_iterate(bpl, &itor, &blk) == 0)
- zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL));
-
- error = zio_wait(zio);
- ASSERT3U(error, ==, 0);
-
- tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
- bplist_vacate(bpl, tx);
-
- /*
- * Pre-dirty the first block so we sync to convergence faster.
- * (Usually only the first block is needed.)
- */
- dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx);
- dmu_tx_commit(tx);
-}
-
-static void
-spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
-{
- char *packed = NULL;
- size_t nvsize = 0;
- dmu_buf_t *db;
-
- VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
-
- packed = kmem_alloc(nvsize, KM_SLEEP);
-
- VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
- KM_SLEEP) == 0);
-
- dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx);
-
- kmem_free(packed, nvsize);
-
- VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
- *(uint64_t *)db->db_data = nvsize;
- dmu_buf_rele(db, FTAG);
-}
-
-static void
-spa_sync_spares(spa_t *spa, dmu_tx_t *tx)
-{
- nvlist_t *nvroot;
- nvlist_t **spares;
- int i;
-
- if (!spa->spa_sync_spares)
- return;
-
- /*
- * Update the MOS nvlist describing the list of available spares.
- * spa_validate_spares() will have already made sure this nvlist is
- * valid and the vdevs are labelled appropriately.
- */
- if (spa->spa_spares_object == 0) {
- spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset,
- DMU_OT_PACKED_NVLIST, 1 << 14,
- DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
- VERIFY(zap_update(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES,
- sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0);
- }
-
- VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- if (spa->spa_nspares == 0) {
- VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- NULL, 0) == 0);
- } else {
- spares = kmem_alloc(spa->spa_nspares * sizeof (void *),
- KM_SLEEP);
- for (i = 0; i < spa->spa_nspares; i++)
- spares[i] = vdev_config_generate(spa,
- spa->spa_spares[i], B_FALSE, B_TRUE);
- VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- spares, spa->spa_nspares) == 0);
- for (i = 0; i < spa->spa_nspares; i++)
- nvlist_free(spares[i]);
- kmem_free(spares, spa->spa_nspares * sizeof (void *));
- }
-
- spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx);
- nvlist_free(nvroot);
-
- spa->spa_sync_spares = B_FALSE;
-}
-
-static void
-spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
-{
- nvlist_t *config;
-
- if (list_is_empty(&spa->spa_dirty_list))
- return;
-
- config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE);
-
- if (spa->spa_config_syncing)
- nvlist_free(spa->spa_config_syncing);
- spa->spa_config_syncing = config;
-
- spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
-}
-
-static void
-spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- spa_t *spa = arg1;
- nvlist_t *nvp = arg2;
- nvpair_t *nvpair;
- objset_t *mos = spa->spa_meta_objset;
- uint64_t zapobj;
-
- mutex_enter(&spa->spa_props_lock);
- if (spa->spa_pool_props_object == 0) {
- zapobj = zap_create(mos, DMU_OT_POOL_PROPS, DMU_OT_NONE, 0, tx);
- VERIFY(zapobj > 0);
-
- spa->spa_pool_props_object = zapobj;
-
- VERIFY(zap_update(mos, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_PROPS, 8, 1,
- &spa->spa_pool_props_object, tx) == 0);
- }
- mutex_exit(&spa->spa_props_lock);
-
- nvpair = NULL;
- while ((nvpair = nvlist_next_nvpair(nvp, nvpair))) {
- switch (zpool_name_to_prop(nvpair_name(nvpair))) {
- case ZFS_PROP_BOOTFS:
- VERIFY(nvlist_lookup_uint64(nvp,
- nvpair_name(nvpair), &spa->spa_bootfs) == 0);
- VERIFY(zap_update(mos,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZFS_PROP_BOOTFS), 8, 1,
- &spa->spa_bootfs, tx) == 0);
- break;
- }
- }
-}
-
-/*
- * Sync the specified transaction group. New blocks may be dirtied as
- * part of the process, so we iterate until it converges.
- */
-void
-spa_sync(spa_t *spa, uint64_t txg)
-{
- dsl_pool_t *dp = spa->spa_dsl_pool;
- objset_t *mos = spa->spa_meta_objset;
- bplist_t *bpl = &spa->spa_sync_bplist;
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- dmu_tx_t *tx;
- int dirty_vdevs;
-
- /*
- * Lock out configuration changes.
- */
- spa_config_enter(spa, RW_READER, FTAG);
-
- spa->spa_syncing_txg = txg;
- spa->spa_sync_pass = 0;
-
- VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj));
-
- tx = dmu_tx_create_assigned(dp, txg);
-
- /*
- * If we are upgrading to ZFS_VERSION_RAIDZ_DEFLATE this txg,
- * set spa_deflate if we have no raid-z vdevs.
- */
- if (spa->spa_ubsync.ub_version < ZFS_VERSION_RAIDZ_DEFLATE &&
- spa->spa_uberblock.ub_version >= ZFS_VERSION_RAIDZ_DEFLATE) {
- int i;
-
- for (i = 0; i < rvd->vdev_children; i++) {
- vd = rvd->vdev_child[i];
- if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
- break;
- }
- if (i == rvd->vdev_children) {
- spa->spa_deflate = TRUE;
- VERIFY(0 == zap_add(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
- sizeof (uint64_t), 1, &spa->spa_deflate, tx));
- }
- }
-
- /*
- * If anything has changed in this txg, push the deferred frees
- * from the previous txg. If not, leave them alone so that we
- * don't generate work on an otherwise idle system.
- */
- if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
- !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
- !txg_list_empty(&dp->dp_sync_tasks, txg))
- spa_sync_deferred_frees(spa, txg);
-
- /*
- * Iterate to convergence.
- */
- do {
- spa->spa_sync_pass++;
-
- spa_sync_config_object(spa, tx);
- spa_sync_spares(spa, tx);
- spa_errlog_sync(spa, txg);
- dsl_pool_sync(dp, txg);
-
- dirty_vdevs = 0;
- while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) {
- vdev_sync(vd, txg);
- dirty_vdevs++;
- }
-
- bplist_sync(bpl, tx);
- } while (dirty_vdevs);
-
- bplist_close(bpl);
-
- dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass);
-
- /*
- * Rewrite the vdev configuration (which includes the uberblock)
- * to commit the transaction group.
- *
- * If there are any dirty vdevs, sync the uberblock to all vdevs.
- * Otherwise, pick a random top-level vdev that's known to be
- * visible in the config cache (see spa_vdev_add() for details).
- * If the write fails, try the next vdev until we're tried them all.
- */
- if (!list_is_empty(&spa->spa_dirty_list)) {
- VERIFY(vdev_config_sync(rvd, txg) == 0);
- } else {
- int children = rvd->vdev_children;
- int c0 = spa_get_random(children);
- int c;
-
- for (c = 0; c < children; c++) {
- vd = rvd->vdev_child[(c0 + c) % children];
- if (vd->vdev_ms_array == 0)
- continue;
- if (vdev_config_sync(vd, txg) == 0)
- break;
- }
- if (c == children)
- VERIFY(vdev_config_sync(rvd, txg) == 0);
- }
-
- dmu_tx_commit(tx);
-
- /*
- * Clear the dirty config list.
- */
- while ((vd = list_head(&spa->spa_dirty_list)) != NULL)
- vdev_config_clean(vd);
-
- /*
- * Now that the new config has synced transactionally,
- * let it become visible to the config cache.
- */
- if (spa->spa_config_syncing != NULL) {
- spa_config_set(spa, spa->spa_config_syncing);
- spa->spa_config_txg = txg;
- spa->spa_config_syncing = NULL;
- }
-
- /*
- * Make a stable copy of the fully synced uberblock.
- * We use this as the root for pool traversals.
- */
- spa->spa_traverse_wanted = 1; /* tells traverse_more() to stop */
-
- spa_scrub_suspend(spa); /* stop scrubbing and finish I/Os */
-
- rw_enter(&spa->spa_traverse_lock, RW_WRITER);
- spa->spa_traverse_wanted = 0;
- spa->spa_ubsync = spa->spa_uberblock;
- rw_exit(&spa->spa_traverse_lock);
-
- spa_scrub_resume(spa); /* resume scrub with new ubsync */
-
- /*
- * Clean up the ZIL records for the synced txg.
- */
- dsl_pool_zil_clean(dp);
-
- /*
- * Update usable space statistics.
- */
- while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
- vdev_sync_done(vd, txg);
-
- /*
- * It had better be the case that we didn't dirty anything
- * since vdev_config_sync().
- */
- ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
- ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
- ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
- ASSERT(bpl->bpl_queue == NULL);
-
- spa_config_exit(spa, FTAG);
-
- /*
- * If any async tasks have been requested, kick them off.
- */
- spa_async_dispatch(spa);
-}
-
-/*
- * Sync all pools. We don't want to hold the namespace lock across these
- * operations, so we take a reference on the spa_t and drop the lock during the
- * sync.
- */
-void
-spa_sync_allpools(void)
-{
- spa_t *spa = NULL;
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (spa_state(spa) != POOL_STATE_ACTIVE)
- continue;
- spa_open_ref(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
- txg_wait_synced(spa_get_dsl(spa), 0);
- mutex_enter(&spa_namespace_lock);
- spa_close(spa, FTAG);
- }
- mutex_exit(&spa_namespace_lock);
-}
-
-/*
- * ==========================================================================
- * Miscellaneous routines
- * ==========================================================================
- */
-
-/*
- * Remove all pools in the system.
- */
-void
-spa_evict_all(void)
-{
- spa_t *spa;
-
- /*
- * Remove all cached state. All pools should be closed now,
- * so every spa in the AVL tree should be unreferenced.
- */
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(NULL)) != NULL) {
- /*
- * Stop async tasks. The async thread may need to detach
- * a device that's been replaced, which requires grabbing
- * spa_namespace_lock, so we must drop it here.
- */
- spa_open_ref(spa, FTAG);
- mutex_exit(&spa_namespace_lock);
- spa_async_suspend(spa);
- VERIFY(spa_scrub(spa, POOL_SCRUB_NONE, B_TRUE) == 0);
- mutex_enter(&spa_namespace_lock);
- spa_close(spa, FTAG);
-
- if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
- spa_unload(spa);
- spa_deactivate(spa);
- }
- spa_remove(spa);
- }
- mutex_exit(&spa_namespace_lock);
-}
-
-vdev_t *
-spa_lookup_by_guid(spa_t *spa, uint64_t guid)
-{
- return (vdev_lookup_by_guid(spa->spa_root_vdev, guid));
-}
-
-void
-spa_upgrade(spa_t *spa)
-{
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- /*
- * This should only be called for a non-faulted pool, and since a
- * future version would result in an unopenable pool, this shouldn't be
- * possible.
- */
- ASSERT(spa->spa_uberblock.ub_version <= ZFS_VERSION);
-
- spa->spa_uberblock.ub_version = ZFS_VERSION;
- vdev_config_dirty(spa->spa_root_vdev);
-
- spa_config_exit(spa, FTAG);
-
- txg_wait_synced(spa_get_dsl(spa), 0);
-}
-
-boolean_t
-spa_has_spare(spa_t *spa, uint64_t guid)
-{
- int i;
- uint64_t spareguid;
-
- for (i = 0; i < spa->spa_nspares; i++)
- if (spa->spa_spares[i]->vdev_guid == guid)
- return (B_TRUE);
-
- for (i = 0; i < spa->spa_pending_nspares; i++) {
- if (nvlist_lookup_uint64(spa->spa_pending_spares[i],
- ZPOOL_CONFIG_GUID, &spareguid) == 0 &&
- spareguid == guid)
- return (B_TRUE);
- }
-
- return (B_FALSE);
-}
-
-int
-spa_set_props(spa_t *spa, nvlist_t *nvp)
-{
- return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
- spa, nvp, 3));
-}
-
-int
-spa_get_props(spa_t *spa, nvlist_t **nvp)
-{
- zap_cursor_t zc;
- zap_attribute_t za;
- objset_t *mos = spa->spa_meta_objset;
- zfs_source_t src;
- zfs_prop_t prop;
- nvlist_t *propval;
- uint64_t value;
- int err;
-
- VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- mutex_enter(&spa->spa_props_lock);
- /* If no props object, then just return empty nvlist */
- if (spa->spa_pool_props_object == 0) {
- mutex_exit(&spa->spa_props_lock);
- return (0);
- }
-
- for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
- (err = zap_cursor_retrieve(&zc, &za)) == 0;
- zap_cursor_advance(&zc)) {
-
- if ((prop = zpool_name_to_prop(za.za_name)) == ZFS_PROP_INVAL)
- continue;
-
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- switch (za.za_integer_length) {
- case 8:
- if (zfs_prop_default_numeric(prop) ==
- za.za_first_integer)
- src = ZFS_SRC_DEFAULT;
- else
- src = ZFS_SRC_LOCAL;
- value = za.za_first_integer;
-
- if (prop == ZFS_PROP_BOOTFS) {
- dsl_pool_t *dp;
- dsl_dataset_t *ds = NULL;
- char strval[MAXPATHLEN];
-
- dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if ((err = dsl_dataset_open_obj(dp,
- za.za_first_integer, NULL, DS_MODE_NONE,
- FTAG, &ds)) != 0) {
- rw_exit(&dp->dp_config_rwlock);
- break;
- }
- dsl_dataset_name(ds, strval);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
- rw_exit(&dp->dp_config_rwlock);
-
- VERIFY(nvlist_add_uint64(propval,
- ZFS_PROP_SOURCE, src) == 0);
- VERIFY(nvlist_add_string(propval,
- ZFS_PROP_VALUE, strval) == 0);
- } else {
- VERIFY(nvlist_add_uint64(propval,
- ZFS_PROP_SOURCE, src) == 0);
- VERIFY(nvlist_add_uint64(propval,
- ZFS_PROP_VALUE, value) == 0);
- }
- VERIFY(nvlist_add_nvlist(*nvp, za.za_name,
- propval) == 0);
- break;
- }
- nvlist_free(propval);
- }
- zap_cursor_fini(&zc);
- mutex_exit(&spa->spa_props_lock);
- if (err && err != ENOENT) {
- nvlist_free(*nvp);
- return (err);
- }
-
- return (0);
-}
-
-/*
- * If the bootfs property value is dsobj, clear it.
- */
-void
-spa_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
-{
- if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
- VERIFY(zap_remove(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZFS_PROP_BOOTFS), tx) == 0);
- spa->spa_bootfs = 0;
- }
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_config.c b/sys/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
deleted file mode 100644
index 9e8bcf3..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/nvpair.h>
-#include <sys/uio.h>
-#include <sys/fs/zfs.h>
-#include <sys/vdev_impl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/utsname.h>
-#include <sys/sunddi.h>
-#ifdef _KERNEL
-#include <sys/kobj.h>
-#endif
-
-/*
- * Pool configuration repository.
- *
- * The configuration for all pools, in addition to being stored on disk, is
- * stored in /etc/zfs/zpool.cache as a packed nvlist. The kernel maintains
- * this list as pools are created, destroyed, or modified.
- *
- * We have a single nvlist which holds all the configuration information. When
- * the module loads, we read this information from the cache and populate the
- * SPA namespace. This namespace is maintained independently in spa.c.
- * Whenever the namespace is modified, or the configuration of a pool is
- * changed, we call spa_config_sync(), which walks through all the active pools
- * and writes the configuration to disk.
- */
-
-static uint64_t spa_config_generation = 1;
-
-/*
- * This can be overridden in userland to preserve an alternate namespace for
- * userland pools when doing testing.
- */
-const char *spa_config_dir = ZPOOL_CACHE_DIR;
-
-/*
- * Called when the module is first loaded, this routine loads the configuration
- * file into the SPA namespace. It does not actually open or load the pools; it
- * only populates the namespace.
- */
-void
-spa_config_load(void)
-{
- void *buf = NULL;
- nvlist_t *nvlist, *child;
- nvpair_t *nvpair;
- spa_t *spa;
- char pathname[128];
- struct _buf *file;
- uint64_t fsize;
-
- /*
- * Open the configuration file.
- */
- (void) snprintf(pathname, sizeof (pathname), "%s/%s",
- spa_config_dir, ZPOOL_CACHE_FILE);
-
- file = kobj_open_file(pathname);
- if (file == (struct _buf *)-1) {
- ZFS_LOG(1, "Cannot open %s.", pathname);
- return;
- }
-
- if (kobj_get_filesize(file, &fsize) != 0) {
- ZFS_LOG(1, "Cannot get size of %s.", pathname);
- goto out;
- }
-
- buf = kmem_alloc(fsize, KM_SLEEP);
-
- /*
- * Read the nvlist from the file.
- */
- if (kobj_read_file(file, buf, fsize, 0) < 0) {
- ZFS_LOG(1, "Cannot read %s.", pathname);
- goto out;
- }
-
- /*
- * Unpack the nvlist.
- */
- if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)
- goto out;
-
- ZFS_LOG(1, "File %s loaded.", pathname);
-
- /*
- * Iterate over all elements in the nvlist, creating a new spa_t for
- * each one with the specified configuration.
- */
- mutex_enter(&spa_namespace_lock);
- nvpair = NULL;
- while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
-
- if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
- continue;
-
- VERIFY(nvpair_value_nvlist(nvpair, &child) == 0);
-
- if (spa_lookup(nvpair_name(nvpair)) != NULL)
- continue;
- spa = spa_add(nvpair_name(nvpair), NULL);
-
- /*
- * We blindly duplicate the configuration here. If it's
- * invalid, we will catch it when the pool is first opened.
- */
- VERIFY(nvlist_dup(child, &spa->spa_config, 0) == 0);
- }
- mutex_exit(&spa_namespace_lock);
-
- nvlist_free(nvlist);
-
-out:
- if (buf != NULL)
- kmem_free(buf, fsize);
-
- kobj_close_file(file);
-}
-
-/*
- * Synchronize all pools to disk. This must be called with the namespace lock
- * held.
- */
-void
-spa_config_sync(void)
-{
- spa_t *spa = NULL;
- nvlist_t *config;
- size_t buflen;
- char *buf;
- vnode_t *vp;
- int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
- char pathname[128];
- char pathname2[128];
-
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- /*
- * Add all known pools to the configuration list, ignoring those with
- * alternate root paths.
- */
- spa = NULL;
- while ((spa = spa_next(spa)) != NULL) {
- mutex_enter(&spa->spa_config_cache_lock);
- if (spa->spa_config && spa->spa_name && spa->spa_root == NULL)
- VERIFY(nvlist_add_nvlist(config, spa->spa_name,
- spa->spa_config) == 0);
- mutex_exit(&spa->spa_config_cache_lock);
- }
-
- /*
- * Pack the configuration into a buffer.
- */
- VERIFY(nvlist_size(config, &buflen, NV_ENCODE_XDR) == 0);
-
- buf = kmem_alloc(buflen, KM_SLEEP);
-
- VERIFY(nvlist_pack(config, &buf, &buflen, NV_ENCODE_XDR,
- KM_SLEEP) == 0);
-
- /*
- * Write the configuration to disk. We need to do the traditional
- * 'write to temporary file, sync, move over original' to make sure we
- * always have a consistent view of the data.
- */
- (void) snprintf(pathname, sizeof (pathname), "%s/%s", spa_config_dir,
- ZPOOL_CACHE_TMP);
-
- if (vn_open(pathname, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) != 0)
- goto out;
-
- if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
- 0, RLIM64_INFINITY, kcred, NULL) == 0 &&
- VOP_FSYNC(vp, FSYNC, kcred) == 0) {
- (void) snprintf(pathname2, sizeof (pathname2), "%s/%s",
- spa_config_dir, ZPOOL_CACHE_FILE);
- (void) vn_rename(pathname, pathname2, UIO_SYSSPACE);
- }
-
- (void) VOP_CLOSE(vp, oflags, 1, 0, kcred);
- VN_RELE(vp);
-
-out:
- (void) vn_remove(pathname, UIO_SYSSPACE, RMFILE);
- spa_config_generation++;
-
- kmem_free(buf, buflen);
- nvlist_free(config);
-}
-
-/*
- * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache,
- * and we don't want to allow the local zone to see all the pools anyway.
- * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
- * information for all pool visible within the zone.
- */
-nvlist_t *
-spa_all_configs(uint64_t *generation)
-{
- nvlist_t *pools;
- spa_t *spa;
-
- if (*generation == spa_config_generation)
- return (NULL);
-
- VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- spa = NULL;
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (INGLOBALZONE(curproc) ||
- zone_dataset_visible(spa_name(spa), NULL)) {
- mutex_enter(&spa->spa_config_cache_lock);
- VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
- spa->spa_config) == 0);
- mutex_exit(&spa->spa_config_cache_lock);
- }
- }
- mutex_exit(&spa_namespace_lock);
-
- *generation = spa_config_generation;
-
- return (pools);
-}
-
-void
-spa_config_set(spa_t *spa, nvlist_t *config)
-{
- mutex_enter(&spa->spa_config_cache_lock);
- if (spa->spa_config != NULL)
- nvlist_free(spa->spa_config);
- spa->spa_config = config;
- mutex_exit(&spa->spa_config_cache_lock);
-}
-
-/*
- * Generate the pool's configuration based on the current in-core state.
- * We infer whether to generate a complete config or just one top-level config
- * based on whether vd is the root vdev.
- */
-nvlist_t *
-spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
-{
- nvlist_t *config, *nvroot;
- vdev_t *rvd = spa->spa_root_vdev;
- unsigned long hostid = 0;
-
- ASSERT(spa_config_held(spa, RW_READER));
-
- if (vd == NULL)
- vd = rvd;
-
- /*
- * If txg is -1, report the current value of spa->spa_config_txg.
- */
- if (txg == -1ULL)
- txg = spa->spa_config_txg;
-
- VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
- spa_version(spa)) == 0);
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
- spa_name(spa)) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- spa_state(spa)) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- txg) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- spa_guid(spa)) == 0);
- (void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
- hostid) == 0);
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
- utsname.nodename) == 0);
-
- if (vd != rvd) {
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
- vd->vdev_top->vdev_guid) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
- if (vd->vdev_isspare)
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
- 1ULL) == 0);
- vd = vd->vdev_top; /* label contains top config */
- }
-
- nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE);
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
- nvlist_free(nvroot);
-
- return (config);
-}
-
-/*
- * Update all disk labels, generate a fresh config based on the current
- * in-core state, and sync the global config cache.
- */
-void
-spa_config_update(spa_t *spa, int what)
-{
- vdev_t *rvd = spa->spa_root_vdev;
- uint64_t txg;
- int c;
-
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- spa_config_enter(spa, RW_WRITER, FTAG);
- txg = spa_last_synced_txg(spa) + 1;
- if (what == SPA_CONFIG_UPDATE_POOL) {
- vdev_config_dirty(rvd);
- } else {
- /*
- * If we have top-level vdevs that were added but have
- * not yet been prepared for allocation, do that now.
- * (It's safe now because the config cache is up to date,
- * so it will be able to translate the new DVAs.)
- * See comments in spa_vdev_add() for full details.
- */
- for (c = 0; c < rvd->vdev_children; c++) {
- vdev_t *tvd = rvd->vdev_child[c];
- if (tvd->vdev_ms_array == 0) {
- vdev_init(tvd, txg);
- vdev_config_dirty(tvd);
- }
- }
- }
- spa_config_exit(spa, FTAG);
-
- /*
- * Wait for the mosconfig to be regenerated and synced.
- */
- txg_wait_synced(spa->spa_dsl_pool, txg);
-
- /*
- * Update the global config cache to reflect the new mosconfig.
- */
- spa_config_sync();
-
- if (what == SPA_CONFIG_UPDATE_POOL)
- spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c b/sys/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
deleted file mode 100644
index c52acaf..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Routines to manage the on-disk persistent error log.
- *
- * Each pool stores a log of all logical data errors seen during normal
- * operation. This is actually the union of two distinct logs: the last log,
- * and the current log. All errors seen are logged to the current log. When a
- * scrub completes, the current log becomes the last log, the last log is thrown
- * out, and the current log is reinitialized. This way, if an error is somehow
- * corrected, a new scrub will show that that it no longer exists, and will be
- * deleted from the log when the scrub completes.
- *
- * The log is stored using a ZAP object whose key is a string form of the
- * zbookmark tuple (objset, object, level, blkid), and whose contents is an
- * optional 'objset:object' human-readable string describing the data. When an
- * error is first logged, this string will be empty, indicating that no name is
- * known. This prevents us from having to issue a potentially large amount of
- * I/O to discover the object name during an error path. Instead, we do the
- * calculation when the data is requested, storing the result so future queries
- * will be faster.
- *
- * This log is then shipped into an nvlist where the key is the dataset name and
- * the value is the object name. Userland is then responsible for uniquifying
- * this list and displaying it to the user.
- */
-
-#include <sys/dmu_tx.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/zap.h>
-#include <sys/zio.h>
-
-/*
- * This is a stripped-down version of strtoull, suitable only for converting
- * lowercase hexidecimal numbers that don't overflow.
- */
-#ifdef _KERNEL
-static uint64_t
-_strtonum(char *str, char **nptr)
-{
- uint64_t val = 0;
- char c;
- int digit;
-
- while ((c = *str) != '\0') {
- if (c >= '0' && c <= '9')
- digit = c - '0';
- else if (c >= 'a' && c <= 'f')
- digit = 10 + c - 'a';
- else
- break;
-
- val *= 16;
- val += digit;
-
- str++;
- }
-
- *nptr = str;
-
- return (val);
-}
-#endif
-
-/*
- * Convert a bookmark to a string.
- */
-static void
-bookmark_to_name(zbookmark_t *zb, char *buf, size_t len)
-{
- (void) snprintf(buf, len, "%llx:%llx:%llx:%llx",
- (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid);
-}
-
-/*
- * Convert a string to a bookmark
- */
-#ifdef _KERNEL
-static void
-name_to_bookmark(char *buf, zbookmark_t *zb)
-{
- zb->zb_objset = _strtonum(buf, &buf);
- ASSERT(*buf == ':');
- zb->zb_object = _strtonum(buf + 1, &buf);
- ASSERT(*buf == ':');
- zb->zb_level = (int)_strtonum(buf + 1, &buf);
- ASSERT(*buf == ':');
- zb->zb_blkid = _strtonum(buf + 1, &buf);
- ASSERT(*buf == '\0');
-}
-#endif
-
-/*
- * Log an uncorrectable error to the persistent error log. We add it to the
- * spa's list of pending errors. The changes are actually synced out to disk
- * during spa_errlog_sync().
- */
-void
-spa_log_error(spa_t *spa, zio_t *zio)
-{
- zbookmark_t *zb = &zio->io_logical->io_bookmark;
- spa_error_entry_t search;
- spa_error_entry_t *new;
- avl_tree_t *tree;
- avl_index_t where;
-
- /*
- * If we are trying to import a pool, ignore any errors, as we won't be
- * writing to the pool any time soon.
- */
- if (spa->spa_load_state == SPA_LOAD_TRYIMPORT)
- return;
-
- mutex_enter(&spa->spa_errlist_lock);
-
- /*
- * If we have had a request to rotate the log, log it to the next list
- * instead of the current one.
- */
- if (spa->spa_scrub_active || spa->spa_scrub_finished)
- tree = &spa->spa_errlist_scrub;
- else
- tree = &spa->spa_errlist_last;
-
- search.se_bookmark = *zb;
- if (avl_find(tree, &search, &where) != NULL) {
- mutex_exit(&spa->spa_errlist_lock);
- return;
- }
-
- new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
- new->se_bookmark = *zb;
- avl_insert(tree, new, where);
-
- mutex_exit(&spa->spa_errlist_lock);
-}
-
-/*
- * Return the number of errors currently in the error log. This is actually the
- * sum of both the last log and the current log, since we don't know the union
- * of these logs until we reach userland.
- */
-uint64_t
-spa_get_errlog_size(spa_t *spa)
-{
- uint64_t total = 0, count;
-
- mutex_enter(&spa->spa_errlog_lock);
- if (spa->spa_errlog_scrub != 0 &&
- zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub,
- &count) == 0)
- total += count;
-
- if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished &&
- zap_count(spa->spa_meta_objset, spa->spa_errlog_last,
- &count) == 0)
- total += count;
- mutex_exit(&spa->spa_errlog_lock);
-
- mutex_enter(&spa->spa_errlist_lock);
- total += avl_numnodes(&spa->spa_errlist_last);
- total += avl_numnodes(&spa->spa_errlist_scrub);
- mutex_exit(&spa->spa_errlist_lock);
-
- return (total);
-}
-
-#ifdef _KERNEL
-static int
-process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count)
-{
- zap_cursor_t zc;
- zap_attribute_t za;
- zbookmark_t zb;
-
- if (obj == 0)
- return (0);
-
- for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
-
- if (*count == 0) {
- zap_cursor_fini(&zc);
- return (ENOMEM);
- }
-
- name_to_bookmark(za.za_name, &zb);
-
- if (copyout(&zb, (char *)addr +
- (*count - 1) * sizeof (zbookmark_t),
- sizeof (zbookmark_t)) != 0)
- return (EFAULT);
-
- *count -= 1;
- }
-
- zap_cursor_fini(&zc);
-
- return (0);
-}
-
-static int
-process_error_list(avl_tree_t *list, void *addr, size_t *count)
-{
- spa_error_entry_t *se;
-
- for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) {
-
- if (*count == 0)
- return (ENOMEM);
-
- if (copyout(&se->se_bookmark, (char *)addr +
- (*count - 1) * sizeof (zbookmark_t),
- sizeof (zbookmark_t)) != 0)
- return (EFAULT);
-
- *count -= 1;
- }
-
- return (0);
-}
-#endif
-
-/*
- * Copy all known errors to userland as an array of bookmarks. This is
- * actually a union of the on-disk last log and current log, as well as any
- * pending error requests.
- *
- * Because the act of reading the on-disk log could cause errors to be
- * generated, we have two separate locks: one for the error log and one for the
- * in-core error lists. We only need the error list lock to log and error, so
- * we grab the error log lock while we read the on-disk logs, and only pick up
- * the error list lock when we are finished.
- */
-int
-spa_get_errlog(spa_t *spa, void *uaddr, size_t *count)
-{
- int ret = 0;
-
-#ifdef _KERNEL
- mutex_enter(&spa->spa_errlog_lock);
-
- ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count);
-
- if (!ret && !spa->spa_scrub_finished)
- ret = process_error_log(spa, spa->spa_errlog_last, uaddr,
- count);
-
- mutex_enter(&spa->spa_errlist_lock);
- if (!ret)
- ret = process_error_list(&spa->spa_errlist_scrub, uaddr,
- count);
- if (!ret)
- ret = process_error_list(&spa->spa_errlist_last, uaddr,
- count);
- mutex_exit(&spa->spa_errlist_lock);
-
- mutex_exit(&spa->spa_errlog_lock);
-#endif
-
- return (ret);
-}
-
-/*
- * Called when a scrub completes. This simply set a bit which tells which AVL
- * tree to add new errors. spa_errlog_sync() is responsible for actually
- * syncing the changes to the underlying objects.
- */
-void
-spa_errlog_rotate(spa_t *spa)
-{
- mutex_enter(&spa->spa_errlist_lock);
-
- ASSERT(!spa->spa_scrub_finished);
- spa->spa_scrub_finished = B_TRUE;
-
- mutex_exit(&spa->spa_errlist_lock);
-}
-
-/*
- * Discard any pending errors from the spa_t. Called when unloading a faulted
- * pool, as the errors encountered during the open cannot be synced to disk.
- */
-void
-spa_errlog_drain(spa_t *spa)
-{
- spa_error_entry_t *se;
- void *cookie;
-
- mutex_enter(&spa->spa_errlist_lock);
-
- cookie = NULL;
- while ((se = avl_destroy_nodes(&spa->spa_errlist_last,
- &cookie)) != NULL)
- kmem_free(se, sizeof (spa_error_entry_t));
- cookie = NULL;
- while ((se = avl_destroy_nodes(&spa->spa_errlist_scrub,
- &cookie)) != NULL)
- kmem_free(se, sizeof (spa_error_entry_t));
-
- mutex_exit(&spa->spa_errlist_lock);
-}
-
-/*
- * Process a list of errors into the current on-disk log.
- */
-static void
-sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
-{
- spa_error_entry_t *se;
- char buf[64];
- void *cookie;
-
- if (avl_numnodes(t) != 0) {
- /* create log if necessary */
- if (*obj == 0)
- *obj = zap_create(spa->spa_meta_objset,
- DMU_OT_ERROR_LOG, DMU_OT_NONE,
- 0, tx);
-
- /* add errors to the current log */
- for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) {
- char *name = se->se_name ? se->se_name : "";
-
- bookmark_to_name(&se->se_bookmark, buf, sizeof (buf));
-
- (void) zap_update(spa->spa_meta_objset,
- *obj, buf, 1, strlen(name) + 1, name, tx);
- }
-
- /* purge the error list */
- cookie = NULL;
- while ((se = avl_destroy_nodes(t, &cookie)) != NULL)
- kmem_free(se, sizeof (spa_error_entry_t));
- }
-}
-
-/*
- * Sync the error log out to disk. This is a little tricky because the act of
- * writing the error log requires the spa_errlist_lock. So, we need to lock the
- * error lists, take a copy of the lists, and then reinitialize them. Then, we
- * drop the error list lock and take the error log lock, at which point we
- * do the errlog processing. Then, if we encounter an I/O error during this
- * process, we can successfully add the error to the list. Note that this will
- * result in the perpetual recycling of errors, but it is an unlikely situation
- * and not a performance critical operation.
- */
-void
-spa_errlog_sync(spa_t *spa, uint64_t txg)
-{
- dmu_tx_t *tx;
- avl_tree_t scrub, last;
- int scrub_finished;
-
- mutex_enter(&spa->spa_errlist_lock);
-
- /*
- * Bail out early under normal circumstances.
- */
- if (avl_numnodes(&spa->spa_errlist_scrub) == 0 &&
- avl_numnodes(&spa->spa_errlist_last) == 0 &&
- !spa->spa_scrub_finished) {
- mutex_exit(&spa->spa_errlist_lock);
- return;
- }
-
- spa_get_errlists(spa, &last, &scrub);
- scrub_finished = spa->spa_scrub_finished;
- spa->spa_scrub_finished = B_FALSE;
-
- mutex_exit(&spa->spa_errlist_lock);
- mutex_enter(&spa->spa_errlog_lock);
-
- tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
-
- /*
- * Sync out the current list of errors.
- */
- sync_error_list(spa, &last, &spa->spa_errlog_last, tx);
-
- /*
- * Rotate the log if necessary.
- */
- if (scrub_finished) {
- if (spa->spa_errlog_last != 0)
- VERIFY(dmu_object_free(spa->spa_meta_objset,
- spa->spa_errlog_last, tx) == 0);
- spa->spa_errlog_last = spa->spa_errlog_scrub;
- spa->spa_errlog_scrub = 0;
-
- sync_error_list(spa, &scrub, &spa->spa_errlog_last, tx);
- }
-
- /*
- * Sync out any pending scrub errors.
- */
- sync_error_list(spa, &scrub, &spa->spa_errlog_scrub, tx);
-
- /*
- * Update the MOS to reflect the new values.
- */
- (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_ERRLOG_LAST, sizeof (uint64_t), 1,
- &spa->spa_errlog_last, tx);
- (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_ERRLOG_SCRUB, sizeof (uint64_t), 1,
- &spa->spa_errlog_scrub, tx);
-
- dmu_tx_commit(tx);
-
- mutex_exit(&spa->spa_errlog_lock);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
deleted file mode 100644
index 6642801..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa_impl.h>
-#include <sys/zap.h>
-#include <sys/dsl_synctask.h>
-
-/*
- * Routines to manage the on-disk history log.
- *
- * The history log is stored as a dmu object containing
- * <packed record length, record nvlist> tuples.
- *
- * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
- * "packed record length" is the packed length of the "record nvlist" stored
- * as a little endian uint64_t.
- *
- * The log is implemented as a ring buffer, though the original creation
- * of the pool ('zpool create') is never overwritten.
- *
- * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer
- * of 'spa_history' stores the offsets for logging/retrieving history as
- * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of
- * where the 'zpool create' record is stored. This allows us to never
- * overwrite the original creation of the pool. 'sh_phys_max_off' is the
- * physical ending offset in bytes of the log. This tells you the length of
- * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record
- * is added, 'sh_eof' is incremented by the the size of the record.
- * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes).
- * This is where the consumer should start reading from after reading in
- * the 'zpool create' portion of the log.
- *
- * 'sh_records_lost' keeps track of how many records have been overwritten
- * and permanently lost.
- */
-
-typedef enum history_log_type {
- LOG_CMD_CREATE,
- LOG_CMD_NO_CREATE
-} history_log_type_t;
-
-typedef struct history_arg {
- const char *ha_history_str;
- history_log_type_t ha_log_type;
-} history_arg_t;
-
-/* convert a logical offset to physical */
-static uint64_t
-spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp)
-{
- uint64_t phys_len;
-
- phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len;
- return ((log_off - shpp->sh_pool_create_len) % phys_len
- + shpp->sh_pool_create_len);
-}
-
-void
-spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
-{
- dmu_buf_t *dbp;
- spa_history_phys_t *shpp;
- objset_t *mos = spa->spa_meta_objset;
-
- ASSERT(spa->spa_history == 0);
- spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
- SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
- sizeof (spa_history_phys_t), tx);
-
- VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_HISTORY, sizeof (uint64_t), 1,
- &spa->spa_history, tx) == 0);
-
- VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
- ASSERT(dbp->db_size >= sizeof (spa_history_phys_t));
-
- shpp = dbp->db_data;
- dmu_buf_will_dirty(dbp, tx);
-
- /*
- * Figure out maximum size of history log. We set it at
- * 1% of pool size, with a max of 32MB and min of 128KB.
- */
- shpp->sh_phys_max_off = spa_get_dspace(spa) / 100;
- shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20);
- shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
-
- dmu_buf_rele(dbp, FTAG);
-}
-
-/*
- * Change 'sh_bof' to the beginning of the next record.
- */
-static int
-spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
-{
- objset_t *mos = spa->spa_meta_objset;
- uint64_t firstread, reclen, phys_bof;
- char buf[sizeof (reclen)];
- int err;
-
- phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp);
- firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
-
- if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
- buf)) != 0)
- return (err);
- if (firstread != sizeof (reclen)) {
- if ((err = dmu_read(mos, spa->spa_history,
- shpp->sh_pool_create_len, sizeof (reclen) - firstread,
- buf + firstread)) != 0)
- return (err);
- }
-
- reclen = LE_64(*((uint64_t *)buf));
- shpp->sh_bof += reclen + sizeof (reclen);
- shpp->sh_records_lost++;
- return (0);
-}
-
-static int
-spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
- dmu_tx_t *tx)
-{
- uint64_t firstwrite, phys_eof;
- objset_t *mos = spa->spa_meta_objset;
- int err;
-
- ASSERT(MUTEX_HELD(&spa->spa_history_lock));
-
- /* see if we need to reset logical BOF */
- while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
- (shpp->sh_eof - shpp->sh_bof) <= len) {
- if ((err = spa_history_advance_bof(spa, shpp)) != 0)
- return (err);
- }
-
- phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
- firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
- shpp->sh_eof += len;
- dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
-
- len -= firstwrite;
- if (len > 0) {
- /* write out the rest at the beginning of physical file */
- dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
- len, (char *)buf + firstwrite, tx);
- }
-
- return (0);
-}
-
-/*
- * Write out a history event.
- */
-void
-spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- spa_t *spa = arg1;
- history_arg_t *hap = arg2;
- const char *history_str = hap->ha_history_str;
- objset_t *mos = spa->spa_meta_objset;
- dmu_buf_t *dbp;
- spa_history_phys_t *shpp;
- size_t reclen;
- uint64_t le_len;
- nvlist_t *nvrecord;
- char *record_packed = NULL;
- int ret;
-
- if (history_str == NULL)
- return;
-
- /*
- * If we have an older pool that doesn't have a command
- * history object, create it now.
- */
- mutex_enter(&spa->spa_history_lock);
- if (!spa->spa_history)
- spa_history_create_obj(spa, tx);
- mutex_exit(&spa->spa_history_lock);
-
- /*
- * Get the offset of where we need to write via the bonus buffer.
- * Update the offset when the write completes.
- */
- VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
- shpp = dbp->db_data;
-
- dmu_buf_will_dirty(dbp, tx);
-
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(dbp, &doi);
- ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
- }
-#endif
-
- /* construct a nvlist of the current time and cmd string */
- VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
- gethrestime_sec()) == 0);
- VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, history_str) == 0);
- VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
- NV_ENCODE_XDR, KM_SLEEP) == 0);
-
- mutex_enter(&spa->spa_history_lock);
- if (hap->ha_log_type == LOG_CMD_CREATE)
- VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);
-
- /* write out the packed length as little endian */
- le_len = LE_64((uint64_t)reclen);
- ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
- if (!ret)
- ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
-
- if (!ret && hap->ha_log_type == LOG_CMD_CREATE) {
- shpp->sh_pool_create_len += sizeof (le_len) + reclen;
- shpp->sh_bof = shpp->sh_pool_create_len;
- }
-
- mutex_exit(&spa->spa_history_lock);
- nvlist_free(nvrecord);
- kmem_free(record_packed, reclen);
- dmu_buf_rele(dbp, FTAG);
-}
-
-/*
- * Write out a history event.
- */
-int
-spa_history_log(spa_t *spa, const char *history_str, uint64_t pool_create)
-{
- history_arg_t ha;
-
- ha.ha_history_str = history_str;
- ha.ha_log_type = pool_create ? LOG_CMD_CREATE : LOG_CMD_NO_CREATE;
- return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_history_log_sync,
- spa, &ha, 0));
-}
-
-/*
- * Read out the command history.
- */
-int
-spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
-{
- objset_t *mos = spa->spa_meta_objset;
- dmu_buf_t *dbp;
- uint64_t read_len, phys_read_off, phys_eof;
- uint64_t leftover = 0;
- spa_history_phys_t *shpp;
- int err;
-
- /*
- * If the command history doesn't exist (older pool),
- * that's ok, just return ENOENT.
- */
- if (!spa->spa_history)
- return (ENOENT);
-
- if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
- return (err);
- shpp = dbp->db_data;
-
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(dbp, &doi);
- ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
- }
-#endif
-
- mutex_enter(&spa->spa_history_lock);
- phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
-
- if (*offp < shpp->sh_pool_create_len) {
- /* read in just the zpool create history */
- phys_read_off = *offp;
- read_len = MIN(*len, shpp->sh_pool_create_len -
- phys_read_off);
- } else {
- /*
- * Need to reset passed in offset to BOF if the passed in
- * offset has since been overwritten.
- */
- *offp = MAX(*offp, shpp->sh_bof);
- phys_read_off = spa_history_log_to_phys(*offp, shpp);
-
- /*
- * Read up to the minimum of what the user passed down or
- * the EOF (physical or logical). If we hit physical EOF,
- * use 'leftover' to read from the physical BOF.
- */
- if (phys_read_off <= phys_eof) {
- read_len = MIN(*len, phys_eof - phys_read_off);
- } else {
- read_len = MIN(*len,
- shpp->sh_phys_max_off - phys_read_off);
- if (phys_read_off + *len > shpp->sh_phys_max_off) {
- leftover = MIN(*len - read_len,
- phys_eof - shpp->sh_pool_create_len);
- }
- }
- }
-
- /* offset for consumer to use next */
- *offp += read_len + leftover;
-
- /* tell the consumer how much you actually read */
- *len = read_len + leftover;
-
- if (read_len == 0) {
- mutex_exit(&spa->spa_history_lock);
- dmu_buf_rele(dbp, FTAG);
- return (0);
- }
-
- err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf);
- if (leftover && err == 0) {
- err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
- leftover, buf + read_len);
- }
- mutex_exit(&spa->spa_history_lock);
-
- dmu_buf_rele(dbp, FTAG);
- return (err);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
deleted file mode 100644
index 5da1f96..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ /dev/null
@@ -1,1130 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa_impl.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/zap.h>
-#include <sys/zil.h>
-#include <sys/vdev_impl.h>
-#include <sys/metaslab.h>
-#include <sys/uberblock_impl.h>
-#include <sys/txg.h>
-#include <sys/avl.h>
-#include <sys/unique.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_prop.h>
-#include <sys/fs/zfs.h>
-
-/*
- * SPA locking
- *
- * There are four basic locks for managing spa_t structures:
- *
- * spa_namespace_lock (global mutex)
- *
- * This lock must be acquired to do any of the following:
- *
- * - Lookup a spa_t by name
- * - Add or remove a spa_t from the namespace
- * - Increase spa_refcount from non-zero
- * - Check if spa_refcount is zero
- * - Rename a spa_t
- * - add/remove/attach/detach devices
- * - Held for the duration of create/destroy/import/export
- *
- * It does not need to handle recursion. A create or destroy may
- * reference objects (files or zvols) in other pools, but by
- * definition they must have an existing reference, and will never need
- * to lookup a spa_t by name.
- *
- * spa_refcount (per-spa refcount_t protected by mutex)
- *
- * This reference count keep track of any active users of the spa_t. The
- * spa_t cannot be destroyed or freed while this is non-zero. Internally,
- * the refcount is never really 'zero' - opening a pool implicitly keeps
- * some references in the DMU. Internally we check against SPA_MINREF, but
- * present the image of a zero/non-zero value to consumers.
- *
- * spa_config_lock (per-spa crazy rwlock)
- *
- * This SPA special is a recursive rwlock, capable of being acquired from
- * asynchronous threads. It has protects the spa_t from config changes,
- * and must be held in the following circumstances:
- *
- * - RW_READER to perform I/O to the spa
- * - RW_WRITER to change the vdev config
- *
- * spa_config_cache_lock (per-spa mutex)
- *
- * This mutex prevents the spa_config nvlist from being updated. No
- * other locks are required to obtain this lock, although implicitly you
- * must have the namespace lock or non-zero refcount to have any kind
- * of spa_t pointer at all.
- *
- * The locking order is fairly straightforward:
- *
- * spa_namespace_lock -> spa_refcount
- *
- * The namespace lock must be acquired to increase the refcount from 0
- * or to check if it is zero.
- *
- * spa_refcount -> spa_config_lock
- *
- * There must be at least one valid reference on the spa_t to acquire
- * the config lock.
- *
- * spa_namespace_lock -> spa_config_lock
- *
- * The namespace lock must always be taken before the config lock.
- *
- *
- * The spa_namespace_lock and spa_config_cache_lock can be acquired directly and
- * are globally visible.
- *
- * The namespace is manipulated using the following functions, all which require
- * the spa_namespace_lock to be held.
- *
- * spa_lookup() Lookup a spa_t by name.
- *
- * spa_add() Create a new spa_t in the namespace.
- *
- * spa_remove() Remove a spa_t from the namespace. This also
- * frees up any memory associated with the spa_t.
- *
- * spa_next() Returns the next spa_t in the system, or the
- * first if NULL is passed.
- *
- * spa_evict_all() Shutdown and remove all spa_t structures in
- * the system.
- *
- * spa_guid_exists() Determine whether a pool/device guid exists.
- *
- * The spa_refcount is manipulated using the following functions:
- *
- * spa_open_ref() Adds a reference to the given spa_t. Must be
- * called with spa_namespace_lock held if the
- * refcount is currently zero.
- *
- * spa_close() Remove a reference from the spa_t. This will
- * not free the spa_t or remove it from the
- * namespace. No locking is required.
- *
- * spa_refcount_zero() Returns true if the refcount is currently
- * zero. Must be called with spa_namespace_lock
- * held.
- *
- * The spa_config_lock is manipulated using the following functions:
- *
- * spa_config_enter() Acquire the config lock as RW_READER or
- * RW_WRITER. At least one reference on the spa_t
- * must exist.
- *
- * spa_config_exit() Release the config lock.
- *
- * spa_config_held() Returns true if the config lock is currently
- * held in the given state.
- *
- * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
- *
- * spa_vdev_enter() Acquire the namespace lock and the config lock
- * for writing.
- *
- * spa_vdev_exit() Release the config lock, wait for all I/O
- * to complete, sync the updated configs to the
- * cache, and release the namespace lock.
- *
- * The spa_name() function also requires either the spa_namespace_lock
- * or the spa_config_lock, as both are needed to do a rename. spa_rename() is
- * also implemented within this file since is requires manipulation of the
- * namespace.
- */
-
-static avl_tree_t spa_namespace_avl;
-kmutex_t spa_namespace_lock;
-static kcondvar_t spa_namespace_cv;
-static int spa_active_count;
-int spa_max_replication_override = SPA_DVAS_PER_BP;
-
-static kmutex_t spa_spare_lock;
-static avl_tree_t spa_spare_avl;
-
-kmem_cache_t *spa_buffer_pool;
-int spa_mode;
-
-#ifdef ZFS_DEBUG
-int zfs_flags = ~0;
-#else
-int zfs_flags = 0;
-#endif
-
-/*
- * zfs_recover can be set to nonzero to attempt to recover from
- * otherwise-fatal errors, typically caused by on-disk corruption. When
- * set, calls to zfs_panic_recover() will turn into warning messages.
- */
-int zfs_recover = 0;
-SYSCTL_DECL(_vfs_zfs);
-TUNABLE_INT("vfs.zfs.recover", &zfs_recover);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0,
- "Try to recover from otherwise-fatal errors.");
-
-#define SPA_MINREF 5 /* spa_refcnt for an open-but-idle pool */
-
-/*
- * ==========================================================================
- * SPA namespace functions
- * ==========================================================================
- */
-
-/*
- * Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held.
- * Returns NULL if no matching spa_t is found.
- */
-spa_t *
-spa_lookup(const char *name)
-{
- spa_t search, *spa;
- avl_index_t where;
-
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- search.spa_name = (char *)name;
- spa = avl_find(&spa_namespace_avl, &search, &where);
-
- return (spa);
-}
-
-/*
- * Create an uninitialized spa_t with the given name. Requires
- * spa_namespace_lock. The caller must ensure that the spa_t doesn't already
- * exist by calling spa_lookup() first.
- */
-spa_t *
-spa_add(const char *name, const char *altroot)
-{
- spa_t *spa;
-
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
-
- spa->spa_name = spa_strdup(name);
- spa->spa_state = POOL_STATE_UNINITIALIZED;
- spa->spa_freeze_txg = UINT64_MAX;
- spa->spa_final_txg = UINT64_MAX;
-
- mutex_init(&spa->spa_config_cache_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
-
- cv_init(&spa->spa_scrub_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
-
- refcount_create(&spa->spa_refcount);
- refcount_create(&spa->spa_config_lock.scl_count);
-
- avl_add(&spa_namespace_avl, spa);
-
- /*
- * Set the alternate root, if there is one.
- */
- if (altroot) {
- spa->spa_root = spa_strdup(altroot);
- spa_active_count++;
- }
-
- return (spa);
-}
-
-/*
- * Removes a spa_t from the namespace, freeing up any memory used. Requires
- * spa_namespace_lock. This is called only after the spa_t has been closed and
- * deactivated.
- */
-void
-spa_remove(spa_t *spa)
-{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
- ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
- ASSERT(spa->spa_scrub_thread == NULL);
-
- avl_remove(&spa_namespace_avl, spa);
- cv_broadcast(&spa_namespace_cv);
-
- if (spa->spa_root) {
- spa_strfree(spa->spa_root);
- spa_active_count--;
- }
-
- if (spa->spa_name)
- spa_strfree(spa->spa_name);
-
- spa_config_set(spa, NULL);
-
- refcount_destroy(&spa->spa_refcount);
- refcount_destroy(&spa->spa_config_lock.scl_count);
-
- cv_destroy(&spa->spa_async_cv);
- cv_destroy(&spa->spa_scrub_io_cv);
- cv_destroy(&spa->spa_scrub_cv);
-
- mutex_destroy(&spa->spa_scrub_lock);
- mutex_destroy(&spa->spa_async_lock);
- mutex_destroy(&spa->spa_config_cache_lock);
-
- kmem_free(spa, sizeof (spa_t));
-}
-
-/*
- * Given a pool, return the next pool in the namespace, or NULL if there is
- * none. If 'prev' is NULL, return the first pool.
- */
-spa_t *
-spa_next(spa_t *prev)
-{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- if (prev)
- return (AVL_NEXT(&spa_namespace_avl, prev));
- else
- return (avl_first(&spa_namespace_avl));
-}
-
-/*
- * ==========================================================================
- * SPA refcount functions
- * ==========================================================================
- */
-
-/*
- * Add a reference to the given spa_t. Must have at least one reference, or
- * have the namespace lock held.
- */
-void
-spa_open_ref(spa_t *spa, void *tag)
-{
- ASSERT(refcount_count(&spa->spa_refcount) > SPA_MINREF ||
- MUTEX_HELD(&spa_namespace_lock));
-
- (void) refcount_add(&spa->spa_refcount, tag);
-}
-
-/*
- * Remove a reference to the given spa_t. Must have at least one reference, or
- * have the namespace lock held.
- */
-void
-spa_close(spa_t *spa, void *tag)
-{
- ASSERT(refcount_count(&spa->spa_refcount) > SPA_MINREF ||
- MUTEX_HELD(&spa_namespace_lock));
-
- (void) refcount_remove(&spa->spa_refcount, tag);
-}
-
-/*
- * Check to see if the spa refcount is zero. Must be called with
- * spa_namespace_lock held. We really compare against SPA_MINREF, which is the
- * number of references acquired when opening a pool
- */
-boolean_t
-spa_refcount_zero(spa_t *spa)
-{
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- return (refcount_count(&spa->spa_refcount) == SPA_MINREF);
-}
-
-/*
- * ==========================================================================
- * SPA spare tracking
- * ==========================================================================
- */
-
-/*
- * Spares are tracked globally due to the following constraints:
- *
- * - A spare may be part of multiple pools.
- * - A spare may be added to a pool even if it's actively in use within
- * another pool.
- * - A spare in use in any pool can only be the source of a replacement if
- * the target is a spare in the same pool.
- *
- * We keep track of all spares on the system through the use of a reference
- * counted AVL tree. When a vdev is added as a spare, or used as a replacement
- * spare, then we bump the reference count in the AVL tree. In addition, we set
- * the 'vdev_isspare' member to indicate that the device is a spare (active or
- * inactive). When a spare is made active (used to replace a device in the
- * pool), we also keep track of which pool its been made a part of.
- *
- * The 'spa_spare_lock' protects the AVL tree. These functions are normally
- * called under the spa_namespace lock as part of vdev reconfiguration. The
- * separate spare lock exists for the status query path, which does not need to
- * be completely consistent with respect to other vdev configuration changes.
- */
-
-typedef struct spa_spare {
- uint64_t spare_guid;
- uint64_t spare_pool;
- avl_node_t spare_avl;
- int spare_count;
-} spa_spare_t;
-
-static int
-spa_spare_compare(const void *a, const void *b)
-{
- const spa_spare_t *sa = a;
- const spa_spare_t *sb = b;
-
- if (sa->spare_guid < sb->spare_guid)
- return (-1);
- else if (sa->spare_guid > sb->spare_guid)
- return (1);
- else
- return (0);
-}
-
-void
-spa_spare_add(vdev_t *vd)
-{
- avl_index_t where;
- spa_spare_t search;
- spa_spare_t *spare;
-
- mutex_enter(&spa_spare_lock);
- ASSERT(!vd->vdev_isspare);
-
- search.spare_guid = vd->vdev_guid;
- if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) {
- spare->spare_count++;
- } else {
- spare = kmem_zalloc(sizeof (spa_spare_t), KM_SLEEP);
- spare->spare_guid = vd->vdev_guid;
- spare->spare_count = 1;
- avl_insert(&spa_spare_avl, spare, where);
- }
- vd->vdev_isspare = B_TRUE;
-
- mutex_exit(&spa_spare_lock);
-}
-
-void
-spa_spare_remove(vdev_t *vd)
-{
- spa_spare_t search;
- spa_spare_t *spare;
- avl_index_t where;
-
- mutex_enter(&spa_spare_lock);
-
- search.spare_guid = vd->vdev_guid;
- spare = avl_find(&spa_spare_avl, &search, &where);
-
- ASSERT(vd->vdev_isspare);
- ASSERT(spare != NULL);
-
- if (--spare->spare_count == 0) {
- avl_remove(&spa_spare_avl, spare);
- kmem_free(spare, sizeof (spa_spare_t));
- } else if (spare->spare_pool == spa_guid(vd->vdev_spa)) {
- spare->spare_pool = 0ULL;
- }
-
- vd->vdev_isspare = B_FALSE;
- mutex_exit(&spa_spare_lock);
-}
-
-boolean_t
-spa_spare_exists(uint64_t guid, uint64_t *pool)
-{
- spa_spare_t search, *found;
- avl_index_t where;
-
- mutex_enter(&spa_spare_lock);
-
- search.spare_guid = guid;
- found = avl_find(&spa_spare_avl, &search, &where);
-
- if (pool) {
- if (found)
- *pool = found->spare_pool;
- else
- *pool = 0ULL;
- }
-
- mutex_exit(&spa_spare_lock);
-
- return (found != NULL);
-}
-
-void
-spa_spare_activate(vdev_t *vd)
-{
- spa_spare_t search, *found;
- avl_index_t where;
-
- mutex_enter(&spa_spare_lock);
- ASSERT(vd->vdev_isspare);
-
- search.spare_guid = vd->vdev_guid;
- found = avl_find(&spa_spare_avl, &search, &where);
- ASSERT(found != NULL);
- ASSERT(found->spare_pool == 0ULL);
-
- found->spare_pool = spa_guid(vd->vdev_spa);
- mutex_exit(&spa_spare_lock);
-}
-
-/*
- * ==========================================================================
- * SPA config locking
- * ==========================================================================
- */
-
-/*
- * Acquire the config lock. The config lock is a special rwlock that allows for
- * recursive enters. Because these enters come from the same thread as well as
- * asynchronous threads working on behalf of the owner, we must unilaterally
- * allow all reads access as long at least one reader is held (even if a write
- * is requested). This has the side effect of write starvation, but write locks
- * are extremely rare, and a solution to this problem would be significantly
- * more complex (if even possible).
- *
- * We would like to assert that the namespace lock isn't held, but this is a
- * valid use during create.
- */
-void
-spa_config_enter(spa_t *spa, krw_t rw, void *tag)
-{
- spa_config_lock_t *scl = &spa->spa_config_lock;
-
- mutex_enter(&scl->scl_lock);
-
- if (scl->scl_writer != curthread) {
- if (rw == RW_READER) {
- while (scl->scl_writer != NULL)
- cv_wait(&scl->scl_cv, &scl->scl_lock);
- } else {
- while (scl->scl_writer != NULL ||
- !refcount_is_zero(&scl->scl_count))
- cv_wait(&scl->scl_cv, &scl->scl_lock);
- scl->scl_writer = curthread;
- }
- }
-
- (void) refcount_add(&scl->scl_count, tag);
-
- mutex_exit(&scl->scl_lock);
-}
-
-/*
- * Release the spa config lock, notifying any waiters in the process.
- */
-void
-spa_config_exit(spa_t *spa, void *tag)
-{
- spa_config_lock_t *scl = &spa->spa_config_lock;
-
- mutex_enter(&scl->scl_lock);
-
- ASSERT(!refcount_is_zero(&scl->scl_count));
- if (refcount_remove(&scl->scl_count, tag) == 0) {
- cv_broadcast(&scl->scl_cv);
- scl->scl_writer = NULL; /* OK in either case */
- }
-
- mutex_exit(&scl->scl_lock);
-}
-
-/*
- * Returns true if the config lock is held in the given manner.
- */
-boolean_t
-spa_config_held(spa_t *spa, krw_t rw)
-{
- spa_config_lock_t *scl = &spa->spa_config_lock;
- boolean_t held;
-
- mutex_enter(&scl->scl_lock);
- if (rw == RW_WRITER)
- held = (scl->scl_writer == curthread);
- else
- held = !refcount_is_zero(&scl->scl_count);
- mutex_exit(&scl->scl_lock);
-
- return (held);
-}
-
-/*
- * ==========================================================================
- * SPA vdev locking
- * ==========================================================================
- */
-
-/*
- * Lock the given spa_t for the purpose of adding or removing a vdev.
- * Grabs the global spa_namespace_lock plus the spa config lock for writing.
- * It returns the next transaction group for the spa_t.
- */
-uint64_t
-spa_vdev_enter(spa_t *spa)
-{
- /*
- * Suspend scrub activity while we mess with the config.
- */
- spa_scrub_suspend(spa);
-
- mutex_enter(&spa_namespace_lock);
-
- spa_config_enter(spa, RW_WRITER, spa);
-
- return (spa_last_synced_txg(spa) + 1);
-}
-
-/*
- * Unlock the spa_t after adding or removing a vdev. Besides undoing the
- * locking of spa_vdev_enter(), we also want make sure the transactions have
- * synced to disk, and then update the global configuration cache with the new
- * information.
- */
-int
-spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
-{
- int config_changed = B_FALSE;
-
- ASSERT(txg > spa_last_synced_txg(spa));
-
- /*
- * Reassess the DTLs.
- */
- vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
-
- /*
- * If the config changed, notify the scrub thread that it must restart.
- */
- if (error == 0 && !list_is_empty(&spa->spa_dirty_list)) {
- config_changed = B_TRUE;
- spa_scrub_restart(spa, txg);
- }
-
- spa_config_exit(spa, spa);
-
- /*
- * Allow scrubbing to resume.
- */
- spa_scrub_resume(spa);
-
- /*
- * Note: this txg_wait_synced() is important because it ensures
- * that there won't be more than one config change per txg.
- * This allows us to use the txg as the generation number.
- */
- if (error == 0)
- txg_wait_synced(spa->spa_dsl_pool, txg);
-
- if (vd != NULL) {
- ASSERT(!vd->vdev_detached || vd->vdev_dtl.smo_object == 0);
- vdev_free(vd);
- }
-
- /*
- * If the config changed, update the config cache.
- */
- if (config_changed)
- spa_config_sync();
-
- mutex_exit(&spa_namespace_lock);
-
- return (error);
-}
-
-/*
- * ==========================================================================
- * Miscellaneous functions
- * ==========================================================================
- */
-
-/*
- * Rename a spa_t.
- */
-int
-spa_rename(const char *name, const char *newname)
-{
- spa_t *spa;
- int err;
-
- /*
- * Lookup the spa_t and grab the config lock for writing. We need to
- * actually open the pool so that we can sync out the necessary labels.
- * It's OK to call spa_open() with the namespace lock held because we
- * allow recursive calls for other reasons.
- */
- mutex_enter(&spa_namespace_lock);
- if ((err = spa_open(name, &spa, FTAG)) != 0) {
- mutex_exit(&spa_namespace_lock);
- return (err);
- }
-
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- avl_remove(&spa_namespace_avl, spa);
- spa_strfree(spa->spa_name);
- spa->spa_name = spa_strdup(newname);
- avl_add(&spa_namespace_avl, spa);
-
- /*
- * Sync all labels to disk with the new names by marking the root vdev
- * dirty and waiting for it to sync. It will pick up the new pool name
- * during the sync.
- */
- vdev_config_dirty(spa->spa_root_vdev);
-
- spa_config_exit(spa, FTAG);
-
- txg_wait_synced(spa->spa_dsl_pool, 0);
-
- /*
- * Sync the updated config cache.
- */
- spa_config_sync();
-
- spa_close(spa, FTAG);
-
- mutex_exit(&spa_namespace_lock);
-
- return (0);
-}
-
-
-/*
- * Determine whether a pool with given pool_guid exists. If device_guid is
- * non-zero, determine whether the pool exists *and* contains a device with the
- * specified device_guid.
- */
-boolean_t
-spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
-{
- spa_t *spa;
- avl_tree_t *t = &spa_namespace_avl;
-
- ASSERT(MUTEX_HELD(&spa_namespace_lock));
-
- for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
- if (spa->spa_state == POOL_STATE_UNINITIALIZED)
- continue;
- if (spa->spa_root_vdev == NULL)
- continue;
- if (spa_guid(spa) == pool_guid) {
- if (device_guid == 0)
- break;
-
- if (vdev_lookup_by_guid(spa->spa_root_vdev,
- device_guid) != NULL)
- break;
-
- /*
- * Check any devices we may in the process of adding.
- */
- if (spa->spa_pending_vdev) {
- if (vdev_lookup_by_guid(spa->spa_pending_vdev,
- device_guid) != NULL)
- break;
- }
- }
- }
-
- return (spa != NULL);
-}
-
-char *
-spa_strdup(const char *s)
-{
- size_t len;
- char *new;
-
- len = strlen(s);
- new = kmem_alloc(len + 1, KM_SLEEP);
- bcopy(s, new, len);
- new[len] = '\0';
-
- return (new);
-}
-
-void
-spa_strfree(char *s)
-{
- kmem_free(s, strlen(s) + 1);
-}
-
-uint64_t
-spa_get_random(uint64_t range)
-{
- uint64_t r;
-
- ASSERT(range != 0);
-
- (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
-
- return (r % range);
-}
-
-void
-sprintf_blkptr(char *buf, int len, const blkptr_t *bp)
-{
- int d;
-
- if (bp == NULL) {
- (void) snprintf(buf, len, "<NULL>");
- return;
- }
-
- if (BP_IS_HOLE(bp)) {
- (void) snprintf(buf, len, "<hole>");
- return;
- }
-
- (void) snprintf(buf, len, "[L%llu %s] %llxL/%llxP ",
- (u_longlong_t)BP_GET_LEVEL(bp),
- dmu_ot[BP_GET_TYPE(bp)].ot_name,
- (u_longlong_t)BP_GET_LSIZE(bp),
- (u_longlong_t)BP_GET_PSIZE(bp));
-
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- const dva_t *dva = &bp->blk_dva[d];
- (void) snprintf(buf + strlen(buf), len - strlen(buf),
- "DVA[%d]=<%llu:%llx:%llx> ", d,
- (u_longlong_t)DVA_GET_VDEV(dva),
- (u_longlong_t)DVA_GET_OFFSET(dva),
- (u_longlong_t)DVA_GET_ASIZE(dva));
- }
-
- (void) snprintf(buf + strlen(buf), len - strlen(buf),
- "%s %s %s %s birth=%llu fill=%llu cksum=%llx:%llx:%llx:%llx",
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
- BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",
- BP_IS_GANG(bp) ? "gang" : "contiguous",
- (u_longlong_t)bp->blk_birth,
- (u_longlong_t)bp->blk_fill,
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
-}
-
-void
-spa_freeze(spa_t *spa)
-{
- uint64_t freeze_txg = 0;
-
- spa_config_enter(spa, RW_WRITER, FTAG);
- if (spa->spa_freeze_txg == UINT64_MAX) {
- freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
- spa->spa_freeze_txg = freeze_txg;
- }
- spa_config_exit(spa, FTAG);
- if (freeze_txg != 0)
- txg_wait_synced(spa_get_dsl(spa), freeze_txg);
-}
-
-void
-zfs_panic_recover(const char *fmt, ...)
-{
- va_list adx;
-
- va_start(adx, fmt);
- vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
- va_end(adx);
-}
-
-/*
- * ==========================================================================
- * Accessor functions
- * ==========================================================================
- */
-
-krwlock_t *
-spa_traverse_rwlock(spa_t *spa)
-{
- return (&spa->spa_traverse_lock);
-}
-
-int
-spa_traverse_wanted(spa_t *spa)
-{
- return (spa->spa_traverse_wanted);
-}
-
-dsl_pool_t *
-spa_get_dsl(spa_t *spa)
-{
- return (spa->spa_dsl_pool);
-}
-
-blkptr_t *
-spa_get_rootblkptr(spa_t *spa)
-{
- return (&spa->spa_ubsync.ub_rootbp);
-}
-
-void
-spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
-{
- spa->spa_uberblock.ub_rootbp = *bp;
-}
-
-void
-spa_altroot(spa_t *spa, char *buf, size_t buflen)
-{
- if (spa->spa_root == NULL)
- buf[0] = '\0';
- else
- (void) strncpy(buf, spa->spa_root, buflen);
-}
-
-int
-spa_sync_pass(spa_t *spa)
-{
- return (spa->spa_sync_pass);
-}
-
-char *
-spa_name(spa_t *spa)
-{
- /*
- * Accessing the name requires holding either the namespace lock or the
- * config lock, both of which are required to do a rename.
- */
- ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
- spa_config_held(spa, RW_READER) || spa_config_held(spa, RW_WRITER));
-
- return (spa->spa_name);
-}
-
-uint64_t
-spa_guid(spa_t *spa)
-{
- /*
- * If we fail to parse the config during spa_load(), we can go through
- * the error path (which posts an ereport) and end up here with no root
- * vdev. We stash the original pool guid in 'spa_load_guid' to handle
- * this case.
- */
- if (spa->spa_root_vdev != NULL)
- return (spa->spa_root_vdev->vdev_guid);
- else
- return (spa->spa_load_guid);
-}
-
-uint64_t
-spa_last_synced_txg(spa_t *spa)
-{
- return (spa->spa_ubsync.ub_txg);
-}
-
-uint64_t
-spa_first_txg(spa_t *spa)
-{
- return (spa->spa_first_txg);
-}
-
-int
-spa_state(spa_t *spa)
-{
- return (spa->spa_state);
-}
-
-uint64_t
-spa_freeze_txg(spa_t *spa)
-{
- return (spa->spa_freeze_txg);
-}
-
-/*
- * In the future, this may select among different metaslab classes
- * depending on the zdp. For now, there's no such distinction.
- */
-metaslab_class_t *
-spa_metaslab_class_select(spa_t *spa)
-{
- return (spa->spa_normal_class);
-}
-
-/*
- * Return how much space is allocated in the pool (ie. sum of all asize)
- */
-uint64_t
-spa_get_alloc(spa_t *spa)
-{
- return (spa->spa_root_vdev->vdev_stat.vs_alloc);
-}
-
-/*
- * Return how much (raid-z inflated) space there is in the pool.
- */
-uint64_t
-spa_get_space(spa_t *spa)
-{
- return (spa->spa_root_vdev->vdev_stat.vs_space);
-}
-
-/*
- * Return the amount of raid-z-deflated space in the pool.
- */
-uint64_t
-spa_get_dspace(spa_t *spa)
-{
- if (spa->spa_deflate)
- return (spa->spa_root_vdev->vdev_stat.vs_dspace);
- else
- return (spa->spa_root_vdev->vdev_stat.vs_space);
-}
-
-/* ARGSUSED */
-uint64_t
-spa_get_asize(spa_t *spa, uint64_t lsize)
-{
- /*
- * For now, the worst case is 512-byte RAID-Z blocks, in which
- * case the space requirement is exactly 2x; so just assume that.
- * Add to this the fact that we can have up to 3 DVAs per bp, and
- * we have to multiply by a total of 6x.
- */
- return (lsize * 6);
-}
-
-uint64_t
-spa_version(spa_t *spa)
-{
- return (spa->spa_ubsync.ub_version);
-}
-
-int
-spa_max_replication(spa_t *spa)
-{
- /*
- * As of ZFS_VERSION == ZFS_VERSION_DITTO_BLOCKS, we are able to
- * handle BPs with more than one DVA allocated. Set our max
- * replication level accordingly.
- */
- if (spa_version(spa) < ZFS_VERSION_DITTO_BLOCKS)
- return (1);
- return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
-}
-
-uint64_t
-bp_get_dasize(spa_t *spa, const blkptr_t *bp)
-{
- int sz = 0, i;
-
- if (!spa->spa_deflate)
- return (BP_GET_ASIZE(bp));
-
- for (i = 0; i < SPA_DVAS_PER_BP; i++) {
- vdev_t *vd =
- vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i]));
- sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >> SPA_MINBLOCKSHIFT) *
- vd->vdev_deflate_ratio;
- }
- return (sz);
-}
-
-/*
- * ==========================================================================
- * Initialization and Termination
- * ==========================================================================
- */
-
-static int
-spa_name_compare(const void *a1, const void *a2)
-{
- const spa_t *s1 = a1;
- const spa_t *s2 = a2;
- int s;
-
- s = strcmp(s1->spa_name, s2->spa_name);
- if (s > 0)
- return (1);
- if (s < 0)
- return (-1);
- return (0);
-}
-
-int
-spa_busy(void)
-{
- return (spa_active_count);
-}
-
-void
-spa_init(int mode)
-{
- mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
-
- avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
- offsetof(spa_t, spa_avl));
-
- mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
-
- avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t),
- offsetof(spa_spare_t, spare_avl));
-
- spa_mode = mode;
-
- refcount_init();
- unique_init();
- zio_init();
- dmu_init();
- zil_init();
- spa_config_load();
-}
-
-void
-spa_fini(void)
-{
- spa_evict_all();
-
- zil_fini();
- dmu_fini();
- zio_fini();
- refcount_fini();
-
- avl_destroy(&spa_namespace_avl);
- avl_destroy(&spa_spare_avl);
-
- cv_destroy(&spa_namespace_cv);
- mutex_destroy(&spa_namespace_lock);
- mutex_destroy(&spa_spare_lock);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/contrib/opensolaris/uts/common/fs/zfs/space_map.c
deleted file mode 100644
index 23313a9..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/space_map.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/dmu.h>
-#include <sys/zio.h>
-#include <sys/space_map.h>
-
-/*
- * Space map routines.
- * NOTE: caller is responsible for all locking.
- */
-static int
-space_map_seg_compare(const void *x1, const void *x2)
-{
- const space_seg_t *s1 = x1;
- const space_seg_t *s2 = x2;
-
- if (s1->ss_start < s2->ss_start) {
- if (s1->ss_end > s2->ss_start)
- return (0);
- return (-1);
- }
- if (s1->ss_start > s2->ss_start) {
- if (s1->ss_start < s2->ss_end)
- return (0);
- return (1);
- }
- return (0);
-}
-
-void
-space_map_create(space_map_t *sm, uint64_t start, uint64_t size, uint8_t shift,
- kmutex_t *lp)
-{
- bzero(sm, sizeof (*sm));
-
- cv_init(&sm->sm_load_cv, NULL, CV_DEFAULT, NULL);
- avl_create(&sm->sm_root, space_map_seg_compare,
- sizeof (space_seg_t), offsetof(struct space_seg, ss_node));
-
- sm->sm_start = start;
- sm->sm_size = size;
- sm->sm_shift = shift;
- sm->sm_lock = lp;
-}
-
-void
-space_map_destroy(space_map_t *sm)
-{
- ASSERT(!sm->sm_loaded && !sm->sm_loading);
- VERIFY3U(sm->sm_space, ==, 0);
- avl_destroy(&sm->sm_root);
- cv_destroy(&sm->sm_load_cv);
-}
-
-void
-space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
-{
- avl_index_t where;
- space_seg_t ssearch, *ss_before, *ss_after, *ss;
- uint64_t end = start + size;
- int merge_before, merge_after;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
- VERIFY(size != 0);
- VERIFY3U(start, >=, sm->sm_start);
- VERIFY3U(end, <=, sm->sm_start + sm->sm_size);
- VERIFY(sm->sm_space + size <= sm->sm_size);
- VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
- VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
-
- ssearch.ss_start = start;
- ssearch.ss_end = end;
- ss = avl_find(&sm->sm_root, &ssearch, &where);
-
- if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
- zfs_panic_recover("zfs: allocating allocated segment"
- "(offset=%llu size=%llu)\n",
- (longlong_t)start, (longlong_t)size);
- return;
- }
-
- /* Make sure we don't overlap with either of our neighbors */
- VERIFY(ss == NULL);
-
- ss_before = avl_nearest(&sm->sm_root, where, AVL_BEFORE);
- ss_after = avl_nearest(&sm->sm_root, where, AVL_AFTER);
-
- merge_before = (ss_before != NULL && ss_before->ss_end == start);
- merge_after = (ss_after != NULL && ss_after->ss_start == end);
-
- if (merge_before && merge_after) {
- avl_remove(&sm->sm_root, ss_before);
- ss_after->ss_start = ss_before->ss_start;
- kmem_free(ss_before, sizeof (*ss_before));
- } else if (merge_before) {
- ss_before->ss_end = end;
- } else if (merge_after) {
- ss_after->ss_start = start;
- } else {
- ss = kmem_alloc(sizeof (*ss), KM_SLEEP);
- ss->ss_start = start;
- ss->ss_end = end;
- avl_insert(&sm->sm_root, ss, where);
- }
-
- sm->sm_space += size;
-}
-
-void
-space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
-{
- avl_index_t where;
- space_seg_t ssearch, *ss, *newseg;
- uint64_t end = start + size;
- int left_over, right_over;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
- VERIFY(size != 0);
- VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
- VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
-
- ssearch.ss_start = start;
- ssearch.ss_end = end;
- ss = avl_find(&sm->sm_root, &ssearch, &where);
-
- /* Make sure we completely overlap with someone */
- if (ss == NULL) {
- zfs_panic_recover("zfs: freeing free segment "
- "(offset=%llu size=%llu)",
- (longlong_t)start, (longlong_t)size);
- return;
- }
- VERIFY3U(ss->ss_start, <=, start);
- VERIFY3U(ss->ss_end, >=, end);
- VERIFY(sm->sm_space - size <= sm->sm_size);
-
- left_over = (ss->ss_start != start);
- right_over = (ss->ss_end != end);
-
- if (left_over && right_over) {
- newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP);
- newseg->ss_start = end;
- newseg->ss_end = ss->ss_end;
- ss->ss_end = start;
- avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER);
- } else if (left_over) {
- ss->ss_end = start;
- } else if (right_over) {
- ss->ss_start = end;
- } else {
- avl_remove(&sm->sm_root, ss);
- kmem_free(ss, sizeof (*ss));
- }
-
- sm->sm_space -= size;
-}
-
-int
-space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
-{
- avl_index_t where;
- space_seg_t ssearch, *ss;
- uint64_t end = start + size;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
- VERIFY(size != 0);
- VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
- VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
-
- ssearch.ss_start = start;
- ssearch.ss_end = end;
- ss = avl_find(&sm->sm_root, &ssearch, &where);
-
- return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
-}
-
-void
-space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
-{
- space_seg_t *ss;
- void *cookie = NULL;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
-
- while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
- if (func != NULL)
- func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
- kmem_free(ss, sizeof (*ss));
- }
- sm->sm_space = 0;
-}
-
-void
-space_map_walk(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
-{
- space_seg_t *ss;
-
- for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
- func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
-}
-
-void
-space_map_excise(space_map_t *sm, uint64_t start, uint64_t size)
-{
- avl_tree_t *t = &sm->sm_root;
- avl_index_t where;
- space_seg_t *ss, search;
- uint64_t end = start + size;
- uint64_t rm_start, rm_end;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
-
- search.ss_start = start;
- search.ss_end = start;
-
- for (;;) {
- ss = avl_find(t, &search, &where);
-
- if (ss == NULL)
- ss = avl_nearest(t, where, AVL_AFTER);
-
- if (ss == NULL || ss->ss_start >= end)
- break;
-
- rm_start = MAX(ss->ss_start, start);
- rm_end = MIN(ss->ss_end, end);
-
- space_map_remove(sm, rm_start, rm_end - rm_start);
- }
-}
-
-/*
- * Replace smd with the union of smd and sms.
- */
-void
-space_map_union(space_map_t *smd, space_map_t *sms)
-{
- avl_tree_t *t = &sms->sm_root;
- space_seg_t *ss;
-
- ASSERT(MUTEX_HELD(smd->sm_lock));
-
- /*
- * For each source segment, remove any intersections with the
- * destination, then add the source segment to the destination.
- */
- for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) {
- space_map_excise(smd, ss->ss_start, ss->ss_end - ss->ss_start);
- space_map_add(smd, ss->ss_start, ss->ss_end - ss->ss_start);
- }
-}
-
-/*
- * Wait for any in-progress space_map_load() to complete.
- */
-void
-space_map_load_wait(space_map_t *sm)
-{
- ASSERT(MUTEX_HELD(sm->sm_lock));
-
- while (sm->sm_loading)
- cv_wait(&sm->sm_load_cv, sm->sm_lock);
-}
-
-/*
- * Note: space_map_load() will drop sm_lock across dmu_read() calls.
- * The caller must be OK with this.
- */
-int
-space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
- space_map_obj_t *smo, objset_t *os)
-{
- uint64_t *entry, *entry_map, *entry_map_end;
- uint64_t bufsize, size, offset, end, space;
- uint64_t mapstart = sm->sm_start;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
-
- space_map_load_wait(sm);
-
- if (sm->sm_loaded)
- return (0);
-
- sm->sm_loading = B_TRUE;
- end = smo->smo_objsize;
- space = smo->smo_alloc;
-
- ASSERT(sm->sm_ops == NULL);
- VERIFY3U(sm->sm_space, ==, 0);
-
- if (maptype == SM_FREE) {
- space_map_add(sm, sm->sm_start, sm->sm_size);
- space = sm->sm_size - space;
- }
-
- bufsize = 1ULL << SPACE_MAP_BLOCKSHIFT;
- entry_map = zio_buf_alloc(bufsize);
-
- mutex_exit(sm->sm_lock);
- if (end > bufsize)
- dmu_prefetch(os, smo->smo_object, bufsize, end - bufsize);
- mutex_enter(sm->sm_lock);
-
- for (offset = 0; offset < end; offset += bufsize) {
- size = MIN(end - offset, bufsize);
- VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
- VERIFY(size != 0);
-
- dprintf("object=%llu offset=%llx size=%llx\n",
- smo->smo_object, offset, size);
-
- mutex_exit(sm->sm_lock);
- VERIFY3U(dmu_read(os, smo->smo_object, offset, size,
- entry_map), ==, 0);
- mutex_enter(sm->sm_lock);
-
- entry_map_end = entry_map + (size / sizeof (uint64_t));
- for (entry = entry_map; entry < entry_map_end; entry++) {
- uint64_t e = *entry;
-
- if (SM_DEBUG_DECODE(e)) /* Skip debug entries */
- continue;
-
- (SM_TYPE_DECODE(e) == maptype ?
- space_map_add : space_map_remove)(sm,
- (SM_OFFSET_DECODE(e) << sm->sm_shift) + mapstart,
- SM_RUN_DECODE(e) << sm->sm_shift);
- }
- }
- VERIFY3U(sm->sm_space, ==, space);
-
- zio_buf_free(entry_map, bufsize);
-
- sm->sm_loading = B_FALSE;
- sm->sm_loaded = B_TRUE;
- sm->sm_ops = ops;
-
- cv_broadcast(&sm->sm_load_cv);
-
- if (ops != NULL)
- ops->smop_load(sm);
-
- return (0);
-}
-
-void
-space_map_unload(space_map_t *sm)
-{
- ASSERT(MUTEX_HELD(sm->sm_lock));
-
- if (sm->sm_loaded && sm->sm_ops != NULL)
- sm->sm_ops->smop_unload(sm);
-
- sm->sm_loaded = B_FALSE;
- sm->sm_ops = NULL;
-
- space_map_vacate(sm, NULL, NULL);
-}
-
-uint64_t
-space_map_alloc(space_map_t *sm, uint64_t size)
-{
- uint64_t start;
-
- start = sm->sm_ops->smop_alloc(sm, size);
- if (start != -1ULL)
- space_map_remove(sm, start, size);
- return (start);
-}
-
-void
-space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
-{
- sm->sm_ops->smop_claim(sm, start, size);
- space_map_remove(sm, start, size);
-}
-
-void
-space_map_free(space_map_t *sm, uint64_t start, uint64_t size)
-{
- space_map_add(sm, start, size);
- sm->sm_ops->smop_free(sm, start, size);
-}
-
-/*
- * Note: space_map_sync() will drop sm_lock across dmu_write() calls.
- */
-void
-space_map_sync(space_map_t *sm, uint8_t maptype,
- space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
-{
- spa_t *spa = dmu_objset_spa(os);
- void *cookie = NULL;
- space_seg_t *ss;
- uint64_t bufsize, start, size, run_len;
- uint64_t *entry, *entry_map, *entry_map_end;
-
- ASSERT(MUTEX_HELD(sm->sm_lock));
-
- if (sm->sm_space == 0)
- return;
-
- dprintf("object %4llu, txg %llu, pass %d, %c, count %lu, space %llx\n",
- smo->smo_object, dmu_tx_get_txg(tx), spa_sync_pass(spa),
- maptype == SM_ALLOC ? 'A' : 'F', avl_numnodes(&sm->sm_root),
- sm->sm_space);
-
- if (maptype == SM_ALLOC)
- smo->smo_alloc += sm->sm_space;
- else
- smo->smo_alloc -= sm->sm_space;
-
- bufsize = (8 + avl_numnodes(&sm->sm_root)) * sizeof (uint64_t);
- bufsize = MIN(bufsize, 1ULL << SPACE_MAP_BLOCKSHIFT);
- entry_map = zio_buf_alloc(bufsize);
- entry_map_end = entry_map + (bufsize / sizeof (uint64_t));
- entry = entry_map;
-
- *entry++ = SM_DEBUG_ENCODE(1) |
- SM_DEBUG_ACTION_ENCODE(maptype) |
- SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
- SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
-
- while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
- size = ss->ss_end - ss->ss_start;
- start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
-
- sm->sm_space -= size;
- size >>= sm->sm_shift;
-
- while (size) {
- run_len = MIN(size, SM_RUN_MAX);
-
- if (entry == entry_map_end) {
- mutex_exit(sm->sm_lock);
- dmu_write(os, smo->smo_object, smo->smo_objsize,
- bufsize, entry_map, tx);
- mutex_enter(sm->sm_lock);
- smo->smo_objsize += bufsize;
- entry = entry_map;
- }
-
- *entry++ = SM_OFFSET_ENCODE(start) |
- SM_TYPE_ENCODE(maptype) |
- SM_RUN_ENCODE(run_len);
-
- start += run_len;
- size -= run_len;
- }
- kmem_free(ss, sizeof (*ss));
- }
-
- if (entry != entry_map) {
- size = (entry - entry_map) * sizeof (uint64_t);
- mutex_exit(sm->sm_lock);
- dmu_write(os, smo->smo_object, smo->smo_objsize,
- size, entry_map, tx);
- mutex_enter(sm->sm_lock);
- smo->smo_objsize += size;
- }
-
- zio_buf_free(entry_map, bufsize);
-
- VERIFY3U(sm->sm_space, ==, 0);
-}
-
-void
-space_map_truncate(space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
-{
- VERIFY(dmu_free_range(os, smo->smo_object, 0, -1ULL, tx) == 0);
-
- smo->smo_objsize = 0;
- smo->smo_alloc = 0;
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
deleted file mode 100644
index f58ffc0..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ARC_H
-#define _SYS_ARC_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/zio.h>
-
-typedef struct arc_buf_hdr arc_buf_hdr_t;
-typedef struct arc_buf arc_buf_t;
-typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
-typedef void arc_byteswap_func_t(void *buf, size_t size);
-typedef int arc_evict_func_t(void *private);
-
-/* generic arc_done_func_t's which you can use */
-arc_done_func_t arc_bcopy_func;
-arc_done_func_t arc_getbuf_func;
-
-struct arc_buf {
- arc_buf_hdr_t *b_hdr;
- arc_buf_t *b_next;
- void *b_data;
- arc_evict_func_t *b_efunc;
- void *b_private;
-};
-
-typedef enum arc_buf_contents {
- ARC_BUFC_UNDEF, /* buffer contents undefined */
- ARC_BUFC_DATA, /* buffer contains data */
- ARC_BUFC_METADATA /* buffer contains metadata */
-} arc_buf_contents_t;
-/*
- * These are the flags we pass into calls to the arc
- */
-#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
-#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
-#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
-#define ARC_CACHED (1 << 4) /* I/O was already in cache */
-
-arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
- arc_buf_contents_t type);
-void arc_buf_add_ref(arc_buf_t *buf, void *tag);
-int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
-int arc_buf_size(arc_buf_t *buf);
-void arc_release(arc_buf_t *buf, void *tag);
-int arc_released(arc_buf_t *buf);
-int arc_has_callback(arc_buf_t *buf);
-void arc_buf_freeze(arc_buf_t *buf);
-void arc_buf_thaw(arc_buf_t *buf);
-#ifdef ZFS_DEBUG
-int arc_referenced(arc_buf_t *buf);
-#endif
-
-int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_byteswap_func_t *swap,
- arc_done_func_t *done, void *private, int priority, int flags,
- uint32_t *arc_flags, zbookmark_t *zb);
-zio_t *arc_write(zio_t *pio, spa_t *spa, int checksum, int compress,
- int ncopies, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
- arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
- int flags, zbookmark_t *zb);
-int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private, uint32_t arc_flags);
-int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
-
-void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
-int arc_buf_evict(arc_buf_t *buf);
-
-void arc_flush(void);
-void arc_tempreserve_clear(uint64_t tempreserve);
-int arc_tempreserve_space(uint64_t tempreserve);
-
-void arc_init(void);
-void arc_fini(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ARC_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h
deleted file mode 100644
index b4c8376..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/bplist.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_BPLIST_H
-#define _SYS_BPLIST_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct bplist_phys {
- /*
- * This is the bonus buffer for the dead lists. The object's
- * contents is an array of bpl_entries blkptr_t's, representing
- * a total of bpl_bytes physical space.
- */
- uint64_t bpl_entries;
- uint64_t bpl_bytes;
- uint64_t bpl_comp;
- uint64_t bpl_uncomp;
-} bplist_phys_t;
-
-#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
-
-typedef struct bplist_q {
- blkptr_t bpq_blk;
- void *bpq_next;
-} bplist_q_t;
-
-typedef struct bplist {
- kmutex_t bpl_lock;
- objset_t *bpl_mos;
- uint64_t bpl_object;
- uint8_t bpl_blockshift;
- uint8_t bpl_bpshift;
- uint8_t bpl_havecomp;
- bplist_q_t *bpl_queue;
- bplist_phys_t *bpl_phys;
- dmu_buf_t *bpl_dbuf;
- dmu_buf_t *bpl_cached_dbuf;
-} bplist_t;
-
-extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
-extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
-extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
-extern void bplist_close(bplist_t *bpl);
-extern boolean_t bplist_empty(bplist_t *bpl);
-extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
-extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx);
-extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp);
-extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
-extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
-extern int bplist_space(bplist_t *bpl,
- uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_BPLIST_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
deleted file mode 100644
index d33657b..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DBUF_H
-#define _SYS_DBUF_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/zio.h>
-#include <sys/arc.h>
-#include <sys/zfs_context.h>
-#include <sys/refcount.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DB_BONUS_BLKID (-1ULL)
-#define IN_DMU_SYNC 2
-
-/*
- * define flags for dbuf_read
- */
-
-#define DB_RF_MUST_SUCCEED (1 << 0)
-#define DB_RF_CANFAIL (1 << 1)
-#define DB_RF_HAVESTRUCT (1 << 2)
-#define DB_RF_NOPREFETCH (1 << 3)
-#define DB_RF_NEVERWAIT (1 << 4)
-#define DB_RF_CACHED (1 << 5)
-
-/*
- * The state transition diagram for dbufs looks like:
- *
- * +----> READ ----+
- * | |
- * | V
- * (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
- * | ^
- * | |
- * +----> FILL ----+
- */
-typedef enum dbuf_states {
- DB_UNCACHED,
- DB_FILL,
- DB_READ,
- DB_CACHED,
- DB_EVICTING
-} dbuf_states_t;
-
-struct objset_impl;
-struct dnode;
-struct dmu_tx;
-
-/*
- * level = 0 means the user data
- * level = 1 means the single indirect block
- * etc.
- */
-
-#define LIST_LINK_INACTIVE(link) \
- ((link)->list_next == NULL && (link)->list_prev == NULL)
-
-struct dmu_buf_impl;
-
-typedef enum override_states {
- DR_NOT_OVERRIDDEN,
- DR_IN_DMU_SYNC,
- DR_OVERRIDDEN
-} override_states_t;
-
-typedef struct dbuf_dirty_record {
- /* link on our parents dirty list */
- list_node_t dr_dirty_node;
-
- /* transaction group this data will sync in */
- uint64_t dr_txg;
-
- /* zio of outstanding write IO */
- zio_t *dr_zio;
-
- /* pointer back to our dbuf */
- struct dmu_buf_impl *dr_dbuf;
-
- /* pointer to next dirty record */
- struct dbuf_dirty_record *dr_next;
-
- /* pointer to parent dirty record */
- struct dbuf_dirty_record *dr_parent;
-
- union dirty_types {
- struct dirty_indirect {
-
- /* protect access to list */
- kmutex_t dr_mtx;
-
- /* Our list of dirty children */
- list_t dr_children;
- } di;
- struct dirty_leaf {
-
- /*
- * dr_data is set when we dirty the buffer
- * so that we can retain the pointer even if it
- * gets COW'd in a subsequent transaction group.
- */
- arc_buf_t *dr_data;
- blkptr_t dr_overridden_by;
- override_states_t dr_override_state;
- } dl;
- } dt;
-} dbuf_dirty_record_t;
-
-typedef struct dmu_buf_impl {
- /*
- * The following members are immutable, with the exception of
- * db.db_data, which is protected by db_mtx.
- */
-
- /* the publicly visible structure */
- dmu_buf_t db;
-
- /* the objset we belong to */
- struct objset_impl *db_objset;
-
- /*
- * the dnode we belong to (NULL when evicted)
- */
- struct dnode *db_dnode;
-
- /*
- * our parent buffer; if the dnode points to us directly,
- * db_parent == db_dnode->dn_dbuf
- * only accessed by sync thread ???
- * (NULL when evicted)
- */
- struct dmu_buf_impl *db_parent;
-
- /*
- * link for hash table of all dmu_buf_impl_t's
- */
- struct dmu_buf_impl *db_hash_next;
-
- /* our block number */
- uint64_t db_blkid;
-
- /*
- * Pointer to the blkptr_t which points to us. May be NULL if we
- * don't have one yet. (NULL when evicted)
- */
- blkptr_t *db_blkptr;
-
- /*
- * Our indirection level. Data buffers have db_level==0.
- * Indirect buffers which point to data buffers have
- * db_level==1. etc. Buffers which contain dnodes have
- * db_level==0, since the dnodes are stored in a file.
- */
- uint8_t db_level;
-
- /* db_mtx protects the members below */
- kmutex_t db_mtx;
-
- /*
- * Current state of the buffer
- */
- dbuf_states_t db_state;
-
- /*
- * Refcount accessed by dmu_buf_{hold,rele}.
- * If nonzero, the buffer can't be destroyed.
- * Protected by db_mtx.
- */
- refcount_t db_holds;
-
- /* buffer holding our data */
- arc_buf_t *db_buf;
-
- kcondvar_t db_changed;
- dbuf_dirty_record_t *db_data_pending;
-
- /* pointer to most recent dirty record for this buffer */
- dbuf_dirty_record_t *db_last_dirty;
-
- /*
- * Our link on the owner dnodes's dn_dbufs list.
- * Protected by its dn_dbufs_mtx.
- */
- list_node_t db_link;
-
- /* Data which is unique to data (leaf) blocks: */
-
- /* stuff we store for the user (see dmu_buf_set_user) */
- void *db_user_ptr;
- void **db_user_data_ptr_ptr;
- dmu_buf_evict_func_t *db_evict_func;
-
- uint8_t db_immediate_evict;
- uint8_t db_freed_in_flight;
-
- uint8_t db_dirtycnt;
-} dmu_buf_impl_t;
-
-/* Note: the dbuf hash table is exposed only for the mdb module */
-#define DBUF_MUTEXES 256
-#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
-typedef struct dbuf_hash_table {
- uint64_t hash_table_mask;
- dmu_buf_impl_t **hash_table;
- kmutex_t hash_mutexes[DBUF_MUTEXES];
-} dbuf_hash_table_t;
-
-
-uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
-
-dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
-dmu_buf_impl_t *dbuf_create_bonus(struct dnode *dn);
-
-dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
-dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
- void *tag);
-int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
- void *tag, dmu_buf_impl_t **dbp);
-
-void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
-
-void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
-uint64_t dbuf_refcount(dmu_buf_impl_t *db);
-
-void dbuf_rele(dmu_buf_impl_t *db, void *tag);
-
-dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
-
-int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
-void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
-void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
-void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
-void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
-void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
-dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
-
-void dbuf_clear(dmu_buf_impl_t *db);
-void dbuf_evict(dmu_buf_impl_t *db);
-
-void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
-void dbuf_unoverride(dbuf_dirty_record_t *dr);
-void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
-
-void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks,
- struct dmu_tx *);
-
-void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
-
-void dbuf_init(void);
-void dbuf_fini(void);
-
-#define DBUF_GET_BUFC_TYPE(db) \
- ((((db)->db_level > 0) || \
- (dmu_ot[(db)->db_dnode->dn_type].ot_metadata)) ? \
- ARC_BUFC_METADATA : ARC_BUFC_DATA);
-
-#ifdef ZFS_DEBUG
-
-/*
- * There should be a ## between the string literal and fmt, to make it
- * clear that we're joining two strings together, but gcc does not
- * support that preprocessor token.
- */
-#define dprintf_dbuf(dbuf, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char __db_buf[32]; \
- uint64_t __db_obj = (dbuf)->db.db_object; \
- if (__db_obj == DMU_META_DNODE_OBJECT) \
- (void) strcpy(__db_buf, "mdn"); \
- else \
- (void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
- (u_longlong_t)__db_obj); \
- dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
- "obj=%s lvl=%u blkid=%lld " fmt, \
- __db_buf, (dbuf)->db_level, \
- (u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
- } \
-_NOTE(CONSTCOND) } while (0)
-
-#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
- sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
- dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
- kmem_free(__blkbuf, BP_SPRINTF_LEN); \
- } \
-_NOTE(CONSTCOND) } while (0)
-
-#define DBUF_VERIFY(db) dbuf_verify(db)
-
-#else
-
-#define dprintf_dbuf(db, fmt, ...)
-#define dprintf_dbuf_bp(db, bp, fmt, ...)
-#define DBUF_VERIFY(db)
-
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DBUF_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
deleted file mode 100644
index 8c2a1fd..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DMU_H
-#define _SYS_DMU_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This file describes the interface that the DMU provides for its
- * consumers.
- *
- * The DMU also interacts with the SPA. That interface is described in
- * dmu_spa.h.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct uio;
-struct page;
-struct vnode;
-struct spa;
-struct zilog;
-struct zio;
-struct blkptr;
-struct zap_cursor;
-struct dsl_dataset;
-struct dsl_pool;
-struct dnode;
-struct drr_begin;
-struct drr_end;
-struct zbookmark;
-struct spa;
-struct nvlist;
-struct objset_impl;
-struct file;
-
-typedef struct objset objset_t;
-typedef struct dmu_tx dmu_tx_t;
-typedef struct dsl_dir dsl_dir_t;
-
-typedef enum dmu_object_type {
- DMU_OT_NONE,
- /* general: */
- DMU_OT_OBJECT_DIRECTORY, /* ZAP */
- DMU_OT_OBJECT_ARRAY, /* UINT64 */
- DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
- DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
- DMU_OT_BPLIST, /* UINT64 */
- DMU_OT_BPLIST_HDR, /* UINT64 */
- /* spa: */
- DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
- DMU_OT_SPACE_MAP, /* UINT64 */
- /* zil: */
- DMU_OT_INTENT_LOG, /* UINT64 */
- /* dmu: */
- DMU_OT_DNODE, /* DNODE */
- DMU_OT_OBJSET, /* OBJSET */
- /* dsl: */
- DMU_OT_DSL_DIR, /* UINT64 */
- DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
- DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
- DMU_OT_DSL_PROPS, /* ZAP */
- DMU_OT_DSL_DATASET, /* UINT64 */
- /* zpl: */
- DMU_OT_ZNODE, /* ZNODE */
- DMU_OT_ACL, /* ACL */
- DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
- DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
- DMU_OT_MASTER_NODE, /* ZAP */
- DMU_OT_UNLINKED_SET, /* ZAP */
- /* zvol: */
- DMU_OT_ZVOL, /* UINT8 */
- DMU_OT_ZVOL_PROP, /* ZAP */
- /* other; for testing only! */
- DMU_OT_PLAIN_OTHER, /* UINT8 */
- DMU_OT_UINT64_OTHER, /* UINT64 */
- DMU_OT_ZAP_OTHER, /* ZAP */
- /* new object types: */
- DMU_OT_ERROR_LOG, /* ZAP */
- DMU_OT_SPA_HISTORY, /* UINT8 */
- DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
- DMU_OT_POOL_PROPS, /* ZAP */
-
- DMU_OT_NUMTYPES
-} dmu_object_type_t;
-
-typedef enum dmu_objset_type {
- DMU_OST_NONE,
- DMU_OST_META,
- DMU_OST_ZFS,
- DMU_OST_ZVOL,
- DMU_OST_OTHER, /* For testing only! */
- DMU_OST_ANY, /* Be careful! */
- DMU_OST_NUMTYPES
-} dmu_objset_type_t;
-
-void byteswap_uint64_array(void *buf, size_t size);
-void byteswap_uint32_array(void *buf, size_t size);
-void byteswap_uint16_array(void *buf, size_t size);
-void byteswap_uint8_array(void *buf, size_t size);
-void zap_byteswap(void *buf, size_t size);
-void zfs_acl_byteswap(void *buf, size_t size);
-void zfs_znode_byteswap(void *buf, size_t size);
-
-#define DS_MODE_NONE 0 /* invalid, to aid debugging */
-#define DS_MODE_STANDARD 1 /* normal access, no special needs */
-#define DS_MODE_PRIMARY 2 /* the "main" access, e.g. a mount */
-#define DS_MODE_EXCLUSIVE 3 /* exclusive access, e.g. to destroy */
-#define DS_MODE_LEVELS 4
-#define DS_MODE_LEVEL(x) ((x) & (DS_MODE_LEVELS - 1))
-#define DS_MODE_READONLY 0x8
-#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
-#define DS_MODE_INCONSISTENT 0x10
-#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
-
-#define DS_FIND_SNAPSHOTS (1<<0)
-#define DS_FIND_CHILDREN (1<<1)
-
-/*
- * The maximum number of bytes that can be accessed as part of one
- * operation, including metadata.
- */
-#define DMU_MAX_ACCESS (10<<20) /* 10MB */
-
-/*
- * Public routines to create, destroy, open, and close objsets.
- */
-int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
- objset_t **osp);
-void dmu_objset_close(objset_t *os);
-int dmu_objset_evict_dbufs(objset_t *os, int try);
-int dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent,
- void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg);
-int dmu_objset_destroy(const char *name);
-int dmu_snapshots_destroy(char *fsname, char *snapname);
-int dmu_objset_rollback(const char *name);
-int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
-int dmu_objset_rename(const char *name, const char *newname,
- boolean_t recursive);
-int dmu_objset_find(char *name, int func(char *, void *), void *arg,
- int flags);
-void dmu_objset_byteswap(void *buf, size_t size);
-
-typedef struct dmu_buf {
- uint64_t db_object; /* object that this buffer is part of */
- uint64_t db_offset; /* byte offset in this object */
- uint64_t db_size; /* size of buffer in bytes */
- void *db_data; /* data in buffer */
-} dmu_buf_t;
-
-typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
-
-/*
- * Callback function to perform byte swapping on a block.
- */
-typedef void dmu_byteswap_func_t(void *buf, size_t size);
-
-/*
- * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
- */
-#define DMU_POOL_DIRECTORY_OBJECT 1
-#define DMU_POOL_CONFIG "config"
-#define DMU_POOL_ROOT_DATASET "root_dataset"
-#define DMU_POOL_SYNC_BPLIST "sync_bplist"
-#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
-#define DMU_POOL_ERRLOG_LAST "errlog_last"
-#define DMU_POOL_SPARES "spares"
-#define DMU_POOL_DEFLATE "deflate"
-#define DMU_POOL_HISTORY "history"
-#define DMU_POOL_PROPS "pool_props"
-
-/*
- * Allocate an object from this objset. The range of object numbers
- * available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode.
- *
- * The transaction must be assigned to a txg. The newly allocated
- * object will be "held" in the transaction (ie. you can modify the
- * newly allocated object in this transaction).
- *
- * dmu_object_alloc() chooses an object and returns it in *objectp.
- *
- * dmu_object_claim() allocates a specific object number. If that
- * number is already allocated, it fails and returns EEXIST.
- *
- * Return 0 on success, or ENOSPC or EEXIST as specified above.
- */
-uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
-int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
-int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
- int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-
-/*
- * Free an object from this objset.
- *
- * The object's data will be freed as well (ie. you don't need to call
- * dmu_free(object, 0, -1, tx)).
- *
- * The object need not be held in the transaction.
- *
- * If there are any holds on this object's buffers (via dmu_buf_hold()),
- * or tx holds on the object (via dmu_tx_hold_object()), you can not
- * free it; it fails and returns EBUSY.
- *
- * If the object is not allocated, it fails and returns ENOENT.
- *
- * Return 0 on success, or EBUSY or ENOENT as specified above.
- */
-int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
-
-/*
- * Find the next allocated or free object.
- *
- * The objectp parameter is in-out. It will be updated to be the next
- * object which is allocated. Ignore objects which have not been
- * modified since txg.
- *
- * XXX Can only be called on a objset with no dirty data.
- *
- * Returns 0 on success, or ENOENT if there are no more objects.
- */
-int dmu_object_next(objset_t *os, uint64_t *objectp,
- boolean_t hole, uint64_t txg);
-
-/*
- * Set the data blocksize for an object.
- *
- * The object cannot have any blocks allcated beyond the first. If
- * the first block is allocated already, the new size must be greater
- * than the current block size. If these conditions are not met,
- * ENOTSUP will be returned.
- *
- * Returns 0 on success, or EBUSY if there are any holds on the object
- * contents, or ENOTSUP as described above.
- */
-int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
- int ibs, dmu_tx_t *tx);
-
-/*
- * Set the checksum property on a dnode. The new checksum algorithm will
- * apply to all newly written blocks; existing blocks will not be affected.
- */
-void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
- dmu_tx_t *tx);
-
-/*
- * Set the compress property on a dnode. The new compression algorithm will
- * apply to all newly written blocks; existing blocks will not be affected.
- */
-void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
- dmu_tx_t *tx);
-
-/*
- * Decide how many copies of a given block we should make. Can be from
- * 1 to SPA_DVAS_PER_BP.
- */
-int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
- dmu_object_type_t ot);
-/*
- * The bonus data is accessed more or less like a regular buffer.
- * You must dmu_bonus_hold() to get the buffer, which will give you a
- * dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
- * data. As with any normal buffer, you must call dmu_buf_read() to
- * read db_data, dmu_buf_will_dirty() before modifying it, and the
- * object must be held in an assigned transaction before calling
- * dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
- * buffer as well. You must release your hold with dmu_buf_rele().
- */
-int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
-int dmu_bonus_max(void);
-
-/*
- * Obtain the DMU buffer from the specified object which contains the
- * specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
- * that it will remain in memory. You must release the hold with
- * dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
- * hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
- *
- * You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
- * on the returned buffer before reading or writing the buffer's
- * db_data. The comments for those routines describe what particular
- * operations are valid after calling them.
- *
- * The object number must be a valid, allocated object number.
- */
-int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
- void *tag, dmu_buf_t **);
-void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
-void dmu_buf_rele(dmu_buf_t *db, void *tag);
-uint64_t dmu_buf_refcount(dmu_buf_t *db);
-
-/*
- * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
- * range of an object. A pointer to an array of dmu_buf_t*'s is
- * returned (in *dbpp).
- *
- * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
- * frees the array. The hold on the array of buffers MUST be released
- * with dmu_buf_rele_array. You can NOT release the hold on each buffer
- * individually with dmu_buf_rele.
- */
-int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
-void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
-
-/*
- * Returns NULL on success, or the existing user ptr if it's already
- * been set.
- *
- * user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
- *
- * user_data_ptr_ptr should be NULL, or a pointer to a pointer which
- * will be set to db->db_data when you are allowed to access it. Note
- * that db->db_data (the pointer) can change when you do dmu_buf_read(),
- * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
- * *user_data_ptr_ptr will be set to the new value when it changes.
- *
- * If non-NULL, pageout func will be called when this buffer is being
- * excised from the cache, so that you can clean up the data structure
- * pointed to by user_ptr.
- *
- * dmu_evict_user() will call the pageout func for all buffers in a
- * objset with a given pageout func.
- */
-void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
- dmu_buf_evict_func_t *pageout_func);
-/*
- * set_user_ie is the same as set_user, but request immediate eviction
- * when hold count goes to zero.
- */
-void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
- void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
-void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
- void *user_ptr, void *user_data_ptr_ptr,
- dmu_buf_evict_func_t *pageout_func);
-void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
-
-/*
- * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
- */
-void *dmu_buf_get_user(dmu_buf_t *db);
-
-/*
- * Indicate that you are going to modify the buffer's data (db_data).
- *
- * The transaction (tx) must be assigned to a txg (ie. you've called
- * dmu_tx_assign()). The buffer's object must be held in the tx
- * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
- */
-void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
-
-/*
- * You must create a transaction, then hold the objects which you will
- * (or might) modify as part of this transaction. Then you must assign
- * the transaction to a transaction group. Once the transaction has
- * been assigned, you can modify buffers which belong to held objects as
- * part of this transaction. You can't modify buffers before the
- * transaction has been assigned; you can't modify buffers which don't
- * belong to objects which this transaction holds; you can't hold
- * objects once the transaction has been assigned. You may hold an
- * object which you are going to free (with dmu_object_free()), but you
- * don't have to.
- *
- * You can abort the transaction before it has been assigned.
- *
- * Note that you may hold buffers (with dmu_buf_hold) at any time,
- * regardless of transaction state.
- */
-
-#define DMU_NEW_OBJECT (-1ULL)
-#define DMU_OBJECT_END (-1ULL)
-
-dmu_tx_t *dmu_tx_create(objset_t *os);
-void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
-void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
- uint64_t len);
-void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
-void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
-void dmu_tx_abort(dmu_tx_t *tx);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
-void dmu_tx_wait(dmu_tx_t *tx);
-void dmu_tx_commit(dmu_tx_t *tx);
-
-/*
- * Free up the data blocks for a defined range of a file. If size is
- * zero, the range from offset to end-of-file is freed.
- */
-int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t size, dmu_tx_t *tx);
-
-/*
- * Convenience functions.
- *
- * Canfail routines will return 0 on success, or an errno if there is a
- * nonrecoverable I/O error.
- */
-int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- void *buf);
-void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx);
-int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
-int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
- dmu_tx_t *tx);
-int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t size, struct page *pp, dmu_tx_t *tx);
-
-extern int zfs_prefetch_disable;
-
-/*
- * Asynchronously try to read in the data.
- */
-void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t len);
-
-typedef struct dmu_object_info {
- /* All sizes are in bytes. */
- uint32_t doi_data_block_size;
- uint32_t doi_metadata_block_size;
- uint64_t doi_bonus_size;
- dmu_object_type_t doi_type;
- dmu_object_type_t doi_bonus_type;
- uint8_t doi_indirection; /* 2 = dnode->indirect->data */
- uint8_t doi_checksum;
- uint8_t doi_compress;
- uint8_t doi_pad[5];
- /* Values below are number of 512-byte blocks. */
- uint64_t doi_physical_blks; /* data + metadata */
- uint64_t doi_max_block_offset;
-} dmu_object_info_t;
-
-typedef struct dmu_object_type_info {
- dmu_byteswap_func_t *ot_byteswap;
- boolean_t ot_metadata;
- char *ot_name;
-} dmu_object_type_info_t;
-
-extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
-
-/*
- * Get information on a DMU object.
- *
- * Return 0 on success or ENOENT if object is not allocated.
- *
- * If doi is NULL, just indicates whether the object exists.
- */
-int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
-void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
-void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
-void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
- u_longlong_t *nblk512);
-
-typedef struct dmu_objset_stats {
- uint64_t dds_num_clones; /* number of clones of this */
- uint64_t dds_creation_txg;
- dmu_objset_type_t dds_type;
- uint8_t dds_is_snapshot;
- uint8_t dds_inconsistent;
- char dds_clone_of[MAXNAMELEN];
-} dmu_objset_stats_t;
-
-/*
- * Get stats on a dataset.
- */
-void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
-
-/*
- * Add entries to the nvlist for all the objset's properties. See
- * zfs_prop_table[] and zfs(1m) for details on the properties.
- */
-void dmu_objset_stats(objset_t *os, struct nvlist *nv);
-
-/*
- * Get the space usage statistics for statvfs().
- *
- * refdbytes is the amount of space "referenced" by this objset.
- * availbytes is the amount of space available to this objset, taking
- * into account quotas & reservations, assuming that no other objsets
- * use the space first. These values correspond to the 'referenced' and
- * 'available' properties, described in the zfs(1m) manpage.
- *
- * usedobjs and availobjs are the number of objects currently allocated,
- * and available.
- */
-void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
- uint64_t *usedobjsp, uint64_t *availobjsp);
-
-/*
- * The fsid_guid is a 56-bit ID that can change to avoid collisions.
- * (Contrast with the ds_guid which is a 64-bit ID that will never
- * change, so there is a small probability that it will collide.)
- */
-uint64_t dmu_objset_fsid_guid(objset_t *os);
-
-int dmu_objset_is_snapshot(objset_t *os);
-
-extern struct spa *dmu_objset_spa(objset_t *os);
-extern struct zilog *dmu_objset_zil(objset_t *os);
-extern struct dsl_pool *dmu_objset_pool(objset_t *os);
-extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
-extern void dmu_objset_name(objset_t *os, char *buf);
-extern dmu_objset_type_t dmu_objset_type(objset_t *os);
-extern uint64_t dmu_objset_id(objset_t *os);
-extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
- uint64_t *id, uint64_t *offp);
-extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
- uint64_t *idp, uint64_t *offp);
-
-/*
- * Return the txg number for the given assigned transaction.
- */
-uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
-
-/*
- * Synchronous write.
- * If a parent zio is provided this function initiates a write on the
- * provided buffer as a child of the parent zio.
- * In the absense of a parent zio, the write is completed synchronously.
- * At write completion, blk is filled with the bp of the written block.
- * Note that while the data covered by this function will be on stable
- * storage when the write completes this new data does not become a
- * permanent part of the file until the associated transaction commits.
- */
-typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
-int dmu_sync(struct zio *zio, dmu_buf_t *db,
- struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
-
-/*
- * Find the next hole or data block in file starting at *off
- * Return found offset in *off. Return ESRCH for end of file.
- */
-int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
- uint64_t *off);
-
-/*
- * Initial setup and final teardown.
- */
-extern void dmu_init(void);
-extern void dmu_fini(void);
-
-typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
- uint64_t object, uint64_t offset, int len);
-void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
- dmu_traverse_cb_t cb, void *arg);
-
-int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, struct file *fp);
-int dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
- boolean_t force, struct file *fp, uint64_t voffset);
-
-/* CRC64 table */
-#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
-extern uint64_t zfs_crc64_table[256];
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DMU_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
deleted file mode 100644
index 807011e..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DMU_IMPL_H
-#define _SYS_DMU_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/txg_impl.h>
-#include <sys/zio.h>
-#include <sys/dnode.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * This is the locking strategy for the DMU. Numbers in parenthesis are
- * cases that use that lock order, referenced below:
- *
- * ARC is self-contained
- * bplist is self-contained
- * refcount is self-contained
- * txg is self-contained (hopefully!)
- * zst_lock
- * zf_rwlock
- *
- * XXX try to improve evicting path?
- *
- * dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
- * dn_dbufs_mtx > hash_mutexes > db_mtx > leafs
- *
- * dp_config_rwlock
- * must be held before: everything
- * protects dd namespace changes
- * protects property changes globally
- * held from:
- * dsl_dir_open/r:
- * dsl_dir_create_sync/w:
- * dsl_dir_sync_destroy/w:
- * dsl_dir_rename_sync/w:
- * dsl_prop_changed_notify/r:
- *
- * os_obj_lock
- * must be held before:
- * everything except dp_config_rwlock
- * protects os_obj_next
- * held from:
- * dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
- *
- * dn_struct_rwlock
- * must be held before:
- * everything except dp_config_rwlock and os_obj_lock
- * protects structure of dnode (eg. nlevels)
- * db_blkptr can change when syncing out change to nlevels
- * dn_maxblkid
- * dn_nlevels
- * dn_*blksz*
- * phys nlevels, maxblkid, physical blkptr_t's (?)
- * held from:
- * callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
- * dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
- * dmu_tx_count_free:
- * dbuf_read_impl: db_mtx, dmu_zfetch()
- * dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
- * dbuf_new_size: db_mtx
- * dbuf_dirty: db_mtx
- * dbuf_findbp: (callers, phys? - the real need)
- * dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
- * dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
- * dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
- * dnode_sync/w (increase_indirection): db_mtx (phys)
- * dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
- * dnode_new_blkid/w: (dn_maxblkid)
- * dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
- * dnode_next_offset: (phys)
- *
- * dn_dbufs_mtx
- * must be held before:
- * db_mtx, hash_mutexes
- * protects:
- * dn_dbufs
- * dn_evicted
- * held from:
- * dmu_evict_user: db_mtx (dn_dbufs)
- * dbuf_free_range: db_mtx (dn_dbufs)
- * dbuf_remove_ref: db_mtx, callees:
- * dbuf_hash_remove: hash_mutexes, db_mtx
- * dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
- * dnode_set_blksz: (dn_dbufs)
- *
- * hash_mutexes (global)
- * must be held before:
- * db_mtx
- * protects dbuf_hash_table (global) and db_hash_next
- * held from:
- * dbuf_find: db_mtx
- * dbuf_hash_insert: db_mtx
- * dbuf_hash_remove: db_mtx
- *
- * db_mtx (meta-leaf)
- * must be held before:
- * dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
- * protects:
- * db_state
- * db_holds
- * db_buf
- * db_changed
- * db_data_pending
- * db_dirtied
- * db_link
- * db_dirty_node (??)
- * db_dirtycnt
- * db_d.*
- * db.*
- * held from:
- * dbuf_dirty: dn_mtx, dn_dirty_mtx
- * dbuf_dirty->dsl_dir_willuse_space: dd_lock
- * dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
- * dbuf_undirty: dn_dirty_mtx (db_d)
- * dbuf_write_done: dn_dirty_mtx (db_state)
- * dbuf_*
- * dmu_buf_update_user: none (db_d)
- * dmu_evict_user: none (db_d) (maybe can eliminate)
- * dbuf_find: none (db_holds)
- * dbuf_hash_insert: none (db_holds)
- * dmu_buf_read_array_impl: none (db_state, db_changed)
- * dmu_sync: none (db_dirty_node, db_d)
- * dnode_reallocate: none (db)
- *
- * dn_mtx (leaf)
- * protects:
- * dn_dirty_dbufs
- * dn_ranges
- * phys accounting
- * dn_allocated_txg
- * dn_free_txg
- * dn_assigned_txg
- * dd_assigned_tx
- * dn_notxholds
- * dn_dirtyctx
- * dn_dirtyctx_firstset
- * (dn_phys copy fields?)
- * (dn_phys contents?)
- * held from:
- * dnode_*
- * dbuf_dirty: none
- * dbuf_sync: none (phys accounting)
- * dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
- * dbuf_write_done: none (phys accounting)
- * dmu_object_info_from_dnode: none (accounting)
- * dmu_tx_commit: none
- * dmu_tx_hold_object_impl: none
- * dmu_tx_try_assign: dn_notxholds(cv)
- * dmu_tx_unassign: none
- *
- * dd_lock (leaf)
- * protects:
- * dd_prop_cbs
- * dd_sync_*
- * dd_used_bytes
- * dd_tempreserved
- * dd_space_towrite
- * dd_myname
- * dd_phys accounting?
- * held from:
- * dsl_dir_*
- * dsl_prop_changed_notify: none (dd_prop_cbs)
- * dsl_prop_register: none (dd_prop_cbs)
- * dsl_prop_unregister: none (dd_prop_cbs)
- * dsl_dataset_block_freeable: none (dd_sync_*)
- *
- * os_lock (leaf)
- * protects:
- * os_dirty_dnodes
- * os_free_dnodes
- * os_dnodes
- * os_downgraded_dbufs
- * dn_dirtyblksz
- * dn_dirty_link
- * held from:
- * dnode_create: none (os_dnodes)
- * dnode_destroy: none (os_dnodes)
- * dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
- * dnode_free: none (dn_dirtyblksz, os_*_dnodes)
- *
- * ds_lock (leaf)
- * protects:
- * ds_user_ptr
- * ds_user_evice_func
- * ds_open_refcount
- * ds_snapname
- * ds_phys accounting
- * held from:
- * dsl_dataset_*
- *
- * dr_mtx (leaf)
- * protects:
- * dr_children
- * held from:
- * dbuf_dirty
- * dbuf_undirty
- * dbuf_sync_indirect
- * dnode_new_blkid
- */
-
-struct objset;
-struct dmu_pool;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DMU_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
deleted file mode 100644
index 8293a3b..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DMU_OBJSET_H
-#define _SYS_DMU_OBJSET_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/arc.h>
-#include <sys/txg.h>
-#include <sys/zfs_context.h>
-#include <sys/dnode.h>
-#include <sys/zio.h>
-#include <sys/zil.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct dsl_dataset;
-struct dmu_tx;
-struct objset_impl;
-
-typedef struct objset_phys {
- dnode_phys_t os_meta_dnode;
- zil_header_t os_zil_header;
- uint64_t os_type;
- char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
- sizeof (uint64_t)];
-} objset_phys_t;
-
-struct objset {
- struct objset_impl *os;
- int os_mode;
-};
-
-typedef struct objset_impl {
- /* Immutable: */
- struct dsl_dataset *os_dsl_dataset;
- spa_t *os_spa;
- arc_buf_t *os_phys_buf;
- objset_phys_t *os_phys;
- dnode_t *os_meta_dnode;
- zilog_t *os_zil;
- objset_t os;
- uint8_t os_checksum; /* can change, under dsl_dir's locks */
- uint8_t os_compress; /* can change, under dsl_dir's locks */
- uint8_t os_copies; /* can change, under dsl_dir's locks */
- uint8_t os_md_checksum;
- uint8_t os_md_compress;
-
- /* no lock needed: */
- struct dmu_tx *os_synctx; /* XXX sketchy */
- blkptr_t *os_rootbp;
-
- /* Protected by os_obj_lock */
- kmutex_t os_obj_lock;
- uint64_t os_obj_next;
-
- /* Protected by os_lock */
- kmutex_t os_lock;
- list_t os_dirty_dnodes[TXG_SIZE];
- list_t os_free_dnodes[TXG_SIZE];
- list_t os_dnodes;
- list_t os_downgraded_dbufs;
-} objset_impl_t;
-
-#define DMU_META_DNODE_OBJECT 0
-
-/* called from zpl */
-int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
- objset_t **osp);
-void dmu_objset_close(objset_t *os);
-int dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent,
- void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg);
-int dmu_objset_destroy(const char *name);
-int dmu_objset_rollback(const char *name);
-int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
-void dmu_objset_stats(objset_t *os, nvlist_t *nv);
-void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
-void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
- uint64_t *usedobjsp, uint64_t *availobjsp);
-uint64_t dmu_objset_fsid_guid(objset_t *os);
-int dmu_objset_find(char *name, int func(char *, void *), void *arg,
- int flags);
-void dmu_objset_byteswap(void *buf, size_t size);
-int dmu_objset_evict_dbufs(objset_t *os, int try);
-
-/* called from dsl */
-void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
-objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
- blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
-int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
- objset_impl_t **osip);
-void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DMU_OBJSET_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
deleted file mode 100644
index ea9fa6c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DMU_TRAVERSE_H
-#define _SYS_DMU_TRAVERSE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dmu.h>
-#include <sys/dnode.h>
-#include <sys/arc.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ADVANCE_POST 0 /* post-order traversal */
-#define ADVANCE_PRE 0x01 /* pre-order traversal */
-#define ADVANCE_PRUNE 0x02 /* prune by prev snapshot birth time */
-#define ADVANCE_DATA 0x04 /* read user data blocks */
-#define ADVANCE_HOLES 0x08 /* visit holes */
-#define ADVANCE_ZIL 0x10 /* visit intent log blocks */
-#define ADVANCE_NOLOCK 0x20 /* Don't grab SPA sync lock */
-
-#define ZB_NO_LEVEL -2
-#define ZB_MAXLEVEL 32 /* Next power of 2 >= DN_MAX_LEVELS */
-#define ZB_MAXBLKID (1ULL << 62)
-#define ZB_MAXOBJSET (1ULL << 62)
-#define ZB_MAXOBJECT (1ULL << 62)
-
-#define ZB_MOS_CACHE 0
-#define ZB_MDN_CACHE 1
-#define ZB_DN_CACHE 2
-#define ZB_DEPTH 3
-
-typedef struct zseg {
- uint64_t seg_mintxg;
- uint64_t seg_maxtxg;
- zbookmark_t seg_start;
- zbookmark_t seg_end;
- list_node_t seg_node;
-} zseg_t;
-
-typedef struct traverse_blk_cache {
- zbookmark_t bc_bookmark;
- blkptr_t bc_blkptr;
- void *bc_data;
- dnode_phys_t *bc_dnode;
- int bc_errno;
- int bc_pad1;
- uint64_t bc_pad2;
-} traverse_blk_cache_t;
-
-typedef int (blkptr_cb_t)(traverse_blk_cache_t *bc, spa_t *spa, void *arg);
-
-struct traverse_handle {
- spa_t *th_spa;
- blkptr_cb_t *th_func;
- void *th_arg;
- uint16_t th_advance;
- uint16_t th_locked;
- int th_zio_flags;
- list_t th_seglist;
- traverse_blk_cache_t th_cache[ZB_DEPTH][ZB_MAXLEVEL];
- traverse_blk_cache_t th_zil_cache;
- uint64_t th_hits;
- uint64_t th_arc_hits;
- uint64_t th_reads;
- uint64_t th_callbacks;
- uint64_t th_syncs;
- uint64_t th_restarts;
- zbookmark_t th_noread;
- zbookmark_t th_lastcb;
-};
-
-int traverse_dsl_dataset(struct dsl_dataset *ds, uint64_t txg_start,
- int advance, blkptr_cb_t func, void *arg);
-
-traverse_handle_t *traverse_init(spa_t *spa, blkptr_cb_t *func, void *arg,
- int advance, int zio_flags);
-void traverse_fini(traverse_handle_t *th);
-
-void traverse_add_dnode(traverse_handle_t *th,
- uint64_t mintxg, uint64_t maxtxg, uint64_t objset, uint64_t object);
-void traverse_add_objset(traverse_handle_t *th,
- uint64_t mintxg, uint64_t maxtxg, uint64_t objset);
-void traverse_add_pool(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg);
-
-int traverse_more(traverse_handle_t *th);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DMU_TRAVERSE_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
deleted file mode 100644
index 89f4799..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DMU_TX_H
-#define _SYS_DMU_TX_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/txg.h>
-#include <sys/refcount.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct dmu_buf_impl;
-struct dmu_tx_hold;
-struct dnode_link;
-struct dsl_pool;
-struct dnode;
-struct dsl_dir;
-
-struct dmu_tx {
- /*
- * No synchronization is needed because a tx can only be handled
- * by one thread.
- */
- list_t tx_holds; /* list of dmu_tx_hold_t */
- objset_t *tx_objset;
- struct dsl_dir *tx_dir;
- struct dsl_pool *tx_pool;
- uint64_t tx_txg;
- uint64_t tx_lastsnap_txg;
- uint64_t tx_lasttried_txg;
- txg_handle_t tx_txgh;
- void *tx_tempreserve_cookie;
- struct dmu_tx_hold *tx_needassign_txh;
- uint8_t tx_anyobj;
- int tx_err;
-#ifdef ZFS_DEBUG
- uint64_t tx_space_towrite;
- uint64_t tx_space_tofree;
- uint64_t tx_space_tooverwrite;
- refcount_t tx_space_written;
- refcount_t tx_space_freed;
-#endif
-};
-
-enum dmu_tx_hold_type {
- THT_NEWOBJECT,
- THT_WRITE,
- THT_BONUS,
- THT_FREE,
- THT_ZAP,
- THT_SPACE,
- THT_NUMTYPES
-};
-
-typedef struct dmu_tx_hold {
- dmu_tx_t *txh_tx;
- list_node_t txh_node;
- struct dnode *txh_dnode;
- uint64_t txh_space_towrite;
- uint64_t txh_space_tofree;
- uint64_t txh_space_tooverwrite;
-#ifdef ZFS_DEBUG
- enum dmu_tx_hold_type txh_type;
- uint64_t txh_arg1;
- uint64_t txh_arg2;
-#endif
-} dmu_tx_hold_t;
-
-
-/*
- * These routines are defined in dmu.h, and are called by the user.
- */
-dmu_tx_t *dmu_tx_create(objset_t *dd);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
-void dmu_tx_commit(dmu_tx_t *tx);
-void dmu_tx_abort(dmu_tx_t *tx);
-uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
-void dmu_tx_wait(dmu_tx_t *tx);
-
-/*
- * These routines are defined in dmu_spa.h, and are called by the SPA.
- */
-extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
-
-/*
- * These routines are only called by the DMU.
- */
-dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
-int dmu_tx_is_syncing(dmu_tx_t *tx);
-int dmu_tx_private_ok(dmu_tx_t *tx);
-void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
-void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
-void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
-int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
-void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
-
-#ifdef ZFS_DEBUG
-#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
-#else
-#define DMU_TX_DIRTY_BUF(tx, db)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DMU_TX_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
deleted file mode 100644
index c94bced..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _DFETCH_H
-#define _DFETCH_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern uint64_t zfetch_array_rd_sz;
-
-struct dnode; /* so we can reference dnode */
-
-typedef enum zfetch_dirn {
- ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
- ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
-} zfetch_dirn_t;
-
-typedef struct zstream {
- uint64_t zst_offset; /* offset of starting block in range */
- uint64_t zst_len; /* length of range, in blocks */
- zfetch_dirn_t zst_direction; /* direction of prefetch */
- uint64_t zst_stride; /* length of stride, in blocks */
- uint64_t zst_ph_offset; /* prefetch offset, in blocks */
- uint64_t zst_cap; /* prefetch limit (cap), in blocks */
- kmutex_t zst_lock; /* protects stream */
- clock_t zst_last; /* lbolt of last prefetch */
- avl_node_t zst_node; /* embed avl node here */
-} zstream_t;
-
-typedef struct zfetch {
- krwlock_t zf_rwlock; /* protects zfetch structure */
- list_t zf_stream; /* AVL tree of zstream_t's */
- struct dnode *zf_dnode; /* dnode that owns this zfetch */
- uint32_t zf_stream_cnt; /* # of active streams */
- uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
-} zfetch_t;
-
-void dmu_zfetch_init(zfetch_t *, struct dnode *);
-void dmu_zfetch_rele(zfetch_t *);
-void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DFETCH_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
deleted file mode 100644
index 327e538..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DNODE_H
-#define _SYS_DNODE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/avl.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/zio.h>
-#include <sys/refcount.h>
-#include <sys/dmu_zfetch.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Flags.
- */
-#define DNODE_MUST_BE_ALLOCATED 1
-#define DNODE_MUST_BE_FREE 2
-
-/*
- * Fixed constants.
- */
-#define DNODE_SHIFT 9 /* 512 bytes */
-#define DN_MIN_INDBLKSHIFT 10 /* 1k */
-#define DN_MAX_INDBLKSHIFT 14 /* 16k */
-#define DNODE_BLOCK_SHIFT 14 /* 16k */
-#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
-#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
-#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
-
-/*
- * Derived constants.
- */
-#define DNODE_SIZE (1 << DNODE_SHIFT)
-#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
-#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
-#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
-
-#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
-#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
-#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
-
-/* The +2 here is a cheesy way to round up */
-#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
- (DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
-
-#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
- (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
-
-#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
- (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
-
-#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
-
-struct dmu_buf_impl;
-struct objset_impl;
-struct zio;
-
-enum dnode_dirtycontext {
- DN_UNDIRTIED,
- DN_DIRTY_OPEN,
- DN_DIRTY_SYNC
-};
-
-/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
-#define DNODE_FLAG_USED_BYTES (1<<0)
-
-typedef struct dnode_phys {
- uint8_t dn_type; /* dmu_object_type_t */
- uint8_t dn_indblkshift; /* ln2(indirect block size) */
- uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */
- uint8_t dn_nblkptr; /* length of dn_blkptr */
- uint8_t dn_bonustype; /* type of data in bonus buffer */
- uint8_t dn_checksum; /* ZIO_CHECKSUM type */
- uint8_t dn_compress; /* ZIO_COMPRESS type */
- uint8_t dn_flags; /* DNODE_FLAG_* */
- uint16_t dn_datablkszsec; /* data block size in 512b sectors */
- uint16_t dn_bonuslen; /* length of dn_bonus */
- uint8_t dn_pad2[4];
-
- /* accounting is protected by dn_dirty_mtx */
- uint64_t dn_maxblkid; /* largest allocated block ID */
- uint64_t dn_used; /* bytes (or sectors) of disk space */
-
- uint64_t dn_pad3[4];
-
- blkptr_t dn_blkptr[1];
- uint8_t dn_bonus[DN_MAX_BONUSLEN];
-} dnode_phys_t;
-
-typedef struct dnode {
- /*
- * dn_struct_rwlock protects the structure of the dnode,
- * including the number of levels of indirection (dn_nlevels),
- * dn_maxblkid, and dn_next_*
- */
- krwlock_t dn_struct_rwlock;
-
- /*
- * Our link on dataset's dd_dnodes list.
- * Protected by dd_accounting_mtx.
- */
- list_node_t dn_link;
-
- /* immutable: */
- struct objset_impl *dn_objset;
- uint64_t dn_object;
- struct dmu_buf_impl *dn_dbuf;
- dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
-
- /*
- * Copies of stuff in dn_phys. They're valid in the open
- * context (eg. even before the dnode is first synced).
- * Where necessary, these are protected by dn_struct_rwlock.
- */
- dmu_object_type_t dn_type; /* object type */
- uint16_t dn_bonuslen; /* bonus length */
- uint8_t dn_bonustype; /* bonus type */
- uint8_t dn_nblkptr; /* number of blkptrs (immutable) */
- uint8_t dn_checksum; /* ZIO_CHECKSUM type */
- uint8_t dn_compress; /* ZIO_COMPRESS type */
- uint8_t dn_nlevels;
- uint8_t dn_indblkshift;
- uint8_t dn_datablkshift; /* zero if blksz not power of 2! */
- uint16_t dn_datablkszsec; /* in 512b sectors */
- uint32_t dn_datablksz; /* in bytes */
- uint64_t dn_maxblkid;
- uint8_t dn_next_nlevels[TXG_SIZE];
- uint8_t dn_next_indblkshift[TXG_SIZE];
- uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
-
- /* protected by os_lock: */
- list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
-
- /* protected by dn_mtx: */
- kmutex_t dn_mtx;
- list_t dn_dirty_records[TXG_SIZE];
- avl_tree_t dn_ranges[TXG_SIZE];
- uint64_t dn_allocated_txg;
- uint64_t dn_free_txg;
- uint64_t dn_assigned_txg;
- kcondvar_t dn_notxholds;
- enum dnode_dirtycontext dn_dirtyctx;
- uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
-
- /* protected by own devices */
- refcount_t dn_tx_holds;
- refcount_t dn_holds;
-
- kmutex_t dn_dbufs_mtx;
- list_t dn_dbufs; /* linked list of descendent dbuf_t's */
- struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
-
- /* parent IO for current sync write */
- zio_t *dn_zio;
-
- /* holds prefetch structure */
- struct zfetch dn_zfetch;
-} dnode_t;
-
-typedef struct free_range {
- avl_node_t fr_node;
- uint64_t fr_blkid;
- uint64_t fr_nblks;
-} free_range_t;
-
-dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
- uint64_t object);
-void dnode_special_close(dnode_t *dn);
-
-int dnode_hold(struct objset_impl *dd, uint64_t object,
- void *ref, dnode_t **dnp);
-int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
- void *ref, dnode_t **dnp);
-void dnode_add_ref(dnode_t *dn, void *ref);
-void dnode_rele(dnode_t *dn, void *ref);
-void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
-void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
-void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-void dnode_free(dnode_t *dn, dmu_tx_t *tx);
-void dnode_byteswap(dnode_phys_t *dnp);
-void dnode_buf_byteswap(void *buf, size_t size);
-void dnode_verify(dnode_t *dn);
-int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
-uint64_t dnode_current_max_length(dnode_t *dn);
-void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
-void dnode_clear_range(dnode_t *dn, uint64_t blkid,
- uint64_t nblks, dmu_tx_t *tx);
-void dnode_diduse_space(dnode_t *dn, int64_t space);
-void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
-void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx);
-uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
-void dnode_init(void);
-void dnode_fini(void);
-int dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *off, int minlvl,
- uint64_t blkfill, uint64_t txg);
-int dnode_evict_dbufs(dnode_t *dn, int try);
-
-#ifdef ZFS_DEBUG
-
-/*
- * There should be a ## between the string literal and fmt, to make it
- * clear that we're joining two strings together, but that piece of shit
- * gcc doesn't support that preprocessor token.
- */
-#define dprintf_dnode(dn, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char __db_buf[32]; \
- uint64_t __db_obj = (dn)->dn_object; \
- if (__db_obj == DMU_META_DNODE_OBJECT) \
- (void) strcpy(__db_buf, "mdn"); \
- else \
- (void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
- (u_longlong_t)__db_obj);\
- dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
- __db_buf, __VA_ARGS__); \
- } \
-_NOTE(CONSTCOND) } while (0)
-
-#define DNODE_VERIFY(dn) dnode_verify(dn)
-#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
-
-#else
-
-#define dprintf_dnode(db, fmt, ...)
-#define DNODE_VERIFY(dn)
-#define FREE_VERIFY(db, start, end, tx)
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DNODE_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
deleted file mode 100644
index 8cfc1dc..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DSL_DATASET_H
-#define _SYS_DSL_DATASET_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/zio.h>
-#include <sys/bplist.h>
-#include <sys/dsl_synctask.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct dsl_dataset;
-struct dsl_dir;
-struct dsl_pool;
-
-typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
-
-#define DS_FLAG_INCONSISTENT (1ULL<<0)
-/*
- * NB: nopromote can not yet be set, but we want support for it in this
- * on-disk version, so that we don't need to upgrade for it later. It
- * will be needed when we implement 'zfs split' (where the split off
- * clone should not be promoted).
- */
-#define DS_FLAG_NOPROMOTE (1ULL<<1)
-
-typedef struct dsl_dataset_phys {
- uint64_t ds_dir_obj;
- uint64_t ds_prev_snap_obj;
- uint64_t ds_prev_snap_txg;
- uint64_t ds_next_snap_obj;
- uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */
- uint64_t ds_num_children; /* clone/snap children; ==0 for head */
- uint64_t ds_creation_time; /* seconds since 1970 */
- uint64_t ds_creation_txg;
- uint64_t ds_deadlist_obj;
- uint64_t ds_used_bytes;
- uint64_t ds_compressed_bytes;
- uint64_t ds_uncompressed_bytes;
- uint64_t ds_unique_bytes; /* only relevant to snapshots */
- /*
- * The ds_fsid_guid is a 56-bit ID that can change to avoid
- * collisions. The ds_guid is a 64-bit ID that will never
- * change, so there is a small probability that it will collide.
- */
- uint64_t ds_fsid_guid;
- uint64_t ds_guid;
- uint64_t ds_flags;
- blkptr_t ds_bp;
- uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
-} dsl_dataset_phys_t;
-
-typedef struct dsl_dataset {
- /* Immutable: */
- struct dsl_dir *ds_dir;
- dsl_dataset_phys_t *ds_phys;
- dmu_buf_t *ds_dbuf;
- uint64_t ds_object;
-
- /* only used in syncing context: */
- struct dsl_dataset *ds_prev; /* only valid for non-snapshots */
-
- /* has internal locking: */
- bplist_t ds_deadlist;
-
- /* protected by lock on pool's dp_dirty_datasets list */
- txg_node_t ds_dirty_link;
- list_node_t ds_synced_link;
-
- /*
- * ds_phys->ds_<accounting> is also protected by ds_lock.
- * Protected by ds_lock:
- */
- kmutex_t ds_lock;
- void *ds_user_ptr;
- dsl_dataset_evict_func_t *ds_user_evict_func;
- uint64_t ds_open_refcount;
-
- /* no locking; only for making guesses */
- uint64_t ds_trysnap_txg;
-
- /* Protected by ds_lock; keep at end of struct for better locality */
- char ds_snapname[MAXNAMELEN];
-} dsl_dataset_t;
-
-#define dsl_dataset_is_snapshot(ds) \
- ((ds)->ds_phys->ds_num_children != 0)
-
-int dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
- void *tag, dsl_dataset_t **dsp);
-int dsl_dataset_open(const char *name, int mode, void *tag,
- dsl_dataset_t **dsp);
-int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj,
- const char *tail, int mode, void *tag, dsl_dataset_t **);
-void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag);
-uint64_t dsl_dataset_create_sync(dsl_dir_t *pds,
- const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx);
-int dsl_dataset_destroy(const char *name);
-int dsl_snapshots_destroy(char *fsname, char *snapname);
-dsl_checkfunc_t dsl_dataset_snapshot_check;
-dsl_syncfunc_t dsl_dataset_snapshot_sync;
-int dsl_dataset_rollback(dsl_dataset_t *ds);
-int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
-int dsl_dataset_promote(const char *name);
-
-void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
- void *p, dsl_dataset_evict_func_t func);
-void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
-
-blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
-void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
-
-spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
-
-void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
-
-void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
-void dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
- dmu_tx_t *tx);
-int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
-uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
-
-void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
-void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
-void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
-void dsl_dataset_space(dsl_dataset_t *ds,
- uint64_t *refdbytesp, uint64_t *availbytesp,
- uint64_t *usedobjsp, uint64_t *availobjsp);
-uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
-
-void dsl_dataset_create_root(struct dsl_pool *dp, uint64_t *ddobjp,
- dmu_tx_t *tx);
-
-int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
-
-#ifdef ZFS_DEBUG
-#define dprintf_ds(ds, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
- dsl_dataset_name(ds, __ds_name); \
- dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
- kmem_free(__ds_name, MAXNAMELEN); \
- } \
-_NOTE(CONSTCOND) } while (0)
-#else
-#define dprintf_ds(dd, fmt, ...)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DSL_DATASET_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
deleted file mode 100644
index e0595d3..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DSL_DIR_H
-#define _SYS_DSL_DIR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dsl_pool.h>
-#include <sys/dsl_synctask.h>
-#include <sys/refcount.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct dsl_dataset;
-
-typedef struct dsl_dir_phys {
- uint64_t dd_creation_time; /* not actually used */
- uint64_t dd_head_dataset_obj;
- uint64_t dd_parent_obj;
- uint64_t dd_clone_parent_obj;
- uint64_t dd_child_dir_zapobj;
- /*
- * how much space our children are accounting for; for leaf
- * datasets, == physical space used by fs + snaps
- */
- uint64_t dd_used_bytes;
- uint64_t dd_compressed_bytes;
- uint64_t dd_uncompressed_bytes;
- /* Administrative quota setting */
- uint64_t dd_quota;
- /* Administrative reservation setting */
- uint64_t dd_reserved;
- uint64_t dd_props_zapobj;
- uint64_t dd_pad[21]; /* pad out to 256 bytes for good measure */
-} dsl_dir_phys_t;
-
-struct dsl_dir {
- /* These are immutable; no lock needed: */
- uint64_t dd_object;
- dsl_dir_phys_t *dd_phys;
- dmu_buf_t *dd_dbuf;
- dsl_pool_t *dd_pool;
-
- /* protected by lock on pool's dp_dirty_dirs list */
- txg_node_t dd_dirty_link;
-
- /* protected by dp_config_rwlock */
- dsl_dir_t *dd_parent;
-
- /* Protected by dd_lock */
- kmutex_t dd_lock;
- list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
-
- /* Accounting */
- /* reflects any changes to dd_phys->dd_used_bytes made this syncing */
- int64_t dd_used_bytes;
- /* gross estimate of space used by in-flight tx's */
- uint64_t dd_tempreserved[TXG_SIZE];
- /* amount of space we expect to write; == amount of dirty data */
- int64_t dd_space_towrite[TXG_SIZE];
-
- /* protected by dd_lock; keep at end of struct for better locality */
- char dd_myname[MAXNAMELEN];
-};
-
-void dsl_dir_close(dsl_dir_t *dd, void *tag);
-int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
-int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
- const char **tailp);
-int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
- const char *tail, void *tag, dsl_dir_t **);
-void dsl_dir_name(dsl_dir_t *dd, char *buf);
-int dsl_dir_namelen(dsl_dir_t *dd);
-int dsl_dir_is_private(dsl_dir_t *dd);
-uint64_t dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx);
-void dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx);
-dsl_checkfunc_t dsl_dir_destroy_check;
-dsl_syncfunc_t dsl_dir_destroy_sync;
-void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
-uint64_t dsl_dir_space_available(dsl_dir_t *dd,
- dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
-void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
-void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
-int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
- uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx);
-void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
-void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
-void dsl_dir_diduse_space(dsl_dir_t *dd,
- int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
-int dsl_dir_set_quota(const char *ddname, uint64_t quota);
-int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
-int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
-int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
-
-/* internal reserved dir name */
-#define MOS_DIR_NAME "$MOS"
-
-#ifdef ZFS_DEBUG
-#define dprintf_dd(dd, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
- KM_SLEEP); \
- dsl_dir_name(dd, __ds_name); \
- dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
- kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
- } \
-_NOTE(CONSTCOND) } while (0)
-#else
-#define dprintf_dd(dd, fmt, ...)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DSL_DIR_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
deleted file mode 100644
index f7ec67a..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DSL_POOL_H
-#define _SYS_DSL_POOL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/txg_impl.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct objset;
-struct dsl_dir;
-
-typedef struct dsl_pool {
- /* Immutable */
- spa_t *dp_spa;
- struct objset *dp_meta_objset;
- struct dsl_dir *dp_root_dir;
- struct dsl_dir *dp_mos_dir;
- uint64_t dp_root_dir_obj;
-
- /* No lock needed - sync context only */
- blkptr_t dp_meta_rootbp;
- list_t dp_synced_objsets;
-
- /* Has its own locking */
- tx_state_t dp_tx;
- txg_list_t dp_dirty_datasets;
- txg_list_t dp_dirty_dirs;
- txg_list_t dp_sync_tasks;
-
- /*
- * Protects administrative changes (properties, namespace)
- * It is only held for write in syncing context. Therefore
- * syncing context does not need to ever have it for read, since
- * nobody else could possibly have it for write.
- */
- krwlock_t dp_config_rwlock;
-} dsl_pool_t;
-
-int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
-void dsl_pool_close(dsl_pool_t *dp);
-dsl_pool_t *dsl_pool_create(spa_t *spa, uint64_t txg);
-void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
-void dsl_pool_zil_clean(dsl_pool_t *dp);
-int dsl_pool_sync_context(dsl_pool_t *dp);
-uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DSL_POOL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
deleted file mode 100644
index d2debff..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DSL_PROP_H
-#define _SYS_DSL_PROP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/dsl_pool.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct dsl_dataset;
-
-/* The callback func may not call into the DMU or DSL! */
-typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
-
-typedef struct dsl_prop_cb_record {
- list_node_t cbr_node; /* link on dd_prop_cbs */
- struct dsl_dataset *cbr_ds;
- const char *cbr_propname;
- dsl_prop_changed_cb_t *cbr_func;
- void *cbr_arg;
-} dsl_prop_cb_record_t;
-
-int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
- dsl_prop_changed_cb_t *callback, void *cbarg);
-int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
- dsl_prop_changed_cb_t *callback, void *cbarg);
-int dsl_prop_numcb(struct dsl_dataset *ds);
-
-int dsl_prop_get(const char *ddname, const char *propname,
- int intsz, int numints, void *buf, char *setpoint);
-int dsl_prop_get_integer(const char *ddname, const char *propname,
- uint64_t *valuep, char *setpoint);
-int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
-
-int dsl_prop_set(const char *ddname, const char *propname,
- int intsz, int numints, const void *buf);
-int dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
- int intsz, int numints, const void *buf);
-
-void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
-void dsl_prop_nvlist_add_string(nvlist_t *nv,
- zfs_prop_t prop, const char *value);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DSL_PROP_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
deleted file mode 100644
index e695b18..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DSL_SYNCTASK_H
-#define _SYS_DSL_SYNCTASK_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/txg.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct dsl_pool;
-
-typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
-typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
-
-typedef struct dsl_sync_task {
- list_node_t dst_node;
- dsl_checkfunc_t *dst_checkfunc;
- dsl_syncfunc_t *dst_syncfunc;
- void *dst_arg1;
- void *dst_arg2;
- int dst_err;
-} dsl_sync_task_t;
-
-typedef struct dsl_sync_task_group {
- txg_node_t dstg_node;
- list_t dstg_tasks;
- struct dsl_pool *dstg_pool;
- uint64_t dstg_txg;
- int dstg_err;
- int dstg_space;
-} dsl_sync_task_group_t;
-
-dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
-void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
- dsl_checkfunc_t *, dsl_syncfunc_t *,
- void *arg1, void *arg2, int blocks_modified);
-int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
-void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
-void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
-
-int dsl_sync_task_do(struct dsl_pool *dp,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DSL_SYNCTASK_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
deleted file mode 100644
index 095dd3c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_METASLAB_H
-#define _SYS_METASLAB_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/space_map.h>
-#include <sys/txg.h>
-#include <sys/zio.h>
-#include <sys/avl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct metaslab_class metaslab_class_t;
-typedef struct metaslab_group metaslab_group_t;
-
-extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
- uint64_t start, uint64_t size, uint64_t txg);
-extern void metaslab_fini(metaslab_t *msp);
-extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
-extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
-
-extern int metaslab_alloc(spa_t *spa, uint64_t psize, blkptr_t *bp,
- int ncopies, uint64_t txg, blkptr_t *hintbp, boolean_t hintbp_avoid);
-extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
- boolean_t now);
-extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
-
-extern metaslab_class_t *metaslab_class_create(void);
-extern void metaslab_class_destroy(metaslab_class_t *mc);
-extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
-extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
-
-extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
- vdev_t *vd);
-extern void metaslab_group_destroy(metaslab_group_t *mg);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_METASLAB_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
deleted file mode 100644
index 5980cbc..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_METASLAB_IMPL_H
-#define _SYS_METASLAB_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/metaslab.h>
-#include <sys/space_map.h>
-#include <sys/vdev.h>
-#include <sys/txg.h>
-#include <sys/avl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct metaslab_class {
- metaslab_group_t *mc_rotor;
- uint64_t mc_allocated;
-};
-
-struct metaslab_group {
- kmutex_t mg_lock;
- avl_tree_t mg_metaslab_tree;
- uint64_t mg_aliquot;
- int64_t mg_bias;
- metaslab_class_t *mg_class;
- vdev_t *mg_vd;
- metaslab_group_t *mg_prev;
- metaslab_group_t *mg_next;
-};
-
-/*
- * Each metaslab's free space is tracked in space map object in the MOS,
- * which is only updated in syncing context. Each time we sync a txg,
- * we append the allocs and frees from that txg to the space map object.
- * When the txg is done syncing, metaslab_sync_done() updates ms_smo
- * to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
- */
-struct metaslab {
- kmutex_t ms_lock; /* metaslab lock */
- space_map_obj_t ms_smo; /* synced space map object */
- space_map_obj_t ms_smo_syncing; /* syncing space map object */
- space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
- space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
- space_map_t ms_map; /* in-core free space map */
- uint64_t ms_weight; /* weight vs. others in group */
- metaslab_group_t *ms_group; /* metaslab group */
- avl_node_t ms_group_node; /* node in metaslab group tree */
- txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_METASLAB_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
deleted file mode 100644
index 4de1cae..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_REFCOUNT_H
-#define _SYS_REFCOUNT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/list.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * If the reference is held only by the calling function and not any
- * particular object, use FTAG (which is a string) for the holder_tag.
- * Otherwise, use the object that holds the reference.
- */
-#define FTAG ((char *)__func__)
-
-#if defined(DEBUG) || !defined(_KERNEL)
-typedef struct reference {
- list_node_t ref_link;
- void *ref_holder;
- uint64_t ref_number;
- uint8_t *ref_removed;
-} reference_t;
-
-typedef struct refcount {
- kmutex_t rc_mtx;
- list_t rc_list;
- list_t rc_removed;
- int64_t rc_count;
- int64_t rc_removed_count;
-} refcount_t;
-
-/* Note: refcount_t should be initialized to zero before use. */
-
-void refcount_create(refcount_t *rc);
-void refcount_destroy(refcount_t *rc);
-void refcount_destroy_many(refcount_t *rc, uint64_t number);
-int refcount_is_zero(refcount_t *rc);
-int64_t refcount_count(refcount_t *rc);
-int64_t refcount_add(refcount_t *rc, void *holder_tag);
-int64_t refcount_remove(refcount_t *rc, void *holder_tag);
-int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
-int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
-
-void refcount_init(void);
-void refcount_fini(void);
-
-#else /* DEBUG */
-
-typedef struct refcount {
- uint64_t rc_count;
-} refcount_t;
-
-#define refcount_create(rc) ((rc)->rc_count = 0)
-#define refcount_destroy(rc) ((rc)->rc_count = 0)
-#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
-#define refcount_is_zero(rc) ((rc)->rc_count == 0)
-#define refcount_count(rc) ((rc)->rc_count)
-#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
-#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
-#define refcount_add_many(rc, number, holder) \
- atomic_add_64_nv(&(rc)->rc_count, number)
-#define refcount_remove_many(rc, number, holder) \
- atomic_add_64_nv(&(rc)->rc_count, -number)
-
-#define refcount_init()
-#define refcount_fini()
-
-#endif /* DEBUG */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_REFCOUNT_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
deleted file mode 100644
index f0eb2e1..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SPA_H
-#define _SYS_SPA_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/avl.h>
-#include <sys/zfs_context.h>
-#include <sys/nvpair.h>
-#include <sys/sysmacros.h>
-#include <sys/types.h>
-#include <sys/fs/zfs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Forward references that lots of things need.
- */
-typedef struct spa spa_t;
-typedef struct vdev vdev_t;
-typedef struct metaslab metaslab_t;
-typedef struct zilog zilog_t;
-typedef struct traverse_handle traverse_handle_t;
-struct dsl_pool;
-
-/*
- * General-purpose 32-bit and 64-bit bitfield encodings.
- */
-#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len))
-#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len))
-#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low))
-#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low))
-
-#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
-#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
-
-#define BF32_SET(x, low, len, val) \
- ((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
-#define BF64_SET(x, low, len, val) \
- ((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
-
-#define BF32_GET_SB(x, low, len, shift, bias) \
- ((BF32_GET(x, low, len) + (bias)) << (shift))
-#define BF64_GET_SB(x, low, len, shift, bias) \
- ((BF64_GET(x, low, len) + (bias)) << (shift))
-
-#define BF32_SET_SB(x, low, len, shift, bias, val) \
- BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
-#define BF64_SET_SB(x, low, len, shift, bias, val) \
- BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
-
-/*
- * We currently support nine block sizes, from 512 bytes to 128K.
- * We could go higher, but the benefits are near-zero and the cost
- * of COWing a giant block to modify one byte would become excessive.
- */
-#define SPA_MINBLOCKSHIFT 9
-#define SPA_MAXBLOCKSHIFT 17
-#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
-#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
-
-#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
-
-/*
- * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
- * The ASIZE encoding should be at least 64 times larger (6 more bits)
- * to support up to 4-way RAID-Z mirror mode with worst-case gang block
- * overhead, three DVAs per bp, plus one more bit in case we do anything
- * else that expands the ASIZE.
- */
-#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */
-#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
-#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
-
-/*
- * All SPA data is represented by 128-bit data virtual addresses (DVAs).
- * The members of the dva_t should be considered opaque outside the SPA.
- */
-typedef struct dva {
- uint64_t dva_word[2];
-} dva_t;
-
-/*
- * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
- */
-typedef struct zio_cksum {
- uint64_t zc_word[4];
-} zio_cksum_t;
-
-/*
- * Each block is described by its DVAs, time of birth, checksum, etc.
- * The word-by-word, bit-by-bit layout of the blkptr is as follows:
- *
- * 64 56 48 40 32 24 16 8 0
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | vdev1 | GRID | ASIZE |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 1 |G| offset1 |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | vdev2 | GRID | ASIZE |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 3 |G| offset2 |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 4 | vdev3 | GRID | ASIZE |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 5 |G| offset3 |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 7 | padding |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 8 | padding |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * 9 | padding |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * a | birth txg |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * b | fill count |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * c | checksum[0] |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * d | checksum[1] |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * e | checksum[2] |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- * f | checksum[3] |
- * +-------+-------+-------+-------+-------+-------+-------+-------+
- *
- * Legend:
- *
- * vdev virtual device ID
- * offset offset into virtual device
- * LSIZE logical size
- * PSIZE physical size (after compression)
- * ASIZE allocated size (including RAID-Z parity and gang block headers)
- * GRID RAID-Z layout information (reserved for future use)
- * cksum checksum function
- * comp compression function
- * G gang block indicator
- * E endianness
- * type DMU object type
- * lvl level of indirection
- * birth txg transaction group in which the block was born
- * fill count number of non-zero blocks under this bp
- * checksum[4] 256-bit checksum of the data this bp describes
- */
-typedef struct blkptr {
- dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
- uint64_t blk_prop; /* size, compression, type, etc */
- uint64_t blk_pad[3]; /* Extra space for the future */
- uint64_t blk_birth; /* transaction group at birth */
- uint64_t blk_fill; /* fill count */
- zio_cksum_t blk_cksum; /* 256-bit checksum */
-} blkptr_t;
-
-#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
-#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
-
-/*
- * Macros to get and set fields in a bp or DVA.
- */
-#define DVA_GET_ASIZE(dva) \
- BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
-#define DVA_SET_ASIZE(dva, x) \
- BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
-
-#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
-#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
-
-#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
-#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
-
-#define DVA_GET_OFFSET(dva) \
- BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
-#define DVA_SET_OFFSET(dva, x) \
- BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
-
-#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1)
-#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
-
-#define BP_GET_LSIZE(bp) \
- (BP_IS_HOLE(bp) ? 0 : \
- BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
-#define BP_SET_LSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
-
-#define BP_GET_PSIZE(bp) \
- BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
-#define BP_SET_PSIZE(bp, x) \
- BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
-
-#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
-#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
-
-#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
-#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
-
-#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
-#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
-
-#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
-#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
-
-#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
-#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
-
-#define BP_GET_ASIZE(bp) \
- (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- DVA_GET_ASIZE(&(bp)->blk_dva[2]))
-
-#define BP_GET_UCSIZE(bp) \
- ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
- BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
-
-#define BP_GET_NDVAS(bp) \
- (!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
- !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- !!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
-
-#define BP_COUNT_GANG(bp) \
- (DVA_GET_GANG(&(bp)->blk_dva[0]) + \
- DVA_GET_GANG(&(bp)->blk_dva[1]) + \
- DVA_GET_GANG(&(bp)->blk_dva[2]))
-
-#define DVA_EQUAL(dva1, dva2) \
- ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
- (dva1)->dva_word[0] == (dva2)->dva_word[0])
-
-#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
- (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
- ((zc1).zc_word[1] - (zc2).zc_word[1]) | \
- ((zc1).zc_word[2] - (zc2).zc_word[2]) | \
- ((zc1).zc_word[3] - (zc2).zc_word[3])))
-
-
-#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
-
-#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
-{ \
- (zcp)->zc_word[0] = w0; \
- (zcp)->zc_word[1] = w1; \
- (zcp)->zc_word[2] = w2; \
- (zcp)->zc_word[3] = w3; \
-}
-
-#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
-#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
-#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
-#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
-
-#define BP_ZERO(bp) \
-{ \
- (bp)->blk_dva[0].dva_word[0] = 0; \
- (bp)->blk_dva[0].dva_word[1] = 0; \
- (bp)->blk_dva[1].dva_word[0] = 0; \
- (bp)->blk_dva[1].dva_word[1] = 0; \
- (bp)->blk_dva[2].dva_word[0] = 0; \
- (bp)->blk_dva[2].dva_word[1] = 0; \
- (bp)->blk_prop = 0; \
- (bp)->blk_pad[0] = 0; \
- (bp)->blk_pad[1] = 0; \
- (bp)->blk_pad[2] = 0; \
- (bp)->blk_birth = 0; \
- (bp)->blk_fill = 0; \
- ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
-}
-
-/*
- * Note: the byteorder is either 0 or -1, both of which are palindromes.
- * This simplifies the endianness handling a bit.
- */
-#if BYTE_ORDER == _BIG_ENDIAN
-#define ZFS_HOST_BYTEORDER (0ULL)
-#else
-#define ZFS_HOST_BYTEORDER (-1ULL)
-#endif
-
-#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
-
-#define BP_SPRINTF_LEN 320
-
-#include <sys/dmu.h>
-
-#define BP_GET_BUFC_TYPE(bp) \
- (((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
- ARC_BUFC_METADATA : ARC_BUFC_DATA);
-/*
- * Routines found in spa.c
- */
-
-/* state manipulation functions */
-extern int spa_open(const char *pool, spa_t **, void *tag);
-extern int spa_get_stats(const char *pool, nvlist_t **config,
- char *altroot, size_t buflen);
-extern int spa_create(const char *pool, nvlist_t *config, const char *altroot);
-extern int spa_import(const char *pool, nvlist_t *config, const char *altroot);
-extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
-extern int spa_destroy(char *pool);
-extern int spa_export(char *pool, nvlist_t **oldconfig);
-extern int spa_reset(char *pool);
-extern void spa_async_request(spa_t *spa, int flag);
-extern void spa_async_suspend(spa_t *spa);
-extern void spa_async_resume(spa_t *spa);
-extern spa_t *spa_inject_addref(char *pool);
-extern void spa_inject_delref(spa_t *spa);
-
-#define SPA_ASYNC_REOPEN 0x01
-#define SPA_ASYNC_REPLACE_DONE 0x02
-#define SPA_ASYNC_SCRUB 0x04
-#define SPA_ASYNC_RESILVER 0x08
-#define SPA_ASYNC_CONFIG_UPDATE 0x10
-
-/* device manipulation */
-extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
-extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
- int replacing);
-extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
-extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
-extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
-
-/* spare state (which is global across all pools) */
-extern void spa_spare_add(vdev_t *vd);
-extern void spa_spare_remove(vdev_t *vd);
-extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool);
-extern void spa_spare_activate(vdev_t *vd);
-
-/* scrubbing */
-extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
-extern void spa_scrub_suspend(spa_t *spa);
-extern void spa_scrub_resume(spa_t *spa);
-extern void spa_scrub_restart(spa_t *spa, uint64_t txg);
-
-/* spa syncing */
-extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
-extern void spa_sync_allpools(void);
-
-/*
- * SPA configuration functions in spa_config.c
- */
-
-#define SPA_CONFIG_UPDATE_POOL 0
-#define SPA_CONFIG_UPDATE_VDEVS 1
-
-extern void spa_config_sync(void);
-extern void spa_config_load(void);
-extern nvlist_t *spa_all_configs(uint64_t *);
-extern void spa_config_set(spa_t *spa, nvlist_t *config);
-extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
- int getstats);
-extern void spa_config_update(spa_t *spa, int what);
-
-/*
- * Miscellaneous SPA routines in spa_misc.c
- */
-
-/* Namespace manipulation */
-extern spa_t *spa_lookup(const char *name);
-extern spa_t *spa_add(const char *name, const char *altroot);
-extern void spa_remove(spa_t *spa);
-extern spa_t *spa_next(spa_t *prev);
-
-/* Refcount functions */
-extern void spa_open_ref(spa_t *spa, void *tag);
-extern void spa_close(spa_t *spa, void *tag);
-extern boolean_t spa_refcount_zero(spa_t *spa);
-
-/* Pool configuration lock */
-extern void spa_config_enter(spa_t *spa, krw_t rw, void *tag);
-extern void spa_config_exit(spa_t *spa, void *tag);
-extern boolean_t spa_config_held(spa_t *spa, krw_t rw);
-
-/* Pool vdev add/remove lock */
-extern uint64_t spa_vdev_enter(spa_t *spa);
-extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
-
-/* Accessor functions */
-extern krwlock_t *spa_traverse_rwlock(spa_t *spa);
-extern int spa_traverse_wanted(spa_t *spa);
-extern struct dsl_pool *spa_get_dsl(spa_t *spa);
-extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
-extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
-extern void spa_altroot(spa_t *, char *, size_t);
-extern int spa_sync_pass(spa_t *spa);
-extern char *spa_name(spa_t *spa);
-extern uint64_t spa_guid(spa_t *spa);
-extern uint64_t spa_last_synced_txg(spa_t *spa);
-extern uint64_t spa_first_txg(spa_t *spa);
-extern uint64_t spa_version(spa_t *spa);
-extern int spa_state(spa_t *spa);
-extern uint64_t spa_freeze_txg(spa_t *spa);
-struct metaslab_class;
-extern struct metaslab_class *spa_metaslab_class_select(spa_t *spa);
-extern uint64_t spa_get_alloc(spa_t *spa);
-extern uint64_t spa_get_space(spa_t *spa);
-extern uint64_t spa_get_dspace(spa_t *spa);
-extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
-extern uint64_t spa_version(spa_t *spa);
-extern int spa_max_replication(spa_t *spa);
-extern int spa_busy(void);
-
-/* Miscellaneous support routines */
-extern int spa_rename(const char *oldname, const char *newname);
-extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
-extern char *spa_strdup(const char *);
-extern void spa_strfree(char *);
-extern uint64_t spa_get_random(uint64_t range);
-extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
-extern void spa_freeze(spa_t *spa);
-extern void spa_upgrade(spa_t *spa);
-extern void spa_evict_all(void);
-extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid);
-extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
-extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
-
-/* history logging */
-extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
-extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
- char *his_buf);
-extern int spa_history_log(spa_t *spa, const char *his_buf,
- uint64_t pool_create);
-
-/* error handling */
-struct zbookmark;
-struct zio;
-extern void spa_log_error(spa_t *spa, struct zio *zio);
-extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
- struct zio *zio, uint64_t stateoroffset, uint64_t length);
-extern void zfs_post_ok(spa_t *spa, vdev_t *vd);
-extern uint64_t spa_get_errlog_size(spa_t *spa);
-extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
-extern void spa_errlog_rotate(spa_t *spa);
-extern void spa_errlog_drain(spa_t *spa);
-extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
-extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
-
-/* Initialization and termination */
-extern void spa_init(int flags);
-extern void spa_fini(void);
-
-/* properties */
-extern int spa_set_props(spa_t *spa, nvlist_t *nvp);
-extern int spa_get_props(spa_t *spa, nvlist_t **nvp);
-extern void spa_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
-extern boolean_t spa_has_bootfs(spa_t *spa);
-
-#ifdef ZFS_DEBUG
-#define dprintf_bp(bp, fmt, ...) do { \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
- sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
- dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
- kmem_free(__blkbuf, BP_SPRINTF_LEN); \
- } \
-_NOTE(CONSTCOND) } while (0)
-#else
-#define dprintf_bp(bp, fmt, ...)
-#endif
-
-extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SPA_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
deleted file mode 100644
index 8c57123..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SPA_IMPL_H
-#define _SYS_SPA_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/vdev.h>
-#include <sys/metaslab.h>
-#include <sys/dmu.h>
-#include <sys/dsl_pool.h>
-#include <sys/uberblock_impl.h>
-#include <sys/zfs_context.h>
-#include <sys/avl.h>
-#include <sys/refcount.h>
-#include <sys/bplist.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct spa_config_lock {
- kmutex_t scl_lock;
- refcount_t scl_count;
- kthread_t *scl_writer;
- kcondvar_t scl_cv;
-} spa_config_lock_t;
-
-typedef struct spa_error_entry {
- zbookmark_t se_bookmark;
- char *se_name;
- avl_node_t se_avl;
-} spa_error_entry_t;
-
-typedef struct spa_history_phys {
- uint64_t sh_pool_create_len; /* ending offset of zpool create */
- uint64_t sh_phys_max_off; /* physical EOF */
- uint64_t sh_bof; /* logical BOF */
- uint64_t sh_eof; /* logical EOF */
- uint64_t sh_records_lost; /* num of records overwritten */
-} spa_history_phys_t;
-
-typedef struct spa_props {
- nvlist_t *spa_props_nvp;
- list_node_t spa_list_node;
-} spa_props_t;
-
-struct spa {
- /*
- * Fields protected by spa_namespace_lock.
- */
- char *spa_name; /* pool name */
- avl_node_t spa_avl; /* node in spa_namespace_avl */
- nvlist_t *spa_config; /* last synced config */
- nvlist_t *spa_config_syncing; /* currently syncing config */
- uint64_t spa_config_txg; /* txg of last config change */
- kmutex_t spa_config_cache_lock; /* for spa_config RW_READER */
- int spa_sync_pass; /* iterate-to-convergence */
- int spa_state; /* pool state */
- int spa_inject_ref; /* injection references */
- uint8_t spa_traverse_wanted; /* traverse lock wanted */
- uint8_t spa_sync_on; /* sync threads are running */
- spa_load_state_t spa_load_state; /* current load operation */
- taskq_t *spa_zio_issue_taskq[ZIO_TYPES];
- taskq_t *spa_zio_intr_taskq[ZIO_TYPES];
- dsl_pool_t *spa_dsl_pool;
- metaslab_class_t *spa_normal_class; /* normal data class */
- uint64_t spa_first_txg; /* first txg after spa_open() */
- uint64_t spa_final_txg; /* txg of export/destroy */
- uint64_t spa_freeze_txg; /* freeze pool at this txg */
- objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
- txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
- vdev_t *spa_root_vdev; /* top-level vdev container */
- uint64_t spa_load_guid; /* initial guid for spa_load */
- list_t spa_dirty_list; /* vdevs with dirty labels */
- uint64_t spa_spares_object; /* MOS object for spare list */
- nvlist_t *spa_sparelist; /* cached spare config */
- vdev_t **spa_spares; /* available hot spares */
- int spa_nspares; /* number of hot spares */
- boolean_t spa_sync_spares; /* sync the spares list */
- uint64_t spa_config_object; /* MOS object for pool config */
- uint64_t spa_syncing_txg; /* txg currently syncing */
- uint64_t spa_sync_bplist_obj; /* object for deferred frees */
- bplist_t spa_sync_bplist; /* deferred-free bplist */
- krwlock_t spa_traverse_lock; /* traverse vs. spa_sync() */
- uberblock_t spa_ubsync; /* last synced uberblock */
- uberblock_t spa_uberblock; /* current uberblock */
- kmutex_t spa_scrub_lock; /* resilver/scrub lock */
- kthread_t *spa_scrub_thread; /* scrub/resilver thread */
- traverse_handle_t *spa_scrub_th; /* scrub traverse handle */
- uint64_t spa_scrub_restart_txg; /* need to restart */
- uint64_t spa_scrub_mintxg; /* min txg we'll scrub */
- uint64_t spa_scrub_maxtxg; /* max txg we'll scrub */
- uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
- uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
- uint64_t spa_scrub_errors; /* scrub I/O error count */
- int spa_scrub_suspended; /* tell scrubber to suspend */
- kcondvar_t spa_scrub_cv; /* scrub thread state change */
- kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
- uint8_t spa_scrub_stop; /* tell scrubber to stop */
- uint8_t spa_scrub_active; /* active or suspended? */
- uint8_t spa_scrub_type; /* type of scrub we're doing */
- uint8_t spa_scrub_finished; /* indicator to rotate logs */
- kmutex_t spa_async_lock; /* protect async state */
- kthread_t *spa_async_thread; /* thread doing async task */
- int spa_async_suspended; /* async tasks suspended */
- kcondvar_t spa_async_cv; /* wait for thread_exit() */
- uint16_t spa_async_tasks; /* async task mask */
- char *spa_root; /* alternate root directory */
- kmutex_t spa_uberblock_lock; /* vdev_uberblock_load_done() */
- uint64_t spa_ena; /* spa-wide ereport ENA */
- boolean_t spa_last_open_failed; /* true if last open faled */
- kmutex_t spa_errlog_lock; /* error log lock */
- uint64_t spa_errlog_last; /* last error log object */
- uint64_t spa_errlog_scrub; /* scrub error log object */
- kmutex_t spa_errlist_lock; /* error list/ereport lock */
- avl_tree_t spa_errlist_last; /* last error list */
- avl_tree_t spa_errlist_scrub; /* scrub error list */
- uint64_t spa_deflate; /* should we deflate? */
- uint64_t spa_history; /* history object */
- kmutex_t spa_history_lock; /* history lock */
- vdev_t *spa_pending_vdev; /* pending vdev additions */
- nvlist_t **spa_pending_spares; /* pending spare additions */
- uint_t spa_pending_nspares; /* # pending spares */
- kmutex_t spa_props_lock; /* property lock */
- uint64_t spa_pool_props_object; /* object for properties */
- uint64_t spa_bootfs; /* default boot filesystem */
- /*
- * spa_refcnt must be the last element because it changes size based on
- * compilation options. In order for the MDB module to function
- * correctly, the other fields must remain in the same location.
- */
- spa_config_lock_t spa_config_lock; /* configuration changes */
- refcount_t spa_refcount; /* number of opens */
-};
-
-extern const char *spa_config_dir;
-extern kmutex_t spa_namespace_lock;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SPA_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
deleted file mode 100644
index db9daef..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SPACE_MAP_H
-#define _SYS_SPACE_MAP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/avl.h>
-#include <sys/dmu.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct space_map_ops space_map_ops_t;
-
-typedef struct space_map {
- avl_tree_t sm_root; /* AVL tree of map segments */
- uint64_t sm_space; /* sum of all segments in the map */
- uint64_t sm_start; /* start of map */
- uint64_t sm_size; /* size of map */
- uint8_t sm_shift; /* unit shift */
- uint8_t sm_pad[3]; /* unused */
- uint8_t sm_loaded; /* map loaded? */
- uint8_t sm_loading; /* map loading? */
- kcondvar_t sm_load_cv; /* map load completion */
- space_map_ops_t *sm_ops; /* space map block picker ops vector */
- void *sm_ppd; /* picker-private data */
- kmutex_t *sm_lock; /* pointer to lock that protects map */
-} space_map_t;
-
-typedef struct space_seg {
- avl_node_t ss_node; /* AVL node */
- uint64_t ss_start; /* starting offset of this segment */
- uint64_t ss_end; /* ending offset (non-inclusive) */
-} space_seg_t;
-
-typedef struct space_map_obj {
- uint64_t smo_object; /* on-disk space map object */
- uint64_t smo_objsize; /* size of the object */
- uint64_t smo_alloc; /* space allocated from the map */
-} space_map_obj_t;
-
-struct space_map_ops {
- void (*smop_load)(space_map_t *sm);
- void (*smop_unload)(space_map_t *sm);
- uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
- void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
- void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
-};
-
-/*
- * debug entry
- *
- * 1 3 10 50
- * ,---+--------+------------+---------------------------------.
- * | 1 | action | syncpass | txg (lower bits) |
- * `---+--------+------------+---------------------------------'
- * 63 62 60 59 50 49 0
- *
- *
- *
- * non-debug entry
- *
- * 1 47 1 15
- * ,-----------------------------------------------------------.
- * | 0 | offset (sm_shift units) | type | run |
- * `-----------------------------------------------------------'
- * 63 62 17 16 15 0
- */
-
-/* All this stuff takes and returns bytes */
-#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
-#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
-#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
-#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
-#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
-#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
-#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
-#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
-
-#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
-#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
-
-#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
-#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
-
-#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
-#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
-
-#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
-
-#define SM_ALLOC 0x0
-#define SM_FREE 0x1
-
-/*
- * The data for a given space map can be kept on blocks of any size.
- * Larger blocks entail fewer i/o operations, but they also cause the
- * DMU to keep more data in-core, and also to waste more i/o bandwidth
- * when only a few blocks have changed since the last transaction group.
- * This could use a lot more research, but for now, set the freelist
- * block size to 4k (2^12).
- */
-#define SPACE_MAP_BLOCKSHIFT 12
-
-typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
-
-extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
- uint8_t shift, kmutex_t *lp);
-extern void space_map_destroy(space_map_t *sm);
-extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
-extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
-extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
-extern void space_map_vacate(space_map_t *sm,
- space_map_func_t *func, space_map_t *mdest);
-extern void space_map_walk(space_map_t *sm,
- space_map_func_t *func, space_map_t *mdest);
-extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
-extern void space_map_union(space_map_t *smd, space_map_t *sms);
-
-extern void space_map_load_wait(space_map_t *sm);
-extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
- uint8_t maptype, space_map_obj_t *smo, objset_t *os);
-extern void space_map_unload(space_map_t *sm);
-
-extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
-extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
-extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
-
-extern void space_map_sync(space_map_t *sm, uint8_t maptype,
- space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
-extern void space_map_truncate(space_map_obj_t *smo,
- objset_t *os, dmu_tx_t *tx);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SPACE_MAP_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
deleted file mode 100644
index dae129c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_TXG_H
-#define _SYS_TXG_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */
-#define TXG_SIZE 4 /* next power of 2 */
-#define TXG_MASK (TXG_SIZE - 1) /* mask for size */
-#define TXG_INITIAL TXG_SIZE /* initial txg */
-#define TXG_IDX (txg & TXG_MASK)
-
-#define TXG_WAIT 1ULL
-#define TXG_NOWAIT 2ULL
-
-typedef struct tx_cpu tx_cpu_t;
-
-typedef struct txg_handle {
- tx_cpu_t *th_cpu;
- uint64_t th_txg;
-} txg_handle_t;
-
-typedef struct txg_node {
- struct txg_node *tn_next[TXG_SIZE];
- uint8_t tn_member[TXG_SIZE];
-} txg_node_t;
-
-typedef struct txg_list {
- kmutex_t tl_lock;
- size_t tl_offset;
- txg_node_t *tl_head[TXG_SIZE];
-} txg_list_t;
-
-struct dsl_pool;
-
-extern void txg_init(struct dsl_pool *dp, uint64_t txg);
-extern void txg_fini(struct dsl_pool *dp);
-extern void txg_sync_start(struct dsl_pool *dp);
-extern void txg_sync_stop(struct dsl_pool *dp);
-extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
-extern void txg_rele_to_quiesce(txg_handle_t *txghp);
-extern void txg_rele_to_sync(txg_handle_t *txghp);
-extern void txg_suspend(struct dsl_pool *dp);
-extern void txg_resume(struct dsl_pool *dp);
-
-/*
- * Wait until the given transaction group has finished syncing.
- * Try to make this happen as soon as possible (eg. kick off any
- * necessary syncs immediately). If txg==0, wait for the currently open
- * txg to finish syncing.
- */
-extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
-
-/*
- * Wait until the given transaction group, or one after it, is
- * the open transaction group. Try to make this happen as soon
- * as possible (eg. kick off any necessary syncs immediately).
- * If txg == 0, wait for the next open txg.
- */
-extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
-
-/*
- * Returns TRUE if we are "backed up" waiting for the syncing
- * transaction to complete; otherwise returns FALSE.
- */
-extern int txg_stalled(struct dsl_pool *dp);
-
-/*
- * Per-txg object lists.
- */
-
-#define TXG_CLEAN(txg) ((txg) - 1)
-
-extern void txg_list_create(txg_list_t *tl, size_t offset);
-extern void txg_list_destroy(txg_list_t *tl);
-extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
-extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
-extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
-extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
-extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
-extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
-extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_TXG_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
deleted file mode 100644
index 45a138a..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_TXG_IMPL_H
-#define _SYS_TXG_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/txg.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct tx_cpu {
- kmutex_t tc_lock;
- kcondvar_t tc_cv[TXG_SIZE];
- uint64_t tc_count[TXG_SIZE];
- char tc_pad[16];
-};
-
-typedef struct tx_state {
- tx_cpu_t *tx_cpu; /* protects right to enter txg */
- kmutex_t tx_sync_lock; /* protects tx_state_t */
- krwlock_t tx_suspend;
- uint64_t tx_open_txg; /* currently open txg id */
- uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
- uint64_t tx_syncing_txg; /* currently syncing txg id */
- uint64_t tx_synced_txg; /* last synced txg id */
-
- uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */
- uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */
-
- kcondvar_t tx_sync_more_cv;
- kcondvar_t tx_sync_done_cv;
- kcondvar_t tx_quiesce_more_cv;
- kcondvar_t tx_quiesce_done_cv;
- kcondvar_t tx_timeout_exit_cv;
- kcondvar_t tx_exit_cv; /* wait for all threads to exit */
-
- uint8_t tx_threads; /* number of threads */
- uint8_t tx_exiting; /* set when we're exiting */
-
- kthread_t *tx_sync_thread;
- kthread_t *tx_quiesce_thread;
- kthread_t *tx_timelimit_thread;
-} tx_state_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_TXG_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock.h
deleted file mode 100644
index 93d936a..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_UBERBLOCK_H
-#define _SYS_UBERBLOCK_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/vdev.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct uberblock uberblock_t;
-
-extern int uberblock_verify(uberblock_t *ub);
-extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_UBERBLOCK_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
deleted file mode 100644
index ab0f2dc..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/uberblock_impl.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_UBERBLOCK_IMPL_H
-#define _SYS_UBERBLOCK_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/uberblock.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * The uberblock version is incremented whenever an incompatible on-disk
- * format change is made to the SPA, DMU, or ZAP.
- *
- * Note: the first two fields should never be moved. When a storage pool
- * is opened, the uberblock must be read off the disk before the version
- * can be checked. If the ub_version field is moved, we may not detect
- * version mismatch. If the ub_magic field is moved, applications that
- * expect the magic number in the first word won't work.
- */
-#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
-#define UBERBLOCK_SHIFT 10 /* up to 1K */
-
-struct uberblock {
- uint64_t ub_magic; /* UBERBLOCK_MAGIC */
- uint64_t ub_version; /* ZFS_VERSION */
- uint64_t ub_txg; /* txg of last sync */
- uint64_t ub_guid_sum; /* sum of all vdev guids */
- uint64_t ub_timestamp; /* UTC time of last sync */
- blkptr_t ub_rootbp; /* MOS objset_phys_t */
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_UBERBLOCK_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h
deleted file mode 100644
index c8c177e..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/unique.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_UNIQUE_H
-#define _SYS_UNIQUE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* The number of significant bits in each unique value. */
-#define UNIQUE_BITS 56
-
-void unique_init(void);
-
-/* Return a new unique value. */
-uint64_t unique_create(void);
-
-/* Return a unique value, which equals the one passed in if possible. */
-uint64_t unique_insert(uint64_t value);
-
-/* Indicate that this value no longer needs to be uniquified against. */
-void unique_remove(uint64_t value);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_UNIQUE_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
deleted file mode 100644
index 3120811..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_VDEV_H
-#define _SYS_VDEV_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dmu.h>
-#include <sys/space_map.h>
-#include <sys/fs/zfs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern boolean_t zfs_nocacheflush;
-
-/*
- * Fault injection modes.
- */
-#define VDEV_FAULT_NONE 0
-#define VDEV_FAULT_RANDOM 1
-#define VDEV_FAULT_COUNT 2
-
-extern int vdev_open(vdev_t *);
-extern int vdev_validate(vdev_t *);
-extern void vdev_close(vdev_t *);
-extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
-extern void vdev_init(vdev_t *, uint64_t txg);
-extern void vdev_reopen(vdev_t *);
-extern int vdev_validate_spare(vdev_t *);
-
-extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
-extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
-extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
-extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
-extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
- int scrub_done);
-
-extern const char *vdev_description(vdev_t *vd);
-
-extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
-extern void vdev_metaslab_fini(vdev_t *vd);
-
-extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
-extern void vdev_stat_update(zio_t *zio);
-extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
- boolean_t complete);
-extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
-extern void vdev_propagate_state(vdev_t *vd);
-extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
- vdev_aux_t aux);
-
-extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
- int64_t alloc_delta);
-
-extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
-
-extern void vdev_io_start(zio_t *zio);
-extern void vdev_io_done(zio_t *zio);
-
-extern int vdev_online(spa_t *spa, uint64_t guid);
-extern int vdev_offline(spa_t *spa, uint64_t guid, int istmp);
-extern void vdev_clear(spa_t *spa, vdev_t *vd);
-
-extern int vdev_error_inject(vdev_t *vd, zio_t *zio);
-extern int vdev_is_dead(vdev_t *vd);
-
-extern void vdev_cache_init(vdev_t *vd);
-extern void vdev_cache_fini(vdev_t *vd);
-extern int vdev_cache_read(zio_t *zio);
-extern void vdev_cache_write(zio_t *zio);
-
-extern void vdev_queue_init(vdev_t *vd);
-extern void vdev_queue_fini(vdev_t *vd);
-extern zio_t *vdev_queue_io(zio_t *zio);
-extern void vdev_queue_io_done(zio_t *zio);
-
-extern void vdev_config_dirty(vdev_t *vd);
-extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
-
-extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
- boolean_t getstats, boolean_t isspare);
-
-/*
- * Label routines
- */
-struct uberblock;
-extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
-extern nvlist_t *vdev_label_read_config(vdev_t *vd);
-extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
-
-typedef enum {
- VDEV_LABEL_CREATE, /* create/add a new device */
- VDEV_LABEL_REPLACE, /* replace an existing device */
- VDEV_LABEL_SPARE, /* add a new hot spare */
- VDEV_LABEL_REMOVE /* remove an existing device */
-} vdev_labeltype_t;
-
-extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_VDEV_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h
deleted file mode 100644
index 95536a7..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_disk.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_VDEV_DISK_H
-#define _SYS_VDEV_DISK_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/vdev.h>
-#ifdef _KERNEL
-#include <sys/sunldi.h>
-#include <sys/sunddi.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct vdev_disk {
- ddi_devid_t vd_devid;
- char *vd_minor;
- ldi_handle_t vd_lh;
-} vdev_disk_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_VDEV_DISK_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_file.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_file.h
deleted file mode 100644
index cd49673..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_file.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_VDEV_FILE_H
-#define _SYS_VDEV_FILE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/vdev.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct vdev_file {
- vnode_t *vf_vnode;
-} vdev_file_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_VDEV_FILE_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
deleted file mode 100644
index aba7567..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_VDEV_IMPL_H
-#define _SYS_VDEV_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/avl.h>
-#include <sys/dmu.h>
-#include <sys/metaslab.h>
-#include <sys/nvpair.h>
-#include <sys/space_map.h>
-#include <sys/vdev.h>
-#include <sys/dkio.h>
-#include <sys/uberblock_impl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Virtual device descriptors.
- *
- * All storage pool operations go through the virtual device framework,
- * which provides data replication and I/O scheduling.
- */
-
-/*
- * Forward declarations that lots of things need.
- */
-typedef struct vdev_queue vdev_queue_t;
-typedef struct vdev_cache vdev_cache_t;
-typedef struct vdev_cache_entry vdev_cache_entry_t;
-
-/*
- * Virtual device operations
- */
-typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
-typedef void vdev_close_func_t(vdev_t *vd);
-typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
-typedef void vdev_io_start_func_t(zio_t *zio);
-typedef void vdev_io_done_func_t(zio_t *zio);
-typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
-
-typedef struct vdev_ops {
- vdev_open_func_t *vdev_op_open;
- vdev_close_func_t *vdev_op_close;
- vdev_asize_func_t *vdev_op_asize;
- vdev_io_start_func_t *vdev_op_io_start;
- vdev_io_done_func_t *vdev_op_io_done;
- vdev_state_change_func_t *vdev_op_state_change;
- char vdev_op_type[16];
- boolean_t vdev_op_leaf;
-} vdev_ops_t;
-
-/*
- * Virtual device properties
- */
-struct vdev_cache_entry {
- char *ve_data;
- uint64_t ve_offset;
- uint64_t ve_lastused;
- avl_node_t ve_offset_node;
- avl_node_t ve_lastused_node;
- uint32_t ve_hits;
- uint16_t ve_missed_update;
- zio_t *ve_fill_io;
-};
-
-struct vdev_cache {
- avl_tree_t vc_offset_tree;
- avl_tree_t vc_lastused_tree;
- kmutex_t vc_lock;
-};
-
-struct vdev_queue {
- avl_tree_t vq_deadline_tree;
- avl_tree_t vq_read_tree;
- avl_tree_t vq_write_tree;
- avl_tree_t vq_pending_tree;
- kmutex_t vq_lock;
-};
-
-/*
- * Virtual device descriptor
- */
-struct vdev {
- /*
- * Common to all vdev types.
- */
- uint64_t vdev_id; /* child number in vdev parent */
- uint64_t vdev_guid; /* unique ID for this vdev */
- uint64_t vdev_guid_sum; /* self guid + all child guids */
- uint64_t vdev_asize; /* allocatable device capacity */
- uint64_t vdev_ashift; /* block alignment shift */
- uint64_t vdev_state; /* see VDEV_STATE_* #defines */
- uint64_t vdev_prevstate; /* used when reopening a vdev */
- vdev_ops_t *vdev_ops; /* vdev operations */
- spa_t *vdev_spa; /* spa for this vdev */
- void *vdev_tsd; /* type-specific data */
- vdev_t *vdev_top; /* top-level vdev */
- vdev_t *vdev_parent; /* parent vdev */
- vdev_t **vdev_child; /* array of children */
- uint64_t vdev_children; /* number of children */
- space_map_t vdev_dtl_map; /* dirty time log in-core state */
- space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
- vdev_stat_t vdev_stat; /* virtual device statistics */
-
- /*
- * Top-level vdev state.
- */
- uint64_t vdev_ms_array; /* metaslab array object */
- uint64_t vdev_ms_shift; /* metaslab size shift */
- uint64_t vdev_ms_count; /* number of metaslabs */
- metaslab_group_t *vdev_mg; /* metaslab group */
- metaslab_t **vdev_ms; /* metaslab array */
- txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
- txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
- txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
- uint8_t vdev_reopen_wanted; /* async reopen wanted? */
- list_node_t vdev_dirty_node; /* config dirty list */
- uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
-
- /*
- * Leaf vdev state.
- */
- uint64_t vdev_psize; /* physical device capacity */
- space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
- txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
- uint64_t vdev_wholedisk; /* true if this is a whole disk */
- uint64_t vdev_offline; /* device taken offline? */
- uint64_t vdev_nparity; /* number of parity devices for raidz */
- char *vdev_path; /* vdev path (if any) */
- char *vdev_devid; /* vdev devid (if any) */
- uint64_t vdev_fault_arg; /* fault injection paramater */
- int vdev_fault_mask; /* zio types to fault */
- uint8_t vdev_fault_mode; /* fault injection mode */
- uint8_t vdev_cache_active; /* vdev_cache and vdev_queue */
- uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
- uint8_t vdev_detached; /* device detached? */
- uint64_t vdev_isspare; /* was a hot spare */
- vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
- vdev_cache_t vdev_cache; /* physical block cache */
- uint64_t vdev_not_present; /* not present during import */
- hrtime_t vdev_last_try; /* last reopen time */
- boolean_t vdev_nowritecache; /* true if flushwritecache failed */
-
- /*
- * For DTrace to work in userland (libzpool) context, these fields must
- * remain at the end of the structure. DTrace will use the kernel's
- * CTF definition for 'struct vdev', and since the size of a kmutex_t is
- * larger in userland, the offsets for the rest fields would be
- * incorrect.
- */
- kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
- kmutex_t vdev_stat_lock; /* vdev_stat */
-};
-
-#define VDEV_SKIP_SIZE (8 << 10)
-#define VDEV_BOOT_HEADER_SIZE (8 << 10)
-#define VDEV_PHYS_SIZE (112 << 10)
-#define VDEV_UBERBLOCK_RING (128 << 10)
-
-#define VDEV_UBERBLOCK_SHIFT(vd) \
- MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
-#define VDEV_UBERBLOCK_COUNT(vd) \
- (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
-#define VDEV_UBERBLOCK_OFFSET(vd, n) \
- offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
-#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
-
-/* ZFS boot block */
-#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
-#define VDEV_BOOT_VERSION 1 /* version number */
-
-typedef struct vdev_boot_header {
- uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
- uint64_t vb_version; /* VDEV_BOOT_VERSION */
- uint64_t vb_offset; /* start offset (bytes) */
- uint64_t vb_size; /* size (bytes) */
- char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
-} vdev_boot_header_t;
-
-typedef struct vdev_phys {
- char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
- zio_block_tail_t vp_zbt;
-} vdev_phys_t;
-
-typedef struct vdev_label {
- char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
- vdev_boot_header_t vl_boot_header; /* 8K */
- vdev_phys_t vl_vdev_phys; /* 112K */
- char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
-} vdev_label_t; /* 256K total */
-
-/*
- * vdev_dirty() flags
- */
-#define VDD_METASLAB 0x01
-#define VDD_DTL 0x02
-
-/*
- * Size and offset of embedded boot loader region on each label.
- * The total size of the first two labels plus the boot area is 4MB.
- */
-#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t))
-#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
-
-/*
- * Size of label regions at the start and end of each leaf device.
- */
-#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
-#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
-#define VDEV_LABELS 4
-
-#define VDEV_ALLOC_LOAD 0
-#define VDEV_ALLOC_ADD 1
-#define VDEV_ALLOC_SPARE 2
-
-/*
- * Allocate or free a vdev
- */
-extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
- vdev_t *parent, uint_t id, int alloctype);
-extern void vdev_free(vdev_t *vd);
-
-/*
- * Add or remove children and parents
- */
-extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
-extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
-extern void vdev_compact_children(vdev_t *pvd);
-extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
-extern void vdev_remove_parent(vdev_t *cvd);
-
-/*
- * vdev sync load and sync
- */
-extern void vdev_load(vdev_t *vd);
-extern void vdev_sync(vdev_t *vd, uint64_t txg);
-extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
-extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
-
-/*
- * Available vdev types.
- */
-extern vdev_ops_t vdev_root_ops;
-extern vdev_ops_t vdev_mirror_ops;
-extern vdev_ops_t vdev_replacing_ops;
-extern vdev_ops_t vdev_raidz_ops;
-#ifdef _KERNEL
-extern vdev_ops_t vdev_geom_ops;
-#else
-extern vdev_ops_t vdev_disk_ops;
-extern vdev_ops_t vdev_file_ops;
-#endif
-extern vdev_ops_t vdev_missing_ops;
-extern vdev_ops_t vdev_spare_ops;
-
-/*
- * Common size functions
- */
-extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
-extern uint64_t vdev_get_rsize(vdev_t *vd);
-
-/*
- * zdb uses this tunable, so it must be declared here to make lint happy.
- */
-extern int zfs_vdev_cache_size;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_VDEV_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
deleted file mode 100644
index f89d938..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZAP_H
-#define _SYS_ZAP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * ZAP - ZFS Attribute Processor
- *
- * The ZAP is a module which sits on top of the DMU (Data Managemnt
- * Unit) and implements a higher-level storage primitive using DMU
- * objects. Its primary consumer is the ZPL (ZFS Posix Layer).
- *
- * A "zapobj" is a DMU object which the ZAP uses to stores attributes.
- * Users should use only zap routines to access a zapobj - they should
- * not access the DMU object directly using DMU routines.
- *
- * The attributes stored in a zapobj are name-value pairs. The name is
- * a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
- * terminating NULL). The value is an array of integers, which may be
- * 1, 2, 4, or 8 bytes long. The total space used by the array (number
- * of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
- * Note that an 8-byte integer value can be used to store the location
- * (object number) of another dmu object (which may be itself a zapobj).
- * Note that you can use a zero-length attribute to store a single bit
- * of information - the attribute is present or not.
- *
- * The ZAP routines are thread-safe. However, you must observe the
- * DMU's restriction that a transaction may not be operated on
- * concurrently.
- *
- * Any of the routines that return an int may return an I/O error (EIO
- * or ECHECKSUM).
- *
- *
- * Implementation / Performance Notes:
- *
- * The ZAP is intended to operate most efficiently on attributes with
- * short (49 bytes or less) names and single 8-byte values, for which
- * the microzap will be used. The ZAP should be efficient enough so
- * that the user does not need to cache these attributes.
- *
- * The ZAP's locking scheme makes its routines thread-safe. Operations
- * on different zapobjs will be processed concurrently. Operations on
- * the same zapobj which only read data will be processed concurrently.
- * Operations on the same zapobj which modify data will be processed
- * concurrently when there are many attributes in the zapobj (because
- * the ZAP uses per-block locking - more than 128 * (number of cpus)
- * small attributes will suffice).
- */
-
-/*
- * We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
- * strings) for the names of attributes, rather than a byte string
- * bounded by an explicit length. If some day we want to support names
- * in character sets which have embedded zeros (eg. UTF-16, UTF-32),
- * we'll have to add routines for using length-bounded strings.
- */
-
-#include <sys/dmu.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ZAP_MAXNAMELEN 256
-#define ZAP_MAXVALUELEN 1024
-
-/*
- * Create a new zapobj with no attributes and return its object number.
- */
-uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-
-/*
- * Create a new zapobj with no attributes from the given (unallocated)
- * object number.
- */
-int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-
-/*
- * The zapobj passed in must be a valid ZAP object for all of the
- * following routines.
- */
-
-/*
- * Destroy this zapobj and all its attributes.
- *
- * Frees the object number using dmu_object_free.
- */
-int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
-
-/*
- * Manipulate attributes.
- *
- * 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
- */
-
-/*
- * Retrieve the contents of the attribute with the given name.
- *
- * If the requested attribute does not exist, the call will fail and
- * return ENOENT.
- *
- * If 'integer_size' is smaller than the attribute's integer size, the
- * call will fail and return EINVAL.
- *
- * If 'integer_size' is equal to or larger than the attribute's integer
- * size, the call will succeed and return 0. * When converting to a
- * larger integer size, the integers will be treated as unsigned (ie. no
- * sign-extension will be performed).
- *
- * 'num_integers' is the length (in integers) of 'buf'.
- *
- * If the attribute is longer than the buffer, as many integers as will
- * fit will be transferred to 'buf'. If the entire attribute was not
- * transferred, the call will return EOVERFLOW.
- */
-int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
- uint64_t integer_size, uint64_t num_integers, void *buf);
-
-/*
- * Create an attribute with the given name and value.
- *
- * If an attribute with the given name already exists, the call will
- * fail and return EEXIST.
- */
-int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
- int integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx);
-
-/*
- * Set the attribute with the given name to the given value. If an
- * attribute with the given name does not exist, it will be created. If
- * an attribute with the given name already exists, the previous value
- * will be overwritten. The integer_size may be different from the
- * existing attribute's integer size, in which case the attribute's
- * integer size will be updated to the new value.
- */
-int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
-
-/*
- * Get the length (in integers) and the integer size of the specified
- * attribute.
- *
- * If the requested attribute does not exist, the call will fail and
- * return ENOENT.
- */
-int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
- uint64_t *integer_size, uint64_t *num_integers);
-
-/*
- * Remove the specified attribute.
- *
- * If the specified attribute does not exist, the call will fail and
- * return ENOENT.
- */
-int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
-
-/*
- * Returns (in *count) the number of attributes in the specified zap
- * object.
- */
-int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
-
-
-/*
- * Returns (in name) the name of the entry whose value
- * (za_first_integer) is value, or ENOENT if not found. The string
- * pointed to by name must be at least 256 bytes long.
- */
-int zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, char *name);
-
-struct zap;
-struct zap_leaf;
-typedef struct zap_cursor {
- /* This structure is opaque! */
- objset_t *zc_objset;
- struct zap *zc_zap;
- struct zap_leaf *zc_leaf;
- uint64_t zc_zapobj;
- uint64_t zc_hash;
- uint32_t zc_cd;
-} zap_cursor_t;
-
-typedef struct {
- int za_integer_length;
- uint64_t za_num_integers;
- uint64_t za_first_integer; /* no sign extension for <8byte ints */
- char za_name[MAXNAMELEN];
-} zap_attribute_t;
-
-/*
- * The interface for listing all the attributes of a zapobj can be
- * thought of as cursor moving down a list of the attributes one by
- * one. The cookie returned by the zap_cursor_serialize routine is
- * persistent across system calls (and across reboot, even).
- */
-
-/*
- * Initialize a zap cursor, pointing to the "first" attribute of the
- * zapobj. You must _fini the cursor when you are done with it.
- */
-void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
-void zap_cursor_fini(zap_cursor_t *zc);
-
-/*
- * Get the attribute currently pointed to by the cursor. Returns
- * ENOENT if at the end of the attributes.
- */
-int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
-
-/*
- * Advance the cursor to the next attribute.
- */
-void zap_cursor_advance(zap_cursor_t *zc);
-
-/*
- * Get a persistent cookie pointing to the current position of the zap
- * cursor. The low 4 bits in the cookie are always zero, and thus can
- * be used as to differentiate a serialized cookie from a different type
- * of value. The cookie will be less than 2^32 as long as there are
- * fewer than 2^22 (4.2 million) entries in the zap object.
- */
-uint64_t zap_cursor_serialize(zap_cursor_t *zc);
-
-/*
- * Initialize a zap cursor pointing to the position recorded by
- * zap_cursor_serialize (in the "serialized" argument). You can also
- * use a "serialized" argument of 0 to start at the beginning of the
- * zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
- * zap_cursor_init(...).)
- */
-void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
- uint64_t zapobj, uint64_t serialized);
-
-
-#define ZAP_HISTOGRAM_SIZE 10
-
-typedef struct zap_stats {
- /*
- * Size of the pointer table (in number of entries).
- * This is always a power of 2, or zero if it's a microzap.
- * In general, it should be considerably greater than zs_num_leafs.
- */
- uint64_t zs_ptrtbl_len;
-
- uint64_t zs_blocksize; /* size of zap blocks */
-
- /*
- * The number of blocks used. Note that some blocks may be
- * wasted because old ptrtbl's and large name/value blocks are
- * not reused. (Although their space is reclaimed, we don't
- * reuse those offsets in the object.)
- */
- uint64_t zs_num_blocks;
-
- /*
- * Pointer table values from zap_ptrtbl in the zap_phys_t
- */
- uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */
- uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */
- uint64_t zs_ptrtbl_zt_blk; /* starting block number */
- uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */
- uint64_t zs_ptrtbl_zt_shift; /* bits to index it */
-
- /*
- * Values of the other members of the zap_phys_t
- */
- uint64_t zs_block_type; /* ZBT_HEADER */
- uint64_t zs_magic; /* ZAP_MAGIC */
- uint64_t zs_num_leafs; /* The number of leaf blocks */
- uint64_t zs_num_entries; /* The number of zap entries */
- uint64_t zs_salt; /* salt to stir into hash function */
-
- /*
- * Histograms. For all histograms, the last index
- * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
- * than what can be represented. For example
- * zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
- * of leafs with more than 45 entries.
- */
-
- /*
- * zs_leafs_with_n_pointers[n] is the number of leafs with
- * 2^n pointers to it.
- */
- uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
-
- /*
- * zs_leafs_with_n_entries[n] is the number of leafs with
- * [n*5, (n+1)*5) entries. In the current implementation, there
- * can be at most 55 entries in any block, but there may be
- * fewer if the name or value is large, or the block is not
- * completely full.
- */
- uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
-
- /*
- * zs_leafs_n_tenths_full[n] is the number of leafs whose
- * fullness is in the range [n/10, (n+1)/10).
- */
- uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
-
- /*
- * zs_entries_using_n_chunks[n] is the number of entries which
- * consume n 24-byte chunks. (Note, large names/values only use
- * one chunk, but contribute to zs_num_blocks_large.)
- */
- uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
-
- /*
- * zs_buckets_with_n_entries[n] is the number of buckets (each
- * leaf has 64 buckets) with n entries.
- * zs_buckets_with_n_entries[1] should be very close to
- * zs_num_entries.
- */
- uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
-} zap_stats_t;
-
-/*
- * Get statistics about a ZAP object. Note: you need to be aware of the
- * internal implementation of the ZAP to correctly interpret some of the
- * statistics. This interface shouldn't be relied on unless you really
- * know what you're doing.
- */
-int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZAP_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
deleted file mode 100644
index 4e43f4a..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZAP_IMPL_H
-#define _SYS_ZAP_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zap.h>
-#include <sys/zfs_context.h>
-#include <sys/avl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern int fzap_default_block_shift;
-
-#define ZAP_MAGIC 0x2F52AB2ABULL
-
-#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
-
-#define ZAP_MAXCD (uint32_t)(-1)
-#define ZAP_HASHBITS 28
-#define MZAP_ENT_LEN 64
-#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
-#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
-#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
-
-typedef struct mzap_ent_phys {
- uint64_t mze_value;
- uint32_t mze_cd;
- uint16_t mze_pad; /* in case we want to chain them someday */
- char mze_name[MZAP_NAME_LEN];
-} mzap_ent_phys_t;
-
-typedef struct mzap_phys {
- uint64_t mz_block_type; /* ZBT_MICRO */
- uint64_t mz_salt;
- uint64_t mz_pad[6];
- mzap_ent_phys_t mz_chunk[1];
- /* actually variable size depending on block size */
-} mzap_phys_t;
-
-typedef struct mzap_ent {
- avl_node_t mze_node;
- int mze_chunkid;
- uint64_t mze_hash;
- mzap_ent_phys_t mze_phys;
-} mzap_ent_t;
-
-
-/*
- * The (fat) zap is stored in one object. It is an array of
- * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
- *
- * ptrtbl fits in first block:
- * [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
- *
- * ptrtbl too big for first block:
- * [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
- *
- */
-
-struct dmu_buf;
-struct zap_leaf;
-
-#define ZBT_LEAF ((1ULL << 63) + 0)
-#define ZBT_HEADER ((1ULL << 63) + 1)
-#define ZBT_MICRO ((1ULL << 63) + 3)
-/* any other values are ptrtbl blocks */
-
-/*
- * the embedded pointer table takes up half a block:
- * block size / entry size (2^3) / 2
- */
-#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
-
-/*
- * The embedded pointer table starts half-way through the block. Since
- * the pointer table itself is half the block, it starts at (64-bit)
- * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
- */
-#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
- ((uint64_t *)(zap)->zap_f.zap_phys) \
- [(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
-
-/*
- * TAKE NOTE:
- * If zap_phys_t is modified, zap_byteswap() must be modified.
- */
-typedef struct zap_phys {
- uint64_t zap_block_type; /* ZBT_HEADER */
- uint64_t zap_magic; /* ZAP_MAGIC */
-
- struct zap_table_phys {
- uint64_t zt_blk; /* starting block number */
- uint64_t zt_numblks; /* number of blocks */
- uint64_t zt_shift; /* bits to index it */
- uint64_t zt_nextblk; /* next (larger) copy start block */
- uint64_t zt_blks_copied; /* number source blocks copied */
- } zap_ptrtbl;
-
- uint64_t zap_freeblk; /* the next free block */
- uint64_t zap_num_leafs; /* number of leafs */
- uint64_t zap_num_entries; /* number of entries */
- uint64_t zap_salt; /* salt to stir into hash function */
- /*
- * This structure is followed by padding, and then the embedded
- * pointer table. The embedded pointer table takes up second
- * half of the block. It is accessed using the
- * ZAP_EMBEDDED_PTRTBL_ENT() macro.
- */
-} zap_phys_t;
-
-typedef struct zap_table_phys zap_table_phys_t;
-
-typedef struct zap {
- objset_t *zap_objset;
- uint64_t zap_object;
- struct dmu_buf *zap_dbuf;
- krwlock_t zap_rwlock;
- int zap_ismicro;
- uint64_t zap_salt;
- union {
- struct {
- zap_phys_t *zap_phys;
-
- /*
- * zap_num_entries_mtx protects
- * zap_num_entries
- */
- kmutex_t zap_num_entries_mtx;
- int zap_block_shift;
- } zap_fat;
- struct {
- mzap_phys_t *zap_phys;
- int16_t zap_num_entries;
- int16_t zap_num_chunks;
- int16_t zap_alloc_next;
- avl_tree_t zap_avl;
- } zap_micro;
- } zap_u;
-} zap_t;
-
-#define zap_f zap_u.zap_fat
-#define zap_m zap_u.zap_micro
-
-uint64_t zap_hash(zap_t *zap, const char *name);
-int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
- krw_t lti, int fatreader, zap_t **zapp);
-void zap_unlockdir(zap_t *zap);
-void zap_evict(dmu_buf_t *db, void *vmzap);
-
-#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
-
-void fzap_byteswap(void *buf, size_t size);
-int fzap_count(zap_t *zap, uint64_t *count);
-int fzap_lookup(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers, void *buf);
-int fzap_add(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx);
-int fzap_update(zap_t *zap, const char *name,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
-int fzap_length(zap_t *zap, const char *name,
- uint64_t *integer_size, uint64_t *num_integers);
-int fzap_remove(zap_t *zap, const char *name, dmu_tx_t *tx);
-int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
-void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
-void zap_put_leaf(struct zap_leaf *l);
-
-int fzap_add_cd(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers,
- const void *val, uint32_t cd, dmu_tx_t *tx);
-void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZAP_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
deleted file mode 100644
index 147fb72..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZAP_LEAF_H
-#define _SYS_ZAP_LEAF_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct zap;
-
-#define ZAP_LEAF_MAGIC 0x2AB1EAF
-
-/* chunk size = 24 bytes */
-#define ZAP_LEAF_CHUNKSIZE 24
-
-/*
- * The amount of space available for chunks is:
- * block size (1<<l->l_bs) - hash entry size (2) * number of hash
- * entries - header space (2*chunksize)
- */
-#define ZAP_LEAF_NUMCHUNKS(l) \
- (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
- ZAP_LEAF_CHUNKSIZE - 2)
-
-/*
- * The amount of space within the chunk available for the array is:
- * chunk size - space for type (1) - space for next pointer (2)
- */
-#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
-
-#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
- (((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
-
-/*
- * Low water mark: when there are only this many chunks free, start
- * growing the ptrtbl. Ideally, this should be larger than a
- * "reasonably-sized" entry. 20 chunks is more than enough for the
- * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
- * while still being only around 3% for 16k blocks.
- */
-#define ZAP_LEAF_LOW_WATER (20)
-
-/*
- * The leaf hash table has block size / 2^5 (32) number of entries,
- * which should be more than enough for the maximum number of entries,
- * which is less than block size / CHUNKSIZE (24) / minimum number of
- * chunks per entry (3).
- */
-#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
-#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
-
-/*
- * The chunks start immediately after the hash table. The end of the
- * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
- * chunk_t.
- */
-#define ZAP_LEAF_CHUNK(l, idx) \
- ((zap_leaf_chunk_t *) \
- ((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
-#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
-
-typedef enum zap_chunk_type {
- ZAP_CHUNK_FREE = 253,
- ZAP_CHUNK_ENTRY = 252,
- ZAP_CHUNK_ARRAY = 251,
- ZAP_CHUNK_TYPE_MAX = 250
-} zap_chunk_type_t;
-
-/*
- * TAKE NOTE:
- * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
- */
-typedef struct zap_leaf_phys {
- struct zap_leaf_header {
- uint64_t lh_block_type; /* ZBT_LEAF */
- uint64_t lh_pad1;
- uint64_t lh_prefix; /* hash prefix of this leaf */
- uint32_t lh_magic; /* ZAP_LEAF_MAGIC */
- uint16_t lh_nfree; /* number free chunks */
- uint16_t lh_nentries; /* number of entries */
- uint16_t lh_prefix_len; /* num bits used to id this */
-
-/* above is accessable to zap, below is zap_leaf private */
-
- uint16_t lh_freelist; /* chunk head of free list */
- uint8_t lh_pad2[12];
- } l_hdr; /* 2 24-byte chunks */
-
- /*
- * The header is followed by a hash table with
- * ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
- * followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
- * zap_leaf_chunk structures. These structures are accessed
- * with the ZAP_LEAF_CHUNK() macro.
- */
-
- uint16_t l_hash[1];
-} zap_leaf_phys_t;
-
-typedef union zap_leaf_chunk {
- struct zap_leaf_entry {
- uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
- uint8_t le_int_size; /* size of ints */
- uint16_t le_next; /* next entry in hash chain */
- uint16_t le_name_chunk; /* first chunk of the name */
- uint16_t le_name_length; /* bytes in name, incl null */
- uint16_t le_value_chunk; /* first chunk of the value */
- uint16_t le_value_length; /* value length in ints */
- uint32_t le_cd; /* collision differentiator */
- uint64_t le_hash; /* hash value of the name */
- } l_entry;
- struct zap_leaf_array {
- uint8_t la_type; /* always ZAP_CHUNK_ARRAY */
- uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
- uint16_t la_next; /* next blk or CHAIN_END */
- } l_array;
- struct zap_leaf_free {
- uint8_t lf_type; /* always ZAP_CHUNK_FREE */
- uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
- uint16_t lf_next; /* next in free list, or CHAIN_END */
- } l_free;
-} zap_leaf_chunk_t;
-
-typedef struct zap_leaf {
- krwlock_t l_rwlock; /* only used on head of chain */
- uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
- int l_bs; /* block size shift */
- dmu_buf_t *l_dbuf;
- zap_leaf_phys_t *l_phys;
-} zap_leaf_t;
-
-
-typedef struct zap_entry_handle {
- /* below is set by zap_leaf.c and is public to zap.c */
- uint64_t zeh_num_integers;
- uint64_t zeh_hash;
- uint32_t zeh_cd;
- uint8_t zeh_integer_size;
-
- /* below is private to zap_leaf.c */
- uint16_t zeh_fakechunk;
- uint16_t *zeh_chunkp;
- zap_leaf_t *zeh_leaf;
-} zap_entry_handle_t;
-
-/*
- * Return a handle to the named entry, or ENOENT if not found. The hash
- * value must equal zap_hash(name).
- */
-extern int zap_leaf_lookup(zap_leaf_t *l,
- const char *name, uint64_t h, zap_entry_handle_t *zeh);
-
-/*
- * Return a handle to the entry with this hash+cd, or the entry with the
- * next closest hash+cd.
- */
-extern int zap_leaf_lookup_closest(zap_leaf_t *l,
- uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
-
-/*
- * Read the first num_integers in the attribute. Integer size
- * conversion will be done without sign extension. Return EINVAL if
- * integer_size is too small. Return EOVERFLOW if there are more than
- * num_integers in the attribute.
- */
-extern int zap_entry_read(const zap_entry_handle_t *zeh,
- uint8_t integer_size, uint64_t num_integers, void *buf);
-
-extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
- uint16_t buflen, char *buf);
-
-/*
- * Replace the value of an existing entry.
- *
- * zap_entry_update may fail if it runs out of space (ENOSPC).
- */
-extern int zap_entry_update(zap_entry_handle_t *zeh,
- uint8_t integer_size, uint64_t num_integers, const void *buf);
-
-/*
- * Remove an entry.
- */
-extern void zap_entry_remove(zap_entry_handle_t *zeh);
-
-/*
- * Create an entry. An equal entry must not exist, and this entry must
- * belong in this leaf (according to its hash value). Fills in the
- * entry handle on success. Returns 0 on success or ENOSPC on failure.
- */
-extern int zap_entry_create(zap_leaf_t *l,
- const char *name, uint64_t h, uint32_t cd,
- uint8_t integer_size, uint64_t num_integers, const void *buf,
- zap_entry_handle_t *zeh);
-
-/*
- * Other stuff.
- */
-
-extern void zap_leaf_init(zap_leaf_t *l);
-extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
-extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl);
-extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZAP_LEAF_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
deleted file mode 100644
index 3250b76..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_acl.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FS_ZFS_ACL_H
-#define _SYS_FS_ZFS_ACL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef _KERNEL
-#include <sys/cred.h>
-#endif
-#include <sys/acl.h>
-#include <sys/dmu.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct znode_phys;
-
-#define ACCESS_UNDETERMINED -1
-
-#define ACE_SLOT_CNT 6
-
-typedef struct zfs_znode_acl {
- uint64_t z_acl_extern_obj; /* ext acl pieces */
- uint32_t z_acl_count; /* Number of ACEs */
- uint16_t z_acl_version; /* acl version */
- uint16_t z_acl_pad; /* pad */
- ace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
-} zfs_znode_acl_t;
-
-#define ACL_DATA_ALLOCED 0x1
-
-/*
- * Max ACL size is prepended deny for all entries + the
- * canonical six tacked on * the end.
- */
-#define MAX_ACL_SIZE (MAX_ACL_ENTRIES * 2 + 6)
-
-typedef struct zfs_acl {
- int z_slots; /* number of allocated slots for ACEs */
- int z_acl_count;
- uint_t z_state;
- ace_t *z_acl;
-} zfs_acl_t;
-
-#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
-
-/*
- * Property values for acl_mode and acl_inherit.
- *
- * acl_mode can take discard, noallow, groupmask and passthrough.
- * whereas acl_inherit has secure instead of groupmask.
- */
-
-#define ZFS_ACL_DISCARD 0
-#define ZFS_ACL_NOALLOW 1
-#define ZFS_ACL_GROUPMASK 2
-#define ZFS_ACL_PASSTHROUGH 3
-#define ZFS_ACL_SECURE 4
-
-struct znode;
-
-#ifdef _KERNEL
-void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
- dmu_tx_t *, cred_t *);
-#ifdef TODO
-int zfs_getacl(struct znode *, vsecattr_t *, cred_t *);
-#endif
-int zfs_mode_update(struct znode *, uint64_t, dmu_tx_t *);
-#ifdef TODO
-int zfs_setacl(struct znode *, vsecattr_t *, cred_t *);
-#endif
-void zfs_acl_rele(void *);
-void zfs_ace_byteswap(ace_t *, int);
-extern int zfs_zaccess(struct znode *, int, cred_t *);
-extern int zfs_zaccess_rwx(struct znode *, mode_t, cred_t *);
-extern int zfs_acl_access(struct znode *, int, cred_t *);
-int zfs_acl_chmod_setattr(struct znode *, uint64_t, dmu_tx_t *);
-int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
-int zfs_zaccess_rename(struct znode *, struct znode *,
- struct znode *, struct znode *, cred_t *cr);
-int zfs_zaccess_v4_perm(struct znode *, int, cred_t *);
-void zfs_acl_free(zfs_acl_t *);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !ZFS_NO_ACL */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
deleted file mode 100644
index 4deeb3c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZFS_CONTEXT_H
-#define _SYS_ZFS_CONTEXT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/param.h>
-#include <sys/stdint.h>
-#include <sys/note.h>
-#include <sys/kernel.h>
-#include <sys/debug.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/sysmacros.h>
-#include <sys/bitmap.h>
-#include <sys/cmn_err.h>
-#include <sys/kmem.h>
-#include <sys/taskq.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/mutex.h>
-#include <sys/rwlock.h>
-#include <sys/random.h>
-#include <sys/byteorder.h>
-#include <sys/systm.h>
-#include <sys/list.h>
-#include <sys/uio.h>
-#include <sys/dirent.h>
-#include <sys/time.h>
-#include <sys/uio.h>
-#include <sys/fcntl.h>
-#include <sys/limits.h>
-#include <sys/string.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/cred.h>
-#include <sys/sdt.h>
-#include <sys/file.h>
-#include <sys/vfs.h>
-#include <sys/sysctl.h>
-#include <sys/sbuf.h>
-#include <sys/priv.h>
-#include <sys/kdb.h>
-#include <sys/ktr.h>
-#include <sys/stack.h>
-#include <sys/lockf.h>
-#include <sys/policy.h>
-#include <sys/zone.h>
-#include <sys/eventhandler.h>
-#include <sys/misc.h>
-#include <sys/zfs_debug.h>
-
-#include <machine/stdarg.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-/* There is clash. vm_map.h defines the two below and vdev_cache.c use them. */
-#ifdef min_offset
-#undef min_offset
-#endif
-#ifdef max_offset
-#undef max_offset
-#endif
-#include <vm/vm_extern.h>
-#include <vm/vnode_pager.h>
-
-#define CPU_SEQID (curcpu)
-
-#ifdef __cplusplus
-}
-#endif
-
-extern int zfs_debug_level;
-extern struct mtx zfs_debug_mtx;
-#define ZFS_LOG(lvl, ...) do { \
- if (((lvl) & 0xff) <= zfs_debug_level) { \
- mtx_lock(&zfs_debug_mtx); \
- printf("%s:%u[%d]: ", __func__, __LINE__, (lvl)); \
- printf(__VA_ARGS__); \
- printf("\n"); \
- if ((lvl) & 0x100) \
- kdb_backtrace(); \
- mtx_unlock(&zfs_debug_mtx); \
- } \
-} while (0)
-
-#endif /* _SYS_ZFS_CONTEXT_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
deleted file mode 100644
index a676533..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZFS_CTLDIR_H
-#define _ZFS_CTLDIR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/vnode.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_znode.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ZFS_CTLDIR_NAME ".zfs"
-
-#define zfs_has_ctldir(zdp) \
- ((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
- ((zdp)->z_zfsvfs->z_ctldir != NULL))
-#define zfs_show_ctldir(zdp) \
- (zfs_has_ctldir(zdp) && \
- ((zdp)->z_zfsvfs->z_show_ctldir))
-
-void zfsctl_create(zfsvfs_t *);
-void zfsctl_destroy(zfsvfs_t *);
-vnode_t *zfsctl_root(znode_t *);
-void zfsctl_init(void);
-void zfsctl_fini(void);
-
-int zfsctl_rename_snapshot(const char *from, const char *to);
-int zfsctl_destroy_snapshot(const char *snapname, int force);
-int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
-
-int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
- int flags, vnode_t *rdir, cred_t *cr);
-
-int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
-
-#define ZFSCTL_INO_ROOT 0x1
-#define ZFSCTL_INO_SNAPDIR 0x2
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZFS_CTLDIR_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
deleted file mode 100644
index 450ac1c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZFS_DEBUG_H
-#define _SYS_ZFS_DEBUG_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-/*
- * ZFS debugging
- */
-
-#if defined(DEBUG) || !defined(_KERNEL)
-#define ZFS_DEBUG
-#endif
-
-extern int zfs_flags;
-
-#define ZFS_DEBUG_DPRINTF 0x0001
-#define ZFS_DEBUG_DBUF_VERIFY 0x0002
-#define ZFS_DEBUG_DNODE_VERIFY 0x0004
-#define ZFS_DEBUG_SNAPNAMES 0x0008
-#define ZFS_DEBUG_MODIFY 0x0010
-
-#ifdef ZFS_DEBUG
-extern void __dprintf(const char *file, const char *func,
- int line, const char *fmt, ...);
-#define dprintf(...) \
- if (zfs_flags & ZFS_DEBUG_DPRINTF) \
- __dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
-#else
-#define dprintf(...) ((void)0)
-#endif /* ZFS_DEBUG */
-
-extern void zfs_panic_recover(const char *fmt, ...);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZFS_DEBUG_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
deleted file mode 100644
index f60d614..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FS_ZFS_DIR_H
-#define _SYS_FS_ZFS_DIR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/dmu.h>
-#include <sys/zfs_znode.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* zfs_dirent_lock() flags */
-#define ZNEW 0x0001 /* entry should not exist */
-#define ZEXISTS 0x0002 /* entry should exist */
-#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
-#define ZXATTR 0x0008 /* we want the xattr dir */
-#define ZRENAMING 0x0010 /* znode is being renamed */
-
-/* mknode flags */
-#define IS_ROOT_NODE 0x01 /* create a root node */
-#define IS_XATTR 0x02 /* create an extended attribute node */
-#define IS_REPLAY 0x04 /* we are replaying intent log */
-
-extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
- int);
-extern void zfs_dirent_unlock(zfs_dirlock_t *);
-extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
-extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
- boolean_t *);
-extern int zfs_dirlook(znode_t *, char *, vnode_t **);
-extern void zfs_mknode(znode_t *, vattr_t *, uint64_t *,
- dmu_tx_t *, cred_t *, uint_t, znode_t **, int);
-extern void zfs_rmnode(znode_t *);
-extern boolean_t zfs_dirempty(znode_t *);
-extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
-extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
-extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
-extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int);
-extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FS_ZFS_DIR_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
deleted file mode 100644
index 61a0a9e..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZFS_IOCTL_H
-#define _SYS_ZFS_IOCTL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/cred.h>
-#include <sys/dmu.h>
-#include <sys/zio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Property values for snapdir
- */
-#define ZFS_SNAPDIR_HIDDEN 0
-#define ZFS_SNAPDIR_VISIBLE 1
-
-#define DMU_BACKUP_VERSION (1ULL)
-#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
-
-/*
- * zfs ioctl command structure
- */
-typedef struct dmu_replay_record {
- enum {
- DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
- DRR_WRITE, DRR_FREE, DRR_END,
- } drr_type;
- uint32_t drr_pad;
- union {
- struct drr_begin {
- uint64_t drr_magic;
- uint64_t drr_version;
- uint64_t drr_creation_time;
- dmu_objset_type_t drr_type;
- uint32_t drr_pad;
- uint64_t drr_toguid;
- uint64_t drr_fromguid;
- char drr_toname[MAXNAMELEN];
- } drr_begin;
- struct drr_end {
- zio_cksum_t drr_checksum;
- } drr_end;
- struct drr_object {
- uint64_t drr_object;
- dmu_object_type_t drr_type;
- dmu_object_type_t drr_bonustype;
- uint32_t drr_blksz;
- uint32_t drr_bonuslen;
- uint8_t drr_checksum;
- uint8_t drr_compress;
- uint8_t drr_pad[6];
- /* bonus content follows */
- } drr_object;
- struct drr_freeobjects {
- uint64_t drr_firstobj;
- uint64_t drr_numobjs;
- } drr_freeobjects;
- struct drr_write {
- uint64_t drr_object;
- dmu_object_type_t drr_type;
- uint32_t drr_pad;
- uint64_t drr_offset;
- uint64_t drr_length;
- /* content follows */
- } drr_write;
- struct drr_free {
- uint64_t drr_object;
- uint64_t drr_offset;
- uint64_t drr_length;
- } drr_free;
- } drr_u;
-} dmu_replay_record_t;
-
-typedef struct zinject_record {
- uint64_t zi_objset;
- uint64_t zi_object;
- uint64_t zi_start;
- uint64_t zi_end;
- uint64_t zi_guid;
- uint32_t zi_level;
- uint32_t zi_error;
- uint64_t zi_type;
- uint32_t zi_freq;
-} zinject_record_t;
-
-#define ZINJECT_NULL 0x1
-#define ZINJECT_FLUSH_ARC 0x2
-#define ZINJECT_UNLOAD_SPA 0x4
-
-typedef struct zfs_cmd {
- char zc_name[MAXPATHLEN];
- char zc_value[MAXPATHLEN * 2];
- uint64_t zc_guid;
- uint64_t zc_nvlist_src; /* really (char *) */
- uint64_t zc_nvlist_src_size;
- uint64_t zc_nvlist_dst; /* really (char *) */
- uint64_t zc_nvlist_dst_size;
- uint64_t zc_cookie;
- uint64_t zc_cred;
- uint64_t zc_dev;
- uint64_t zc_objset_type;
- uint64_t zc_history; /* really (char *) */
- uint64_t zc_history_len;
- uint64_t zc_history_offset;
- uint64_t zc_obj;
- uint64_t zc_jailid;
- dmu_objset_stats_t zc_objset_stats;
- struct drr_begin zc_begin_record;
- zinject_record_t zc_inject_record;
-} zfs_cmd_t;
-
-#ifdef _KERNEL
-typedef struct zfs_create_data {
- cred_t *zc_cred;
- dev_t zc_dev;
- nvlist_t *zc_props;
-} zfs_create_data_t;
-#endif
-
-#define ZVOL_MAX_MINOR (1 << 16)
-#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
-
-#ifdef _KERNEL
-
-extern int zfs_secpolicy_write(const char *dataset, cred_t *cr);
-extern int zfs_busy(void);
-extern int zfs_unmount_snap(char *, void *);
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZFS_IOCTL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h
deleted file mode 100644
index f302b66..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FS_ZFS_RLOCK_H
-#define _SYS_FS_ZFS_RLOCK_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _KERNEL
-
-#include <sys/zfs_znode.h>
-
-typedef enum {
- RL_READER,
- RL_WRITER,
- RL_APPEND
-} rl_type_t;
-
-typedef struct rl {
- znode_t *r_zp; /* znode this lock applies to */
- avl_node_t r_node; /* avl node link */
- uint64_t r_off; /* file range offset */
- uint64_t r_len; /* file range length */
- uint_t r_cnt; /* range reference count in tree */
- rl_type_t r_type; /* range type */
- kcondvar_t r_wr_cv; /* cv for waiting writers */
- kcondvar_t r_rd_cv; /* cv for waiting readers */
- uint8_t r_proxy; /* acting for original range */
- uint8_t r_write_wanted; /* writer wants to lock this range */
- uint8_t r_read_wanted; /* reader wants to lock this range */
-} rl_t;
-
-/*
- * Lock a range (offset, length) as either shared (READER)
- * or exclusive (WRITER or APPEND). APPEND is a special type that
- * is converted to WRITER that specified to lock from the start of the
- * end of file. zfs_range_lock() returns the range lock structure.
- */
-rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
-
-/*
- * Unlock range and destroy range lock structure.
- */
-void zfs_range_unlock(rl_t *rl);
-
-/*
- * Reduce range locked as RW_WRITER from whole file to specified range.
- * Asserts the whole file was previously locked.
- */
-void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
-
-/*
- * AVL comparison function used to compare range locks
- */
-int zfs_range_compare(const void *arg1, const void *arg2);
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FS_ZFS_RLOCK_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
deleted file mode 100644
index aa82cc1..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FS_ZFS_VFSOPS_H
-#define _SYS_FS_ZFS_VFSOPS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/list.h>
-#include <sys/vfs.h>
-#include <sys/zil.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct zfsvfs zfsvfs_t;
-
-struct zfsvfs {
- vfs_t *z_vfs; /* generic fs struct */
- zfsvfs_t *z_parent; /* parent fs */
- objset_t *z_os; /* objset reference */
- uint64_t z_root; /* id of root znode */
- uint64_t z_unlinkedobj; /* id of unlinked zapobj */
- uint64_t z_max_blksz; /* maximum block size for files */
- uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
- zilog_t *z_log; /* intent log pointer */
- uint_t z_acl_mode; /* acl chmod/mode behavior */
- uint_t z_acl_inherit; /* acl inheritance behavior */
- boolean_t z_atime; /* enable atimes mount option */
- boolean_t z_unmounted1; /* unmounted phase 1 */
- boolean_t z_unmounted2; /* unmounted phase 2 */
- uint32_t z_op_cnt; /* vnode/vfs operations ref count */
- krwlock_t z_um_lock; /* rw lock for umount phase 2 */
- list_t z_all_znodes; /* all vnodes in the fs */
- kmutex_t z_znodes_lock; /* lock for z_all_znodes */
- vnode_t *z_ctldir; /* .zfs directory pointer */
- boolean_t z_show_ctldir; /* expose .zfs in the root dir */
- boolean_t z_issnap; /* true if this is a snapshot */
-#define ZFS_OBJ_MTX_SZ 64
- kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
-};
-
-/*
- * The total file ID size is limited to 12 bytes (including the length
- * field) in the NFSv2 protocol. For historical reasons, this same limit
- * is currently being imposed by the Solaris NFSv3 implementation...
- * although the protocol actually permits a maximum of 64 bytes. It will
- * not be possible to expand beyond 12 bytes without abandoning support
- * of NFSv2 and making some changes to the Solaris NFSv3 implementation.
- *
- * For the time being, we will partition up the available space as follows:
- * 2 bytes fid length (required)
- * 6 bytes object number (48 bits)
- * 4 bytes generation number (32 bits)
- * We reserve only 48 bits for the object number, as this is the limit
- * currently defined and imposed by the DMU.
- */
-typedef struct zfid_short {
- uint16_t zf_len;
- uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */
- uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
-} zfid_short_t;
-
-typedef struct zfid_long {
- zfid_short_t z_fid;
- uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
- uint8_t zf_setgen[2]; /* gen[i] = gen >> (8 * i) */
-} zfid_long_t;
-
-#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
-#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FS_ZFS_VFSOPS_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
deleted file mode 100644
index c9c317e..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FS_ZFS_ZNODE_H
-#define _SYS_FS_ZFS_ZNODE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef _KERNEL
-#include <sys/list.h>
-#include <sys/dmu.h>
-#include <sys/zfs_vfsops.h>
-#endif
-#include <sys/zfs_acl.h>
-#include <sys/zil.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Define special zfs pflags
- */
-#define ZFS_XATTR 0x1 /* is an extended attribute */
-#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
-#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
-
-#define MASTER_NODE_OBJ 1
-
-/*
- * special attributes for master node.
- */
-
-#define ZFS_FSID "FSID"
-#define ZFS_UNLINKED_SET "DELETE_QUEUE"
-#define ZFS_ROOT_OBJ "ROOT"
-#define ZPL_VERSION_OBJ "VERSION"
-#define ZFS_PROP_BLOCKPERPAGE "BLOCKPERPAGE"
-#define ZFS_PROP_NOGROWBLOCKS "NOGROWBLOCKS"
-
-#define ZFS_FLAG_BLOCKPERPAGE 0x1
-#define ZFS_FLAG_NOGROWBLOCKS 0x2
-
-/*
- * ZPL version - rev'd whenever an incompatible on-disk format change
- * occurs. Independent of SPA/DMU/ZAP versioning.
- */
-
-#define ZPL_VERSION 1ULL
-
-#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
-
-/* Path component length */
-/*
- * The generic fs code uses MAXNAMELEN to represent
- * what the largest component length is. Unfortunately,
- * this length includes the terminating NULL. ZFS needs
- * to tell the users via pathconf() and statvfs() what the
- * true maximum length of a component is, excluding the NULL.
- */
-#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
-
-/*
- * The directory entry has the type (currently unused on Solaris) in the
- * top 4 bits, and the object number in the low 48 bits. The "middle"
- * 12 bits are unused.
- */
-#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
-#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
-#define ZFS_DIRENT_MAKE(type, obj) (((uint64_t)type << 60) | obj)
-
-
-/*
- * This is the persistent portion of the znode. It is stored
- * in the "bonus buffer" of the file. Short symbolic links
- * are also stored in the bonus buffer.
- */
-typedef struct znode_phys {
- uint64_t zp_atime[2]; /* 0 - last file access time */
- uint64_t zp_mtime[2]; /* 16 - last file modification time */
- uint64_t zp_ctime[2]; /* 32 - last file change time */
- uint64_t zp_crtime[2]; /* 48 - creation time */
- uint64_t zp_gen; /* 64 - generation (txg of creation) */
- uint64_t zp_mode; /* 72 - file mode bits */
- uint64_t zp_size; /* 80 - size of file */
- uint64_t zp_parent; /* 88 - directory parent (`..') */
- uint64_t zp_links; /* 96 - number of links to file */
- uint64_t zp_xattr; /* 104 - DMU object for xattrs */
- uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
- uint64_t zp_flags; /* 120 - persistent flags */
- uint64_t zp_uid; /* 128 - file owner */
- uint64_t zp_gid; /* 136 - owning group */
- uint64_t zp_pad[4]; /* 144 - future */
- zfs_znode_acl_t zp_acl; /* 176 - 263 ACL */
- /*
- * Data may pad out any remaining bytes in the znode buffer, eg:
- *
- * |<---------------------- dnode_phys (512) ------------------------>|
- * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
- * |<---- znode (264) ---->|<---- data (56) ---->|
- *
- * At present, we only use this space to store symbolic links.
- */
-} znode_phys_t;
-
-/*
- * Directory entry locks control access to directory entries.
- * They are used to protect creates, deletes, and renames.
- * Each directory znode has a mutex and a list of locked names.
- */
-#ifdef _KERNEL
-typedef struct zfs_dirlock {
- char *dl_name; /* directory entry being locked */
- uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
- uint16_t dl_namesize; /* set if dl_name was allocated */
- kcondvar_t dl_cv; /* wait for entry to be unlocked */
- struct znode *dl_dzp; /* directory znode */
- struct zfs_dirlock *dl_next; /* next in z_dirlocks list */
-} zfs_dirlock_t;
-
-typedef struct znode {
- struct zfsvfs *z_zfsvfs;
- vnode_t *z_vnode;
- uint64_t z_id; /* object ID for this znode */
- kmutex_t z_lock; /* znode modification lock */
- krwlock_t z_map_lock; /* page map lock */
- krwlock_t z_parent_lock; /* parent lock for directories */
- krwlock_t z_name_lock; /* "master" lock for dirent locks */
- zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
- kmutex_t z_range_lock; /* protects changes to z_range_avl */
- avl_tree_t z_range_avl; /* avl tree of file range locks */
- uint8_t z_unlinked; /* file has been unlinked */
- uint8_t z_atime_dirty; /* atime needs to be synced */
- uint8_t z_dbuf_held; /* Is z_dbuf already held? */
- uint8_t z_zn_prefetch; /* Prefetch znodes? */
- uint_t z_blksz; /* block size in bytes */
- uint_t z_seq; /* modification sequence number */
- uint64_t z_mapcnt; /* number of pages mapped to file */
- uint64_t z_last_itx; /* last ZIL itx on this znode */
- uint32_t z_sync_cnt; /* synchronous open count */
- kmutex_t z_acl_lock; /* acl data lock */
- list_node_t z_link_node; /* all znodes in fs link */
- struct lockf *z_lockf; /* Head of byte-level lock list. */
- /*
- * These are dmu managed fields.
- */
- znode_phys_t *z_phys; /* pointer to persistent znode */
- dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
-} znode_t;
-
-
-/*
- * Range locking rules
- * --------------------
- * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
- * file range needs to be locked as RL_WRITER. Only then can the pages be
- * freed etc and zp_size reset. zp_size must be set within range lock.
- * 2. For writes and punching holes (zfs_write & zfs_space) just the range
- * being written or freed needs to be locked as RL_WRITER.
- * Multiple writes at the end of the file must coordinate zp_size updates
- * to ensure data isn't lost. A compare and swap loop is currently used
- * to ensure the file size is at least the offset last written.
- * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
- * read needs to be locked as RL_READER. A check against zp_size can then
- * be made for reading beyond end of file.
- */
-
-/*
- * Convert between znode pointers and vnode pointers
- */
-#define ZTOV(ZP) ((ZP)->z_vnode)
-#define VTOZ(VP) ((znode_t *)(VP)->v_data)
-
-/*
- * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
- * ZFS_EXIT() must be called before exitting the vop.
- */
-#define ZFS_ENTER(zfsvfs) \
- { \
- atomic_add_32(&(zfsvfs)->z_op_cnt, 1); \
- if ((zfsvfs)->z_unmounted1) { \
- ZFS_EXIT(zfsvfs); \
- return (EIO); \
- } \
- }
-#define ZFS_EXIT(zfsvfs) atomic_add_32(&(zfsvfs)->z_op_cnt, -1)
-
-/*
- * Macros for dealing with dmu_buf_hold
- */
-#define ZFS_OBJ_HASH(obj_num) (obj_num & (ZFS_OBJ_MTX_SZ - 1))
-#define ZFS_OBJ_MUTEX(zp) \
- (&zp->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(zp->z_id)])
-#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
- mutex_enter(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]);
-
-#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
- mutex_exit(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
-
-/*
- * Macros to encode/decode ZFS stored time values from/to struct timespec
- */
-#define ZFS_TIME_ENCODE(tp, stmp) \
-{ \
- stmp[0] = (uint64_t)(tp)->tv_sec; \
- stmp[1] = (uint64_t)(tp)->tv_nsec; \
-}
-
-#define ZFS_TIME_DECODE(tp, stmp) \
-{ \
- (tp)->tv_sec = (time_t)stmp[0]; \
- (tp)->tv_nsec = (long)stmp[1]; \
-}
-
-/*
- * Timestamp defines
- */
-#define ACCESSED (AT_ATIME)
-#define STATE_CHANGED (AT_CTIME)
-#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
-
-#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
- if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
- zfs_time_stamper(zp, ACCESSED, NULL)
-
-extern int zfs_init_fs(zfsvfs_t *, znode_t **, cred_t *);
-extern void zfs_set_dataprop(objset_t *);
-extern void zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx);
-extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
-extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
-extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
-extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
-extern void zfs_znode_init(void);
-extern void zfs_znode_fini(void);
-extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
-extern void zfs_zinactive(znode_t *);
-extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
-extern void zfs_znode_free(znode_t *);
-extern void zfs_remove_op_tables();
-extern int zfs_create_op_tables();
-extern dev_t zfs_cmpldev(uint64_t);
-
-extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name);
-extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, char *name);
-extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name);
-extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name, char *link);
-extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
-extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, offset_t off, ssize_t len, int ioflag);
-extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, uint64_t off, uint64_t len);
-extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, vattr_t *vap, uint_t mask_applied);
-#ifndef ZFS_NO_ACL
-extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, int aclcnt, ace_t *z_ace);
-#endif
-
-extern zil_get_data_t zfs_get_data;
-extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
-extern int zfsfstype;
-
-#endif /* _KERNEL */
-
-extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FS_ZFS_ZNODE_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
deleted file mode 100644
index 947ba9f..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZIL_H
-#define _SYS_ZIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dmu.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Intent log format:
- *
- * Each objset has its own intent log. The log header (zil_header_t)
- * for objset N's intent log is kept in the Nth object of the SPA's
- * intent_log objset. The log header points to a chain of log blocks,
- * each of which contains log records (i.e., transactions) followed by
- * a log block trailer (zil_trailer_t). The format of a log record
- * depends on the record (or transaction) type, but all records begin
- * with a common structure that defines the type, length, and txg.
- */
-
-/*
- * Intent log header - this on disk structure holds fields to manage
- * the log. All fields are 64 bit to easily handle cross architectures.
- */
-typedef struct zil_header {
- uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
- uint64_t zh_replay_seq; /* highest replayed sequence number */
- blkptr_t zh_log; /* log chain */
- uint64_t zh_claim_seq; /* highest claimed sequence number */
- uint64_t zh_pad[5];
-} zil_header_t;
-
-/*
- * Log block trailer - structure at the end of the header and each log block
- *
- * The zit_bt contains a zbt_cksum which for the intent log is
- * the sequence number of this log block. A seq of 0 is invalid.
- * The zbt_cksum is checked by the SPA against the sequence
- * number passed in the blk_cksum field of the blkptr_t
- */
-typedef struct zil_trailer {
- uint64_t zit_pad;
- blkptr_t zit_next_blk; /* next block in chain */
- uint64_t zit_nused; /* bytes in log block used */
- zio_block_tail_t zit_bt; /* block trailer */
-} zil_trailer_t;
-
-#define ZIL_MIN_BLKSZ 4096ULL
-#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
-#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
-
-/*
- * The words of a log block checksum.
- */
-#define ZIL_ZC_GUID_0 0
-#define ZIL_ZC_GUID_1 1
-#define ZIL_ZC_OBJSET 2
-#define ZIL_ZC_SEQ 3
-
-/*
- * Intent log transaction types and record structures
- */
-#define TX_CREATE 1 /* Create file */
-#define TX_MKDIR 2 /* Make directory */
-#define TX_MKXATTR 3 /* Make XATTR directory */
-#define TX_SYMLINK 4 /* Create symbolic link to a file */
-#define TX_REMOVE 5 /* Remove file */
-#define TX_RMDIR 6 /* Remove directory */
-#define TX_LINK 7 /* Create hard link to a file */
-#define TX_RENAME 8 /* Rename a file */
-#define TX_WRITE 9 /* File write */
-#define TX_TRUNCATE 10 /* Truncate a file */
-#define TX_SETATTR 11 /* Set file attributes */
-#define TX_ACL 12 /* Set acl */
-#define TX_MAX_TYPE 13 /* Max transaction type */
-
-/*
- * Format of log records.
- * The fields are carefully defined to allow them to be aligned
- * and sized the same on sparc & intel architectures.
- * Each log record has a common structure at the beginning.
- *
- * Note, lrc_seq holds two different sequence numbers. Whilst in memory
- * it contains the transaction sequence number. The log record on
- * disk holds the sequence number of all log records which is used to
- * ensure we don't replay the same record. The two sequence numbers are
- * different because the transactions can now be pushed out of order.
- */
-typedef struct { /* common log record header */
- uint64_t lrc_txtype; /* intent log transaction type */
- uint64_t lrc_reclen; /* transaction record length */
- uint64_t lrc_txg; /* dmu transaction group number */
- uint64_t lrc_seq; /* see comment above */
-} lr_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_doid; /* object id of directory */
- uint64_t lr_foid; /* object id of created file object */
- uint64_t lr_mode; /* mode of object */
- uint64_t lr_uid; /* uid of object */
- uint64_t lr_gid; /* gid of object */
- uint64_t lr_gen; /* generation (txg of creation) */
- uint64_t lr_crtime[2]; /* creation time */
- uint64_t lr_rdev; /* rdev of object to create */
- /* name of object to create follows this */
- /* for symlinks, link content follows name */
-} lr_create_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_doid; /* obj id of directory */
- /* name of object to remove follows this */
-} lr_remove_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_doid; /* obj id of directory */
- uint64_t lr_link_obj; /* obj id of link */
- /* name of object to link follows this */
-} lr_link_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_sdoid; /* obj id of source directory */
- uint64_t lr_tdoid; /* obj id of target directory */
- /* 2 strings: names of source and destination follow this */
-} lr_rename_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_foid; /* file object to write */
- uint64_t lr_offset; /* offset to write to */
- uint64_t lr_length; /* user data length to write */
- uint64_t lr_blkoff; /* offset represented by lr_blkptr */
- blkptr_t lr_blkptr; /* spa block pointer for replay */
- /* write data will follow for small writes */
-} lr_write_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_foid; /* object id of file to truncate */
- uint64_t lr_offset; /* offset to truncate from */
- uint64_t lr_length; /* length to truncate */
-} lr_truncate_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_foid; /* file object to change attributes */
- uint64_t lr_mask; /* mask of attributes to set */
- uint64_t lr_mode; /* mode to set */
- uint64_t lr_uid; /* uid to set */
- uint64_t lr_gid; /* gid to set */
- uint64_t lr_size; /* size to set */
- uint64_t lr_atime[2]; /* access time */
- uint64_t lr_mtime[2]; /* modification time */
-} lr_setattr_t;
-
-typedef struct {
- lr_t lr_common; /* common portion of log record */
- uint64_t lr_foid; /* obj id of file */
- uint64_t lr_aclcnt; /* number of acl entries */
- /* lr_aclcnt number of ace_t entries follow this */
-} lr_acl_t;
-
-/*
- * ZIL structure definitions, interface function prototype and globals.
- */
-
-/*
- * ZFS intent log transaction structure
- */
-typedef enum {
- WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
- /* and put blkptr in log, rather than actual data) */
- WR_COPIED, /* immediate - data is copied into lr_write_t */
- WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
-} itx_wr_state_t;
-
-typedef struct itx {
- list_node_t itx_node; /* linkage on zl_itx_list */
- void *itx_private; /* type-specific opaque data */
- itx_wr_state_t itx_wr_state; /* write state */
- uint8_t itx_sync; /* synchronous transaction */
- lr_t itx_lr; /* common part of log record */
- /* followed by type-specific part of lr_xx_t and its immediate data */
-} itx_t;
-
-
-/*
- * zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
- * to handle the cleanup of the dmu_sync() buffer write
- */
-typedef struct {
- zilog_t *zgd_zilog; /* zilog */
- blkptr_t *zgd_bp; /* block pointer */
- struct rl *zgd_rl; /* range lock */
-} zgd_t;
-
-
-typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
- uint64_t txg);
-typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
- uint64_t txg);
-typedef int zil_replay_func_t();
-typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
-
-extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
- zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
-
-extern void zil_init(void);
-extern void zil_fini(void);
-
-extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
-extern void zil_free(zilog_t *zilog);
-
-extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
-extern void zil_close(zilog_t *zilog);
-
-extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
- zil_replay_func_t *replay_func[TX_MAX_TYPE]);
-extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
-
-extern itx_t *zil_itx_create(int txtype, size_t lrsize);
-extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
-
-extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
-
-extern int zil_claim(char *osname, void *txarg);
-extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
-extern void zil_clean(zilog_t *zilog);
-extern int zil_is_committed(zilog_t *zilog);
-
-extern int zil_suspend(zilog_t *zilog);
-extern void zil_resume(zilog_t *zilog);
-
-extern void zil_add_vdev(zilog_t *zilog, uint64_t vdev);
-
-extern int zil_disable;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZIL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
deleted file mode 100644
index 3ecf4e4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZIL_IMPL_H
-#define _SYS_ZIL_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zil.h>
-#include <sys/dmu_objset.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Log write buffer.
- */
-typedef struct lwb {
- zilog_t *lwb_zilog; /* back pointer to log struct */
- blkptr_t lwb_blk; /* on disk address of this log blk */
- int lwb_nused; /* # used bytes in buffer */
- int lwb_sz; /* size of block and buffer */
- char *lwb_buf; /* log write buffer */
- zio_t *lwb_zio; /* zio for this buffer */
- uint64_t lwb_max_txg; /* highest txg in this lwb */
- txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
- list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
-} lwb_t;
-
-/*
- * Vdev flushing: We use a bit map of size ZIL_VDEV_BMAP bytes.
- * Any vdev numbers beyond that use a linked list of zil_vdev_t structures.
- */
-
-#define ZIL_VDEV_BMSZ 16 /* 16 * 8 = 128 vdevs */
-typedef struct zil_vdev {
- uint64_t vdev; /* device written */
- list_node_t vdev_seq_node; /* zilog->zl_vdev_list linkage */
-} zil_vdev_t;
-
-/*
- * Stable storage intent log management structure. One per dataset.
- */
-struct zilog {
- kmutex_t zl_lock; /* protects most zilog_t fields */
- struct dsl_pool *zl_dmu_pool; /* DSL pool */
- spa_t *zl_spa; /* handle for read/write log */
- const zil_header_t *zl_header; /* log header buffer */
- objset_t *zl_os; /* object set we're logging */
- zil_get_data_t *zl_get_data; /* callback to get object content */
- zio_t *zl_root_zio; /* log writer root zio */
- uint64_t zl_itx_seq; /* next itx sequence number */
- uint64_t zl_commit_seq; /* committed upto this number */
- uint64_t zl_lr_seq; /* log record sequence number */
- uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
- uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
- uint32_t zl_suspend; /* log suspend count */
- kcondvar_t zl_cv_writer; /* log writer thread completion */
- kcondvar_t zl_cv_suspend; /* log suspend completion */
- uint8_t zl_suspending; /* log is currently suspending */
- uint8_t zl_keep_first; /* keep first log block in destroy */
- uint8_t zl_stop_replay; /* don't replay any further */
- uint8_t zl_stop_sync; /* for debugging */
- uint8_t zl_writer; /* boolean: write setup in progress */
- uint8_t zl_log_error; /* boolean: log write error */
- list_t zl_itx_list; /* in-memory itx list */
- uint64_t zl_itx_list_sz; /* total size of records on list */
- uint64_t zl_cur_used; /* current commit log size used */
- uint64_t zl_prev_used; /* previous commit log size used */
- list_t zl_lwb_list; /* in-flight log write list */
- list_t zl_vdev_list; /* list of [vdev, seq] pairs */
- uint8_t zl_vdev_bmap[ZIL_VDEV_BMSZ]; /* bitmap of vdevs */
- taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
- avl_tree_t zl_dva_tree; /* track DVAs during log parse */
- clock_t zl_replay_time; /* lbolt of when replay started */
- uint64_t zl_replay_blks; /* number of log blocks replayed */
-};
-
-typedef struct zil_dva_node {
- dva_t zn_dva;
- avl_node_t zn_node;
-} zil_dva_node_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZIL_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
deleted file mode 100644
index b026ae6..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZIO_H
-#define _ZIO_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/avl.h>
-#include <sys/dkio.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio_impl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
-
-typedef struct zio_block_tail {
- uint64_t zbt_magic; /* for validation, endianness */
- zio_cksum_t zbt_cksum; /* 256-bit checksum */
-} zio_block_tail_t;
-
-/*
- * Gang block headers are self-checksumming and contain an array
- * of block pointers.
- */
-#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
-#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
- sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
-#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
- sizeof (zio_block_tail_t) - \
- (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
- sizeof (uint64_t))
-
-#define ZIO_GET_IOSIZE(zio) \
- (BP_IS_GANG((zio)->io_bp) ? \
- SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp))
-
-typedef struct zio_gbh {
- blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
- uint64_t zg_filler[SPA_GBH_FILLER];
- zio_block_tail_t zg_tail;
-} zio_gbh_phys_t;
-
-enum zio_checksum {
- ZIO_CHECKSUM_INHERIT = 0,
- ZIO_CHECKSUM_ON,
- ZIO_CHECKSUM_OFF,
- ZIO_CHECKSUM_LABEL,
- ZIO_CHECKSUM_GANG_HEADER,
- ZIO_CHECKSUM_ZILOG,
- ZIO_CHECKSUM_FLETCHER_2,
- ZIO_CHECKSUM_FLETCHER_4,
- ZIO_CHECKSUM_SHA256,
- ZIO_CHECKSUM_FUNCTIONS
-};
-
-#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
-#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
-
-enum zio_compress {
- ZIO_COMPRESS_INHERIT = 0,
- ZIO_COMPRESS_ON,
- ZIO_COMPRESS_OFF,
- ZIO_COMPRESS_LZJB,
- ZIO_COMPRESS_EMPTY,
- ZIO_COMPRESS_GZIP_1,
- ZIO_COMPRESS_GZIP_2,
- ZIO_COMPRESS_GZIP_3,
- ZIO_COMPRESS_GZIP_4,
- ZIO_COMPRESS_GZIP_5,
- ZIO_COMPRESS_GZIP_6,
- ZIO_COMPRESS_GZIP_7,
- ZIO_COMPRESS_GZIP_8,
- ZIO_COMPRESS_GZIP_9,
- ZIO_COMPRESS_FUNCTIONS
-};
-
-#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
-#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
-
-#define ZIO_PRIORITY_NOW (zio_priority_table[0])
-#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
-#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
-#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
-#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
-#define ZIO_PRIORITY_FREE (zio_priority_table[5])
-#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
-#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
-#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
-#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
-#define ZIO_PRIORITY_TABLE_SIZE 10
-
-#define ZIO_FLAG_MUSTSUCCEED 0x00000
-#define ZIO_FLAG_CANFAIL 0x00001
-#define ZIO_FLAG_FAILFAST 0x00002
-#define ZIO_FLAG_CONFIG_HELD 0x00004
-#define ZIO_FLAG_CONFIG_GRABBED 0x00008
-
-#define ZIO_FLAG_DONT_CACHE 0x00010
-#define ZIO_FLAG_DONT_QUEUE 0x00020
-#define ZIO_FLAG_DONT_PROPAGATE 0x00040
-#define ZIO_FLAG_DONT_RETRY 0x00080
-
-#define ZIO_FLAG_PHYSICAL 0x00100
-#define ZIO_FLAG_IO_BYPASS 0x00200
-#define ZIO_FLAG_IO_REPAIR 0x00400
-#define ZIO_FLAG_SPECULATIVE 0x00800
-
-#define ZIO_FLAG_RESILVER 0x01000
-#define ZIO_FLAG_SCRUB 0x02000
-#define ZIO_FLAG_SCRUB_THREAD 0x04000
-#define ZIO_FLAG_SUBBLOCK 0x08000
-
-#define ZIO_FLAG_NOBOOKMARK 0x10000
-#define ZIO_FLAG_USER 0x20000
-
-#define ZIO_FLAG_METADATA 0x40000
-
-#define ZIO_FLAG_GANG_INHERIT \
- (ZIO_FLAG_CANFAIL | \
- ZIO_FLAG_FAILFAST | \
- ZIO_FLAG_CONFIG_HELD | \
- ZIO_FLAG_DONT_RETRY | \
- ZIO_FLAG_IO_REPAIR | \
- ZIO_FLAG_SPECULATIVE | \
- ZIO_FLAG_RESILVER | \
- ZIO_FLAG_SCRUB | \
- ZIO_FLAG_SCRUB_THREAD)
-
-#define ZIO_FLAG_VDEV_INHERIT \
- (ZIO_FLAG_GANG_INHERIT | \
- ZIO_FLAG_DONT_CACHE | \
- ZIO_FLAG_PHYSICAL)
-
-/*
- * We'll take the EILSEQ (Illegal byte sequence) errno
- * to indicate checksum errors.
- */
-#define ECKSUM EILSEQ
-
-typedef struct zio zio_t;
-typedef void zio_done_func_t(zio_t *zio);
-
-extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
-extern char *zio_type_name[ZIO_TYPES];
-
-/*
- * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
- * identifies any block in the pool. By convention, the meta-objset (MOS)
- * is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
- * level -1 of the meta-dnode, and intent log blocks (which are chained
- * off the root block) have blkid == sequence number. In summary:
- *
- * mos is objset 0
- * meta-dnode is object 0
- * root block is <objset, 0, -1, 0>
- * intent log is <objset, 0, -1, ZIL sequence number>
- *
- * Note: this structure is called a bookmark because its first purpose was
- * to remember where to resume a pool-wide traverse. The absolute ordering
- * for block visitation during traversal is defined in compare_bookmark().
- *
- * Note: this structure is passed between userland and the kernel.
- * Therefore it must not change size or alignment between 32/64 bit
- * compilation options.
- */
-typedef struct zbookmark {
- uint64_t zb_objset;
- uint64_t zb_object;
- int64_t zb_level;
- uint64_t zb_blkid;
-} zbookmark_t;
-
-struct zio {
- /* Core information about this I/O */
- zio_t *io_parent;
- zio_t *io_root;
- spa_t *io_spa;
- zbookmark_t io_bookmark;
- enum zio_checksum io_checksum;
- enum zio_compress io_compress;
- int io_ndvas;
- uint64_t io_txg;
- blkptr_t *io_bp;
- blkptr_t io_bp_copy;
- zio_t *io_child;
- zio_t *io_sibling_prev;
- zio_t *io_sibling_next;
- zio_transform_t *io_transform_stack;
- zio_t *io_logical;
-
- /* Callback info */
- zio_done_func_t *io_ready;
- zio_done_func_t *io_done;
- void *io_private;
- blkptr_t io_bp_orig;
-
- /* Data represented by this I/O */
- void *io_data;
- uint64_t io_size;
-
- /* Stuff for the vdev stack */
- vdev_t *io_vd;
- void *io_vsd;
- uint64_t io_offset;
- uint64_t io_deadline;
- uint64_t io_timestamp;
- avl_node_t io_offset_node;
- avl_node_t io_deadline_node;
- avl_tree_t *io_vdev_tree;
- zio_t *io_delegate_list;
- zio_t *io_delegate_next;
-
- /* Internal pipeline state */
- int io_flags;
- enum zio_type io_type;
- enum zio_stage io_stage;
- uint8_t io_stalled;
- uint8_t io_priority;
- struct dk_callback io_dk_callback;
- int io_cmd;
- int io_retries;
- int io_error;
- uint32_t io_numerrors;
- uint32_t io_pipeline;
- uint32_t io_async_stages;
- uint64_t io_children_notready;
- uint64_t io_children_notdone;
- void *io_waiter;
- kmutex_t io_lock;
- kcondvar_t io_cv;
-
- /* FMA state */
- uint64_t io_ena;
-};
-
-extern zio_t *zio_null(zio_t *pio, spa_t *spa,
- zio_done_func_t *done, void *private, int flags);
-
-extern zio_t *zio_root(spa_t *spa,
- zio_done_func_t *done, void *private, int flags);
-
-extern zio_t *zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data,
- uint64_t size, zio_done_func_t *done, void *private,
- int priority, int flags, zbookmark_t *zb);
-
-extern zio_t *zio_write(zio_t *pio, spa_t *spa, int checksum, int compress,
- int ncopies, uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *ready, zio_done_func_t *done, void *private, int priority,
- int flags, zbookmark_t *zb);
-
-extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, int checksum,
- uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *done, void *private, int priority, int flags,
- zbookmark_t *zb);
-
-extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private);
-
-extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private);
-
-extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
- zio_done_func_t *done, void *private, int priority, int flags);
-
-extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
- uint64_t size, void *data, int checksum,
- zio_done_func_t *done, void *private, int priority, int flags);
-
-extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
- uint64_t size, void *data, int checksum,
- zio_done_func_t *done, void *private, int priority, int flags);
-
-extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
- blkptr_t *old_bp, uint64_t txg);
-extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
-
-extern int zio_wait(zio_t *zio);
-extern void zio_nowait(zio_t *zio);
-
-extern void *zio_buf_alloc(size_t size);
-extern void zio_buf_free(void *buf, size_t size);
-extern void *zio_data_buf_alloc(size_t size);
-extern void zio_data_buf_free(void *buf, size_t size);
-
-/*
- * Move an I/O to the next stage of the pipeline and execute that stage.
- * There's no locking on io_stage because there's no legitimate way for
- * multiple threads to be attempting to process the same I/O.
- */
-extern void zio_next_stage(zio_t *zio);
-extern void zio_next_stage_async(zio_t *zio);
-extern void zio_wait_children_done(zio_t *zio);
-
-/*
- * Delegate I/O to a child vdev.
- */
-extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
- uint64_t offset, void *data, uint64_t size, int type, int priority,
- int flags, zio_done_func_t *done, void *private);
-
-extern void zio_vdev_io_bypass(zio_t *zio);
-extern void zio_vdev_io_reissue(zio_t *zio);
-extern void zio_vdev_io_redone(zio_t *zio);
-
-extern void zio_checksum_verified(zio_t *zio);
-extern void zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp);
-
-extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
-extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
-
-boolean_t zio_should_retry(zio_t *zio);
-
-/*
- * Initial setup and teardown.
- */
-extern void zio_init(void);
-extern void zio_fini(void);
-
-/*
- * Fault injection
- */
-struct zinject_record;
-extern uint32_t zio_injection_enabled;
-extern int zio_inject_fault(char *name, int flags, int *id,
- struct zinject_record *record);
-extern int zio_inject_list_next(int *id, char *name, size_t buflen,
- struct zinject_record *record);
-extern int zio_clear_fault(int id);
-extern int zio_handle_fault_injection(zio_t *zio, int error);
-extern int zio_handle_device_injection(vdev_t *vd, int error);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZIO_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
deleted file mode 100644
index bb7bd41..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZIO_CHECKSUM_H
-#define _SYS_ZIO_CHECKSUM_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Signature for checksum functions.
- */
-typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
-
-/*
- * Information about each checksum function.
- */
-typedef struct zio_checksum_info {
- zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
- int ci_correctable; /* number of correctable bits */
- int ci_zbt; /* uses zio block tail? */
- char *ci_name; /* descriptive name */
-} zio_checksum_info_t;
-
-extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
-
-/*
- * Checksum routines.
- */
-extern zio_checksum_t fletcher_2_native;
-extern zio_checksum_t fletcher_4_native;
-extern zio_checksum_t fletcher_4_incremental_native;
-
-extern zio_checksum_t fletcher_2_byteswap;
-extern zio_checksum_t fletcher_4_byteswap;
-extern zio_checksum_t fletcher_4_incremental_byteswap;
-
-extern zio_checksum_t zio_checksum_SHA256;
-
-extern void zio_checksum(uint_t checksum, zio_cksum_t *zcp,
- void *data, uint64_t size);
-extern int zio_checksum_error(zio_t *zio);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZIO_CHECKSUM_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
deleted file mode 100644
index 66ee8d4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZIO_COMPRESS_H
-#define _SYS_ZIO_COMPRESS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Common signature for all zio compress/decompress functions.
- */
-typedef size_t zio_compress_func_t(void *src, void *dst,
- size_t s_len, size_t d_len, int);
-typedef int zio_decompress_func_t(void *src, void *dst,
- size_t s_len, size_t d_len, int);
-
-/*
- * Information about each compression function.
- */
-typedef struct zio_compress_info {
- zio_compress_func_t *ci_compress; /* compression function */
- zio_decompress_func_t *ci_decompress; /* decompression function */
- int ci_level; /* level parameter */
- char *ci_name; /* algorithm name */
-} zio_compress_info_t;
-
-extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
-
-/*
- * Compression routines.
- */
-extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
- int level);
-extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
- int level);
-extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
- int level);
-extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
- int level);
-
-/*
- * Compress and decompress data if necessary.
- */
-extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
- void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
-extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
- void *dest, uint64_t destsize);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZIO_COMPRESS_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
deleted file mode 100644
index d2ddbc3..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZIO_IMPL_H
-#define _ZIO_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/zio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * I/O Groups: pipeline stage definitions.
- */
-
-typedef enum zio_stage {
- ZIO_STAGE_OPEN = 0, /* RWFCI */
- ZIO_STAGE_WAIT_CHILDREN_READY, /* RWFCI */
-
- ZIO_STAGE_WRITE_COMPRESS, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
-
- ZIO_STAGE_GANG_PIPELINE, /* -WFC- */
-
- ZIO_STAGE_GET_GANG_HEADER, /* -WFC- */
- ZIO_STAGE_REWRITE_GANG_MEMBERS, /* -W--- */
- ZIO_STAGE_FREE_GANG_MEMBERS, /* --F-- */
- ZIO_STAGE_CLAIM_GANG_MEMBERS, /* ---C- */
-
- ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
- ZIO_STAGE_DVA_FREE, /* --F-- */
- ZIO_STAGE_DVA_CLAIM, /* ---C- */
-
- ZIO_STAGE_GANG_CHECKSUM_GENERATE, /* -W--- */
-
- ZIO_STAGE_READY, /* RWFCI */
-
- ZIO_STAGE_VDEV_IO_START, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
-
- ZIO_STAGE_WAIT_CHILDREN_DONE, /* RWFCI */
-
- ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
- ZIO_STAGE_READ_GANG_MEMBERS, /* R---- */
- ZIO_STAGE_READ_DECOMPRESS, /* R---- */
-
- ZIO_STAGE_DONE /* RWFCI */
-} zio_stage_t;
-
-/*
- * The stages for which there's some performance value in going async.
- * When compression is enabled, ZIO_STAGE_WRITE_COMPRESS is ORed in as well.
- */
-#define ZIO_ASYNC_PIPELINE_STAGES \
- ((1U << ZIO_STAGE_CHECKSUM_GENERATE) | \
- (1U << ZIO_STAGE_VDEV_IO_DONE) | \
- (1U << ZIO_STAGE_CHECKSUM_VERIFY) | \
- (1U << ZIO_STAGE_READ_DECOMPRESS))
-
-#define ZIO_VDEV_IO_PIPELINE \
- ((1U << ZIO_STAGE_VDEV_IO_START) | \
- (1U << ZIO_STAGE_VDEV_IO_DONE) | \
- (1U << ZIO_STAGE_VDEV_IO_ASSESS))
-
-#define ZIO_READ_PHYS_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_READY) | \
- ZIO_VDEV_IO_PIPELINE | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_CHECKSUM_VERIFY) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_READ_PIPELINE \
- ZIO_READ_PHYS_PIPELINE
-
-#define ZIO_WRITE_PHYS_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_CHECKSUM_GENERATE) | \
- (1U << ZIO_STAGE_READY) | \
- ZIO_VDEV_IO_PIPELINE | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_WRITE_COMMON_PIPELINE \
- ZIO_WRITE_PHYS_PIPELINE
-
-#define ZIO_WRITE_PIPELINE \
- ((1U << ZIO_STAGE_WRITE_COMPRESS) | \
- ZIO_WRITE_COMMON_PIPELINE)
-
-#define ZIO_GANG_STAGES \
- ((1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_REWRITE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_FREE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_CLAIM_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE) | \
- (1U << ZIO_STAGE_READ_GANG_MEMBERS))
-
-#define ZIO_REWRITE_PIPELINE \
- ((1U << ZIO_STAGE_GANG_PIPELINE) | \
- (1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_REWRITE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE) | \
- ZIO_WRITE_COMMON_PIPELINE)
-
-#define ZIO_WRITE_ALLOCATE_PIPELINE \
- ((1U << ZIO_STAGE_DVA_ALLOCATE) | \
- ZIO_WRITE_COMMON_PIPELINE)
-
-#define ZIO_GANG_FREE_STAGES \
- ((1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_FREE_GANG_MEMBERS))
-
-#define ZIO_FREE_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_GANG_PIPELINE) | \
- (1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_FREE_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_DVA_FREE) | \
- (1U << ZIO_STAGE_READY) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_CLAIM_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_GANG_PIPELINE) | \
- (1U << ZIO_STAGE_GET_GANG_HEADER) | \
- (1U << ZIO_STAGE_CLAIM_GANG_MEMBERS) | \
- (1U << ZIO_STAGE_DVA_CLAIM) | \
- (1U << ZIO_STAGE_READY) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_IOCTL_PIPELINE \
- ((1U << ZIO_STAGE_OPEN) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_READY) | \
- ZIO_VDEV_IO_PIPELINE | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_WAIT_FOR_CHILDREN_PIPELINE \
- ((1U << ZIO_STAGE_WAIT_CHILDREN_READY) | \
- (1U << ZIO_STAGE_READY) | \
- (1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_WAIT_FOR_CHILDREN_DONE_PIPELINE \
- ((1U << ZIO_STAGE_WAIT_CHILDREN_DONE) | \
- (1U << ZIO_STAGE_DONE))
-
-#define ZIO_VDEV_CHILD_PIPELINE \
- (ZIO_WAIT_FOR_CHILDREN_DONE_PIPELINE | \
- ZIO_VDEV_IO_PIPELINE)
-
-#define ZIO_ERROR_PIPELINE_MASK \
- ZIO_WAIT_FOR_CHILDREN_PIPELINE
-
-typedef struct zio_transform zio_transform_t;
-struct zio_transform {
- void *zt_data;
- uint64_t zt_size;
- uint64_t zt_bufsize;
- zio_transform_t *zt_next;
-};
-
-extern void zio_inject_init(void);
-extern void zio_inject_fini(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZIO_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h b/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h
deleted file mode 100644
index df85824..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ZVOL_H
-#define _SYS_ZVOL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _KERNEL
-extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
-extern int zvol_check_volblocksize(uint64_t volblocksize);
-extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
-extern void zvol_create_cb(objset_t *os, void *arg, dmu_tx_t *tx);
-extern int zvol_create_minor(const char *, dev_t);
-extern int zvol_remove_minor(const char *);
-extern int zvol_set_volsize(const char *, dev_t, uint64_t);
-extern int zvol_set_volblocksize(const char *, uint64_t);
-
-extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
-extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
-#ifndef __FreeBSD__
-extern int zvol_strategy(buf_t *bp);
-extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr);
-extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr);
-extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr);
-extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr);
-#endif
-extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
- int *rvalp);
-extern int zvol_busy(void);
-extern void zvol_init(void);
-extern void zvol_fini(void);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ZVOL_H */
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/txg.c b/sys/contrib/opensolaris/uts/common/fs/zfs/txg.c
deleted file mode 100644
index 844beb6..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/txg.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/txg_impl.h>
-#include <sys/dmu_impl.h>
-#include <sys/dsl_pool.h>
-#include <sys/callb.h>
-
-/*
- * Pool-wide transaction groups.
- */
-
-static void txg_sync_thread(void *arg);
-static void txg_quiesce_thread(void *arg);
-static void txg_timelimit_thread(void *arg);
-
-int txg_time = 5; /* max 5 seconds worth of delta per txg */
-
-/*
- * Prepare the txg subsystem.
- */
-void
-txg_init(dsl_pool_t *dp, uint64_t txg)
-{
- tx_state_t *tx = &dp->dp_tx;
- int c, i;
- bzero(tx, sizeof (tx_state_t));
-
- tx->tx_cpu = kmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
- for (c = 0; c < max_ncpus; c++) {
- mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL);
- for (i = 0; i < TXG_SIZE; i++)
- cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT, NULL);
- }
-
- rw_init(&tx->tx_suspend, NULL, RW_DEFAULT, NULL);
- mutex_init(&tx->tx_sync_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&tx->tx_sync_more_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&tx->tx_sync_done_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&tx->tx_quiesce_more_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&tx->tx_quiesce_done_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&tx->tx_timeout_exit_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&tx->tx_exit_cv, NULL, CV_DEFAULT, NULL);
-
- tx->tx_open_txg = txg;
-}
-
-/*
- * Close down the txg subsystem.
- */
-void
-txg_fini(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
- int c, i;
-
- ASSERT(tx->tx_threads == 0);
-
- cv_destroy(&tx->tx_exit_cv);
- cv_destroy(&tx->tx_timeout_exit_cv);
- cv_destroy(&tx->tx_quiesce_done_cv);
- cv_destroy(&tx->tx_quiesce_more_cv);
- cv_destroy(&tx->tx_sync_done_cv);
- cv_destroy(&tx->tx_sync_more_cv);
- rw_destroy(&tx->tx_suspend);
- mutex_destroy(&tx->tx_sync_lock);
-
- for (c = 0; c < max_ncpus; c++) {
- for (i = 0; i < TXG_SIZE; i++)
- cv_destroy(&tx->tx_cpu[c].tc_cv[i]);
- mutex_destroy(&tx->tx_cpu[c].tc_lock);
- }
-
- kmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
-
- bzero(tx, sizeof (tx_state_t));
-}
-
-/*
- * Start syncing transaction groups.
- */
-void
-txg_sync_start(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
-
- mutex_enter(&tx->tx_sync_lock);
-
- dprintf("pool %p\n", dp);
-
- ASSERT(tx->tx_threads == 0);
-
- tx->tx_threads = 3;
-
- tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
- dp, 0, &p0, TS_RUN, minclsyspri);
-
- tx->tx_sync_thread = thread_create(NULL, 0, txg_sync_thread,
- dp, 0, &p0, TS_RUN, minclsyspri);
-
- tx->tx_timelimit_thread = thread_create(NULL, 0, txg_timelimit_thread,
- dp, 0, &p0, TS_RUN, minclsyspri);
-
- mutex_exit(&tx->tx_sync_lock);
-}
-
-static void
-txg_thread_enter(tx_state_t *tx, callb_cpr_t *cpr)
-{
- CALLB_CPR_INIT(cpr, &tx->tx_sync_lock, callb_generic_cpr, FTAG);
- mutex_enter(&tx->tx_sync_lock);
-}
-
-static void
-txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp)
-{
- ASSERT(*tpp != NULL);
- *tpp = NULL;
- tx->tx_threads--;
- cv_broadcast(&tx->tx_exit_cv);
- CALLB_CPR_EXIT(cpr); /* drops &tx->tx_sync_lock */
- thread_exit();
-}
-
-static void
-txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, int secmax)
-{
- CALLB_CPR_SAFE_BEGIN(cpr);
-
- if (secmax)
- (void) cv_timedwait(cv, &tx->tx_sync_lock, secmax * hz);
- else
- cv_wait(cv, &tx->tx_sync_lock);
-
- CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock);
-}
-
-/*
- * Stop syncing transaction groups.
- */
-void
-txg_sync_stop(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
-
- dprintf("pool %p\n", dp);
- /*
- * Finish off any work in progress.
- */
- ASSERT(tx->tx_threads == 3);
- txg_wait_synced(dp, 0);
-
- /*
- * Wake all 3 sync threads (one per state) and wait for them to die.
- */
- mutex_enter(&tx->tx_sync_lock);
-
- ASSERT(tx->tx_threads == 3);
-
- tx->tx_exiting = 1;
-
- cv_broadcast(&tx->tx_quiesce_more_cv);
- cv_broadcast(&tx->tx_quiesce_done_cv);
- cv_broadcast(&tx->tx_sync_more_cv);
- cv_broadcast(&tx->tx_timeout_exit_cv);
-
- while (tx->tx_threads != 0)
- cv_wait(&tx->tx_exit_cv, &tx->tx_sync_lock);
-
- tx->tx_exiting = 0;
-
- mutex_exit(&tx->tx_sync_lock);
-}
-
-uint64_t
-txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
-{
- tx_state_t *tx = &dp->dp_tx;
- tx_cpu_t *tc = &tx->tx_cpu[CPU_SEQID];
- uint64_t txg;
-
- mutex_enter(&tc->tc_lock);
-
- txg = tx->tx_open_txg;
- tc->tc_count[txg & TXG_MASK]++;
-
- th->th_cpu = tc;
- th->th_txg = txg;
-
- return (txg);
-}
-
-void
-txg_rele_to_quiesce(txg_handle_t *th)
-{
- tx_cpu_t *tc = th->th_cpu;
-
- mutex_exit(&tc->tc_lock);
-}
-
-void
-txg_rele_to_sync(txg_handle_t *th)
-{
- tx_cpu_t *tc = th->th_cpu;
- int g = th->th_txg & TXG_MASK;
-
- mutex_enter(&tc->tc_lock);
- ASSERT(tc->tc_count[g] != 0);
- if (--tc->tc_count[g] == 0)
- cv_broadcast(&tc->tc_cv[g]);
- mutex_exit(&tc->tc_lock);
-
- th->th_cpu = NULL; /* defensive */
-}
-
-static void
-txg_quiesce(dsl_pool_t *dp, uint64_t txg)
-{
- tx_state_t *tx = &dp->dp_tx;
- int g = txg & TXG_MASK;
- int c;
-
- /*
- * Grab all tx_cpu locks so nobody else can get into this txg.
- */
- for (c = 0; c < max_ncpus; c++)
- mutex_enter(&tx->tx_cpu[c].tc_lock);
-
- ASSERT(txg == tx->tx_open_txg);
- tx->tx_open_txg++;
-
- /*
- * Now that we've incremented tx_open_txg, we can let threads
- * enter the next transaction group.
- */
- for (c = 0; c < max_ncpus; c++)
- mutex_exit(&tx->tx_cpu[c].tc_lock);
-
- /*
- * Quiesce the transaction group by waiting for everyone to txg_exit().
- */
- for (c = 0; c < max_ncpus; c++) {
- tx_cpu_t *tc = &tx->tx_cpu[c];
- mutex_enter(&tc->tc_lock);
- while (tc->tc_count[g] != 0)
- cv_wait(&tc->tc_cv[g], &tc->tc_lock);
- mutex_exit(&tc->tc_lock);
- }
-}
-
-static void
-txg_sync_thread(void *arg)
-{
- dsl_pool_t *dp = arg;
- tx_state_t *tx = &dp->dp_tx;
- callb_cpr_t cpr;
-
- txg_thread_enter(tx, &cpr);
-
- for (;;) {
- uint64_t txg;
-
- /*
- * We sync when there's someone waiting on us, or the
- * quiesce thread has handed off a txg to us.
- */
- while (!tx->tx_exiting &&
- tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
- tx->tx_quiesced_txg == 0) {
- dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
- tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
- txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, 0);
- }
-
- /*
- * Wait until the quiesce thread hands off a txg to us,
- * prompting it to do so if necessary.
- */
- while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) {
- if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
- tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
- cv_broadcast(&tx->tx_quiesce_more_cv);
- txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
- }
-
- if (tx->tx_exiting)
- txg_thread_exit(tx, &cpr, &tx->tx_sync_thread);
-
- rw_enter(&tx->tx_suspend, RW_WRITER);
-
- /*
- * Consume the quiesced txg which has been handed off to
- * us. This may cause the quiescing thread to now be
- * able to quiesce another txg, so we must signal it.
- */
- txg = tx->tx_quiesced_txg;
- tx->tx_quiesced_txg = 0;
- tx->tx_syncing_txg = txg;
- cv_broadcast(&tx->tx_quiesce_more_cv);
- rw_exit(&tx->tx_suspend);
-
- dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
- txg, tx->tx_quiesce_txg_waiting,
- tx->tx_sync_txg_waiting);
- mutex_exit(&tx->tx_sync_lock);
- spa_sync(dp->dp_spa, txg);
- mutex_enter(&tx->tx_sync_lock);
- rw_enter(&tx->tx_suspend, RW_WRITER);
- tx->tx_synced_txg = txg;
- tx->tx_syncing_txg = 0;
- rw_exit(&tx->tx_suspend);
- cv_broadcast(&tx->tx_sync_done_cv);
- }
-}
-
-static void
-txg_quiesce_thread(void *arg)
-{
- dsl_pool_t *dp = arg;
- tx_state_t *tx = &dp->dp_tx;
- callb_cpr_t cpr;
-
- txg_thread_enter(tx, &cpr);
-
- for (;;) {
- uint64_t txg;
-
- /*
- * We quiesce when there's someone waiting on us.
- * However, we can only have one txg in "quiescing" or
- * "quiesced, waiting to sync" state. So we wait until
- * the "quiesced, waiting to sync" txg has been consumed
- * by the sync thread.
- */
- while (!tx->tx_exiting &&
- (tx->tx_open_txg >= tx->tx_quiesce_txg_waiting ||
- tx->tx_quiesced_txg != 0))
- txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0);
-
- if (tx->tx_exiting)
- txg_thread_exit(tx, &cpr, &tx->tx_quiesce_thread);
-
- txg = tx->tx_open_txg;
- dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
- txg, tx->tx_quiesce_txg_waiting,
- tx->tx_sync_txg_waiting);
- mutex_exit(&tx->tx_sync_lock);
- txg_quiesce(dp, txg);
- mutex_enter(&tx->tx_sync_lock);
-
- /*
- * Hand this txg off to the sync thread.
- */
- dprintf("quiesce done, handing off txg %llu\n", txg);
- tx->tx_quiesced_txg = txg;
- cv_broadcast(&tx->tx_sync_more_cv);
- cv_broadcast(&tx->tx_quiesce_done_cv);
- }
-}
-
-void
-txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
-{
- tx_state_t *tx = &dp->dp_tx;
-
- mutex_enter(&tx->tx_sync_lock);
- ASSERT(tx->tx_threads == 3);
- if (txg == 0)
- txg = tx->tx_open_txg;
- if (tx->tx_sync_txg_waiting < txg)
- tx->tx_sync_txg_waiting = txg;
- dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
- txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
- while (tx->tx_synced_txg < txg) {
- dprintf("broadcasting sync more "
- "tx_synced=%llu waiting=%llu dp=%p\n",
- tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
- cv_broadcast(&tx->tx_sync_more_cv);
- cv_wait(&tx->tx_sync_done_cv, &tx->tx_sync_lock);
- }
- mutex_exit(&tx->tx_sync_lock);
-}
-
-void
-txg_wait_open(dsl_pool_t *dp, uint64_t txg)
-{
- tx_state_t *tx = &dp->dp_tx;
-
- mutex_enter(&tx->tx_sync_lock);
- ASSERT(tx->tx_threads == 3);
- if (txg == 0)
- txg = tx->tx_open_txg + 1;
- if (tx->tx_quiesce_txg_waiting < txg)
- tx->tx_quiesce_txg_waiting = txg;
- dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
- txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
- while (tx->tx_open_txg < txg) {
- cv_broadcast(&tx->tx_quiesce_more_cv);
- cv_wait(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
- }
- mutex_exit(&tx->tx_sync_lock);
-}
-
-static void
-txg_timelimit_thread(void *arg)
-{
- dsl_pool_t *dp = arg;
- tx_state_t *tx = &dp->dp_tx;
- callb_cpr_t cpr;
-
- txg_thread_enter(tx, &cpr);
-
- while (!tx->tx_exiting) {
- uint64_t txg = tx->tx_open_txg + 1;
-
- txg_thread_wait(tx, &cpr, &tx->tx_timeout_exit_cv, txg_time);
-
- if (tx->tx_quiesce_txg_waiting < txg)
- tx->tx_quiesce_txg_waiting = txg;
-
- while (!tx->tx_exiting && tx->tx_open_txg < txg) {
- dprintf("pushing out %llu\n", txg);
- cv_broadcast(&tx->tx_quiesce_more_cv);
- txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
- }
- }
- txg_thread_exit(tx, &cpr, &tx->tx_timelimit_thread);
-}
-
-int
-txg_stalled(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
- return (tx->tx_quiesce_txg_waiting > tx->tx_open_txg);
-}
-
-void
-txg_suspend(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
- /* XXX some code paths suspend when they are already suspended! */
- rw_enter(&tx->tx_suspend, RW_READER);
-}
-
-void
-txg_resume(dsl_pool_t *dp)
-{
- tx_state_t *tx = &dp->dp_tx;
- rw_exit(&tx->tx_suspend);
-}
-
-/*
- * Per-txg object lists.
- */
-void
-txg_list_create(txg_list_t *tl, size_t offset)
-{
- int t;
-
- mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL);
-
- tl->tl_offset = offset;
-
- for (t = 0; t < TXG_SIZE; t++)
- tl->tl_head[t] = NULL;
-}
-
-void
-txg_list_destroy(txg_list_t *tl)
-{
- int t;
-
- for (t = 0; t < TXG_SIZE; t++)
- ASSERT(txg_list_empty(tl, t));
-
- mutex_destroy(&tl->tl_lock);
-}
-
-int
-txg_list_empty(txg_list_t *tl, uint64_t txg)
-{
- return (tl->tl_head[txg & TXG_MASK] == NULL);
-}
-
-/*
- * Add an entry to the list.
- * Returns 0 if it's a new entry, 1 if it's already there.
- */
-int
-txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
-{
- int t = txg & TXG_MASK;
- txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
- int already_on_list;
-
- mutex_enter(&tl->tl_lock);
- already_on_list = tn->tn_member[t];
- if (!already_on_list) {
- tn->tn_member[t] = 1;
- tn->tn_next[t] = tl->tl_head[t];
- tl->tl_head[t] = tn;
- }
- mutex_exit(&tl->tl_lock);
-
- return (already_on_list);
-}
-
-/*
- * Remove the head of the list and return it.
- */
-void *
-txg_list_remove(txg_list_t *tl, uint64_t txg)
-{
- int t = txg & TXG_MASK;
- txg_node_t *tn;
- void *p = NULL;
-
- mutex_enter(&tl->tl_lock);
- if ((tn = tl->tl_head[t]) != NULL) {
- p = (char *)tn - tl->tl_offset;
- tl->tl_head[t] = tn->tn_next[t];
- tn->tn_next[t] = NULL;
- tn->tn_member[t] = 0;
- }
- mutex_exit(&tl->tl_lock);
-
- return (p);
-}
-
-/*
- * Remove a specific item from the list and return it.
- */
-void *
-txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
-{
- int t = txg & TXG_MASK;
- txg_node_t *tn, **tp;
-
- mutex_enter(&tl->tl_lock);
-
- for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) {
- if ((char *)tn - tl->tl_offset == p) {
- *tp = tn->tn_next[t];
- tn->tn_next[t] = NULL;
- tn->tn_member[t] = 0;
- mutex_exit(&tl->tl_lock);
- return (p);
- }
- }
-
- mutex_exit(&tl->tl_lock);
-
- return (NULL);
-}
-
-int
-txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
-{
- int t = txg & TXG_MASK;
- txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
-
- return (tn->tn_member[t]);
-}
-
-/*
- * Walk a txg list -- only safe if you know it's not changing.
- */
-void *
-txg_list_head(txg_list_t *tl, uint64_t txg)
-{
- int t = txg & TXG_MASK;
- txg_node_t *tn = tl->tl_head[t];
-
- return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
-}
-
-void *
-txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
-{
- int t = txg & TXG_MASK;
- txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
-
- tn = tn->tn_next[t];
-
- return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/uberblock.c b/sys/contrib/opensolaris/uts/common/fs/zfs/uberblock.c
deleted file mode 100644
index 34d7e0c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/uberblock.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/uberblock_impl.h>
-#include <sys/vdev_impl.h>
-
-int
-uberblock_verify(uberblock_t *ub)
-{
- if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC))
- byteswap_uint64_array(ub, sizeof (uberblock_t));
-
- if (ub->ub_magic != UBERBLOCK_MAGIC)
- return (EINVAL);
-
- return (0);
-}
-
-/*
- * Update the uberblock and return a boolean value indicating whether
- * anything changed in this transaction group.
- */
-int
-uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
-{
- ASSERT(ub->ub_txg < txg);
-
- /*
- * We explicitly do not set ub_version here, so that older versions
- * continue to be written with the previous uberblock version.
- */
- ub->ub_magic = UBERBLOCK_MAGIC;
- ub->ub_txg = txg;
- ub->ub_guid_sum = rvd->vdev_guid_sum;
- ub->ub_timestamp = gethrestime_sec();
-
- return (ub->ub_rootbp.blk_birth == txg);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/unique.c b/sys/contrib/opensolaris/uts/common/fs/zfs/unique.c
deleted file mode 100644
index b52e729..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/unique.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/avl.h>
-#include <sys/unique.h>
-
-static avl_tree_t unique_avl;
-static kmutex_t unique_mtx; /* Lock never initialized. */
-SX_SYSINIT(unique, &unique_mtx, "unique lock");
-
-typedef struct unique {
- avl_node_t un_link;
- uint64_t un_value;
-} unique_t;
-
-#define UNIQUE_MASK ((1ULL << UNIQUE_BITS) - 1)
-
-static int
-unique_compare(const void *a, const void *b)
-{
- const unique_t *una = a;
- const unique_t *unb = b;
-
- if (una->un_value < unb->un_value)
- return (-1);
- if (una->un_value > unb->un_value)
- return (+1);
- return (0);
-}
-
-void
-unique_init(void)
-{
- avl_create(&unique_avl, unique_compare,
- sizeof (unique_t), offsetof(unique_t, un_link));
-}
-
-uint64_t
-unique_create(void)
-{
- return (unique_insert(0));
-}
-
-uint64_t
-unique_insert(uint64_t value)
-{
- avl_index_t idx;
- unique_t *un = kmem_alloc(sizeof (unique_t), KM_SLEEP);
-
- un->un_value = value;
-
- mutex_enter(&unique_mtx);
- while (un->un_value == 0 || un->un_value & ~UNIQUE_MASK ||
- avl_find(&unique_avl, un, &idx)) {
- mutex_exit(&unique_mtx);
- (void) random_get_pseudo_bytes((void*)&un->un_value,
- sizeof (un->un_value));
- un->un_value &= UNIQUE_MASK;
- mutex_enter(&unique_mtx);
- }
-
- avl_insert(&unique_avl, un, idx);
- mutex_exit(&unique_mtx);
-
- return (un->un_value);
-}
-
-void
-unique_remove(uint64_t value)
-{
- unique_t un_tofind;
- unique_t *un;
-
- un_tofind.un_value = value;
- mutex_enter(&unique_mtx);
- un = avl_find(&unique_avl, &un_tofind, NULL);
- if (un != NULL) {
- avl_remove(&unique_avl, un);
- kmem_free(un, sizeof (unique_t));
- }
- mutex_exit(&unique_mtx);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev.c
deleted file mode 100644
index b966099..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ /dev/null
@@ -1,1915 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/fm/fs/zfs.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/dmu.h>
-#include <sys/dmu_tx.h>
-#include <sys/vdev_impl.h>
-#include <sys/uberblock_impl.h>
-#include <sys/metaslab.h>
-#include <sys/metaslab_impl.h>
-#include <sys/space_map.h>
-#include <sys/zio.h>
-#include <sys/zap.h>
-#include <sys/fs/zfs.h>
-
-SYSCTL_DECL(_vfs_zfs);
-SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
-
-/*
- * Virtual device management.
- */
-
-static vdev_ops_t *vdev_ops_table[] = {
- &vdev_root_ops,
- &vdev_raidz_ops,
- &vdev_mirror_ops,
- &vdev_replacing_ops,
- &vdev_spare_ops,
-#ifdef _KERNEL
- &vdev_geom_ops,
-#else
- &vdev_disk_ops,
- &vdev_file_ops,
-#endif
- &vdev_missing_ops,
- NULL
-};
-
-/* maximum scrub/resilver I/O queue */
-int zfs_scrub_limit = 70;
-
-/*
- * Given a vdev type, return the appropriate ops vector.
- */
-static vdev_ops_t *
-vdev_getops(const char *type)
-{
- vdev_ops_t *ops, **opspp;
-
- for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++)
- if (strcmp(ops->vdev_op_type, type) == 0)
- break;
-
- return (ops);
-}
-
-/*
- * Default asize function: return the MAX of psize with the asize of
- * all children. This is what's used by anything other than RAID-Z.
- */
-uint64_t
-vdev_default_asize(vdev_t *vd, uint64_t psize)
-{
- uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
- uint64_t csize;
- uint64_t c;
-
- for (c = 0; c < vd->vdev_children; c++) {
- csize = vdev_psize_to_asize(vd->vdev_child[c], psize);
- asize = MAX(asize, csize);
- }
-
- return (asize);
-}
-
-/*
- * Get the replaceable or attachable device size.
- * If the parent is a mirror or raidz, the replaceable size is the minimum
- * psize of all its children. For the rest, just return our own psize.
- *
- * e.g.
- * psize rsize
- * root - -
- * mirror/raidz - -
- * disk1 20g 20g
- * disk2 40g 20g
- * disk3 80g 80g
- */
-uint64_t
-vdev_get_rsize(vdev_t *vd)
-{
- vdev_t *pvd, *cvd;
- uint64_t c, rsize;
-
- pvd = vd->vdev_parent;
-
- /*
- * If our parent is NULL or the root, just return our own psize.
- */
- if (pvd == NULL || pvd->vdev_parent == NULL)
- return (vd->vdev_psize);
-
- rsize = 0;
-
- for (c = 0; c < pvd->vdev_children; c++) {
- cvd = pvd->vdev_child[c];
- rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1;
- }
-
- return (rsize);
-}
-
-vdev_t *
-vdev_lookup_top(spa_t *spa, uint64_t vdev)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- if (vdev < rvd->vdev_children)
- return (rvd->vdev_child[vdev]);
-
- return (NULL);
-}
-
-vdev_t *
-vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
-{
- int c;
- vdev_t *mvd;
-
- if (vd->vdev_guid == guid)
- return (vd);
-
- for (c = 0; c < vd->vdev_children; c++)
- if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) !=
- NULL)
- return (mvd);
-
- return (NULL);
-}
-
-void
-vdev_add_child(vdev_t *pvd, vdev_t *cvd)
-{
- size_t oldsize, newsize;
- uint64_t id = cvd->vdev_id;
- vdev_t **newchild;
-
- ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER));
- ASSERT(cvd->vdev_parent == NULL);
-
- cvd->vdev_parent = pvd;
-
- if (pvd == NULL)
- return;
-
- ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL);
-
- oldsize = pvd->vdev_children * sizeof (vdev_t *);
- pvd->vdev_children = MAX(pvd->vdev_children, id + 1);
- newsize = pvd->vdev_children * sizeof (vdev_t *);
-
- newchild = kmem_zalloc(newsize, KM_SLEEP);
- if (pvd->vdev_child != NULL) {
- bcopy(pvd->vdev_child, newchild, oldsize);
- kmem_free(pvd->vdev_child, oldsize);
- }
-
- pvd->vdev_child = newchild;
- pvd->vdev_child[id] = cvd;
-
- cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd);
- ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL);
-
- /*
- * Walk up all ancestors to update guid sum.
- */
- for (; pvd != NULL; pvd = pvd->vdev_parent)
- pvd->vdev_guid_sum += cvd->vdev_guid_sum;
-
- if (cvd->vdev_ops->vdev_op_leaf)
- cvd->vdev_spa->spa_scrub_maxinflight += zfs_scrub_limit;
-}
-
-void
-vdev_remove_child(vdev_t *pvd, vdev_t *cvd)
-{
- int c;
- uint_t id = cvd->vdev_id;
-
- ASSERT(cvd->vdev_parent == pvd);
-
- if (pvd == NULL)
- return;
-
- ASSERT(id < pvd->vdev_children);
- ASSERT(pvd->vdev_child[id] == cvd);
-
- pvd->vdev_child[id] = NULL;
- cvd->vdev_parent = NULL;
-
- for (c = 0; c < pvd->vdev_children; c++)
- if (pvd->vdev_child[c])
- break;
-
- if (c == pvd->vdev_children) {
- kmem_free(pvd->vdev_child, c * sizeof (vdev_t *));
- pvd->vdev_child = NULL;
- pvd->vdev_children = 0;
- }
-
- /*
- * Walk up all ancestors to update guid sum.
- */
- for (; pvd != NULL; pvd = pvd->vdev_parent)
- pvd->vdev_guid_sum -= cvd->vdev_guid_sum;
-
- if (cvd->vdev_ops->vdev_op_leaf)
- cvd->vdev_spa->spa_scrub_maxinflight -= zfs_scrub_limit;
-}
-
-/*
- * Remove any holes in the child array.
- */
-void
-vdev_compact_children(vdev_t *pvd)
-{
- vdev_t **newchild, *cvd;
- int oldc = pvd->vdev_children;
- int newc, c;
-
- ASSERT(spa_config_held(pvd->vdev_spa, RW_WRITER));
-
- for (c = newc = 0; c < oldc; c++)
- if (pvd->vdev_child[c])
- newc++;
-
- newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP);
-
- for (c = newc = 0; c < oldc; c++) {
- if ((cvd = pvd->vdev_child[c]) != NULL) {
- newchild[newc] = cvd;
- cvd->vdev_id = newc++;
- }
- }
-
- kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *));
- pvd->vdev_child = newchild;
- pvd->vdev_children = newc;
-}
-
-/*
- * Allocate and minimally initialize a vdev_t.
- */
-static vdev_t *
-vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
-{
- vdev_t *vd;
-
- vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP);
-
- if (spa->spa_root_vdev == NULL) {
- ASSERT(ops == &vdev_root_ops);
- spa->spa_root_vdev = vd;
- }
-
- if (guid == 0) {
- if (spa->spa_root_vdev == vd) {
- /*
- * The root vdev's guid will also be the pool guid,
- * which must be unique among all pools.
- */
- while (guid == 0 || spa_guid_exists(guid, 0))
- guid = spa_get_random(-1ULL);
- } else {
- /*
- * Any other vdev's guid must be unique within the pool.
- */
- while (guid == 0 ||
- spa_guid_exists(spa_guid(spa), guid))
- guid = spa_get_random(-1ULL);
- }
- ASSERT(!spa_guid_exists(spa_guid(spa), guid));
- }
-
- vd->vdev_spa = spa;
- vd->vdev_id = id;
- vd->vdev_guid = guid;
- vd->vdev_guid_sum = guid;
- vd->vdev_ops = ops;
- vd->vdev_state = VDEV_STATE_CLOSED;
-
- mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
- space_map_create(&vd->vdev_dtl_map, 0, -1ULL, 0, &vd->vdev_dtl_lock);
- space_map_create(&vd->vdev_dtl_scrub, 0, -1ULL, 0, &vd->vdev_dtl_lock);
- txg_list_create(&vd->vdev_ms_list,
- offsetof(struct metaslab, ms_txg_node));
- txg_list_create(&vd->vdev_dtl_list,
- offsetof(struct vdev, vdev_dtl_node));
- vd->vdev_stat.vs_timestamp = gethrtime();
-
- return (vd);
-}
-
-/*
- * Free a vdev_t that has been removed from service.
- */
-static void
-vdev_free_common(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
-
- if (vd->vdev_path)
- spa_strfree(vd->vdev_path);
- if (vd->vdev_devid)
- spa_strfree(vd->vdev_devid);
-
- if (vd->vdev_isspare)
- spa_spare_remove(vd);
-
- txg_list_destroy(&vd->vdev_ms_list);
- txg_list_destroy(&vd->vdev_dtl_list);
- mutex_enter(&vd->vdev_dtl_lock);
- space_map_unload(&vd->vdev_dtl_map);
- space_map_destroy(&vd->vdev_dtl_map);
- space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL);
- space_map_destroy(&vd->vdev_dtl_scrub);
- mutex_exit(&vd->vdev_dtl_lock);
- mutex_destroy(&vd->vdev_dtl_lock);
- mutex_destroy(&vd->vdev_stat_lock);
-
- if (vd == spa->spa_root_vdev)
- spa->spa_root_vdev = NULL;
-
- kmem_free(vd, sizeof (vdev_t));
-}
-
-/*
- * Allocate a new vdev. The 'alloctype' is used to control whether we are
- * creating a new vdev or loading an existing one - the behavior is slightly
- * different for each case.
- */
-int
-vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
- int alloctype)
-{
- vdev_ops_t *ops;
- char *type;
- uint64_t guid = 0;
- vdev_t *vd;
-
- ASSERT(spa_config_held(spa, RW_WRITER));
-
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
- return (EINVAL);
-
- if ((ops = vdev_getops(type)) == NULL)
- return (EINVAL);
-
- /*
- * If this is a load, get the vdev guid from the nvlist.
- * Otherwise, vdev_alloc_common() will generate one for us.
- */
- if (alloctype == VDEV_ALLOC_LOAD) {
- uint64_t label_id;
-
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) ||
- label_id != id)
- return (EINVAL);
-
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
- return (EINVAL);
- } else if (alloctype == VDEV_ALLOC_SPARE) {
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
- return (EINVAL);
- }
-
- /*
- * The first allocated vdev must be of type 'root'.
- */
- if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL)
- return (EINVAL);
-
- vd = vdev_alloc_common(spa, id, guid, ops);
-
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
- vd->vdev_path = spa_strdup(vd->vdev_path);
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0)
- vd->vdev_devid = spa_strdup(vd->vdev_devid);
-
- /*
- * Set the nparity propery for RAID-Z vdevs.
- */
- if (ops == &vdev_raidz_ops) {
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
- &vd->vdev_nparity) == 0) {
- /*
- * Currently, we can only support 2 parity devices.
- */
- if (vd->vdev_nparity > 2)
- return (EINVAL);
- /*
- * Older versions can only support 1 parity device.
- */
- if (vd->vdev_nparity == 2 &&
- spa_version(spa) < ZFS_VERSION_RAID6)
- return (ENOTSUP);
-
- } else {
- /*
- * We require the parity to be specified for SPAs that
- * support multiple parity levels.
- */
- if (spa_version(spa) >= ZFS_VERSION_RAID6)
- return (EINVAL);
-
- /*
- * Otherwise, we default to 1 parity device for RAID-Z.
- */
- vd->vdev_nparity = 1;
- }
- } else {
- vd->vdev_nparity = 0;
- }
-
- /*
- * Set the whole_disk property. If it's not specified, leave the value
- * as -1.
- */
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &vd->vdev_wholedisk) != 0)
- vd->vdev_wholedisk = -1ULL;
-
- /*
- * Look for the 'not present' flag. This will only be set if the device
- * was not present at the time of import.
- */
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
- &vd->vdev_not_present);
-
- /*
- * Get the alignment requirement.
- */
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
-
- /*
- * If we're a top-level vdev, try to load the allocation parameters.
- */
- if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) {
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
- &vd->vdev_ms_array);
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
- &vd->vdev_ms_shift);
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE,
- &vd->vdev_asize);
- }
-
- /*
- * If we're a leaf vdev, try to load the DTL object and offline state.
- */
- if (vd->vdev_ops->vdev_op_leaf && alloctype == VDEV_ALLOC_LOAD) {
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL,
- &vd->vdev_dtl.smo_object);
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
- &vd->vdev_offline);
- }
-
- /*
- * Add ourselves to the parent's list of children.
- */
- vdev_add_child(parent, vd);
-
- *vdp = vd;
-
- return (0);
-}
-
-void
-vdev_free(vdev_t *vd)
-{
- int c;
-
- /*
- * vdev_free() implies closing the vdev first. This is simpler than
- * trying to ensure complicated semantics for all callers.
- */
- vdev_close(vd);
-
- ASSERT(!list_link_active(&vd->vdev_dirty_node));
-
- /*
- * Free all children.
- */
- for (c = 0; c < vd->vdev_children; c++)
- vdev_free(vd->vdev_child[c]);
-
- ASSERT(vd->vdev_child == NULL);
- ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
-
- /*
- * Discard allocation state.
- */
- if (vd == vd->vdev_top)
- vdev_metaslab_fini(vd);
-
- ASSERT3U(vd->vdev_stat.vs_space, ==, 0);
- ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0);
- ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
-
- /*
- * Remove this vdev from its parent's child list.
- */
- vdev_remove_child(vd->vdev_parent, vd);
-
- ASSERT(vd->vdev_parent == NULL);
-
- vdev_free_common(vd);
-}
-
-/*
- * Transfer top-level vdev state from svd to tvd.
- */
-static void
-vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
-{
- spa_t *spa = svd->vdev_spa;
- metaslab_t *msp;
- vdev_t *vd;
- int t;
-
- ASSERT(tvd == tvd->vdev_top);
-
- tvd->vdev_ms_array = svd->vdev_ms_array;
- tvd->vdev_ms_shift = svd->vdev_ms_shift;
- tvd->vdev_ms_count = svd->vdev_ms_count;
-
- svd->vdev_ms_array = 0;
- svd->vdev_ms_shift = 0;
- svd->vdev_ms_count = 0;
-
- tvd->vdev_mg = svd->vdev_mg;
- tvd->vdev_ms = svd->vdev_ms;
-
- svd->vdev_mg = NULL;
- svd->vdev_ms = NULL;
-
- if (tvd->vdev_mg != NULL)
- tvd->vdev_mg->mg_vd = tvd;
-
- tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc;
- tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space;
- tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace;
-
- svd->vdev_stat.vs_alloc = 0;
- svd->vdev_stat.vs_space = 0;
- svd->vdev_stat.vs_dspace = 0;
-
- for (t = 0; t < TXG_SIZE; t++) {
- while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL)
- (void) txg_list_add(&tvd->vdev_ms_list, msp, t);
- while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL)
- (void) txg_list_add(&tvd->vdev_dtl_list, vd, t);
- if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t))
- (void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t);
- }
-
- if (list_link_active(&svd->vdev_dirty_node)) {
- vdev_config_clean(svd);
- vdev_config_dirty(tvd);
- }
-
- tvd->vdev_reopen_wanted = svd->vdev_reopen_wanted;
- svd->vdev_reopen_wanted = 0;
-
- tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio;
- svd->vdev_deflate_ratio = 0;
-}
-
-static void
-vdev_top_update(vdev_t *tvd, vdev_t *vd)
-{
- int c;
-
- if (vd == NULL)
- return;
-
- vd->vdev_top = tvd;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_top_update(tvd, vd->vdev_child[c]);
-}
-
-/*
- * Add a mirror/replacing vdev above an existing vdev.
- */
-vdev_t *
-vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
-{
- spa_t *spa = cvd->vdev_spa;
- vdev_t *pvd = cvd->vdev_parent;
- vdev_t *mvd;
-
- ASSERT(spa_config_held(spa, RW_WRITER));
-
- mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops);
-
- mvd->vdev_asize = cvd->vdev_asize;
- mvd->vdev_ashift = cvd->vdev_ashift;
- mvd->vdev_state = cvd->vdev_state;
-
- vdev_remove_child(pvd, cvd);
- vdev_add_child(pvd, mvd);
- cvd->vdev_id = mvd->vdev_children;
- vdev_add_child(mvd, cvd);
- vdev_top_update(cvd->vdev_top, cvd->vdev_top);
-
- if (mvd == mvd->vdev_top)
- vdev_top_transfer(cvd, mvd);
-
- return (mvd);
-}
-
-/*
- * Remove a 1-way mirror/replacing vdev from the tree.
- */
-void
-vdev_remove_parent(vdev_t *cvd)
-{
- vdev_t *mvd = cvd->vdev_parent;
- vdev_t *pvd = mvd->vdev_parent;
-
- ASSERT(spa_config_held(cvd->vdev_spa, RW_WRITER));
-
- ASSERT(mvd->vdev_children == 1);
- ASSERT(mvd->vdev_ops == &vdev_mirror_ops ||
- mvd->vdev_ops == &vdev_replacing_ops ||
- mvd->vdev_ops == &vdev_spare_ops);
- cvd->vdev_ashift = mvd->vdev_ashift;
-
- vdev_remove_child(mvd, cvd);
- vdev_remove_child(pvd, mvd);
- cvd->vdev_id = mvd->vdev_id;
- vdev_add_child(pvd, cvd);
- /*
- * If we created a new toplevel vdev, then we need to change the child's
- * vdev GUID to match the old toplevel vdev. Otherwise, we could have
- * detached an offline device, and when we go to import the pool we'll
- * think we have two toplevel vdevs, instead of a different version of
- * the same toplevel vdev.
- */
- if (cvd->vdev_top == cvd) {
- pvd->vdev_guid_sum -= cvd->vdev_guid;
- cvd->vdev_guid_sum -= cvd->vdev_guid;
- cvd->vdev_guid = mvd->vdev_guid;
- cvd->vdev_guid_sum += mvd->vdev_guid;
- pvd->vdev_guid_sum += cvd->vdev_guid;
- }
- vdev_top_update(cvd->vdev_top, cvd->vdev_top);
-
- if (cvd == cvd->vdev_top)
- vdev_top_transfer(mvd, cvd);
-
- ASSERT(mvd->vdev_children == 0);
- vdev_free(mvd);
-}
-
-int
-vdev_metaslab_init(vdev_t *vd, uint64_t txg)
-{
- spa_t *spa = vd->vdev_spa;
- objset_t *mos = spa->spa_meta_objset;
- metaslab_class_t *mc = spa_metaslab_class_select(spa);
- uint64_t m;
- uint64_t oldc = vd->vdev_ms_count;
- uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift;
- metaslab_t **mspp;
- int error;
-
- if (vd->vdev_ms_shift == 0) /* not being allocated from yet */
- return (0);
-
- dprintf("%s oldc %llu newc %llu\n", vdev_description(vd), oldc, newc);
-
- ASSERT(oldc <= newc);
-
- if (vd->vdev_mg == NULL)
- vd->vdev_mg = metaslab_group_create(mc, vd);
-
- mspp = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP);
-
- if (oldc != 0) {
- bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp));
- kmem_free(vd->vdev_ms, oldc * sizeof (*mspp));
- }
-
- vd->vdev_ms = mspp;
- vd->vdev_ms_count = newc;
-
- for (m = oldc; m < newc; m++) {
- space_map_obj_t smo = { 0, 0, 0 };
- if (txg == 0) {
- uint64_t object = 0;
- error = dmu_read(mos, vd->vdev_ms_array,
- m * sizeof (uint64_t), sizeof (uint64_t), &object);
- if (error)
- return (error);
- if (object != 0) {
- dmu_buf_t *db;
- error = dmu_bonus_hold(mos, object, FTAG, &db);
- if (error)
- return (error);
- ASSERT3U(db->db_size, ==, sizeof (smo));
- bcopy(db->db_data, &smo, db->db_size);
- ASSERT3U(smo.smo_object, ==, object);
- dmu_buf_rele(db, FTAG);
- }
- }
- vd->vdev_ms[m] = metaslab_init(vd->vdev_mg, &smo,
- m << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg);
- }
-
- return (0);
-}
-
-void
-vdev_metaslab_fini(vdev_t *vd)
-{
- uint64_t m;
- uint64_t count = vd->vdev_ms_count;
-
- if (vd->vdev_ms != NULL) {
- for (m = 0; m < count; m++)
- if (vd->vdev_ms[m] != NULL)
- metaslab_fini(vd->vdev_ms[m]);
- kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *));
- vd->vdev_ms = NULL;
- }
-}
-
-/*
- * Prepare a virtual device for access.
- */
-int
-vdev_open(vdev_t *vd)
-{
- int error;
- int c;
- uint64_t osize = 0;
- uint64_t asize, psize;
- uint64_t ashift = 0;
-
- ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
- vd->vdev_state == VDEV_STATE_CANT_OPEN ||
- vd->vdev_state == VDEV_STATE_OFFLINE);
-
- if (vd->vdev_fault_mode == VDEV_FAULT_COUNT)
- vd->vdev_fault_arg >>= 1;
- else
- vd->vdev_fault_mode = VDEV_FAULT_NONE;
-
- vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
-
- if (vd->vdev_ops->vdev_op_leaf) {
- vdev_cache_init(vd);
- vdev_queue_init(vd);
- vd->vdev_cache_active = B_TRUE;
- }
-
- if (vd->vdev_offline) {
- ASSERT(vd->vdev_children == 0);
- vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE);
- return (ENXIO);
- }
-
- error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
-
- if (zio_injection_enabled && error == 0)
- error = zio_handle_device_injection(vd, ENXIO);
-
- dprintf("%s = %d, osize %llu, state = %d\n",
- vdev_description(vd), error, osize, vd->vdev_state);
-
- if (error) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- vd->vdev_stat.vs_aux);
- return (error);
- }
-
- vd->vdev_state = VDEV_STATE_HEALTHY;
-
- for (c = 0; c < vd->vdev_children; c++)
- if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
- VDEV_AUX_NONE);
- break;
- }
-
- osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t));
-
- if (vd->vdev_children == 0) {
- if (osize < SPA_MINDEVSIZE) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_TOO_SMALL);
- return (EOVERFLOW);
- }
- psize = osize;
- asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE);
- } else {
- if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE -
- (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_TOO_SMALL);
- return (EOVERFLOW);
- }
- psize = 0;
- asize = osize;
- }
-
- vd->vdev_psize = psize;
-
- if (vd->vdev_asize == 0) {
- /*
- * This is the first-ever open, so use the computed values.
- * For testing purposes, a higher ashift can be requested.
- */
- vd->vdev_asize = asize;
- vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
- } else {
- /*
- * Make sure the alignment requirement hasn't increased.
- */
- if (ashift > vd->vdev_top->vdev_ashift) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_BAD_LABEL);
- return (EINVAL);
- }
-
- /*
- * Make sure the device hasn't shrunk.
- */
- if (asize < vd->vdev_asize) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_BAD_LABEL);
- return (EINVAL);
- }
-
- /*
- * If all children are healthy and the asize has increased,
- * then we've experienced dynamic LUN growth.
- */
- if (vd->vdev_state == VDEV_STATE_HEALTHY &&
- asize > vd->vdev_asize) {
- vd->vdev_asize = asize;
- }
- }
-
- /*
- * If this is a top-level vdev, compute the raidz-deflation
- * ratio. Note, we hard-code in 128k (1<<17) because it is the
- * current "typical" blocksize. Even if SPA_MAXBLOCKSIZE
- * changes, this algorithm must never change, or we will
- * inconsistently account for existing bp's.
- */
- if (vd->vdev_top == vd) {
- vd->vdev_deflate_ratio = (1<<17) /
- (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
- }
-
- /*
- * This allows the ZFS DE to close cases appropriately. If a device
- * goes away and later returns, we want to close the associated case.
- * But it's not enough to simply post this only when a device goes from
- * CANT_OPEN -> HEALTHY. If we reboot the system and the device is
- * back, we also need to close the case (otherwise we will try to replay
- * it). So we have to post this notifier every time. Since this only
- * occurs during pool open or error recovery, this should not be an
- * issue.
- */
- zfs_post_ok(vd->vdev_spa, vd);
-
- return (0);
-}
-
-/*
- * Called once the vdevs are all opened, this routine validates the label
- * contents. This needs to be done before vdev_load() so that we don't
- * inadvertently do repair I/Os to the wrong device, and so that vdev_reopen()
- * won't succeed if the device has been changed underneath.
- *
- * This function will only return failure if one of the vdevs indicates that it
- * has since been destroyed or exported. This is only possible if
- * /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state
- * will be updated but the function will return 0.
- */
-int
-vdev_validate(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
- int c;
- nvlist_t *label;
- uint64_t guid;
- uint64_t state;
-
- for (c = 0; c < vd->vdev_children; c++)
- if (vdev_validate(vd->vdev_child[c]) != 0)
- return (EBADF);
-
- /*
- * If the device has already failed, or was marked offline, don't do
- * any further validation. Otherwise, label I/O will fail and we will
- * overwrite the previous state.
- */
- if (vd->vdev_ops->vdev_op_leaf && !vdev_is_dead(vd)) {
-
- if ((label = vdev_label_read_config(vd)) == NULL) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_BAD_LABEL);
- return (0);
- }
-
- if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
- &guid) != 0 || guid != spa_guid(spa)) {
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- nvlist_free(label);
- return (0);
- }
-
- if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
- &guid) != 0 || guid != vd->vdev_guid) {
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- nvlist_free(label);
- return (0);
- }
-
- if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
- &state) != 0) {
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- nvlist_free(label);
- return (0);
- }
-
- nvlist_free(label);
-
- if (spa->spa_load_state == SPA_LOAD_OPEN &&
- state != POOL_STATE_ACTIVE)
- return (EBADF);
- }
-
- /*
- * If we were able to open and validate a vdev that was previously
- * marked permanently unavailable, clear that state now.
- */
- if (vd->vdev_not_present)
- vd->vdev_not_present = 0;
-
- return (0);
-}
-
-/*
- * Close a virtual device.
- */
-void
-vdev_close(vdev_t *vd)
-{
- vd->vdev_ops->vdev_op_close(vd);
-
- if (vd->vdev_cache_active) {
- vdev_cache_fini(vd);
- vdev_queue_fini(vd);
- vd->vdev_cache_active = B_FALSE;
- }
-
- /*
- * We record the previous state before we close it, so that if we are
- * doing a reopen(), we don't generate FMA ereports if we notice that
- * it's still faulted.
- */
- vd->vdev_prevstate = vd->vdev_state;
-
- if (vd->vdev_offline)
- vd->vdev_state = VDEV_STATE_OFFLINE;
- else
- vd->vdev_state = VDEV_STATE_CLOSED;
- vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
-}
-
-void
-vdev_reopen(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
-
- ASSERT(spa_config_held(spa, RW_WRITER));
-
- vdev_close(vd);
- (void) vdev_open(vd);
-
- /*
- * Call vdev_validate() here to make sure we have the same device.
- * Otherwise, a device with an invalid label could be successfully
- * opened in response to vdev_reopen().
- *
- * The downside to this is that if the user is simply experimenting by
- * overwriting an entire disk, we'll fault the device rather than
- * demonstrate self-healing capabilities. On the other hand, with
- * proper FMA integration, the series of errors we'd see from the device
- * would result in a faulted device anyway. Given that this doesn't
- * model any real-world corruption, it's better to catch this here and
- * correctly identify that the device has either changed beneath us, or
- * is corrupted beyond recognition.
- */
- (void) vdev_validate(vd);
-
- /*
- * Reassess root vdev's health.
- */
- vdev_propagate_state(spa->spa_root_vdev);
-}
-
-int
-vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing)
-{
- int error;
-
- /*
- * Normally, partial opens (e.g. of a mirror) are allowed.
- * For a create, however, we want to fail the request if
- * there are any components we can't open.
- */
- error = vdev_open(vd);
-
- if (error || vd->vdev_state != VDEV_STATE_HEALTHY) {
- vdev_close(vd);
- return (error ? error : ENXIO);
- }
-
- /*
- * Recursively initialize all labels.
- */
- if ((error = vdev_label_init(vd, txg, isreplacing ?
- VDEV_LABEL_REPLACE : VDEV_LABEL_CREATE)) != 0) {
- vdev_close(vd);
- return (error);
- }
-
- return (0);
-}
-
-/*
- * The is the latter half of vdev_create(). It is distinct because it
- * involves initiating transactions in order to do metaslab creation.
- * For creation, we want to try to create all vdevs at once and then undo it
- * if anything fails; this is much harder if we have pending transactions.
- */
-void
-vdev_init(vdev_t *vd, uint64_t txg)
-{
- /*
- * Aim for roughly 200 metaslabs per vdev.
- */
- vd->vdev_ms_shift = highbit(vd->vdev_asize / 200);
- vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
-
- /*
- * Initialize the vdev's metaslabs. This can't fail because
- * there's nothing to read when creating all new metaslabs.
- */
- VERIFY(vdev_metaslab_init(vd, txg) == 0);
-}
-
-void
-vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
-{
- ASSERT(vd == vd->vdev_top);
- ASSERT(ISP2(flags));
-
- if (flags & VDD_METASLAB)
- (void) txg_list_add(&vd->vdev_ms_list, arg, txg);
-
- if (flags & VDD_DTL)
- (void) txg_list_add(&vd->vdev_dtl_list, arg, txg);
-
- (void) txg_list_add(&vd->vdev_spa->spa_vdev_txg_list, vd, txg);
-}
-
-void
-vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size)
-{
- mutex_enter(sm->sm_lock);
- if (!space_map_contains(sm, txg, size))
- space_map_add(sm, txg, size);
- mutex_exit(sm->sm_lock);
-}
-
-int
-vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size)
-{
- int dirty;
-
- /*
- * Quick test without the lock -- covers the common case that
- * there are no dirty time segments.
- */
- if (sm->sm_space == 0)
- return (0);
-
- mutex_enter(sm->sm_lock);
- dirty = space_map_contains(sm, txg, size);
- mutex_exit(sm->sm_lock);
-
- return (dirty);
-}
-
-/*
- * Reassess DTLs after a config change or scrub completion.
- */
-void
-vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
-{
- spa_t *spa = vd->vdev_spa;
- int c;
-
- ASSERT(spa_config_held(spa, RW_WRITER));
-
- if (vd->vdev_children == 0) {
- mutex_enter(&vd->vdev_dtl_lock);
- /*
- * We're successfully scrubbed everything up to scrub_txg.
- * Therefore, excise all old DTLs up to that point, then
- * fold in the DTLs for everything we couldn't scrub.
- */
- if (scrub_txg != 0) {
- space_map_excise(&vd->vdev_dtl_map, 0, scrub_txg);
- space_map_union(&vd->vdev_dtl_map, &vd->vdev_dtl_scrub);
- }
- if (scrub_done)
- space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL);
- mutex_exit(&vd->vdev_dtl_lock);
- if (txg != 0)
- vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
- return;
- }
-
- /*
- * Make sure the DTLs are always correct under the scrub lock.
- */
- if (vd == spa->spa_root_vdev)
- mutex_enter(&spa->spa_scrub_lock);
-
- mutex_enter(&vd->vdev_dtl_lock);
- space_map_vacate(&vd->vdev_dtl_map, NULL, NULL);
- space_map_vacate(&vd->vdev_dtl_scrub, NULL, NULL);
- mutex_exit(&vd->vdev_dtl_lock);
-
- for (c = 0; c < vd->vdev_children; c++) {
- vdev_t *cvd = vd->vdev_child[c];
- vdev_dtl_reassess(cvd, txg, scrub_txg, scrub_done);
- mutex_enter(&vd->vdev_dtl_lock);
- space_map_union(&vd->vdev_dtl_map, &cvd->vdev_dtl_map);
- space_map_union(&vd->vdev_dtl_scrub, &cvd->vdev_dtl_scrub);
- mutex_exit(&vd->vdev_dtl_lock);
- }
-
- if (vd == spa->spa_root_vdev)
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-static int
-vdev_dtl_load(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
- space_map_obj_t *smo = &vd->vdev_dtl;
- objset_t *mos = spa->spa_meta_objset;
- dmu_buf_t *db;
- int error;
-
- ASSERT(vd->vdev_children == 0);
-
- if (smo->smo_object == 0)
- return (0);
-
- if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0)
- return (error);
-
- ASSERT3U(db->db_size, ==, sizeof (*smo));
- bcopy(db->db_data, smo, db->db_size);
- dmu_buf_rele(db, FTAG);
-
- mutex_enter(&vd->vdev_dtl_lock);
- error = space_map_load(&vd->vdev_dtl_map, NULL, SM_ALLOC, smo, mos);
- mutex_exit(&vd->vdev_dtl_lock);
-
- return (error);
-}
-
-void
-vdev_dtl_sync(vdev_t *vd, uint64_t txg)
-{
- spa_t *spa = vd->vdev_spa;
- space_map_obj_t *smo = &vd->vdev_dtl;
- space_map_t *sm = &vd->vdev_dtl_map;
- objset_t *mos = spa->spa_meta_objset;
- space_map_t smsync;
- kmutex_t smlock;
- dmu_buf_t *db;
- dmu_tx_t *tx;
-
- dprintf("%s in txg %llu pass %d\n",
- vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa));
-
- tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
-
- if (vd->vdev_detached) {
- if (smo->smo_object != 0) {
- int err = dmu_object_free(mos, smo->smo_object, tx);
- ASSERT3U(err, ==, 0);
- smo->smo_object = 0;
- }
- dmu_tx_commit(tx);
- dprintf("detach %s committed in txg %llu\n",
- vdev_description(vd), txg);
- return;
- }
-
- if (smo->smo_object == 0) {
- ASSERT(smo->smo_objsize == 0);
- ASSERT(smo->smo_alloc == 0);
- smo->smo_object = dmu_object_alloc(mos,
- DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
- DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
- ASSERT(smo->smo_object != 0);
- vdev_config_dirty(vd->vdev_top);
- }
-
- mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL);
-
- space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift,
- &smlock);
-
- mutex_enter(&smlock);
-
- mutex_enter(&vd->vdev_dtl_lock);
- space_map_walk(sm, space_map_add, &smsync);
- mutex_exit(&vd->vdev_dtl_lock);
-
- space_map_truncate(smo, mos, tx);
- space_map_sync(&smsync, SM_ALLOC, smo, mos, tx);
-
- space_map_destroy(&smsync);
-
- mutex_exit(&smlock);
- mutex_destroy(&smlock);
-
- VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
- ASSERT3U(db->db_size, ==, sizeof (*smo));
- bcopy(smo, db->db_data, db->db_size);
- dmu_buf_rele(db, FTAG);
-
- dmu_tx_commit(tx);
-}
-
-void
-vdev_load(vdev_t *vd)
-{
- int c;
-
- /*
- * Recursively load all children.
- */
- for (c = 0; c < vd->vdev_children; c++)
- vdev_load(vd->vdev_child[c]);
-
- /*
- * If this is a top-level vdev, initialize its metaslabs.
- */
- if (vd == vd->vdev_top &&
- (vd->vdev_ashift == 0 || vd->vdev_asize == 0 ||
- vdev_metaslab_init(vd, 0) != 0))
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
-
- /*
- * If this is a leaf vdev, load its DTL.
- */
- if (vd->vdev_ops->vdev_op_leaf && vdev_dtl_load(vd) != 0)
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
-}
-
-/*
- * This special case of vdev_spare() is used for hot spares. It's sole purpose
- * it to set the vdev state for the associated vdev. To do this, we make sure
- * that we can open the underlying device, then try to read the label, and make
- * sure that the label is sane and that it hasn't been repurposed to another
- * pool.
- */
-int
-vdev_validate_spare(vdev_t *vd)
-{
- nvlist_t *label;
- uint64_t guid, version;
- uint64_t state;
-
- if ((label = vdev_label_read_config(vd)) == NULL) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- return (-1);
- }
-
- if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
- version > ZFS_VERSION ||
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
- guid != vd->vdev_guid ||
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- nvlist_free(label);
- return (-1);
- }
-
- spa_spare_add(vd);
-
- /*
- * We don't actually check the pool state here. If it's in fact in
- * use by another pool, we update this fact on the fly when requested.
- */
- nvlist_free(label);
- return (0);
-}
-
-void
-vdev_sync_done(vdev_t *vd, uint64_t txg)
-{
- metaslab_t *msp;
-
- dprintf("%s txg %llu\n", vdev_description(vd), txg);
-
- while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
- metaslab_sync_done(msp, txg);
-}
-
-void
-vdev_sync(vdev_t *vd, uint64_t txg)
-{
- spa_t *spa = vd->vdev_spa;
- vdev_t *lvd;
- metaslab_t *msp;
- dmu_tx_t *tx;
-
- dprintf("%s txg %llu pass %d\n",
- vdev_description(vd), (u_longlong_t)txg, spa_sync_pass(spa));
-
- if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) {
- ASSERT(vd == vd->vdev_top);
- tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
- vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset,
- DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx);
- ASSERT(vd->vdev_ms_array != 0);
- vdev_config_dirty(vd);
- dmu_tx_commit(tx);
- }
-
- while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) {
- metaslab_sync(msp, txg);
- (void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg));
- }
-
- while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL)
- vdev_dtl_sync(lvd, txg);
-
- (void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg));
-}
-
-uint64_t
-vdev_psize_to_asize(vdev_t *vd, uint64_t psize)
-{
- return (vd->vdev_ops->vdev_op_asize(vd, psize));
-}
-
-void
-vdev_io_start(zio_t *zio)
-{
- zio->io_vd->vdev_ops->vdev_op_io_start(zio);
-}
-
-void
-vdev_io_done(zio_t *zio)
-{
- zio->io_vd->vdev_ops->vdev_op_io_done(zio);
-}
-
-const char *
-vdev_description(vdev_t *vd)
-{
- if (vd == NULL || vd->vdev_ops == NULL)
- return ("<unknown>");
-
- if (vd->vdev_path != NULL)
- return (vd->vdev_path);
-
- if (vd->vdev_parent == NULL)
- return (spa_name(vd->vdev_spa));
-
- return (vd->vdev_ops->vdev_op_type);
-}
-
-int
-vdev_online(spa_t *spa, uint64_t guid)
-{
- vdev_t *rvd, *vd;
- uint64_t txg;
-
- txg = spa_vdev_enter(spa);
-
- rvd = spa->spa_root_vdev;
-
- if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL)
- return (spa_vdev_exit(spa, NULL, txg, ENODEV));
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- dprintf("ONLINE: %s\n", vdev_description(vd));
-
- vd->vdev_offline = B_FALSE;
- vd->vdev_tmpoffline = B_FALSE;
- vdev_reopen(vd->vdev_top);
-
- vdev_config_dirty(vd->vdev_top);
-
- (void) spa_vdev_exit(spa, NULL, txg, 0);
-
- VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER, B_TRUE) == 0);
-
- return (0);
-}
-
-int
-vdev_offline(spa_t *spa, uint64_t guid, int istmp)
-{
- vdev_t *rvd, *vd;
- uint64_t txg;
-
- txg = spa_vdev_enter(spa);
-
- rvd = spa->spa_root_vdev;
-
- if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL)
- return (spa_vdev_exit(spa, NULL, txg, ENODEV));
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
- dprintf("OFFLINE: %s\n", vdev_description(vd));
-
- /*
- * If the device isn't already offline, try to offline it.
- */
- if (!vd->vdev_offline) {
- /*
- * If this device's top-level vdev has a non-empty DTL,
- * don't allow the device to be offlined.
- *
- * XXX -- make this more precise by allowing the offline
- * as long as the remaining devices don't have any DTL holes.
- */
- if (vd->vdev_top->vdev_dtl_map.sm_space != 0)
- return (spa_vdev_exit(spa, NULL, txg, EBUSY));
-
- /*
- * Offline this device and reopen its top-level vdev.
- * If this action results in the top-level vdev becoming
- * unusable, undo it and fail the request.
- */
- vd->vdev_offline = B_TRUE;
- vdev_reopen(vd->vdev_top);
- if (vdev_is_dead(vd->vdev_top)) {
- vd->vdev_offline = B_FALSE;
- vdev_reopen(vd->vdev_top);
- return (spa_vdev_exit(spa, NULL, txg, EBUSY));
- }
- }
-
- vd->vdev_tmpoffline = istmp;
-
- vdev_config_dirty(vd->vdev_top);
-
- return (spa_vdev_exit(spa, NULL, txg, 0));
-}
-
-/*
- * Clear the error counts associated with this vdev. Unlike vdev_online() and
- * vdev_offline(), we assume the spa config is locked. We also clear all
- * children. If 'vd' is NULL, then the user wants to clear all vdevs.
- */
-void
-vdev_clear(spa_t *spa, vdev_t *vd)
-{
- int c;
-
- if (vd == NULL)
- vd = spa->spa_root_vdev;
-
- vd->vdev_stat.vs_read_errors = 0;
- vd->vdev_stat.vs_write_errors = 0;
- vd->vdev_stat.vs_checksum_errors = 0;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_clear(spa, vd->vdev_child[c]);
-}
-
-int
-vdev_is_dead(vdev_t *vd)
-{
- return (vd->vdev_state <= VDEV_STATE_CANT_OPEN);
-}
-
-int
-vdev_error_inject(vdev_t *vd, zio_t *zio)
-{
- int error = 0;
-
- if (vd->vdev_fault_mode == VDEV_FAULT_NONE)
- return (0);
-
- if (((1ULL << zio->io_type) & vd->vdev_fault_mask) == 0)
- return (0);
-
- switch (vd->vdev_fault_mode) {
- case VDEV_FAULT_RANDOM:
- if (spa_get_random(vd->vdev_fault_arg) == 0)
- error = EIO;
- break;
-
- case VDEV_FAULT_COUNT:
- if ((int64_t)--vd->vdev_fault_arg <= 0)
- vd->vdev_fault_mode = VDEV_FAULT_NONE;
- error = EIO;
- break;
- }
-
- if (error != 0) {
- dprintf("returning %d for type %d on %s state %d offset %llx\n",
- error, zio->io_type, vdev_description(vd),
- vd->vdev_state, zio->io_offset);
- }
-
- return (error);
-}
-
-/*
- * Get statistics for the given vdev.
- */
-void
-vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
-{
- vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
- int c, t;
-
- mutex_enter(&vd->vdev_stat_lock);
- bcopy(&vd->vdev_stat, vs, sizeof (*vs));
- vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
- vs->vs_state = vd->vdev_state;
- vs->vs_rsize = vdev_get_rsize(vd);
- mutex_exit(&vd->vdev_stat_lock);
-
- /*
- * If we're getting stats on the root vdev, aggregate the I/O counts
- * over all top-level vdevs (i.e. the direct children of the root).
- */
- if (vd == rvd) {
- for (c = 0; c < rvd->vdev_children; c++) {
- vdev_t *cvd = rvd->vdev_child[c];
- vdev_stat_t *cvs = &cvd->vdev_stat;
-
- mutex_enter(&vd->vdev_stat_lock);
- for (t = 0; t < ZIO_TYPES; t++) {
- vs->vs_ops[t] += cvs->vs_ops[t];
- vs->vs_bytes[t] += cvs->vs_bytes[t];
- }
- vs->vs_read_errors += cvs->vs_read_errors;
- vs->vs_write_errors += cvs->vs_write_errors;
- vs->vs_checksum_errors += cvs->vs_checksum_errors;
- vs->vs_scrub_examined += cvs->vs_scrub_examined;
- vs->vs_scrub_errors += cvs->vs_scrub_errors;
- mutex_exit(&vd->vdev_stat_lock);
- }
- }
-}
-
-void
-vdev_stat_update(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_t *pvd;
- uint64_t txg = zio->io_txg;
- vdev_stat_t *vs = &vd->vdev_stat;
- zio_type_t type = zio->io_type;
- int flags = zio->io_flags;
-
- if (zio->io_error == 0) {
- if (!(flags & ZIO_FLAG_IO_BYPASS)) {
- mutex_enter(&vd->vdev_stat_lock);
- vs->vs_ops[type]++;
- vs->vs_bytes[type] += zio->io_size;
- mutex_exit(&vd->vdev_stat_lock);
- }
- if ((flags & ZIO_FLAG_IO_REPAIR) &&
- zio->io_delegate_list == NULL) {
- mutex_enter(&vd->vdev_stat_lock);
- if (flags & ZIO_FLAG_SCRUB_THREAD)
- vs->vs_scrub_repaired += zio->io_size;
- else
- vs->vs_self_healed += zio->io_size;
- mutex_exit(&vd->vdev_stat_lock);
- }
- return;
- }
-
- if (flags & ZIO_FLAG_SPECULATIVE)
- return;
-
- if (!vdev_is_dead(vd)) {
- mutex_enter(&vd->vdev_stat_lock);
- if (type == ZIO_TYPE_READ) {
- if (zio->io_error == ECKSUM)
- vs->vs_checksum_errors++;
- else
- vs->vs_read_errors++;
- }
- if (type == ZIO_TYPE_WRITE)
- vs->vs_write_errors++;
- mutex_exit(&vd->vdev_stat_lock);
- }
-
- if (type == ZIO_TYPE_WRITE) {
- if (txg == 0 || vd->vdev_children != 0)
- return;
- if (flags & ZIO_FLAG_SCRUB_THREAD) {
- ASSERT(flags & ZIO_FLAG_IO_REPAIR);
- for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
- vdev_dtl_dirty(&pvd->vdev_dtl_scrub, txg, 1);
- }
- if (!(flags & ZIO_FLAG_IO_REPAIR)) {
- if (vdev_dtl_contains(&vd->vdev_dtl_map, txg, 1))
- return;
- vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
- for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
- vdev_dtl_dirty(&pvd->vdev_dtl_map, txg, 1);
- }
- }
-}
-
-void
-vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete)
-{
- int c;
- vdev_stat_t *vs = &vd->vdev_stat;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_scrub_stat_update(vd->vdev_child[c], type, complete);
-
- mutex_enter(&vd->vdev_stat_lock);
-
- if (type == POOL_SCRUB_NONE) {
- /*
- * Update completion and end time. Leave everything else alone
- * so we can report what happened during the previous scrub.
- */
- vs->vs_scrub_complete = complete;
- vs->vs_scrub_end = gethrestime_sec();
- } else {
- vs->vs_scrub_type = type;
- vs->vs_scrub_complete = 0;
- vs->vs_scrub_examined = 0;
- vs->vs_scrub_repaired = 0;
- vs->vs_scrub_errors = 0;
- vs->vs_scrub_start = gethrestime_sec();
- vs->vs_scrub_end = 0;
- }
-
- mutex_exit(&vd->vdev_stat_lock);
-}
-
-/*
- * Update the in-core space usage stats for this vdev and the root vdev.
- */
-void
-vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta)
-{
- ASSERT(vd == vd->vdev_top);
- int64_t dspace_delta = space_delta;
-
- do {
- if (vd->vdev_ms_count) {
- /*
- * If this is a top-level vdev, apply the
- * inverse of its psize-to-asize (ie. RAID-Z)
- * space-expansion factor. We must calculate
- * this here and not at the root vdev because
- * the root vdev's psize-to-asize is simply the
- * max of its childrens', thus not accurate
- * enough for us.
- */
- ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
- dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
- vd->vdev_deflate_ratio;
- }
-
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_space += space_delta;
- vd->vdev_stat.vs_alloc += alloc_delta;
- vd->vdev_stat.vs_dspace += dspace_delta;
- mutex_exit(&vd->vdev_stat_lock);
- } while ((vd = vd->vdev_parent) != NULL);
-}
-
-/*
- * Mark a top-level vdev's config as dirty, placing it on the dirty list
- * so that it will be written out next time the vdev configuration is synced.
- * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs.
- */
-void
-vdev_config_dirty(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
- vdev_t *rvd = spa->spa_root_vdev;
- int c;
-
- /*
- * The dirty list is protected by the config lock. The caller must
- * either hold the config lock as writer, or must be the sync thread
- * (which holds the lock as reader). There's only one sync thread,
- * so this is sufficient to ensure mutual exclusion.
- */
- ASSERT(spa_config_held(spa, RW_WRITER) ||
- dsl_pool_sync_context(spa_get_dsl(spa)));
-
- if (vd == rvd) {
- for (c = 0; c < rvd->vdev_children; c++)
- vdev_config_dirty(rvd->vdev_child[c]);
- } else {
- ASSERT(vd == vd->vdev_top);
-
- if (!list_link_active(&vd->vdev_dirty_node))
- list_insert_head(&spa->spa_dirty_list, vd);
- }
-}
-
-void
-vdev_config_clean(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
-
- ASSERT(spa_config_held(spa, RW_WRITER) ||
- dsl_pool_sync_context(spa_get_dsl(spa)));
-
- ASSERT(list_link_active(&vd->vdev_dirty_node));
- list_remove(&spa->spa_dirty_list, vd);
-}
-
-void
-vdev_propagate_state(vdev_t *vd)
-{
- vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
- int degraded = 0, faulted = 0;
- int corrupted = 0;
- int c;
- vdev_t *child;
-
- for (c = 0; c < vd->vdev_children; c++) {
- child = vd->vdev_child[c];
- if (child->vdev_state <= VDEV_STATE_CANT_OPEN)
- faulted++;
- else if (child->vdev_state == VDEV_STATE_DEGRADED)
- degraded++;
-
- if (child->vdev_stat.vs_aux == VDEV_AUX_CORRUPT_DATA)
- corrupted++;
- }
-
- vd->vdev_ops->vdev_op_state_change(vd, faulted, degraded);
-
- /*
- * Root special: if there is a toplevel vdev that cannot be
- * opened due to corrupted metadata, then propagate the root
- * vdev's aux state as 'corrupt' rather than 'insufficient
- * replicas'.
- */
- if (corrupted && vd == rvd && rvd->vdev_state == VDEV_STATE_CANT_OPEN)
- vdev_set_state(rvd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
-}
-
-/*
- * Set a vdev's state. If this is during an open, we don't update the parent
- * state, because we're in the process of opening children depth-first.
- * Otherwise, we propagate the change to the parent.
- *
- * If this routine places a device in a faulted state, an appropriate ereport is
- * generated.
- */
-void
-vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
-{
- uint64_t save_state;
-
- if (state == vd->vdev_state) {
- vd->vdev_stat.vs_aux = aux;
- return;
- }
-
- save_state = vd->vdev_state;
-
- vd->vdev_state = state;
- vd->vdev_stat.vs_aux = aux;
-
- /*
- * If we are setting the vdev state to anything but an open state, then
- * always close the underlying device. Otherwise, we keep accessible
- * but invalid devices open forever. We don't call vdev_close() itself,
- * because that implies some extra checks (offline, etc) that we don't
- * want here. This is limited to leaf devices, because otherwise
- * closing the device will affect other children.
- */
- if (vdev_is_dead(vd) && vd->vdev_ops->vdev_op_leaf)
- vd->vdev_ops->vdev_op_close(vd);
-
- if (state == VDEV_STATE_CANT_OPEN) {
- /*
- * If we fail to open a vdev during an import, we mark it as
- * "not available", which signifies that it was never there to
- * begin with. Failure to open such a device is not considered
- * an error.
- */
- if (vd->vdev_spa->spa_load_state == SPA_LOAD_IMPORT &&
- vd->vdev_ops->vdev_op_leaf)
- vd->vdev_not_present = 1;
-
- /*
- * Post the appropriate ereport. If the 'prevstate' field is
- * set to something other than VDEV_STATE_UNKNOWN, it indicates
- * that this is part of a vdev_reopen(). In this case, we don't
- * want to post the ereport if the device was already in the
- * CANT_OPEN state beforehand.
- */
- if (vd->vdev_prevstate != state && !vd->vdev_not_present &&
- vd != vd->vdev_spa->spa_root_vdev) {
- const char *class;
-
- switch (aux) {
- case VDEV_AUX_OPEN_FAILED:
- class = FM_EREPORT_ZFS_DEVICE_OPEN_FAILED;
- break;
- case VDEV_AUX_CORRUPT_DATA:
- class = FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA;
- break;
- case VDEV_AUX_NO_REPLICAS:
- class = FM_EREPORT_ZFS_DEVICE_NO_REPLICAS;
- break;
- case VDEV_AUX_BAD_GUID_SUM:
- class = FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM;
- break;
- case VDEV_AUX_TOO_SMALL:
- class = FM_EREPORT_ZFS_DEVICE_TOO_SMALL;
- break;
- case VDEV_AUX_BAD_LABEL:
- class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL;
- break;
- default:
- class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
- }
-
- zfs_ereport_post(class, vd->vdev_spa,
- vd, NULL, save_state, 0);
- }
- }
-
- if (isopen)
- return;
-
- if (vd->vdev_parent != NULL)
- vdev_propagate_state(vd->vdev_parent);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
deleted file mode 100644
index 4e419b6..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-
-/*
- * Virtual device read-ahead caching.
- *
- * This file implements a simple LRU read-ahead cache. When the DMU reads
- * a given block, it will often want other, nearby blocks soon thereafter.
- * We take advantage of this by reading a larger disk region and caching
- * the result. In the best case, this can turn 256 back-to-back 512-byte
- * reads into a single 128k read followed by 255 cache hits; this reduces
- * latency dramatically. In the worst case, it can turn an isolated 512-byte
- * read into a 128k read, which doesn't affect latency all that much but is
- * terribly wasteful of bandwidth. A more intelligent version of the cache
- * could keep track of access patterns and not do read-ahead unless it sees
- * at least two temporally close I/Os to the same region. It could also
- * take advantage of semantic information about the I/O. And it could use
- * something faster than an AVL tree; that was chosen solely for convenience.
- *
- * There are five cache operations: allocate, fill, read, write, evict.
- *
- * (1) Allocate. This reserves a cache entry for the specified region.
- * We separate the allocate and fill operations so that multiple threads
- * don't generate I/O for the same cache miss.
- *
- * (2) Fill. When the I/O for a cache miss completes, the fill routine
- * places the data in the previously allocated cache entry.
- *
- * (3) Read. Read data from the cache.
- *
- * (4) Write. Update cache contents after write completion.
- *
- * (5) Evict. When allocating a new entry, we evict the oldest (LRU) entry
- * if the total cache size exceeds zfs_vdev_cache_size.
- */
-
-/*
- * These tunables are for performance analysis.
- */
-/*
- * All i/os smaller than zfs_vdev_cache_max will be turned into
- * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
- * track buffer. At most zfs_vdev_cache_size bytes will be kept in each
- * vdev's vdev_cache.
- */
-int zfs_vdev_cache_max = 1<<14;
-int zfs_vdev_cache_size = 10ULL << 20;
-int zfs_vdev_cache_bshift = 16;
-
-SYSCTL_DECL(_vfs_zfs_vdev);
-SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
-TUNABLE_INT("vfs.zfs.vdev.cache.max", &zfs_vdev_cache_max);
-SYSCTL_INT(_vfs_zfs_vdev_cache, OID_AUTO, max, CTLFLAG_RDTUN,
- &zfs_vdev_cache_max, 0, "Maximum I/O request size that increase read size");
-TUNABLE_INT("vfs.zfs.vdev.cache.size", &zfs_vdev_cache_size);
-SYSCTL_INT(_vfs_zfs_vdev_cache, OID_AUTO, size, CTLFLAG_RDTUN,
- &zfs_vdev_cache_size, 0, "Size of VDEV cache");
-
-#define VCBS (1 << zfs_vdev_cache_bshift)
-
-static int
-vdev_cache_offset_compare(const void *a1, const void *a2)
-{
- const vdev_cache_entry_t *ve1 = a1;
- const vdev_cache_entry_t *ve2 = a2;
-
- if (ve1->ve_offset < ve2->ve_offset)
- return (-1);
- if (ve1->ve_offset > ve2->ve_offset)
- return (1);
- return (0);
-}
-
-static int
-vdev_cache_lastused_compare(const void *a1, const void *a2)
-{
- const vdev_cache_entry_t *ve1 = a1;
- const vdev_cache_entry_t *ve2 = a2;
-
- if (ve1->ve_lastused < ve2->ve_lastused)
- return (-1);
- if (ve1->ve_lastused > ve2->ve_lastused)
- return (1);
-
- /*
- * Among equally old entries, sort by offset to ensure uniqueness.
- */
- return (vdev_cache_offset_compare(a1, a2));
-}
-
-/*
- * Evict the specified entry from the cache.
- */
-static void
-vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
-{
- ASSERT(MUTEX_HELD(&vc->vc_lock));
- ASSERT(ve->ve_fill_io == NULL);
- ASSERT(ve->ve_data != NULL);
-
- dprintf("evicting %p, off %llx, LRU %llu, age %lu, hits %u, stale %u\n",
- vc, ve->ve_offset, ve->ve_lastused, LBOLT - ve->ve_lastused,
- ve->ve_hits, ve->ve_missed_update);
-
- avl_remove(&vc->vc_lastused_tree, ve);
- avl_remove(&vc->vc_offset_tree, ve);
- zio_buf_free(ve->ve_data, VCBS);
- kmem_free(ve, sizeof (vdev_cache_entry_t));
-}
-
-/*
- * Allocate an entry in the cache. At the point we don't have the data,
- * we're just creating a placeholder so that multiple threads don't all
- * go off and read the same blocks.
- */
-static vdev_cache_entry_t *
-vdev_cache_allocate(zio_t *zio)
-{
- vdev_cache_t *vc = &zio->io_vd->vdev_cache;
- uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
- vdev_cache_entry_t *ve;
-
- ASSERT(MUTEX_HELD(&vc->vc_lock));
-
- if (zfs_vdev_cache_size == 0)
- return (NULL);
-
- /*
- * If adding a new entry would exceed the cache size,
- * evict the oldest entry (LRU).
- */
- if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
- zfs_vdev_cache_size) {
- ve = avl_first(&vc->vc_lastused_tree);
- if (ve->ve_fill_io != NULL) {
- dprintf("can't evict in %p, still filling\n", vc);
- return (NULL);
- }
- ASSERT(ve->ve_hits != 0);
- vdev_cache_evict(vc, ve);
- }
-
- ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
- ve->ve_offset = offset;
- ve->ve_lastused = LBOLT;
- ve->ve_data = zio_buf_alloc(VCBS);
-
- avl_add(&vc->vc_offset_tree, ve);
- avl_add(&vc->vc_lastused_tree, ve);
-
- return (ve);
-}
-
-static void
-vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
-{
- uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
-
- ASSERT(MUTEX_HELD(&vc->vc_lock));
- ASSERT(ve->ve_fill_io == NULL);
-
- if (ve->ve_lastused != LBOLT) {
- avl_remove(&vc->vc_lastused_tree, ve);
- ve->ve_lastused = LBOLT;
- avl_add(&vc->vc_lastused_tree, ve);
- }
-
- ve->ve_hits++;
- bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size);
-}
-
-/*
- * Fill a previously allocated cache entry with data.
- */
-static void
-vdev_cache_fill(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_cache_t *vc = &vd->vdev_cache;
- vdev_cache_entry_t *ve = zio->io_private;
- zio_t *dio;
-
- ASSERT(zio->io_size == VCBS);
-
- /*
- * Add data to the cache.
- */
- mutex_enter(&vc->vc_lock);
-
- ASSERT(ve->ve_fill_io == zio);
- ASSERT(ve->ve_offset == zio->io_offset);
- ASSERT(ve->ve_data == zio->io_data);
-
- ve->ve_fill_io = NULL;
-
- /*
- * Even if this cache line was invalidated by a missed write update,
- * any reads that were queued up before the missed update are still
- * valid, so we can satisfy them from this line before we evict it.
- */
- for (dio = zio->io_delegate_list; dio; dio = dio->io_delegate_next)
- vdev_cache_hit(vc, ve, dio);
-
- if (zio->io_error || ve->ve_missed_update)
- vdev_cache_evict(vc, ve);
-
- mutex_exit(&vc->vc_lock);
-
- while ((dio = zio->io_delegate_list) != NULL) {
- zio->io_delegate_list = dio->io_delegate_next;
- dio->io_delegate_next = NULL;
- dio->io_error = zio->io_error;
- zio_next_stage(dio);
- }
-}
-
-/*
- * Read data from the cache. Returns 0 on cache hit, errno on a miss.
- */
-int
-vdev_cache_read(zio_t *zio)
-{
- vdev_cache_t *vc = &zio->io_vd->vdev_cache;
- vdev_cache_entry_t *ve, ve_search;
- uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
- uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
- zio_t *fio;
-
- ASSERT(zio->io_type == ZIO_TYPE_READ);
-
- if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
- return (EINVAL);
-
- if (zio->io_size > zfs_vdev_cache_max)
- return (EOVERFLOW);
-
- /*
- * If the I/O straddles two or more cache blocks, don't cache it.
- */
- if (P2CROSS(zio->io_offset, zio->io_offset + zio->io_size - 1, VCBS))
- return (EXDEV);
-
- ASSERT(cache_phase + zio->io_size <= VCBS);
-
- mutex_enter(&vc->vc_lock);
-
- ve_search.ve_offset = cache_offset;
- ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL);
-
- if (ve != NULL) {
- if (ve->ve_missed_update) {
- mutex_exit(&vc->vc_lock);
- return (ESTALE);
- }
-
- if ((fio = ve->ve_fill_io) != NULL) {
- zio->io_delegate_next = fio->io_delegate_list;
- fio->io_delegate_list = zio;
- zio_vdev_io_bypass(zio);
- mutex_exit(&vc->vc_lock);
- return (0);
- }
-
- vdev_cache_hit(vc, ve, zio);
- zio_vdev_io_bypass(zio);
-
- mutex_exit(&vc->vc_lock);
- zio_next_stage(zio);
- return (0);
- }
-
- ve = vdev_cache_allocate(zio);
-
- if (ve == NULL) {
- mutex_exit(&vc->vc_lock);
- return (ENOMEM);
- }
-
- fio = zio_vdev_child_io(zio, NULL, zio->io_vd, cache_offset,
- ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_CACHE_FILL,
- ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
- ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK,
- vdev_cache_fill, ve);
-
- ve->ve_fill_io = fio;
- fio->io_delegate_list = zio;
- zio_vdev_io_bypass(zio);
-
- mutex_exit(&vc->vc_lock);
- zio_nowait(fio);
-
- return (0);
-}
-
-/*
- * Update cache contents upon write completion.
- */
-void
-vdev_cache_write(zio_t *zio)
-{
- vdev_cache_t *vc = &zio->io_vd->vdev_cache;
- vdev_cache_entry_t *ve, ve_search;
- uint64_t io_start = zio->io_offset;
- uint64_t io_end = io_start + zio->io_size;
- uint64_t min_offset = P2ALIGN(io_start, VCBS);
- uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
- avl_index_t where;
-
- ASSERT(zio->io_type == ZIO_TYPE_WRITE);
-
- mutex_enter(&vc->vc_lock);
-
- ve_search.ve_offset = min_offset;
- ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);
-
- if (ve == NULL)
- ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);
-
- while (ve != NULL && ve->ve_offset < max_offset) {
- uint64_t start = MAX(ve->ve_offset, io_start);
- uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
-
- if (ve->ve_fill_io != NULL) {
- ve->ve_missed_update = 1;
- } else {
- bcopy((char *)zio->io_data + start - io_start,
- ve->ve_data + start - ve->ve_offset, end - start);
- }
- ve = AVL_NEXT(&vc->vc_offset_tree, ve);
- }
- mutex_exit(&vc->vc_lock);
-}
-
-void
-vdev_cache_init(vdev_t *vd)
-{
- vdev_cache_t *vc = &vd->vdev_cache;
-
- mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL);
-
- avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare,
- sizeof (vdev_cache_entry_t),
- offsetof(struct vdev_cache_entry, ve_offset_node));
-
- avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
- sizeof (vdev_cache_entry_t),
- offsetof(struct vdev_cache_entry, ve_lastused_node));
-}
-
-void
-vdev_cache_fini(vdev_t *vd)
-{
- vdev_cache_t *vc = &vd->vdev_cache;
- vdev_cache_entry_t *ve;
-
- mutex_enter(&vc->vc_lock);
- while ((ve = avl_first(&vc->vc_offset_tree)) != NULL)
- vdev_cache_evict(vc, ve);
- mutex_exit(&vc->vc_lock);
-
- avl_destroy(&vc->vc_offset_tree);
- avl_destroy(&vc->vc_lastused_tree);
-
- mutex_destroy(&vc->vc_lock);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
deleted file mode 100644
index b965b1c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_disk.h>
-#include <sys/vdev_impl.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-#include <sys/sunldi.h>
-
-/*
- * Virtual device vector for disks.
- */
-
-extern ldi_ident_t zfs_li;
-
-typedef struct vdev_disk_buf {
- buf_t vdb_buf;
- zio_t *vdb_io;
-} vdev_disk_buf_t;
-
-static int
-vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
-{
- vdev_disk_t *dvd;
- struct dk_minfo dkm;
- int error;
-
- /*
- * We must have a pathname, and it must be absolute.
- */
- if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
-
- dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
-
- /*
- * When opening a disk device, we want to preserve the user's original
- * intent. We always want to open the device by the path the user gave
- * us, even if it is one of multiple paths to the save device. But we
- * also want to be able to survive disks being removed/recabled.
- * Therefore the sequence of opening devices is:
- *
- * 1. Try opening the device by path. For legacy pools without the
- * 'whole_disk' property, attempt to fix the path by appending 's0'.
- *
- * 2. If the devid of the device matches the stored value, return
- * success.
- *
- * 3. Otherwise, the device may have moved. Try opening the device
- * by the devid instead.
- *
- */
- if (vd->vdev_devid != NULL) {
- if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
- &dvd->vd_minor) != 0) {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
- }
-
- error = EINVAL; /* presume failure */
-
- if (vd->vdev_path != NULL) {
- ddi_devid_t devid;
-
- if (vd->vdev_wholedisk == -1ULL) {
- size_t len = strlen(vd->vdev_path) + 3;
- char *buf = kmem_alloc(len, KM_SLEEP);
- ldi_handle_t lh;
-
- (void) snprintf(buf, len, "%ss0", vd->vdev_path);
-
- if (ldi_open_by_name(buf, spa_mode, kcred,
- &lh, zfs_li) == 0) {
- spa_strfree(vd->vdev_path);
- vd->vdev_path = buf;
- vd->vdev_wholedisk = 1ULL;
- (void) ldi_close(lh, spa_mode, kcred);
- } else {
- kmem_free(buf, len);
- }
- }
-
- error = ldi_open_by_name(vd->vdev_path, spa_mode, kcred,
- &dvd->vd_lh, zfs_li);
-
- /*
- * Compare the devid to the stored value.
- */
- if (error == 0 && vd->vdev_devid != NULL &&
- ldi_get_devid(dvd->vd_lh, &devid) == 0) {
- if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
- error = EINVAL;
- (void) ldi_close(dvd->vd_lh, spa_mode, kcred);
- dvd->vd_lh = NULL;
- }
- ddi_devid_free(devid);
- }
-
- /*
- * If we succeeded in opening the device, but 'vdev_wholedisk'
- * is not yet set, then this must be a slice.
- */
- if (error == 0 && vd->vdev_wholedisk == -1ULL)
- vd->vdev_wholedisk = 0;
- }
-
- /*
- * If we were unable to open by path, or the devid check fails, open by
- * devid instead.
- */
- if (error != 0 && vd->vdev_devid != NULL)
- error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
- spa_mode, kcred, &dvd->vd_lh, zfs_li);
-
- if (error) {
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- return (error);
- }
-
- /*
- * Determine the actual size of the device.
- */
- if (ldi_get_size(dvd->vd_lh, psize) != 0) {
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- return (EINVAL);
- }
-
- /*
- * If we own the whole disk, try to enable disk write caching.
- * We ignore errors because it's OK if we can't do it.
- */
- if (vd->vdev_wholedisk == 1) {
- int wce = 1;
- (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
- FKIOCTL, kcred, NULL);
- }
-
- /*
- * Determine the device's minimum transfer size.
- * If the ioctl isn't supported, assume DEV_BSIZE.
- */
- if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm,
- FKIOCTL, kcred, NULL) != 0)
- dkm.dki_lbsize = DEV_BSIZE;
-
- *ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1;
-
- /*
- * Clear the nowritecache bit, so that on a vdev_reopen() we will
- * try again.
- */
- vd->vdev_nowritecache = B_FALSE;
-
- return (0);
-}
-
-static void
-vdev_disk_close(vdev_t *vd)
-{
- vdev_disk_t *dvd = vd->vdev_tsd;
-
- if (dvd == NULL)
- return;
-
- dprintf("removing disk %s, devid %s\n",
- vd->vdev_path ? vd->vdev_path : "<none>",
- vd->vdev_devid ? vd->vdev_devid : "<none>");
-
- if (dvd->vd_minor != NULL)
- ddi_devid_str_free(dvd->vd_minor);
-
- if (dvd->vd_devid != NULL)
- ddi_devid_free(dvd->vd_devid);
-
- if (dvd->vd_lh != NULL)
- (void) ldi_close(dvd->vd_lh, spa_mode, kcred);
-
- kmem_free(dvd, sizeof (vdev_disk_t));
- vd->vdev_tsd = NULL;
-}
-
-static void
-vdev_disk_io_intr(buf_t *bp)
-{
- vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
- zio_t *zio = vdb->vdb_io;
-
- if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0)
- zio->io_error = EIO;
-
- kmem_free(vdb, sizeof (vdev_disk_buf_t));
-
- zio_next_stage_async(zio);
-}
-
-static void
-vdev_disk_ioctl_done(void *zio_arg, int error)
-{
- zio_t *zio = zio_arg;
-
- zio->io_error = error;
-
- zio_next_stage_async(zio);
-}
-
-static void
-vdev_disk_io_start(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_disk_t *dvd = vd->vdev_tsd;
- vdev_disk_buf_t *vdb;
- buf_t *bp;
- int flags, error;
-
- if (zio->io_type == ZIO_TYPE_IOCTL) {
- zio_vdev_io_bypass(zio);
-
- /* XXPOLICY */
- if (vdev_is_dead(vd)) {
- zio->io_error = ENXIO;
- zio_next_stage_async(zio);
- return;
- }
-
- switch (zio->io_cmd) {
-
- case DKIOCFLUSHWRITECACHE:
-
- if (zfs_nocacheflush)
- break;
-
- if (vd->vdev_nowritecache) {
- zio->io_error = ENOTSUP;
- break;
- }
-
- zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done;
- zio->io_dk_callback.dkc_cookie = zio;
-
- error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
- (uintptr_t)&zio->io_dk_callback,
- FKIOCTL, kcred, NULL);
-
- if (error == 0) {
- /*
- * The ioctl will be done asychronously,
- * and will call vdev_disk_ioctl_done()
- * upon completion.
- */
- return;
- } else if (error == ENOTSUP) {
- /*
- * If we get ENOTSUP, we know that no future
- * attempts will ever succeed. In this case we
- * set a persistent bit so that we don't bother
- * with the ioctl in the future.
- */
- vd->vdev_nowritecache = B_TRUE;
- }
- zio->io_error = error;
-
- break;
-
- default:
- zio->io_error = ENOTSUP;
- }
-
- zio_next_stage_async(zio);
- return;
- }
-
- if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
- return;
-
- if ((zio = vdev_queue_io(zio)) == NULL)
- return;
-
- flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
- flags |= B_BUSY | B_NOCACHE;
- if (zio->io_flags & ZIO_FLAG_FAILFAST)
- flags |= B_FAILFAST;
-
- vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
-
- vdb->vdb_io = zio;
- bp = &vdb->vdb_buf;
-
- bioinit(bp);
- bp->b_flags = flags;
- bp->b_bcount = zio->io_size;
- bp->b_un.b_addr = zio->io_data;
- bp->b_lblkno = lbtodb(zio->io_offset);
- bp->b_bufsize = zio->io_size;
- bp->b_iodone = (int (*)())vdev_disk_io_intr;
-
- /* XXPOLICY */
- error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
- if (error) {
- zio->io_error = error;
- bioerror(bp, error);
- bp->b_resid = bp->b_bcount;
- bp->b_iodone(bp);
- return;
- }
-
- error = ldi_strategy(dvd->vd_lh, bp);
- /* ldi_strategy() will return non-zero only on programming errors */
- ASSERT(error == 0);
-}
-
-static void
-vdev_disk_io_done(zio_t *zio)
-{
- vdev_queue_io_done(zio);
-
- if (zio->io_type == ZIO_TYPE_WRITE)
- vdev_cache_write(zio);
-
- if (zio_injection_enabled && zio->io_error == 0)
- zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
-
- zio_next_stage(zio);
-}
-
-vdev_ops_t vdev_disk_ops = {
- vdev_disk_open,
- vdev_disk_close,
- vdev_default_asize,
- vdev_disk_io_start,
- vdev_disk_io_done,
- NULL,
- VDEV_TYPE_DISK, /* name of this vdev type */
- B_TRUE /* leaf vdev */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
deleted file mode 100644
index b8e79f8..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_file.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/fs/zfs.h>
-
-/*
- * Virtual device vector for files.
- */
-
-static int
-vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
-{
- vdev_file_t *vf;
- vnode_t *vp;
- vattr_t vattr;
- int error;
-
- /*
- * We must have a pathname, and it must be absolute.
- */
- if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
-
- vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
-
- /*
- * We always open the files from the root of the global zone, even if
- * we're in a local zone. If the user has gotten to this point, the
- * administrator has already decided that the pool should be available
- * to local zone users, so the underlying devices should be as well.
- */
- ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
- error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, spa_mode | FOFFMAX,
- 0, &vp, 0, 0, rootdir);
-
- if (error) {
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- return (error);
- }
-
- vf->vf_vnode = vp;
-
-#ifdef _KERNEL
- /*
- * Make sure it's a regular file.
- */
- if (vp->v_type != VREG) {
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- return (ENODEV);
- }
-#endif
-
- /*
- * Determine the physical size of the file.
- */
- vattr.va_mask = AT_SIZE;
- error = VOP_GETATTR(vp, &vattr, 0, kcred);
- if (error) {
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- return (error);
- }
-
- *psize = vattr.va_size;
- *ashift = SPA_MINBLOCKSHIFT;
-
- return (0);
-}
-
-static void
-vdev_file_close(vdev_t *vd)
-{
- vdev_file_t *vf = vd->vdev_tsd;
-
- if (vf == NULL)
- return;
-
- if (vf->vf_vnode != NULL) {
- (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred);
- (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred);
- VN_RELE(vf->vf_vnode);
- }
-
- kmem_free(vf, sizeof (vdev_file_t));
- vd->vdev_tsd = NULL;
-}
-
-static void
-vdev_file_io_start(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_file_t *vf = vd->vdev_tsd;
- ssize_t resid;
- int error;
-
- if (zio->io_type == ZIO_TYPE_IOCTL) {
- zio_vdev_io_bypass(zio);
-
- /* XXPOLICY */
- if (vdev_is_dead(vd)) {
- zio->io_error = ENXIO;
- zio_next_stage_async(zio);
- return;
- }
-
- switch (zio->io_cmd) {
- case DKIOCFLUSHWRITECACHE:
- zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
- kcred);
- dprintf("fsync(%s) = %d\n", vdev_description(vd),
- zio->io_error);
- break;
- default:
- zio->io_error = ENOTSUP;
- }
-
- zio_next_stage_async(zio);
- return;
- }
-
- /*
- * In the kernel, don't bother double-caching, but in userland,
- * we want to test the vdev_cache code.
- */
-#ifndef _KERNEL
- if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
- return;
-#endif
-
- if ((zio = vdev_queue_io(zio)) == NULL)
- return;
-
- /* XXPOLICY */
- error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
- if (error) {
- zio->io_error = error;
- zio_next_stage_async(zio);
- return;
- }
-
- zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
- UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
- zio->io_size, zio->io_offset, UIO_SYSSPACE,
- 0, RLIM64_INFINITY, kcred, &resid);
-
- if (resid != 0 && zio->io_error == 0)
- zio->io_error = ENOSPC;
-
- zio_next_stage_async(zio);
-}
-
-static void
-vdev_file_io_done(zio_t *zio)
-{
- vdev_queue_io_done(zio);
-
-#ifndef _KERNEL
- if (zio->io_type == ZIO_TYPE_WRITE)
- vdev_cache_write(zio);
-#endif
-
- if (zio_injection_enabled && zio->io_error == 0)
- zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
-
- zio_next_stage(zio);
-}
-
-vdev_ops_t vdev_file_ops = {
- vdev_file_open,
- vdev_file_close,
- vdev_default_asize,
- vdev_file_io_start,
- vdev_file_io_done,
- NULL,
- VDEV_TYPE_FILE, /* name of this vdev type */
- B_TRUE /* leaf vdev */
-};
-
-/*
- * From userland we access disks just like files.
- */
-#ifndef _KERNEL
-
-vdev_ops_t vdev_disk_ops = {
- vdev_file_open,
- vdev_file_close,
- vdev_default_asize,
- vdev_file_io_start,
- vdev_file_io_done,
- NULL,
- VDEV_TYPE_DISK, /* name of this vdev type */
- B_TRUE /* leaf vdev */
-};
-
-#endif
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
deleted file mode 100644
index eebc911..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/disk.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-#include <geom/geom.h>
-#include <geom/geom_int.h>
-
-/*
- * Virtual device vector for GEOM.
- */
-
-struct g_class zfs_vdev_class = {
- .name = "ZFS::VDEV",
- .version = G_VERSION,
-};
-
-DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
-
-typedef struct vdev_geom_ctx {
- struct g_consumer *gc_consumer;
- int gc_state;
- struct bio_queue_head gc_queue;
- struct mtx gc_queue_mtx;
-} vdev_geom_ctx_t;
-
-static void
-vdev_geom_release(vdev_t *vd)
-{
- vdev_geom_ctx_t *ctx;
-
- ctx = vd->vdev_tsd;
- vd->vdev_tsd = NULL;
-
- mtx_lock(&ctx->gc_queue_mtx);
- ctx->gc_state = 1;
- wakeup_one(&ctx->gc_queue);
- while (ctx->gc_state != 2)
- msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
- mtx_unlock(&ctx->gc_queue_mtx);
- mtx_destroy(&ctx->gc_queue_mtx);
- kmem_free(ctx, sizeof(*ctx));
-}
-
-static void
-vdev_geom_orphan(struct g_consumer *cp)
-{
- struct g_geom *gp;
- vdev_t *vd;
- int error;
-
- g_topology_assert();
-
- vd = cp->private;
- gp = cp->geom;
- error = cp->provider->error;
-
- ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
- if (cp->acr + cp->acw + cp->ace > 0)
- g_access(cp, -cp->acr, -cp->acw, -cp->ace);
- ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
- g_detach(cp);
- g_destroy_consumer(cp);
- /* Destroy geom if there are no consumers left. */
- if (LIST_EMPTY(&gp->consumer)) {
- ZFS_LOG(1, "Destroyed geom %s.", gp->name);
- g_wither_geom(gp, error);
- }
- vdev_geom_release(vd);
- /* Both methods below work, but in a bit different way. */
-#if 0
- vd->vdev_reopen_wanted = 1;
-#else
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, vd->vdev_stat.vs_aux);
-#endif
-}
-
-static struct g_consumer *
-vdev_geom_attach(struct g_provider *pp, int write)
-{
- struct g_geom *gp;
- struct g_consumer *cp;
-
- g_topology_assert();
-
- ZFS_LOG(1, "Attaching to %s.", pp->name);
- /* Do we have geom already? No? Create one. */
- LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
- if (gp->flags & G_GEOM_WITHER)
- continue;
- if (strcmp(gp->name, "zfs::vdev") != 0)
- continue;
- break;
- }
- if (gp == NULL) {
- gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
- gp->orphan = vdev_geom_orphan;
- cp = g_new_consumer(gp);
- if (g_attach(cp, pp) != 0) {
- g_wither_geom(gp, ENXIO);
- return (NULL);
- }
- if (g_access(cp, 1, write, 1) != 0) {
- g_wither_geom(gp, ENXIO);
- return (NULL);
- }
- ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
- } else {
- /* Check if we are already connected to this provider. */
- LIST_FOREACH(cp, &gp->consumer, consumer) {
- if (cp->provider == pp) {
- ZFS_LOG(1, "Found consumer for %s.", pp->name);
- break;
- }
- }
- if (cp == NULL) {
- cp = g_new_consumer(gp);
- if (g_attach(cp, pp) != 0) {
- g_destroy_consumer(cp);
- return (NULL);
- }
- if (g_access(cp, 1, write, 1) != 0) {
- g_detach(cp);
- g_destroy_consumer(cp);
- return (NULL);
- }
- ZFS_LOG(1, "Created consumer for %s.", pp->name);
- } else {
- if (g_access(cp, 1, cp->acw > 0 ? 0 : write, 1) != 0)
- return (NULL);
- ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
- }
- }
- return (cp);
-}
-
-static void
-vdev_geom_detach(void *arg, int flag __unused)
-{
- struct g_geom *gp;
- struct g_consumer *cp;
-
- g_topology_assert();
- cp = arg;
- gp = cp->geom;
-
- ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
- g_access(cp, -1, 0, -1);
- /* Destroy consumer on last close. */
- if (cp->acr == 0 && cp->ace == 0) {
- ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
- if (cp->acw > 0)
- g_access(cp, 0, -cp->acw, 0);
- g_detach(cp);
- g_destroy_consumer(cp);
- }
- /* Destroy geom if there are no consumers left. */
- if (LIST_EMPTY(&gp->consumer)) {
- ZFS_LOG(1, "Destroyed geom %s.", gp->name);
- g_wither_geom(gp, ENXIO);
- }
-}
-
-static void
-vdev_geom_worker(void *arg)
-{
- vdev_geom_ctx_t *ctx;
- zio_t *zio;
- struct bio *bp;
-
- ctx = arg;
- for (;;) {
- mtx_lock(&ctx->gc_queue_mtx);
- bp = bioq_takefirst(&ctx->gc_queue);
- if (bp == NULL) {
- if (ctx->gc_state == 1) {
- ctx->gc_state = 2;
- wakeup_one(&ctx->gc_state);
- mtx_unlock(&ctx->gc_queue_mtx);
- kproc_exit(0);
- }
- msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
- PRIBIO | PDROP, "vgeom:io", 0);
- continue;
- }
- mtx_unlock(&ctx->gc_queue_mtx);
- zio = bp->bio_caller1;
- zio->io_error = bp->bio_error;
- if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
- vdev_t *vd;
-
- /*
- * If we get ENOTSUP, we know that no future
- * attempts will ever succeed. In this case we
- * set a persistent bit so that we don't bother
- * with the ioctl in the future.
- */
- vd = zio->io_vd;
- vd->vdev_nowritecache = B_TRUE;
- }
- g_destroy_bio(bp);
- zio_next_stage_async(zio);
- }
-}
-
-static char *
-vdev_geom_get_id(struct g_consumer *cp)
-{
- char *id;
- int len;
-
- g_topology_assert_not();
- len = DISK_IDENT_SIZE;
- id = kmem_zalloc(len, KM_SLEEP);
- if (g_io_getattr("GEOM::ident", cp, &len, id) != 0) {
- kmem_free(id, DISK_IDENT_SIZE);
- return (NULL);
- }
- return (id);
-}
-
-static void
-vdev_geom_free_id(char *id)
-{
-
- if (id != NULL)
- kmem_free(id, DISK_IDENT_SIZE);
-}
-
-struct vdev_geom_find {
- const char *id;
- int write;
- struct g_consumer *cp;
-};
-
-static void
-vdev_geom_taste_orphan(struct g_consumer *cp)
-{
-
- KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
- cp->provider->name));
-}
-
-static void
-vdev_geom_attach_by_id_event(void *arg, int flags __unused)
-{
- struct vdev_geom_find *ap;
- struct g_class *mp;
- struct g_geom *gp, *zgp;
- struct g_provider *pp;
- struct g_consumer *zcp;
- char *id;
-
- g_topology_assert();
-
- ap = arg;
-
- zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
- /* This orphan function should be never called. */
- zgp->orphan = vdev_geom_taste_orphan;
- zcp = g_new_consumer(zgp);
-
- LIST_FOREACH(mp, &g_classes, class) {
- if (mp == &zfs_vdev_class)
- continue;
- LIST_FOREACH(gp, &mp->geom, geom) {
- if (gp->flags & G_GEOM_WITHER)
- continue;
- LIST_FOREACH(pp, &gp->provider, provider) {
- if (pp->flags & G_PF_WITHER)
- continue;
- g_attach(zcp, pp);
- if (g_access(zcp, 1, 0, 0) != 0) {
- g_detach(zcp);
- continue;
- }
- g_topology_unlock();
- id = vdev_geom_get_id(zcp);
- g_topology_lock();
- g_access(zcp, -1, 0, 0);
- g_detach(zcp);
- if (id == NULL || strcmp(id, ap->id) != 0) {
- vdev_geom_free_id(id);
- continue;
- }
- vdev_geom_free_id(id);
- ap->cp = vdev_geom_attach(pp, ap->write);
- if (ap->cp == NULL) {
- printf("ZFS WARNING: Cannot open %s "
- "for writting.\n", pp->name);
- continue;
- }
- goto end;
- }
- }
- }
- ap->cp = NULL;
-end:
- g_destroy_consumer(zcp);
- g_destroy_geom(zgp);
-}
-
-static struct g_consumer *
-vdev_geom_attach_by_id(const char *id, int write)
-{
- struct vdev_geom_find *ap;
- struct g_consumer *cp;
-
- ap = kmem_zalloc(sizeof(*ap), KM_SLEEP);
- ap->id = id;
- ap->write = write;
- g_waitfor_event(vdev_geom_attach_by_id_event, ap, M_WAITOK, NULL);
- cp = ap->cp;
- kmem_free(ap, sizeof(*ap));
- return (cp);
-}
-
-static int
-vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
-{
- vdev_geom_ctx_t *ctx;
- struct g_provider *pp;
- struct g_consumer *cp;
- char *id = NULL;
- int owned;
-
- /*
- * We must have a pathname, and it must be absolute.
- */
- if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
-
- if ((owned = mtx_owned(&Giant)))
- mtx_unlock(&Giant);
- cp = NULL;
- g_topology_lock();
- pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
- if (pp != NULL) {
- ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
- cp = vdev_geom_attach(pp, !!(spa_mode & FWRITE));
- if (cp != NULL && vd->vdev_devid != NULL) {
- g_topology_unlock();
- id = vdev_geom_get_id(cp);
- g_topology_lock();
- if (id == NULL || strcmp(id, vd->vdev_devid) != 0) {
- vdev_geom_detach(cp, 0);
- cp = NULL;
- ZFS_LOG(1, "ID mismatch for provider %s: "
- "[%s]!=[%s].", vd->vdev_path,
- vd->vdev_devid, id);
- goto next;
- }
- ZFS_LOG(1, "ID match for provider %s.", vd->vdev_path);
- }
- }
-next:
- g_topology_unlock();
- vdev_geom_free_id(id);
- if (cp == NULL && vd->vdev_devid != NULL) {
- ZFS_LOG(1, "Searching by ID [%s].", vd->vdev_devid);
- cp = vdev_geom_attach_by_id(vd->vdev_devid,
- !!(spa_mode & FWRITE));
- if (cp != NULL) {
- size_t len = strlen(cp->provider->name) + 6; /* 6 == strlen("/dev/") + 1 */
- char *buf = kmem_alloc(len, KM_SLEEP);
-
- snprintf(buf, len, "/dev/%s", cp->provider->name);
- spa_strfree(vd->vdev_path);
- vd->vdev_path = buf;
-
- ZFS_LOG(1, "Attach by ID [%s] succeeded, provider %s.",
- vd->vdev_devid, vd->vdev_path);
- }
- }
- if (owned)
- mtx_lock(&Giant);
- if (cp == NULL) {
- ZFS_LOG(1, "Provider %s (id=[%s]) not found.", vd->vdev_path,
- vd->vdev_devid);
- vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- return (EACCES);
- }
- pp = cp->provider;
-
- /*
- * Determine the actual size of the device.
- */
- *psize = pp->mediasize;
-
- /*
- * Determine the device's minimum transfer size.
- */
- *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
-
- /*
- * Clear the nowritecache bit, so that on a vdev_reopen() we will
- * try again.
- */
- vd->vdev_nowritecache = B_FALSE;
-
- cp->private = vd;
-
- ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
- bioq_init(&ctx->gc_queue);
- mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
- ctx->gc_consumer = cp;
- ctx->gc_state = 0;
-
- vd->vdev_tsd = ctx;
-
- kproc_create(vdev_geom_worker, ctx, NULL, 0, 0, "vdev:worker %s",
- pp->name);
-
- return (0);
-}
-
-static void
-vdev_geom_close(vdev_t *vd)
-{
- vdev_geom_ctx_t *ctx;
- struct g_consumer *cp;
-
- if ((ctx = vd->vdev_tsd) == NULL)
- return;
- if ((cp = ctx->gc_consumer) == NULL)
- return;
- vdev_geom_release(vd);
- g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
-}
-
-static void
-vdev_geom_io_intr(struct bio *bp)
-{
- vdev_geom_ctx_t *ctx;
- zio_t *zio;
-
- zio = bp->bio_caller1;
- ctx = zio->io_vd->vdev_tsd;
-
- mtx_lock(&ctx->gc_queue_mtx);
- bioq_insert_tail(&ctx->gc_queue, bp);
- wakeup_one(&ctx->gc_queue);
- mtx_unlock(&ctx->gc_queue_mtx);
-}
-
-static void
-vdev_geom_io_start(zio_t *zio)
-{
- vdev_t *vd;
- vdev_geom_ctx_t *ctx;
- struct g_consumer *cp;
- struct bio *bp;
- int error;
-
- cp = NULL;
-
- vd = zio->io_vd;
- ctx = vd->vdev_tsd;
- if (ctx != NULL)
- cp = ctx->gc_consumer;
-
- if (zio->io_type == ZIO_TYPE_IOCTL) {
- zio_vdev_io_bypass(zio);
-
- /* XXPOLICY */
- if (vdev_is_dead(vd)) {
- zio->io_error = ENXIO;
- zio_next_stage_async(zio);
- return;
- }
-
- switch (zio->io_cmd) {
-
- case DKIOCFLUSHWRITECACHE:
- if (vd->vdev_nowritecache) {
- zio->io_error = ENOTSUP;
- break;
- }
-
- goto sendreq;
- default:
- zio->io_error = ENOTSUP;
- }
-
- zio_next_stage_async(zio);
- return;
- }
-
- if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
- return;
-
- if ((zio = vdev_queue_io(zio)) == NULL)
- return;
-
-sendreq:
-
- error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
- if (error == 0 && cp == NULL)
- error = ENXIO;
- if (error) {
- zio->io_error = error;
- zio_next_stage_async(zio);
- return;
- }
-
- bp = g_alloc_bio();
- bp->bio_caller1 = zio;
- switch (zio->io_type) {
- case ZIO_TYPE_READ:
- case ZIO_TYPE_WRITE:
- bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
- bp->bio_data = zio->io_data;
- bp->bio_offset = zio->io_offset;
- bp->bio_length = zio->io_size;
- break;
- case ZIO_TYPE_IOCTL:
- bp->bio_cmd = BIO_FLUSH;
- bp->bio_data = NULL;
- bp->bio_offset = cp->provider->mediasize;
- bp->bio_length = 0;
- break;
- }
- bp->bio_done = vdev_geom_io_intr;
-
- g_io_request(bp, cp);
-}
-
-static void
-vdev_geom_io_done(zio_t *zio)
-{
- vdev_queue_io_done(zio);
-
- if (zio->io_type == ZIO_TYPE_WRITE)
- vdev_cache_write(zio);
-
- if (zio_injection_enabled && zio->io_error == 0)
- zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
-
- zio_next_stage(zio);
-}
-
-vdev_ops_t vdev_geom_ops = {
- vdev_geom_open,
- vdev_geom_close,
- vdev_default_asize,
- vdev_geom_io_start,
- vdev_geom_io_done,
- NULL,
- VDEV_TYPE_DISK, /* name of this vdev type */
- B_TRUE /* leaf vdev */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
deleted file mode 100644
index 9d9f555..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Virtual Device Labels
- * ---------------------
- *
- * The vdev label serves several distinct purposes:
- *
- * 1. Uniquely identify this device as part of a ZFS pool and confirm its
- * identity within the pool.
- *
- * 2. Verify that all the devices given in a configuration are present
- * within the pool.
- *
- * 3. Determine the uberblock for the pool.
- *
- * 4. In case of an import operation, determine the configuration of the
- * toplevel vdev of which it is a part.
- *
- * 5. If an import operation cannot find all the devices in the pool,
- * provide enough information to the administrator to determine which
- * devices are missing.
- *
- * It is important to note that while the kernel is responsible for writing the
- * label, it only consumes the information in the first three cases. The
- * latter information is only consumed in userland when determining the
- * configuration to import a pool.
- *
- *
- * Label Organization
- * ------------------
- *
- * Before describing the contents of the label, it's important to understand how
- * the labels are written and updated with respect to the uberblock.
- *
- * When the pool configuration is altered, either because it was newly created
- * or a device was added, we want to update all the labels such that we can deal
- * with fatal failure at any point. To this end, each disk has two labels which
- * are updated before and after the uberblock is synced. Assuming we have
- * labels and an uberblock with the following transacation groups:
- *
- * L1 UB L2
- * +------+ +------+ +------+
- * | | | | | |
- * | t10 | | t10 | | t10 |
- * | | | | | |
- * +------+ +------+ +------+
- *
- * In this stable state, the labels and the uberblock were all updated within
- * the same transaction group (10). Each label is mirrored and checksummed, so
- * that we can detect when we fail partway through writing the label.
- *
- * In order to identify which labels are valid, the labels are written in the
- * following manner:
- *
- * 1. For each vdev, update 'L1' to the new label
- * 2. Update the uberblock
- * 3. For each vdev, update 'L2' to the new label
- *
- * Given arbitrary failure, we can determine the correct label to use based on
- * the transaction group. If we fail after updating L1 but before updating the
- * UB, we will notice that L1's transaction group is greater than the uberblock,
- * so L2 must be valid. If we fail after writing the uberblock but before
- * writing L2, we will notice that L2's transaction group is less than L1, and
- * therefore L1 is valid.
- *
- * Another added complexity is that not every label is updated when the config
- * is synced. If we add a single device, we do not want to have to re-write
- * every label for every device in the pool. This means that both L1 and L2 may
- * be older than the pool uberblock, because the necessary information is stored
- * on another vdev.
- *
- *
- * On-disk Format
- * --------------
- *
- * The vdev label consists of two distinct parts, and is wrapped within the
- * vdev_label_t structure. The label includes 8k of padding to permit legacy
- * VTOC disk labels, but is otherwise ignored.
- *
- * The first half of the label is a packed nvlist which contains pool wide
- * properties, per-vdev properties, and configuration information. It is
- * described in more detail below.
- *
- * The latter half of the label consists of a redundant array of uberblocks.
- * These uberblocks are updated whenever a transaction group is committed,
- * or when the configuration is updated. When a pool is loaded, we scan each
- * vdev for the 'best' uberblock.
- *
- *
- * Configuration Information
- * -------------------------
- *
- * The nvlist describing the pool and vdev contains the following elements:
- *
- * version ZFS on-disk version
- * name Pool name
- * state Pool state
- * txg Transaction group in which this label was written
- * pool_guid Unique identifier for this pool
- * vdev_tree An nvlist describing vdev tree.
- *
- * Each leaf device label also contains the following:
- *
- * top_guid Unique ID for top-level vdev in which this is contained
- * guid Unique ID for the leaf vdev
- *
- * The 'vs' configuration follows the format described in 'spa_config.c'.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/dmu.h>
-#include <sys/zap.h>
-#include <sys/vdev.h>
-#include <sys/vdev_impl.h>
-#include <sys/uberblock_impl.h>
-#include <sys/metaslab.h>
-#include <sys/zio.h>
-#include <sys/fs/zfs.h>
-
-/*
- * Basic routines to read and write from a vdev label.
- * Used throughout the rest of this file.
- */
-uint64_t
-vdev_label_offset(uint64_t psize, int l, uint64_t offset)
-{
- ASSERT(offset < sizeof (vdev_label_t));
-
- return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
- 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
-}
-
-static void
-vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
- uint64_t size, zio_done_func_t *done, void *private)
-{
- ASSERT(vd->vdev_children == 0);
-
- zio_nowait(zio_read_phys(zio, vd,
- vdev_label_offset(vd->vdev_psize, l, offset),
- size, buf, ZIO_CHECKSUM_LABEL, done, private,
- ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE));
-}
-
-static void
-vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
- uint64_t size, zio_done_func_t *done, void *private)
-{
- ASSERT(vd->vdev_children == 0);
-
- zio_nowait(zio_write_phys(zio, vd,
- vdev_label_offset(vd->vdev_psize, l, offset),
- size, buf, ZIO_CHECKSUM_LABEL, done, private,
- ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL));
-}
-
-/*
- * Generate the nvlist representing this vdev's config.
- */
-nvlist_t *
-vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
- boolean_t isspare)
-{
- nvlist_t *nv = NULL;
-
- VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
- vd->vdev_ops->vdev_op_type) == 0);
- if (!isspare)
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
- == 0);
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
-
- if (vd->vdev_path != NULL)
- VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PATH,
- vd->vdev_path) == 0);
-
- if (vd->vdev_devid != NULL)
- VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID,
- vd->vdev_devid) == 0);
-
- if (vd->vdev_nparity != 0) {
- ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
- VDEV_TYPE_RAIDZ) == 0);
-
- /*
- * Make sure someone hasn't managed to sneak a fancy new vdev
- * into a crufty old storage pool.
- */
- ASSERT(vd->vdev_nparity == 1 ||
- (vd->vdev_nparity == 2 &&
- spa_version(spa) >= ZFS_VERSION_RAID6));
-
- /*
- * Note that we'll add the nparity tag even on storage pools
- * that only support a single parity device -- older software
- * will just ignore it.
- */
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY,
- vd->vdev_nparity) == 0);
- }
-
- if (vd->vdev_wholedisk != -1ULL)
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- vd->vdev_wholedisk) == 0);
-
- if (vd->vdev_not_present)
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0);
-
- if (vd->vdev_isspare)
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
-
- if (!isspare && vd == vd->vdev_top) {
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
- vd->vdev_ms_array) == 0);
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
- vd->vdev_ms_shift) == 0);
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT,
- vd->vdev_ashift) == 0);
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
- vd->vdev_asize) == 0);
- }
-
- if (vd->vdev_dtl.smo_object != 0)
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
- vd->vdev_dtl.smo_object) == 0);
-
- if (getstats) {
- vdev_stat_t vs;
- vdev_get_stats(vd, &vs);
- VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_STATS,
- (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0);
- }
-
- if (!vd->vdev_ops->vdev_op_leaf) {
- nvlist_t **child;
- int c;
-
- child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
- KM_SLEEP);
-
- for (c = 0; c < vd->vdev_children; c++)
- child[c] = vdev_config_generate(spa, vd->vdev_child[c],
- getstats, isspare);
-
- VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- child, vd->vdev_children) == 0);
-
- for (c = 0; c < vd->vdev_children; c++)
- nvlist_free(child[c]);
-
- kmem_free(child, vd->vdev_children * sizeof (nvlist_t *));
-
- } else {
- if (vd->vdev_offline && !vd->vdev_tmpoffline)
- VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE,
- B_TRUE) == 0);
- else
- (void) nvlist_remove(nv, ZPOOL_CONFIG_OFFLINE,
- DATA_TYPE_UINT64);
- }
-
- return (nv);
-}
-
-nvlist_t *
-vdev_label_read_config(vdev_t *vd)
-{
- spa_t *spa = vd->vdev_spa;
- nvlist_t *config = NULL;
- vdev_phys_t *vp;
- zio_t *zio;
- int l;
-
- ASSERT(spa_config_held(spa, RW_READER));
-
- if (vdev_is_dead(vd))
- return (NULL);
-
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
-
- for (l = 0; l < VDEV_LABELS; l++) {
-
- zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CONFIG_HELD);
-
- vdev_label_read(zio, vd, l, vp,
- offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t), NULL, NULL);
-
- if (zio_wait(zio) == 0 &&
- nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
- &config, 0) == 0)
- break;
-
- if (config != NULL) {
- nvlist_free(config);
- config = NULL;
- }
- }
-
- zio_buf_free(vp, sizeof (vdev_phys_t));
-
- return (config);
-}
-
-/*
- * Determine if a device is in use. The 'spare_guid' parameter will be filled
- * in with the device guid if this spare is active elsewhere on the system.
- */
-static boolean_t
-vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
- uint64_t *spare_guid)
-{
- spa_t *spa = vd->vdev_spa;
- uint64_t state, pool_guid, device_guid, txg, spare_pool;
- uint64_t vdtxg = 0;
- nvlist_t *label;
-
- if (spare_guid)
- *spare_guid = 0ULL;
-
- /*
- * Read the label, if any, and perform some basic sanity checks.
- */
- if ((label = vdev_label_read_config(vd)) == NULL)
- return (B_FALSE);
-
- (void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
- &vdtxg);
-
- if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
- &state) != 0 ||
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
- &device_guid) != 0) {
- nvlist_free(label);
- return (B_FALSE);
- }
-
- if (state != POOL_STATE_SPARE &&
- (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
- &pool_guid) != 0 ||
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0)) {
- nvlist_free(label);
- return (B_FALSE);
- }
-
- nvlist_free(label);
-
- /*
- * Check to see if this device indeed belongs to the pool it claims to
- * be a part of. The only way this is allowed is if the device is a hot
- * spare (which we check for later on).
- */
- if (state != POOL_STATE_SPARE &&
- !spa_guid_exists(pool_guid, device_guid) &&
- !spa_spare_exists(device_guid, NULL))
- return (B_FALSE);
-
- /*
- * If the transaction group is zero, then this an initialized (but
- * unused) label. This is only an error if the create transaction
- * on-disk is the same as the one we're using now, in which case the
- * user has attempted to add the same vdev multiple times in the same
- * transaction.
- */
- if (state != POOL_STATE_SPARE && txg == 0 && vdtxg == crtxg)
- return (B_TRUE);
-
- /*
- * Check to see if this is a spare device. We do an explicit check for
- * spa_has_spare() here because it may be on our pending list of spares
- * to add.
- */
- if (spa_spare_exists(device_guid, &spare_pool) ||
- spa_has_spare(spa, device_guid)) {
- if (spare_guid)
- *spare_guid = device_guid;
-
- switch (reason) {
- case VDEV_LABEL_CREATE:
- return (B_TRUE);
-
- case VDEV_LABEL_REPLACE:
- return (!spa_has_spare(spa, device_guid) ||
- spare_pool != 0ULL);
-
- case VDEV_LABEL_SPARE:
- return (spa_has_spare(spa, device_guid));
- }
- }
-
- /*
- * If the device is marked ACTIVE, then this device is in use by another
- * pool on the system.
- */
- return (state == POOL_STATE_ACTIVE);
-}
-
-/*
- * Initialize a vdev label. We check to make sure each leaf device is not in
- * use, and writable. We put down an initial label which we will later
- * overwrite with a complete label. Note that it's important to do this
- * sequentially, not in parallel, so that we catch cases of multiple use of the
- * same leaf vdev in the vdev we're creating -- e.g. mirroring a disk with
- * itself.
- */
-int
-vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
-{
- spa_t *spa = vd->vdev_spa;
- nvlist_t *label;
- vdev_phys_t *vp;
- vdev_boot_header_t *vb;
- uberblock_t *ub;
- zio_t *zio;
- int l, c, n;
- char *buf;
- size_t buflen;
- int error;
- uint64_t spare_guid;
-
- ASSERT(spa_config_held(spa, RW_WRITER));
-
- for (c = 0; c < vd->vdev_children; c++)
- if ((error = vdev_label_init(vd->vdev_child[c],
- crtxg, reason)) != 0)
- return (error);
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return (0);
-
- /*
- * Dead vdevs cannot be initialized.
- */
- if (vdev_is_dead(vd))
- return (EIO);
-
- /*
- * Determine if the vdev is in use.
- */
- if (reason != VDEV_LABEL_REMOVE &&
- vdev_inuse(vd, crtxg, reason, &spare_guid))
- return (EBUSY);
-
- ASSERT(reason != VDEV_LABEL_REMOVE ||
- vdev_inuse(vd, crtxg, reason, NULL));
-
- /*
- * If this is a request to add or replace a spare that is in use
- * elsewhere on the system, then we must update the guid (which was
- * initialized to a random value) to reflect the actual GUID (which is
- * shared between multiple pools).
- */
- if (reason != VDEV_LABEL_REMOVE && spare_guid != 0ULL) {
- vdev_t *pvd = vd->vdev_parent;
-
- for (; pvd != NULL; pvd = pvd->vdev_parent) {
- pvd->vdev_guid_sum -= vd->vdev_guid;
- pvd->vdev_guid_sum += spare_guid;
- }
-
- vd->vdev_guid = vd->vdev_guid_sum = spare_guid;
-
- /*
- * If this is a replacement, then we want to fallthrough to the
- * rest of the code. If we're adding a spare, then it's already
- * labelled appropriately and we can just return.
- */
- if (reason == VDEV_LABEL_SPARE)
- return (0);
- ASSERT(reason == VDEV_LABEL_REPLACE);
- }
-
- /*
- * Initialize its label.
- */
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
- bzero(vp, sizeof (vdev_phys_t));
-
- /*
- * Generate a label describing the pool and our top-level vdev.
- * We mark it as being from txg 0 to indicate that it's not
- * really part of an active pool just yet. The labels will
- * be written again with a meaningful txg by spa_sync().
- */
- if (reason == VDEV_LABEL_SPARE ||
- (reason == VDEV_LABEL_REMOVE && vd->vdev_isspare)) {
- /*
- * For inactive hot spares, we generate a special label that
- * identifies as a mutually shared hot spare. We write the
- * label if we are adding a hot spare, or if we are removing an
- * active hot spare (in which case we want to revert the
- * labels).
- */
- VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
- spa_version(spa)) == 0);
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
- POOL_STATE_SPARE) == 0);
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
- } else {
- label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
-
- /*
- * Add our creation time. This allows us to detect multiple
- * vdev uses as described above, and automatically expires if we
- * fail.
- */
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
- crtxg) == 0);
- }
-
- buf = vp->vp_nvlist;
- buflen = sizeof (vp->vp_nvlist);
-
- error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
- if (error != 0) {
- nvlist_free(label);
- zio_buf_free(vp, sizeof (vdev_phys_t));
- /* EFAULT means nvlist_pack ran out of room */
- return (error == EFAULT ? ENAMETOOLONG : EINVAL);
- }
-
- /*
- * Initialize boot block header.
- */
- vb = zio_buf_alloc(sizeof (vdev_boot_header_t));
- bzero(vb, sizeof (vdev_boot_header_t));
- vb->vb_magic = VDEV_BOOT_MAGIC;
- vb->vb_version = VDEV_BOOT_VERSION;
- vb->vb_offset = VDEV_BOOT_OFFSET;
- vb->vb_size = VDEV_BOOT_SIZE;
-
- /*
- * Initialize uberblock template.
- */
- ub = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd));
- bzero(ub, VDEV_UBERBLOCK_SIZE(vd));
- *ub = spa->spa_uberblock;
- ub->ub_txg = 0;
-
- /*
- * Write everything in parallel.
- */
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL);
-
- for (l = 0; l < VDEV_LABELS; l++) {
-
- vdev_label_write(zio, vd, l, vp,
- offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t), NULL, NULL);
-
- vdev_label_write(zio, vd, l, vb,
- offsetof(vdev_label_t, vl_boot_header),
- sizeof (vdev_boot_header_t), NULL, NULL);
-
- for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
- vdev_label_write(zio, vd, l, ub,
- VDEV_UBERBLOCK_OFFSET(vd, n),
- VDEV_UBERBLOCK_SIZE(vd), NULL, NULL);
- }
- }
-
- error = zio_wait(zio);
-
- nvlist_free(label);
- zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
- zio_buf_free(vb, sizeof (vdev_boot_header_t));
- zio_buf_free(vp, sizeof (vdev_phys_t));
-
- /*
- * If this vdev hasn't been previously identified as a spare, then we
- * mark it as such only if a) we are labelling it as a spare, or b) it
- * exists as a spare elsewhere in the system.
- */
- if (error == 0 && !vd->vdev_isspare &&
- (reason == VDEV_LABEL_SPARE ||
- spa_spare_exists(vd->vdev_guid, NULL)))
- spa_spare_add(vd);
-
- return (error);
-}
-
-/*
- * ==========================================================================
- * uberblock load/sync
- * ==========================================================================
- */
-
-/*
- * Consider the following situation: txg is safely synced to disk. We've
- * written the first uberblock for txg + 1, and then we lose power. When we
- * come back up, we fail to see the uberblock for txg + 1 because, say,
- * it was on a mirrored device and the replica to which we wrote txg + 1
- * is now offline. If we then make some changes and sync txg + 1, and then
- * the missing replica comes back, then for a new seconds we'll have two
- * conflicting uberblocks on disk with the same txg. The solution is simple:
- * among uberblocks with equal txg, choose the one with the latest timestamp.
- */
-static int
-vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
-{
- if (ub1->ub_txg < ub2->ub_txg)
- return (-1);
- if (ub1->ub_txg > ub2->ub_txg)
- return (1);
-
- if (ub1->ub_timestamp < ub2->ub_timestamp)
- return (-1);
- if (ub1->ub_timestamp > ub2->ub_timestamp)
- return (1);
-
- return (0);
-}
-
-static void
-vdev_uberblock_load_done(zio_t *zio)
-{
- uberblock_t *ub = zio->io_data;
- uberblock_t *ubbest = zio->io_private;
- spa_t *spa = zio->io_spa;
-
- ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
-
- if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
- mutex_enter(&spa->spa_uberblock_lock);
- if (vdev_uberblock_compare(ub, ubbest) > 0)
- *ubbest = *ub;
- mutex_exit(&spa->spa_uberblock_lock);
- }
-
- zio_buf_free(zio->io_data, zio->io_size);
-}
-
-void
-vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
-{
- int l, c, n;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return;
-
- if (vdev_is_dead(vd))
- return;
-
- for (l = 0; l < VDEV_LABELS; l++) {
- for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
- vdev_label_read(zio, vd, l,
- zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
- VDEV_UBERBLOCK_OFFSET(vd, n),
- VDEV_UBERBLOCK_SIZE(vd),
- vdev_uberblock_load_done, ubbest);
- }
- }
-}
-
-/*
- * Write the uberblock to both labels of all leaves of the specified vdev.
- * We only get credit for writes to known-visible vdevs; see spa_vdev_add().
- */
-static void
-vdev_uberblock_sync_done(zio_t *zio)
-{
- uint64_t *good_writes = zio->io_root->io_private;
-
- if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
- atomic_add_64(good_writes, 1);
-}
-
-static void
-vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, uint64_t txg)
-{
- int l, c, n;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_uberblock_sync(zio, ub, vd->vdev_child[c], txg);
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return;
-
- if (vdev_is_dead(vd))
- return;
-
- n = txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
-
- ASSERT(ub->ub_txg == txg);
-
- for (l = 0; l < VDEV_LABELS; l++)
- vdev_label_write(zio, vd, l, ub,
- VDEV_UBERBLOCK_OFFSET(vd, n),
- VDEV_UBERBLOCK_SIZE(vd),
- vdev_uberblock_sync_done, NULL);
-
- dprintf("vdev %s in txg %llu\n", vdev_description(vd), txg);
-}
-
-static int
-vdev_uberblock_sync_tree(spa_t *spa, uberblock_t *ub, vdev_t *vd, uint64_t txg)
-{
- uberblock_t *ubbuf;
- size_t size = vd->vdev_top ? VDEV_UBERBLOCK_SIZE(vd) : SPA_MAXBLOCKSIZE;
- uint64_t *good_writes;
- zio_t *zio;
- int error;
-
- ubbuf = zio_buf_alloc(size);
- bzero(ubbuf, size);
- *ubbuf = *ub;
-
- good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
-
- zio = zio_root(spa, NULL, good_writes,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL);
-
- vdev_uberblock_sync(zio, ubbuf, vd, txg);
-
- error = zio_wait(zio);
-
- if (error && *good_writes != 0) {
- dprintf("partial success: good_writes = %llu\n", *good_writes);
- error = 0;
- }
-
- /*
- * It's possible to have no good writes and no error if every vdev is in
- * the CANT_OPEN state.
- */
- if (*good_writes == 0 && error == 0)
- error = EIO;
-
- kmem_free(good_writes, sizeof (uint64_t));
- zio_buf_free(ubbuf, size);
-
- return (error);
-}
-
-/*
- * Sync out an individual vdev.
- */
-static void
-vdev_sync_label_done(zio_t *zio)
-{
- uint64_t *good_writes = zio->io_root->io_private;
-
- if (zio->io_error == 0)
- atomic_add_64(good_writes, 1);
-}
-
-static void
-vdev_sync_label(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
-{
- nvlist_t *label;
- vdev_phys_t *vp;
- char *buf;
- size_t buflen;
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_sync_label(zio, vd->vdev_child[c], l, txg);
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return;
-
- if (vdev_is_dead(vd))
- return;
-
- /*
- * Generate a label describing the top-level config to which we belong.
- */
- label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE);
-
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
- bzero(vp, sizeof (vdev_phys_t));
-
- buf = vp->vp_nvlist;
- buflen = sizeof (vp->vp_nvlist);
-
- if (nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP) == 0)
- vdev_label_write(zio, vd, l, vp,
- offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t),
- vdev_sync_label_done, NULL);
-
- zio_buf_free(vp, sizeof (vdev_phys_t));
- nvlist_free(label);
-
- dprintf("%s label %d txg %llu\n", vdev_description(vd), l, txg);
-}
-
-static int
-vdev_sync_labels(vdev_t *vd, int l, uint64_t txg)
-{
- uint64_t *good_writes;
- zio_t *zio;
- int error;
-
- ASSERT(vd == vd->vdev_top);
-
- good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
-
- zio = zio_root(vd->vdev_spa, NULL, good_writes,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL);
-
- /*
- * Recursively kick off writes to all labels.
- */
- vdev_sync_label(zio, vd, l, txg);
-
- error = zio_wait(zio);
-
- if (error && *good_writes != 0) {
- dprintf("partial success: good_writes = %llu\n", *good_writes);
- error = 0;
- }
-
- if (*good_writes == 0 && error == 0)
- error = ENODEV;
-
- kmem_free(good_writes, sizeof (uint64_t));
-
- return (error);
-}
-
-/*
- * Sync the entire vdev configuration.
- *
- * The order of operations is carefully crafted to ensure that
- * if the system panics or loses power at any time, the state on disk
- * is still transactionally consistent. The in-line comments below
- * describe the failure semantics at each stage.
- *
- * Moreover, it is designed to be idempotent: if spa_sync_labels() fails
- * at any time, you can just call it again, and it will resume its work.
- */
-int
-vdev_config_sync(vdev_t *uvd, uint64_t txg)
-{
- spa_t *spa = uvd->vdev_spa;
- uberblock_t *ub = &spa->spa_uberblock;
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- zio_t *zio;
- int l, error;
-
- ASSERT(ub->ub_txg <= txg);
-
- /*
- * If this isn't a resync due to I/O errors, and nothing changed
- * in this transaction group, and the vdev configuration hasn't changed,
- * then there's nothing to do.
- */
- if (ub->ub_txg < txg && uberblock_update(ub, rvd, txg) == B_FALSE &&
- list_is_empty(&spa->spa_dirty_list)) {
- dprintf("nothing to sync in %s in txg %llu\n",
- spa_name(spa), txg);
- return (0);
- }
-
- if (txg > spa_freeze_txg(spa))
- return (0);
-
- ASSERT(txg <= spa->spa_final_txg);
-
- dprintf("syncing %s txg %llu\n", spa_name(spa), txg);
-
- /*
- * Flush the write cache of every disk that's been written to
- * in this transaction group. This ensures that all blocks
- * written in this txg will be committed to stable storage
- * before any uberblock that references them.
- */
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL);
- for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
- vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg))) {
- zio_nowait(zio_ioctl(zio, spa, vd, DKIOCFLUSHWRITECACHE,
- NULL, NULL, ZIO_PRIORITY_NOW,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY));
- }
- (void) zio_wait(zio);
-
- /*
- * Sync out the even labels (L0, L2) for every dirty vdev. If the
- * system dies in the middle of this process, that's OK: all of the
- * even labels that made it to disk will be newer than any uberblock,
- * and will therefore be considered invalid. The odd labels (L1, L3),
- * which have not yet been touched, will still be valid.
- */
- for (vd = list_head(&spa->spa_dirty_list); vd != NULL;
- vd = list_next(&spa->spa_dirty_list, vd)) {
- for (l = 0; l < VDEV_LABELS; l++) {
- if (l & 1)
- continue;
- if ((error = vdev_sync_labels(vd, l, txg)) != 0)
- return (error);
- }
- }
-
- /*
- * Flush the new labels to disk. This ensures that all even-label
- * updates are committed to stable storage before the uberblock update.
- */
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL);
- for (vd = list_head(&spa->spa_dirty_list); vd != NULL;
- vd = list_next(&spa->spa_dirty_list, vd)) {
- zio_nowait(zio_ioctl(zio, spa, vd, DKIOCFLUSHWRITECACHE,
- NULL, NULL, ZIO_PRIORITY_NOW,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY));
- }
- (void) zio_wait(zio);
-
- /*
- * Sync the uberblocks to all vdevs in the tree specified by uvd.
- * If the system dies in the middle of this step, there are two cases
- * to consider, and the on-disk state is consistent either way:
- *
- * (1) If none of the new uberblocks made it to disk, then the
- * previous uberblock will be the newest, and the odd labels
- * (which had not yet been touched) will be valid with respect
- * to that uberblock.
- *
- * (2) If one or more new uberblocks made it to disk, then they
- * will be the newest, and the even labels (which had all
- * been successfully committed) will be valid with respect
- * to the new uberblocks.
- */
- if ((error = vdev_uberblock_sync_tree(spa, ub, uvd, txg)) != 0)
- return (error);
-
- /*
- * Flush the uberblocks to disk. This ensures that the odd labels
- * are no longer needed (because the new uberblocks and the even
- * labels are safely on disk), so it is safe to overwrite them.
- */
- (void) zio_wait(zio_ioctl(NULL, spa, uvd, DKIOCFLUSHWRITECACHE,
- NULL, NULL, ZIO_PRIORITY_NOW,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY));
-
- /*
- * Sync out odd labels for every dirty vdev. If the system dies
- * in the middle of this process, the even labels and the new
- * uberblocks will suffice to open the pool. The next time
- * the pool is opened, the first thing we'll do -- before any
- * user data is modified -- is mark every vdev dirty so that
- * all labels will be brought up to date.
- */
- for (vd = list_head(&spa->spa_dirty_list); vd != NULL;
- vd = list_next(&spa->spa_dirty_list, vd)) {
- for (l = 0; l < VDEV_LABELS; l++) {
- if ((l & 1) == 0)
- continue;
- if ((error = vdev_sync_labels(vd, l, txg)) != 0)
- return (error);
- }
- }
-
- /*
- * Flush the new labels to disk. This ensures that all odd-label
- * updates are committed to stable storage before the next
- * transaction group begins.
- */
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL);
- for (vd = list_head(&spa->spa_dirty_list); vd != NULL;
- vd = list_next(&spa->spa_dirty_list, vd)) {
- zio_nowait(zio_ioctl(zio, spa, vd, DKIOCFLUSHWRITECACHE,
- NULL, NULL, ZIO_PRIORITY_NOW,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY));
- }
- (void) zio_wait(zio);
-
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
deleted file mode 100644
index 73d1a83..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/fs/zfs.h>
-
-/*
- * Virtual device vector for mirroring.
- */
-
-typedef struct mirror_child {
- vdev_t *mc_vd;
- uint64_t mc_offset;
- int mc_error;
- short mc_tried;
- short mc_skipped;
-} mirror_child_t;
-
-typedef struct mirror_map {
- int mm_children;
- int mm_replacing;
- int mm_preferred;
- int mm_root;
- mirror_child_t mm_child[1];
-} mirror_map_t;
-
-int vdev_mirror_shift = 21;
-
-static mirror_map_t *
-vdev_mirror_map_alloc(zio_t *zio)
-{
- mirror_map_t *mm = NULL;
- mirror_child_t *mc;
- vdev_t *vd = zio->io_vd;
- int c, d;
-
- if (vd == NULL) {
- dva_t *dva = zio->io_bp->blk_dva;
- spa_t *spa = zio->io_spa;
-
- c = BP_GET_NDVAS(zio->io_bp);
-
- mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
- mm->mm_children = c;
- mm->mm_replacing = B_FALSE;
- mm->mm_preferred = spa_get_random(c);
- mm->mm_root = B_TRUE;
-
- /*
- * Check the other, lower-index DVAs to see if they're on
- * the same vdev as the child we picked. If they are, use
- * them since they are likely to have been allocated from
- * the primary metaslab in use at the time, and hence are
- * more likely to have locality with single-copy data.
- */
- for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) {
- if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c]))
- mm->mm_preferred = d;
- }
-
- for (c = 0; c < mm->mm_children; c++) {
- mc = &mm->mm_child[c];
-
- mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
- mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
- }
- } else {
- c = vd->vdev_children;
-
- mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
- mm->mm_children = c;
- mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
- vd->vdev_ops == &vdev_spare_ops);
- mm->mm_preferred = mm->mm_replacing ? 0 :
- (zio->io_offset >> vdev_mirror_shift) % c;
- mm->mm_root = B_FALSE;
-
- for (c = 0; c < mm->mm_children; c++) {
- mc = &mm->mm_child[c];
- mc->mc_vd = vd->vdev_child[c];
- mc->mc_offset = zio->io_offset;
- }
- }
-
- zio->io_vsd = mm;
- return (mm);
-}
-
-static void
-vdev_mirror_map_free(zio_t *zio)
-{
- mirror_map_t *mm = zio->io_vsd;
-
- kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children]));
- zio->io_vsd = NULL;
-}
-
-static int
-vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
-{
- vdev_t *cvd;
- uint64_t c;
- int numerrors = 0;
- int ret, lasterror = 0;
-
- if (vd->vdev_children == 0) {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
-
- for (c = 0; c < vd->vdev_children; c++) {
- cvd = vd->vdev_child[c];
-
- if ((ret = vdev_open(cvd)) != 0) {
- lasterror = ret;
- numerrors++;
- continue;
- }
-
- *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
- *ashift = MAX(*ashift, cvd->vdev_ashift);
- }
-
- if (numerrors == vd->vdev_children) {
- vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
- return (lasterror);
- }
-
- return (0);
-}
-
-static void
-vdev_mirror_close(vdev_t *vd)
-{
- uint64_t c;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_close(vd->vdev_child[c]);
-}
-
-static void
-vdev_mirror_child_done(zio_t *zio)
-{
- mirror_child_t *mc = zio->io_private;
-
- mc->mc_error = zio->io_error;
- mc->mc_tried = 1;
- mc->mc_skipped = 0;
-}
-
-static void
-vdev_mirror_scrub_done(zio_t *zio)
-{
- mirror_child_t *mc = zio->io_private;
-
- if (zio->io_error == 0) {
- zio_t *pio = zio->io_parent;
- mutex_enter(&pio->io_lock);
- ASSERT3U(zio->io_size, >=, pio->io_size);
- bcopy(zio->io_data, pio->io_data, pio->io_size);
- mutex_exit(&pio->io_lock);
- }
-
- zio_buf_free(zio->io_data, zio->io_size);
-
- mc->mc_error = zio->io_error;
- mc->mc_tried = 1;
- mc->mc_skipped = 0;
-}
-
-static void
-vdev_mirror_repair_done(zio_t *zio)
-{
- ASSERT(zio->io_private == zio->io_parent);
- vdev_mirror_map_free(zio->io_private);
-}
-
-/*
- * Try to find a child whose DTL doesn't contain the block we want to read.
- * If we can't, try the read on any vdev we haven't already tried.
- */
-static int
-vdev_mirror_child_select(zio_t *zio)
-{
- mirror_map_t *mm = zio->io_vsd;
- mirror_child_t *mc;
- uint64_t txg = zio->io_txg;
- int i, c;
-
- ASSERT(zio->io_bp == NULL || zio->io_bp->blk_birth == txg);
-
- /*
- * Try to find a child whose DTL doesn't contain the block to read.
- * If a child is known to be completely inaccessible (indicated by
- * vdev_is_dead() returning B_TRUE), don't even try.
- */
- for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) {
- if (c >= mm->mm_children)
- c = 0;
- mc = &mm->mm_child[c];
- if (mc->mc_tried || mc->mc_skipped)
- continue;
- if (vdev_is_dead(mc->mc_vd)) {
- mc->mc_error = ENXIO;
- mc->mc_tried = 1; /* don't even try */
- mc->mc_skipped = 1;
- continue;
- }
- if (!vdev_dtl_contains(&mc->mc_vd->vdev_dtl_map, txg, 1))
- return (c);
- mc->mc_error = ESTALE;
- mc->mc_skipped = 1;
- }
-
- /*
- * Every device is either missing or has this txg in its DTL.
- * Look for any child we haven't already tried before giving up.
- */
- for (c = 0; c < mm->mm_children; c++)
- if (!mm->mm_child[c].mc_tried)
- return (c);
-
- /*
- * Every child failed. There's no place left to look.
- */
- return (-1);
-}
-
-static void
-vdev_mirror_io_start(zio_t *zio)
-{
- mirror_map_t *mm;
- mirror_child_t *mc;
- int c, children;
-
- mm = vdev_mirror_map_alloc(zio);
-
- if (zio->io_type == ZIO_TYPE_READ) {
- if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
- /*
- * For scrubbing reads we need to allocate a read
- * buffer for each child and issue reads to all
- * children. If any child succeeds, it will copy its
- * data into zio->io_data in vdev_mirror_scrub_done.
- */
- for (c = 0; c < mm->mm_children; c++) {
- mc = &mm->mm_child[c];
- zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
- mc->mc_vd, mc->mc_offset,
- zio_buf_alloc(zio->io_size), zio->io_size,
- zio->io_type, zio->io_priority,
- ZIO_FLAG_CANFAIL,
- vdev_mirror_scrub_done, mc));
- }
- zio_wait_children_done(zio);
- return;
- }
- /*
- * For normal reads just pick one child.
- */
- c = vdev_mirror_child_select(zio);
- children = (c >= 0);
- } else {
- ASSERT(zio->io_type == ZIO_TYPE_WRITE);
-
- /*
- * If this is a resilvering I/O to a replacing vdev,
- * only the last child should be written -- unless the
- * first child happens to have a DTL entry here as well.
- * All other writes go to all children.
- */
- if ((zio->io_flags & ZIO_FLAG_RESILVER) && mm->mm_replacing &&
- !vdev_dtl_contains(&mm->mm_child[0].mc_vd->vdev_dtl_map,
- zio->io_txg, 1)) {
- c = mm->mm_children - 1;
- children = 1;
- } else {
- c = 0;
- children = mm->mm_children;
- }
- }
-
- while (children--) {
- mc = &mm->mm_child[c];
- zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
- mc->mc_vd, mc->mc_offset,
- zio->io_data, zio->io_size, zio->io_type, zio->io_priority,
- ZIO_FLAG_CANFAIL, vdev_mirror_child_done, mc));
- c++;
- }
-
- zio_wait_children_done(zio);
-}
-
-static void
-vdev_mirror_io_done(zio_t *zio)
-{
- mirror_map_t *mm = zio->io_vsd;
- mirror_child_t *mc;
- int c;
- int good_copies = 0;
- int unexpected_errors = 0;
-
- zio->io_error = 0;
- zio->io_numerrors = 0;
-
- for (c = 0; c < mm->mm_children; c++) {
- mc = &mm->mm_child[c];
-
- if (mc->mc_tried && mc->mc_error == 0) {
- good_copies++;
- continue;
- }
-
- /*
- * We preserve any EIOs because those may be worth retrying;
- * whereas ECKSUM and ENXIO are more likely to be persistent.
- */
- if (mc->mc_error) {
- if (zio->io_error != EIO)
- zio->io_error = mc->mc_error;
- if (!mc->mc_skipped)
- unexpected_errors++;
- zio->io_numerrors++;
- }
- }
-
- if (zio->io_type == ZIO_TYPE_WRITE) {
- /*
- * XXX -- for now, treat partial writes as success.
- * XXX -- For a replacing vdev, we need to make sure the
- * new child succeeds.
- */
- /* XXPOLICY */
- if (good_copies != 0)
- zio->io_error = 0;
- vdev_mirror_map_free(zio);
- zio_next_stage(zio);
- return;
- }
-
- ASSERT(zio->io_type == ZIO_TYPE_READ);
-
- /*
- * If we don't have a good copy yet, keep trying other children.
- */
- /* XXPOLICY */
- if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
- ASSERT(c >= 0 && c < mm->mm_children);
- mc = &mm->mm_child[c];
- dprintf("retrying i/o (err=%d) on child %s\n",
- zio->io_error, vdev_description(mc->mc_vd));
- zio->io_error = 0;
- zio_vdev_io_redone(zio);
- zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
- mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
- ZIO_TYPE_READ, zio->io_priority, ZIO_FLAG_CANFAIL,
- vdev_mirror_child_done, mc));
- zio_wait_children_done(zio);
- return;
- }
-
- /* XXPOLICY */
- if (good_copies)
- zio->io_error = 0;
- else
- ASSERT(zio->io_error != 0);
-
- if (good_copies && (spa_mode & FWRITE) &&
- (unexpected_errors ||
- (zio->io_flags & ZIO_FLAG_RESILVER) ||
- ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
- zio_t *rio;
-
- /*
- * Use the good data we have in hand to repair damaged children.
- *
- * We issue all repair I/Os as children of 'rio' to arrange
- * that vdev_mirror_map_free(zio) will be invoked after all
- * repairs complete, but before we advance to the next stage.
- */
- rio = zio_null(zio, zio->io_spa,
- vdev_mirror_repair_done, zio, ZIO_FLAG_CANFAIL);
-
- for (c = 0; c < mm->mm_children; c++) {
- /*
- * Don't rewrite known good children.
- * Not only is it unnecessary, it could
- * actually be harmful: if the system lost
- * power while rewriting the only good copy,
- * there would be no good copies left!
- */
- mc = &mm->mm_child[c];
-
- if (mc->mc_error == 0) {
- if (mc->mc_tried)
- continue;
- if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
- !vdev_dtl_contains(&mc->mc_vd->vdev_dtl_map,
- zio->io_txg, 1))
- continue;
- mc->mc_error = ESTALE;
- }
-
- dprintf("resilvered %s @ 0x%llx error %d\n",
- vdev_description(mc->mc_vd), mc->mc_offset,
- mc->mc_error);
-
- zio_nowait(zio_vdev_child_io(rio, zio->io_bp, mc->mc_vd,
- mc->mc_offset, zio->io_data, zio->io_size,
- ZIO_TYPE_WRITE, zio->io_priority,
- ZIO_FLAG_IO_REPAIR | ZIO_FLAG_CANFAIL |
- ZIO_FLAG_DONT_PROPAGATE, NULL, NULL));
- }
-
- zio_nowait(rio);
- zio_wait_children_done(zio);
- return;
- }
-
- vdev_mirror_map_free(zio);
- zio_next_stage(zio);
-}
-
-static void
-vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded)
-{
- if (faulted == vd->vdev_children)
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_NO_REPLICAS);
- else if (degraded + faulted != 0)
- vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
- else
- vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
-}
-
-vdev_ops_t vdev_mirror_ops = {
- vdev_mirror_open,
- vdev_mirror_close,
- vdev_default_asize,
- vdev_mirror_io_start,
- vdev_mirror_io_done,
- vdev_mirror_state_change,
- VDEV_TYPE_MIRROR, /* name of this vdev type */
- B_FALSE /* not a leaf vdev */
-};
-
-vdev_ops_t vdev_replacing_ops = {
- vdev_mirror_open,
- vdev_mirror_close,
- vdev_default_asize,
- vdev_mirror_io_start,
- vdev_mirror_io_done,
- vdev_mirror_state_change,
- VDEV_TYPE_REPLACING, /* name of this vdev type */
- B_FALSE /* not a leaf vdev */
-};
-
-vdev_ops_t vdev_spare_ops = {
- vdev_mirror_open,
- vdev_mirror_close,
- vdev_default_asize,
- vdev_mirror_io_start,
- vdev_mirror_io_done,
- vdev_mirror_state_change,
- VDEV_TYPE_SPARE, /* name of this vdev type */
- B_FALSE /* not a leaf vdev */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
deleted file mode 100644
index b35f4a5..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * The 'missing' vdev is a special vdev type used only during import. It
- * signifies a placeholder in the root vdev for some vdev that we know is
- * missing. We pass it down to the kernel to allow the rest of the
- * configuration to parsed and an attempt made to open all available devices.
- * Because its GUID is always 0, we know that the guid sum will mismatch and we
- * won't be able to open the pool anyway.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-
-/* ARGSUSED */
-static int
-vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
-{
- /*
- * Really this should just fail. But then the root vdev will be in the
- * faulted state with VDEV_AUX_NO_REPLICAS, when what we really want is
- * VDEV_AUX_BAD_GUID_SUM. So we pretend to succeed, knowing that we
- * will fail the GUID sum check before ever trying to open the pool.
- */
- *psize = SPA_MINDEVSIZE;
- *ashift = SPA_MINBLOCKSHIFT;
- return (0);
-}
-
-/* ARGSUSED */
-static void
-vdev_missing_close(vdev_t *vd)
-{
-}
-
-/* ARGSUSED */
-static void
-vdev_missing_io_start(zio_t *zio)
-{
- zio->io_error = ENOTSUP;
- zio_next_stage_async(zio);
-}
-
-/* ARGSUSED */
-static void
-vdev_missing_io_done(zio_t *zio)
-{
- zio_next_stage(zio);
-}
-
-vdev_ops_t vdev_missing_ops = {
- vdev_missing_open,
- vdev_missing_close,
- vdev_default_asize,
- vdev_missing_io_start,
- vdev_missing_io_done,
- NULL,
- VDEV_TYPE_MISSING, /* name of this vdev type */
- B_TRUE /* leaf vdev */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
deleted file mode 100644
index 8ef524f..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/avl.h>
-
-/*
- * These tunables are for performance analysis.
- */
-/*
- * zfs_vdev_max_pending is the maximum number of i/os concurrently
- * pending to each device. zfs_vdev_min_pending is the initial number
- * of i/os pending to each device (before it starts ramping up to
- * max_pending).
- */
-int zfs_vdev_max_pending = 35;
-int zfs_vdev_min_pending = 4;
-
-/* deadline = pri + (LBOLT >> time_shift) */
-int zfs_vdev_time_shift = 6;
-
-/* exponential I/O issue ramp-up rate */
-int zfs_vdev_ramp_rate = 2;
-
-/*
- * i/os will be aggregated into a single large i/o up to
- * zfs_vdev_aggregation_limit bytes long.
- */
-int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
-
-/*
- * Virtual device vector for disk I/O scheduling.
- */
-int
-vdev_queue_deadline_compare(const void *x1, const void *x2)
-{
- const zio_t *z1 = x1;
- const zio_t *z2 = x2;
-
- if (z1->io_deadline < z2->io_deadline)
- return (-1);
- if (z1->io_deadline > z2->io_deadline)
- return (1);
-
- if (z1->io_offset < z2->io_offset)
- return (-1);
- if (z1->io_offset > z2->io_offset)
- return (1);
-
- if (z1 < z2)
- return (-1);
- if (z1 > z2)
- return (1);
-
- return (0);
-}
-
-int
-vdev_queue_offset_compare(const void *x1, const void *x2)
-{
- const zio_t *z1 = x1;
- const zio_t *z2 = x2;
-
- if (z1->io_offset < z2->io_offset)
- return (-1);
- if (z1->io_offset > z2->io_offset)
- return (1);
-
- if (z1 < z2)
- return (-1);
- if (z1 > z2)
- return (1);
-
- return (0);
-}
-
-void
-vdev_queue_init(vdev_t *vd)
-{
- vdev_queue_t *vq = &vd->vdev_queue;
-
- mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
-
- avl_create(&vq->vq_deadline_tree, vdev_queue_deadline_compare,
- sizeof (zio_t), offsetof(struct zio, io_deadline_node));
-
- avl_create(&vq->vq_read_tree, vdev_queue_offset_compare,
- sizeof (zio_t), offsetof(struct zio, io_offset_node));
-
- avl_create(&vq->vq_write_tree, vdev_queue_offset_compare,
- sizeof (zio_t), offsetof(struct zio, io_offset_node));
-
- avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare,
- sizeof (zio_t), offsetof(struct zio, io_offset_node));
-}
-
-void
-vdev_queue_fini(vdev_t *vd)
-{
- vdev_queue_t *vq = &vd->vdev_queue;
-
- avl_destroy(&vq->vq_deadline_tree);
- avl_destroy(&vq->vq_read_tree);
- avl_destroy(&vq->vq_write_tree);
- avl_destroy(&vq->vq_pending_tree);
-
- mutex_destroy(&vq->vq_lock);
-}
-
-static void
-vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
-{
- avl_add(&vq->vq_deadline_tree, zio);
- avl_add(zio->io_vdev_tree, zio);
-}
-
-static void
-vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
-{
- avl_remove(&vq->vq_deadline_tree, zio);
- avl_remove(zio->io_vdev_tree, zio);
-}
-
-static void
-vdev_queue_agg_io_done(zio_t *aio)
-{
- zio_t *dio;
- uint64_t offset = 0;
-
- while ((dio = aio->io_delegate_list) != NULL) {
- if (aio->io_type == ZIO_TYPE_READ)
- bcopy((char *)aio->io_data + offset, dio->io_data,
- dio->io_size);
- offset += dio->io_size;
- aio->io_delegate_list = dio->io_delegate_next;
- dio->io_delegate_next = NULL;
- dio->io_error = aio->io_error;
- zio_next_stage(dio);
- }
- ASSERT3U(offset, ==, aio->io_size);
-
- zio_buf_free(aio->io_data, aio->io_size);
-}
-
-#define IS_ADJACENT(io, nio) \
- ((io)->io_offset + (io)->io_size == (nio)->io_offset)
-
-typedef void zio_issue_func_t(zio_t *);
-
-static zio_t *
-vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit,
- zio_issue_func_t **funcp)
-{
- zio_t *fio, *lio, *aio, *dio;
- avl_tree_t *tree;
- uint64_t size;
-
- ASSERT(MUTEX_HELD(&vq->vq_lock));
-
- *funcp = NULL;
-
- if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit ||
- avl_numnodes(&vq->vq_deadline_tree) == 0)
- return (NULL);
-
- fio = lio = avl_first(&vq->vq_deadline_tree);
-
- tree = fio->io_vdev_tree;
- size = fio->io_size;
-
- while ((dio = AVL_PREV(tree, fio)) != NULL && IS_ADJACENT(dio, fio) &&
- size + dio->io_size <= zfs_vdev_aggregation_limit) {
- dio->io_delegate_next = fio;
- fio = dio;
- size += dio->io_size;
- }
-
- while ((dio = AVL_NEXT(tree, lio)) != NULL && IS_ADJACENT(lio, dio) &&
- size + dio->io_size <= zfs_vdev_aggregation_limit) {
- lio->io_delegate_next = dio;
- lio = dio;
- size += dio->io_size;
- }
-
- if (fio != lio) {
- char *buf = zio_buf_alloc(size);
- uint64_t offset = 0;
- int nagg = 0;
-
- ASSERT(size <= zfs_vdev_aggregation_limit);
-
- aio = zio_vdev_child_io(fio, NULL, fio->io_vd,
- fio->io_offset, buf, size, fio->io_type,
- ZIO_PRIORITY_NOW, ZIO_FLAG_DONT_QUEUE |
- ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
- ZIO_FLAG_NOBOOKMARK,
- vdev_queue_agg_io_done, NULL);
-
- aio->io_delegate_list = fio;
-
- for (dio = fio; dio != NULL; dio = dio->io_delegate_next) {
- ASSERT(dio->io_type == aio->io_type);
- ASSERT(dio->io_vdev_tree == tree);
- if (dio->io_type == ZIO_TYPE_WRITE)
- bcopy(dio->io_data, buf + offset, dio->io_size);
- offset += dio->io_size;
- vdev_queue_io_remove(vq, dio);
- zio_vdev_io_bypass(dio);
- nagg++;
- }
-
- ASSERT(offset == size);
-
- dprintf("%5s T=%llu off=%8llx agg=%3d "
- "old=%5llx new=%5llx\n",
- zio_type_name[fio->io_type],
- fio->io_deadline, fio->io_offset, nagg, fio->io_size, size);
-
- avl_add(&vq->vq_pending_tree, aio);
-
- *funcp = zio_nowait;
- return (aio);
- }
-
- ASSERT(fio->io_vdev_tree == tree);
- vdev_queue_io_remove(vq, fio);
-
- avl_add(&vq->vq_pending_tree, fio);
-
- *funcp = zio_next_stage;
-
- return (fio);
-}
-
-zio_t *
-vdev_queue_io(zio_t *zio)
-{
- vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
- zio_issue_func_t *func;
-
- ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
-
- if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
- return (zio);
-
- zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
-
- if (zio->io_type == ZIO_TYPE_READ)
- zio->io_vdev_tree = &vq->vq_read_tree;
- else
- zio->io_vdev_tree = &vq->vq_write_tree;
-
- mutex_enter(&vq->vq_lock);
-
- zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
- zio->io_priority;
-
- vdev_queue_io_add(vq, zio);
-
- nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending, &func);
-
- mutex_exit(&vq->vq_lock);
-
- if (nio == NULL || func != zio_nowait)
- return (nio);
-
- func(nio);
- return (NULL);
-}
-
-void
-vdev_queue_io_done(zio_t *zio)
-{
- vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
- zio_issue_func_t *func;
- int i;
-
- mutex_enter(&vq->vq_lock);
-
- avl_remove(&vq->vq_pending_tree, zio);
-
- for (i = 0; i < zfs_vdev_ramp_rate; i++) {
- nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending, &func);
- if (nio == NULL)
- break;
- mutex_exit(&vq->vq_lock);
- if (func == zio_next_stage)
- zio_vdev_io_reissue(nio);
- func(nio);
- mutex_enter(&vq->vq_lock);
- }
-
- mutex_exit(&vq->vq_lock);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
deleted file mode 100644
index 0c86630..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/fs/zfs.h>
-#include <sys/fm/fs/zfs.h>
-
-/*
- * Virtual device vector for RAID-Z.
- *
- * This vdev supports both single and double parity. For single parity, we
- * use a simple XOR of all the data columns. For double parity, we use both
- * the simple XOR as well as a technique described in "The mathematics of
- * RAID-6" by H. Peter Anvin. This technique defines a Galois field, GF(2^8),
- * over the integers expressable in a single byte. Briefly, the operations on
- * the field are defined as follows:
- *
- * o addition (+) is represented by a bitwise XOR
- * o subtraction (-) is therefore identical to addition: A + B = A - B
- * o multiplication of A by 2 is defined by the following bitwise expression:
- * (A * 2)_7 = A_6
- * (A * 2)_6 = A_5
- * (A * 2)_5 = A_4
- * (A * 2)_4 = A_3 + A_7
- * (A * 2)_3 = A_2 + A_7
- * (A * 2)_2 = A_1 + A_7
- * (A * 2)_1 = A_0
- * (A * 2)_0 = A_7
- *
- * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)).
- *
- * Observe that any number in the field (except for 0) can be expressed as a
- * power of 2 -- a generator for the field. We store a table of the powers of
- * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can
- * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather
- * than field addition). The inverse of a field element A (A^-1) is A^254.
- *
- * The two parity columns, P and Q, over several data columns, D_0, ... D_n-1,
- * can be expressed by field operations:
- *
- * P = D_0 + D_1 + ... + D_n-2 + D_n-1
- * Q = 2^n-1 * D_0 + 2^n-2 * D_1 + ... + 2^1 * D_n-2 + 2^0 * D_n-1
- * = ((...((D_0) * 2 + D_1) * 2 + ...) * 2 + D_n-2) * 2 + D_n-1
- *
- * See the reconstruction code below for how P and Q can used individually or
- * in concert to recover missing data columns.
- */
-
-typedef struct raidz_col {
- uint64_t rc_devidx; /* child device index for I/O */
- uint64_t rc_offset; /* device offset */
- uint64_t rc_size; /* I/O size */
- void *rc_data; /* I/O data */
- int rc_error; /* I/O error for this device */
- uint8_t rc_tried; /* Did we attempt this I/O column? */
- uint8_t rc_skipped; /* Did we skip this I/O column? */
-} raidz_col_t;
-
-typedef struct raidz_map {
- uint64_t rm_cols; /* Column count */
- uint64_t rm_bigcols; /* Number of oversized columns */
- uint64_t rm_asize; /* Actual total I/O size */
- uint64_t rm_missingdata; /* Count of missing data devices */
- uint64_t rm_missingparity; /* Count of missing parity devices */
- uint64_t rm_firstdatacol; /* First data column/parity count */
- raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
-} raidz_map_t;
-
-#define VDEV_RAIDZ_P 0
-#define VDEV_RAIDZ_Q 1
-
-#define VDEV_RAIDZ_MAXPARITY 2
-
-#define VDEV_RAIDZ_MUL_2(a) (((a) << 1) ^ (((a) & 0x80) ? 0x1d : 0))
-
-/*
- * These two tables represent powers and logs of 2 in the Galois field defined
- * above. These values were computed by repeatedly multiplying by 2 as above.
- */
-static const uint8_t vdev_raidz_pow2[256] = {
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
- 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
- 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
- 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
- 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
- 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
- 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
- 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
- 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
- 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
- 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
- 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
- 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
- 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
- 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
- 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
- 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
- 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
- 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
- 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
- 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
- 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
- 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
- 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
- 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
- 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
- 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
- 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
- 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
- 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
- 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
- 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
-};
-static const uint8_t vdev_raidz_log2[256] = {
- 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
- 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
- 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
- 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
- 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
- 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
- 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
- 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
- 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
- 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
- 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
- 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
- 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
- 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
- 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
- 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
- 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
- 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
- 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
- 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
- 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
- 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
- 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
- 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
- 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
- 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
- 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
- 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
- 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
- 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
- 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
- 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
-};
-
-/*
- * Multiply a given number by 2 raised to the given power.
- */
-static uint8_t
-vdev_raidz_exp2(uint_t a, int exp)
-{
- if (a == 0)
- return (0);
-
- ASSERT(exp >= 0);
- ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
-
- exp += vdev_raidz_log2[a];
- if (exp > 255)
- exp -= 255;
-
- return (vdev_raidz_pow2[exp]);
-}
-
-static raidz_map_t *
-vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
- uint64_t nparity)
-{
- raidz_map_t *rm;
- uint64_t b = zio->io_offset >> unit_shift;
- uint64_t s = zio->io_size >> unit_shift;
- uint64_t f = b % dcols;
- uint64_t o = (b / dcols) << unit_shift;
- uint64_t q, r, c, bc, col, acols, coff, devidx;
-
- q = s / (dcols - nparity);
- r = s - q * (dcols - nparity);
- bc = (r == 0 ? 0 : r + nparity);
-
- acols = (q == 0 ? bc : dcols);
-
- rm = kmem_alloc(offsetof(raidz_map_t, rm_col[acols]), KM_SLEEP);
-
- rm->rm_cols = acols;
- rm->rm_bigcols = bc;
- rm->rm_asize = 0;
- rm->rm_missingdata = 0;
- rm->rm_missingparity = 0;
- rm->rm_firstdatacol = nparity;
-
- for (c = 0; c < acols; c++) {
- col = f + c;
- coff = o;
- if (col >= dcols) {
- col -= dcols;
- coff += 1ULL << unit_shift;
- }
- rm->rm_col[c].rc_devidx = col;
- rm->rm_col[c].rc_offset = coff;
- rm->rm_col[c].rc_size = (q + (c < bc)) << unit_shift;
- rm->rm_col[c].rc_data = NULL;
- rm->rm_col[c].rc_error = 0;
- rm->rm_col[c].rc_tried = 0;
- rm->rm_col[c].rc_skipped = 0;
- rm->rm_asize += rm->rm_col[c].rc_size;
- }
-
- rm->rm_asize = roundup(rm->rm_asize, (nparity + 1) << unit_shift);
-
- for (c = 0; c < rm->rm_firstdatacol; c++)
- rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size);
-
- rm->rm_col[c].rc_data = zio->io_data;
-
- for (c = c + 1; c < acols; c++)
- rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data +
- rm->rm_col[c - 1].rc_size;
-
- /*
- * If all data stored spans all columns, there's a danger that parity
- * will always be on the same device and, since parity isn't read
- * during normal operation, that that device's I/O bandwidth won't be
- * used effectively. We therefore switch the parity every 1MB.
- *
- * ... at least that was, ostensibly, the theory. As a practical
- * matter unless we juggle the parity between all devices evenly, we
- * won't see any benefit. Further, occasional writes that aren't a
- * multiple of the LCM of the number of children and the minimum
- * stripe width are sufficient to avoid pessimal behavior.
- * Unfortunately, this decision created an implicit on-disk format
- * requirement that we need to support for all eternity, but only
- * for single-parity RAID-Z.
- */
- ASSERT(rm->rm_cols >= 2);
- ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
-
- if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
- devidx = rm->rm_col[0].rc_devidx;
- o = rm->rm_col[0].rc_offset;
- rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
- rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset;
- rm->rm_col[1].rc_devidx = devidx;
- rm->rm_col[1].rc_offset = o;
- }
-
- zio->io_vsd = rm;
- return (rm);
-}
-
-static void
-vdev_raidz_map_free(zio_t *zio)
-{
- raidz_map_t *rm = zio->io_vsd;
- int c;
-
- for (c = 0; c < rm->rm_firstdatacol; c++)
- zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size);
-
- kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_cols]));
- zio->io_vsd = NULL;
-}
-
-static void
-vdev_raidz_generate_parity_p(raidz_map_t *rm)
-{
- uint64_t *p, *src, pcount, ccount, i;
- int c;
-
- pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
-
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
-
- if (c == rm->rm_firstdatacol) {
- ASSERT(ccount == pcount);
- for (i = 0; i < ccount; i++, p++, src++) {
- *p = *src;
- }
- } else {
- ASSERT(ccount <= pcount);
- for (i = 0; i < ccount; i++, p++, src++) {
- *p ^= *src;
- }
- }
- }
-}
-
-static void
-vdev_raidz_generate_parity_pq(raidz_map_t *rm)
-{
- uint64_t *q, *p, *src, pcount, ccount, mask, i;
- int c;
-
- pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
- ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
- rm->rm_col[VDEV_RAIDZ_Q].rc_size);
-
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
-
- if (c == rm->rm_firstdatacol) {
- ASSERT(ccount == pcount || ccount == 0);
- for (i = 0; i < ccount; i++, p++, q++, src++) {
- *q = *src;
- *p = *src;
- }
- for (; i < pcount; i++, p++, q++, src++) {
- *q = 0;
- *p = 0;
- }
- } else {
- ASSERT(ccount <= pcount);
-
- /*
- * Rather than multiplying each byte individually (as
- * described above), we are able to handle 8 at once
- * by generating a mask based on the high bit in each
- * byte and using that to conditionally XOR in 0x1d.
- */
- for (i = 0; i < ccount; i++, p++, q++, src++) {
- mask = *q & 0x8080808080808080ULL;
- mask = (mask << 1) - (mask >> 7);
- *q = ((*q << 1) & 0xfefefefefefefefeULL) ^
- (mask & 0x1d1d1d1d1d1d1d1dULL);
- *q ^= *src;
- *p ^= *src;
- }
-
- /*
- * Treat short columns as though they are full of 0s.
- */
- for (; i < pcount; i++, q++) {
- mask = *q & 0x8080808080808080ULL;
- mask = (mask << 1) - (mask >> 7);
- *q = ((*q << 1) & 0xfefefefefefefefeULL) ^
- (mask & 0x1d1d1d1d1d1d1d1dULL);
- }
- }
- }
-}
-
-static void
-vdev_raidz_reconstruct_p(raidz_map_t *rm, int x)
-{
- uint64_t *dst, *src, xcount, ccount, count, i;
- int c;
-
- xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
- ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]));
- ASSERT(xcount > 0);
-
- src = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- dst = rm->rm_col[x].rc_data;
- for (i = 0; i < xcount; i++, dst++, src++) {
- *dst = *src;
- }
-
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- dst = rm->rm_col[x].rc_data;
-
- if (c == x)
- continue;
-
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
- count = MIN(ccount, xcount);
-
- for (i = 0; i < count; i++, dst++, src++) {
- *dst ^= *src;
- }
- }
-}
-
-static void
-vdev_raidz_reconstruct_q(raidz_map_t *rm, int x)
-{
- uint64_t *dst, *src, xcount, ccount, count, mask, i;
- uint8_t *b;
- int c, j, exp;
-
- xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
- ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0]));
-
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- dst = rm->rm_col[x].rc_data;
-
- if (c == x)
- ccount = 0;
- else
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
-
- count = MIN(ccount, xcount);
-
- if (c == rm->rm_firstdatacol) {
- for (i = 0; i < count; i++, dst++, src++) {
- *dst = *src;
- }
- for (; i < xcount; i++, dst++) {
- *dst = 0;
- }
-
- } else {
- /*
- * For an explanation of this, see the comment in
- * vdev_raidz_generate_parity_pq() above.
- */
- for (i = 0; i < count; i++, dst++, src++) {
- mask = *dst & 0x8080808080808080ULL;
- mask = (mask << 1) - (mask >> 7);
- *dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^
- (mask & 0x1d1d1d1d1d1d1d1dULL);
- *dst ^= *src;
- }
-
- for (; i < xcount; i++, dst++) {
- mask = *dst & 0x8080808080808080ULL;
- mask = (mask << 1) - (mask >> 7);
- *dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^
- (mask & 0x1d1d1d1d1d1d1d1dULL);
- }
- }
- }
-
- src = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- dst = rm->rm_col[x].rc_data;
- exp = 255 - (rm->rm_cols - 1 - x);
-
- for (i = 0; i < xcount; i++, dst++, src++) {
- *dst ^= *src;
- for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
- *b = vdev_raidz_exp2(*b, exp);
- }
- }
-}
-
-static void
-vdev_raidz_reconstruct_pq(raidz_map_t *rm, int x, int y)
-{
- uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp;
- void *pdata, *qdata;
- uint64_t xsize, ysize, i;
-
- ASSERT(x < y);
- ASSERT(x >= rm->rm_firstdatacol);
- ASSERT(y < rm->rm_cols);
-
- ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size);
-
- /*
- * Move the parity data aside -- we're going to compute parity as
- * though columns x and y were full of zeros -- Pxy and Qxy. We want to
- * reuse the parity generation mechanism without trashing the actual
- * parity so we make those columns appear to be full of zeros by
- * setting their lengths to zero.
- */
- pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- xsize = rm->rm_col[x].rc_size;
- ysize = rm->rm_col[y].rc_size;
-
- rm->rm_col[VDEV_RAIDZ_P].rc_data =
- zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size);
- rm->rm_col[VDEV_RAIDZ_Q].rc_data =
- zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size);
- rm->rm_col[x].rc_size = 0;
- rm->rm_col[y].rc_size = 0;
-
- vdev_raidz_generate_parity_pq(rm);
-
- rm->rm_col[x].rc_size = xsize;
- rm->rm_col[y].rc_size = ysize;
-
- p = pdata;
- q = qdata;
- pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- xd = rm->rm_col[x].rc_data;
- yd = rm->rm_col[y].rc_data;
-
- /*
- * We now have:
- * Pxy = P + D_x + D_y
- * Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y
- *
- * We can then solve for D_x:
- * D_x = A * (P + Pxy) + B * (Q + Qxy)
- * where
- * A = 2^(x - y) * (2^(x - y) + 1)^-1
- * B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1
- *
- * With D_x in hand, we can easily solve for D_y:
- * D_y = P + Pxy + D_x
- */
-
- a = vdev_raidz_pow2[255 + x - y];
- b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)];
- tmp = 255 - vdev_raidz_log2[a ^ 1];
-
- aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)];
- bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)];
-
- for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) {
- *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^
- vdev_raidz_exp2(*q ^ *qxy, bexp);
-
- if (i < ysize)
- *yd = *p ^ *pxy ^ *xd;
- }
-
- zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data,
- rm->rm_col[VDEV_RAIDZ_P].rc_size);
- zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data,
- rm->rm_col[VDEV_RAIDZ_Q].rc_size);
-
- /*
- * Restore the saved parity data.
- */
- rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata;
- rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata;
-}
-
-
-static int
-vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
-{
- vdev_t *cvd;
- uint64_t nparity = vd->vdev_nparity;
- int c, error;
- int lasterror = 0;
- int numerrors = 0;
-
- ASSERT(nparity > 0);
-
- if (nparity > VDEV_RAIDZ_MAXPARITY ||
- vd->vdev_children < nparity + 1) {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
-
- for (c = 0; c < vd->vdev_children; c++) {
- cvd = vd->vdev_child[c];
-
- if ((error = vdev_open(cvd)) != 0) {
- lasterror = error;
- numerrors++;
- continue;
- }
-
- *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
- *ashift = MAX(*ashift, cvd->vdev_ashift);
- }
-
- *asize *= vd->vdev_children;
-
- if (numerrors > nparity) {
- vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
- return (lasterror);
- }
-
- return (0);
-}
-
-static void
-vdev_raidz_close(vdev_t *vd)
-{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_close(vd->vdev_child[c]);
-}
-
-static uint64_t
-vdev_raidz_asize(vdev_t *vd, uint64_t psize)
-{
- uint64_t asize;
- uint64_t ashift = vd->vdev_top->vdev_ashift;
- uint64_t cols = vd->vdev_children;
- uint64_t nparity = vd->vdev_nparity;
-
- asize = ((psize - 1) >> ashift) + 1;
- asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
- asize = roundup(asize, nparity + 1) << ashift;
-
- return (asize);
-}
-
-static void
-vdev_raidz_child_done(zio_t *zio)
-{
- raidz_col_t *rc = zio->io_private;
-
- rc->rc_error = zio->io_error;
- rc->rc_tried = 1;
- rc->rc_skipped = 0;
-}
-
-static void
-vdev_raidz_repair_done(zio_t *zio)
-{
- ASSERT(zio->io_private == zio->io_parent);
- vdev_raidz_map_free(zio->io_private);
-}
-
-static void
-vdev_raidz_io_start(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_t *tvd = vd->vdev_top;
- vdev_t *cvd;
- blkptr_t *bp = zio->io_bp;
- raidz_map_t *rm;
- raidz_col_t *rc;
- int c;
-
- rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
- vd->vdev_nparity);
-
- ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size));
-
- if (zio->io_type == ZIO_TYPE_WRITE) {
- /*
- * Generate RAID parity in the first virtual columns.
- */
- if (rm->rm_firstdatacol == 1)
- vdev_raidz_generate_parity_p(rm);
- else
- vdev_raidz_generate_parity_pq(rm);
-
- for (c = 0; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- cvd = vd->vdev_child[rc->rc_devidx];
- zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
- rc->rc_offset, rc->rc_data, rc->rc_size,
- zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
- vdev_raidz_child_done, rc));
- }
- zio_wait_children_done(zio);
- return;
- }
-
- ASSERT(zio->io_type == ZIO_TYPE_READ);
-
- /*
- * Iterate over the columns in reverse order so that we hit the parity
- * last -- any errors along the way will force us to read the parity
- * data.
- */
- for (c = rm->rm_cols - 1; c >= 0; c--) {
- rc = &rm->rm_col[c];
- cvd = vd->vdev_child[rc->rc_devidx];
- if (vdev_is_dead(cvd)) {
- if (c >= rm->rm_firstdatacol)
- rm->rm_missingdata++;
- else
- rm->rm_missingparity++;
- rc->rc_error = ENXIO;
- rc->rc_tried = 1; /* don't even try */
- rc->rc_skipped = 1;
- continue;
- }
- if (vdev_dtl_contains(&cvd->vdev_dtl_map, bp->blk_birth, 1)) {
- if (c >= rm->rm_firstdatacol)
- rm->rm_missingdata++;
- else
- rm->rm_missingparity++;
- rc->rc_error = ESTALE;
- rc->rc_skipped = 1;
- continue;
- }
- if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
- (zio->io_flags & ZIO_FLAG_SCRUB)) {
- zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
- rc->rc_offset, rc->rc_data, rc->rc_size,
- zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
- vdev_raidz_child_done, rc));
- }
- }
-
- zio_wait_children_done(zio);
-}
-
-/*
- * Report a checksum error for a child of a RAID-Z device.
- */
-static void
-raidz_checksum_error(zio_t *zio, raidz_col_t *rc)
-{
- vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
- dprintf_bp(zio->io_bp, "imputed checksum error on %s: ",
- vdev_description(vd));
-
- if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_checksum_errors++;
- mutex_exit(&vd->vdev_stat_lock);
- }
-
- if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE))
- zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM,
- zio->io_spa, vd, zio, rc->rc_offset, rc->rc_size);
-}
-
-/*
- * Generate the parity from the data columns. If we tried and were able to
- * read the parity without error, verify that the generated parity matches the
- * data we read. If it doesn't, we fire off a checksum error. Return the
- * number such failures.
- */
-static int
-raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
-{
- void *orig[VDEV_RAIDZ_MAXPARITY];
- int c, ret = 0;
- raidz_col_t *rc;
-
- for (c = 0; c < rm->rm_firstdatacol; c++) {
- rc = &rm->rm_col[c];
- if (!rc->rc_tried || rc->rc_error != 0)
- continue;
- orig[c] = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig[c], rc->rc_size);
- }
-
- if (rm->rm_firstdatacol == 1)
- vdev_raidz_generate_parity_p(rm);
- else
- vdev_raidz_generate_parity_pq(rm);
-
- for (c = 0; c < rm->rm_firstdatacol; c++) {
- rc = &rm->rm_col[c];
- if (!rc->rc_tried || rc->rc_error != 0)
- continue;
- if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) {
- raidz_checksum_error(zio, rc);
- rc->rc_error = ECKSUM;
- ret++;
- }
- zio_buf_free(orig[c], rc->rc_size);
- }
-
- return (ret);
-}
-
-static uint64_t raidz_corrected_p;
-static uint64_t raidz_corrected_q;
-static uint64_t raidz_corrected_pq;
-
-static void
-vdev_raidz_io_done(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_t *cvd;
- raidz_map_t *rm = zio->io_vsd;
- raidz_col_t *rc, *rc1;
- int unexpected_errors = 0;
- int parity_errors = 0;
- int parity_untried = 0;
- int data_errors = 0;
- int n, c, c1;
-
- ASSERT(zio->io_bp != NULL); /* XXX need to add code to enforce this */
-
- zio->io_error = 0;
- zio->io_numerrors = 0;
-
- ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol);
- ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol);
-
- for (c = 0; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
-
- /*
- * We preserve any EIOs because those may be worth retrying;
- * whereas ECKSUM and ENXIO are more likely to be persistent.
- */
- if (rc->rc_error) {
- if (zio->io_error != EIO)
- zio->io_error = rc->rc_error;
-
- if (c < rm->rm_firstdatacol)
- parity_errors++;
- else
- data_errors++;
-
- if (!rc->rc_skipped)
- unexpected_errors++;
-
- zio->io_numerrors++;
- } else if (c < rm->rm_firstdatacol && !rc->rc_tried) {
- parity_untried++;
- }
- }
-
- if (zio->io_type == ZIO_TYPE_WRITE) {
- /*
- * If this is not a failfast write, and we were able to
- * write enough columns to reconstruct the data, good enough.
- */
- /* XXPOLICY */
- if (zio->io_numerrors <= rm->rm_firstdatacol &&
- !(zio->io_flags & ZIO_FLAG_FAILFAST))
- zio->io_error = 0;
-
- vdev_raidz_map_free(zio);
- zio_next_stage(zio);
- return;
- }
-
- ASSERT(zio->io_type == ZIO_TYPE_READ);
- /*
- * There are three potential phases for a read:
- * 1. produce valid data from the columns read
- * 2. read all disks and try again
- * 3. perform combinatorial reconstruction
- *
- * Each phase is progressively both more expensive and less likely to
- * occur. If we encounter more errors than we can repair or all phases
- * fail, we have no choice but to return an error.
- */
-
- /*
- * If the number of errors we saw was correctable -- less than or equal
- * to the number of parity disks read -- attempt to produce data that
- * has a valid checksum. Naturally, this case applies in the absence of
- * any errors.
- */
- if (zio->io_numerrors <= rm->rm_firstdatacol - parity_untried) {
- switch (data_errors) {
- case 0:
- if (zio_checksum_error(zio) == 0) {
- zio->io_error = 0;
-
- /*
- * If we read parity information (unnecessarily
- * as it happens since no reconstruction was
- * needed) regenerate and verify the parity.
- * We also regenerate parity when resilvering
- * so we can write it out to the failed device
- * later.
- */
- if (parity_errors + parity_untried <
- rm->rm_firstdatacol ||
- (zio->io_flags & ZIO_FLAG_RESILVER)) {
- n = raidz_parity_verify(zio, rm);
- unexpected_errors += n;
- ASSERT(parity_errors + n <=
- rm->rm_firstdatacol);
- }
- goto done;
- }
- break;
-
- case 1:
- /*
- * We either attempt to read all the parity columns or
- * none of them. If we didn't try to read parity, we
- * wouldn't be here in the correctable case. There must
- * also have been fewer parity errors than parity
- * columns or, again, we wouldn't be in this code path.
- */
- ASSERT(parity_untried == 0);
- ASSERT(parity_errors < rm->rm_firstdatacol);
-
- /*
- * Find the column that reported the error.
- */
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- if (rc->rc_error != 0)
- break;
- }
- ASSERT(c != rm->rm_cols);
- ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
- rc->rc_error == ESTALE);
-
- if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) {
- vdev_raidz_reconstruct_p(rm, c);
- } else {
- ASSERT(rm->rm_firstdatacol > 1);
- vdev_raidz_reconstruct_q(rm, c);
- }
-
- if (zio_checksum_error(zio) == 0) {
- zio->io_error = 0;
- if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0)
- atomic_inc_64(&raidz_corrected_p);
- else
- atomic_inc_64(&raidz_corrected_q);
-
- /*
- * If there's more than one parity disk that
- * was successfully read, confirm that the
- * other parity disk produced the correct data.
- * This routine is suboptimal in that it
- * regenerates both the parity we wish to test
- * as well as the parity we just used to
- * perform the reconstruction, but this should
- * be a relatively uncommon case, and can be
- * optimized if it becomes a problem.
- * We also regenerate parity when resilvering
- * so we can write it out to the failed device
- * later.
- */
- if (parity_errors < rm->rm_firstdatacol - 1 ||
- (zio->io_flags & ZIO_FLAG_RESILVER)) {
- n = raidz_parity_verify(zio, rm);
- unexpected_errors += n;
- ASSERT(parity_errors + n <=
- rm->rm_firstdatacol);
- }
-
- goto done;
- }
- break;
-
- case 2:
- /*
- * Two data column errors require double parity.
- */
- ASSERT(rm->rm_firstdatacol == 2);
-
- /*
- * Find the two columns that reported errors.
- */
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- if (rc->rc_error != 0)
- break;
- }
- ASSERT(c != rm->rm_cols);
- ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
- rc->rc_error == ESTALE);
-
- for (c1 = c++; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- if (rc->rc_error != 0)
- break;
- }
- ASSERT(c != rm->rm_cols);
- ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
- rc->rc_error == ESTALE);
-
- vdev_raidz_reconstruct_pq(rm, c1, c);
-
- if (zio_checksum_error(zio) == 0) {
- zio->io_error = 0;
- atomic_inc_64(&raidz_corrected_pq);
-
- goto done;
- }
- break;
-
- default:
- ASSERT(rm->rm_firstdatacol <= 2);
- ASSERT(0);
- }
- }
-
- /*
- * This isn't a typical situation -- either we got a read error or
- * a child silently returned bad data. Read every block so we can
- * try again with as much data and parity as we can track down. If
- * we've already been through once before, all children will be marked
- * as tried so we'll proceed to combinatorial reconstruction.
- */
- unexpected_errors = 1;
- rm->rm_missingdata = 0;
- rm->rm_missingparity = 0;
-
- for (c = 0; c < rm->rm_cols; c++) {
- if (rm->rm_col[c].rc_tried)
- continue;
-
- zio->io_error = 0;
- zio_vdev_io_redone(zio);
- do {
- rc = &rm->rm_col[c];
- if (rc->rc_tried)
- continue;
- zio_nowait(zio_vdev_child_io(zio, NULL,
- vd->vdev_child[rc->rc_devidx],
- rc->rc_offset, rc->rc_data, rc->rc_size,
- zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
- vdev_raidz_child_done, rc));
- } while (++c < rm->rm_cols);
- dprintf("rereading\n");
- zio_wait_children_done(zio);
- return;
- }
-
- /*
- * At this point we've attempted to reconstruct the data given the
- * errors we detected, and we've attempted to read all columns. There
- * must, therefore, be one or more additional problems -- silent errors
- * resulting in invalid data rather than explicit I/O errors resulting
- * in absent data. Before we attempt combinatorial reconstruction make
- * sure we have a chance of coming up with the right answer.
- */
- if (zio->io_numerrors >= rm->rm_firstdatacol) {
- ASSERT(zio->io_error != 0);
- goto done;
- }
-
- if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) {
- /*
- * Attempt to reconstruct the data from parity P.
- */
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- void *orig;
- rc = &rm->rm_col[c];
-
- orig = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig, rc->rc_size);
- vdev_raidz_reconstruct_p(rm, c);
-
- if (zio_checksum_error(zio) == 0) {
- zio_buf_free(orig, rc->rc_size);
- zio->io_error = 0;
- atomic_inc_64(&raidz_corrected_p);
-
- /*
- * If this child didn't know that it returned
- * bad data, inform it.
- */
- if (rc->rc_tried && rc->rc_error == 0)
- raidz_checksum_error(zio, rc);
- rc->rc_error = ECKSUM;
- goto done;
- }
-
- bcopy(orig, rc->rc_data, rc->rc_size);
- zio_buf_free(orig, rc->rc_size);
- }
- }
-
- if (rm->rm_firstdatacol > 1 && rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) {
- /*
- * Attempt to reconstruct the data from parity Q.
- */
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- void *orig;
- rc = &rm->rm_col[c];
-
- orig = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig, rc->rc_size);
- vdev_raidz_reconstruct_q(rm, c);
-
- if (zio_checksum_error(zio) == 0) {
- zio_buf_free(orig, rc->rc_size);
- zio->io_error = 0;
- atomic_inc_64(&raidz_corrected_q);
-
- /*
- * If this child didn't know that it returned
- * bad data, inform it.
- */
- if (rc->rc_tried && rc->rc_error == 0)
- raidz_checksum_error(zio, rc);
- rc->rc_error = ECKSUM;
- goto done;
- }
-
- bcopy(orig, rc->rc_data, rc->rc_size);
- zio_buf_free(orig, rc->rc_size);
- }
- }
-
- if (rm->rm_firstdatacol > 1 &&
- rm->rm_col[VDEV_RAIDZ_P].rc_error == 0 &&
- rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) {
- /*
- * Attempt to reconstruct the data from both P and Q.
- */
- for (c = rm->rm_firstdatacol; c < rm->rm_cols - 1; c++) {
- void *orig, *orig1;
- rc = &rm->rm_col[c];
-
- orig = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig, rc->rc_size);
-
- for (c1 = c + 1; c1 < rm->rm_cols; c1++) {
- rc1 = &rm->rm_col[c1];
-
- orig1 = zio_buf_alloc(rc1->rc_size);
- bcopy(rc1->rc_data, orig1, rc1->rc_size);
-
- vdev_raidz_reconstruct_pq(rm, c, c1);
-
- if (zio_checksum_error(zio) == 0) {
- zio_buf_free(orig, rc->rc_size);
- zio_buf_free(orig1, rc1->rc_size);
- zio->io_error = 0;
- atomic_inc_64(&raidz_corrected_pq);
-
- /*
- * If these children didn't know they
- * returned bad data, inform them.
- */
- if (rc->rc_tried && rc->rc_error == 0)
- raidz_checksum_error(zio, rc);
- if (rc1->rc_tried && rc1->rc_error == 0)
- raidz_checksum_error(zio, rc1);
-
- rc->rc_error = ECKSUM;
- rc1->rc_error = ECKSUM;
-
- goto done;
- }
-
- bcopy(orig1, rc1->rc_data, rc1->rc_size);
- zio_buf_free(orig1, rc1->rc_size);
- }
-
- bcopy(orig, rc->rc_data, rc->rc_size);
- zio_buf_free(orig, rc->rc_size);
- }
- }
-
- /*
- * All combinations failed to checksum. Generate checksum ereports for
- * all children.
- */
- zio->io_error = ECKSUM;
- if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- for (c = 0; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM,
- zio->io_spa, vd->vdev_child[rc->rc_devidx], zio,
- rc->rc_offset, rc->rc_size);
- }
- }
-
-done:
- zio_checksum_verified(zio);
-
- if (zio->io_error == 0 && (spa_mode & FWRITE) &&
- (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER))) {
- zio_t *rio;
-
- /*
- * Use the good data we have in hand to repair damaged children.
- *
- * We issue all repair I/Os as children of 'rio' to arrange
- * that vdev_raidz_map_free(zio) will be invoked after all
- * repairs complete, but before we advance to the next stage.
- */
- rio = zio_null(zio, zio->io_spa,
- vdev_raidz_repair_done, zio, ZIO_FLAG_CANFAIL);
-
- for (c = 0; c < rm->rm_cols; c++) {
- rc = &rm->rm_col[c];
- cvd = vd->vdev_child[rc->rc_devidx];
-
- if (rc->rc_error == 0)
- continue;
-
- dprintf("%s resilvered %s @ 0x%llx error %d\n",
- vdev_description(vd),
- vdev_description(cvd),
- zio->io_offset, rc->rc_error);
-
- zio_nowait(zio_vdev_child_io(rio, NULL, cvd,
- rc->rc_offset, rc->rc_data, rc->rc_size,
- ZIO_TYPE_WRITE, zio->io_priority,
- ZIO_FLAG_IO_REPAIR | ZIO_FLAG_DONT_PROPAGATE |
- ZIO_FLAG_CANFAIL, NULL, NULL));
- }
-
- zio_nowait(rio);
- zio_wait_children_done(zio);
- return;
- }
-
- vdev_raidz_map_free(zio);
- zio_next_stage(zio);
-}
-
-static void
-vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
-{
- if (faulted > vd->vdev_nparity)
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_NO_REPLICAS);
- else if (degraded + faulted != 0)
- vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
- else
- vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
-}
-
-vdev_ops_t vdev_raidz_ops = {
- vdev_raidz_open,
- vdev_raidz_close,
- vdev_raidz_asize,
- vdev_raidz_io_start,
- vdev_raidz_io_done,
- vdev_raidz_state_change,
- VDEV_TYPE_RAIDZ, /* name of this vdev type */
- B_FALSE /* not a leaf vdev */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c b/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
deleted file mode 100644
index 0e8752c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-#include <sys/fs/zfs.h>
-
-/*
- * Virtual device vector for the pool's root vdev.
- */
-
-/*
- * We should be able to tolerate one failure with absolutely no damage
- * to our metadata. Two failures will take out space maps, a bunch of
- * indirect block trees, meta dnodes, dnodes, etc. Probably not a happy
- * place to live. When we get smarter, we can liberalize this policy.
- * e.g. If we haven't lost two consecutive top-level vdevs, then we are
- * probably fine. Adding bean counters during alloc/free can make this
- * future guesswork more accurate.
- */
-/*ARGSUSED*/
-static int
-too_many_errors(vdev_t *vd, int numerrors)
-{
- return (numerrors > 0);
-}
-
-static int
-vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
-{
- vdev_t *cvd;
- int c, error;
- int lasterror = 0;
- int numerrors = 0;
-
- if (vd->vdev_children == 0) {
- vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- return (EINVAL);
- }
-
- for (c = 0; c < vd->vdev_children; c++) {
- cvd = vd->vdev_child[c];
-
- if ((error = vdev_open(cvd)) != 0) {
- lasterror = error;
- numerrors++;
- continue;
- }
- }
-
- if (too_many_errors(vd, numerrors)) {
- vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
- return (lasterror);
- }
-
- *asize = 0;
- *ashift = 0;
-
- return (0);
-}
-
-static void
-vdev_root_close(vdev_t *vd)
-{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_close(vd->vdev_child[c]);
-}
-
-static void
-vdev_root_state_change(vdev_t *vd, int faulted, int degraded)
-{
- if (too_many_errors(vd, faulted))
- vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_NO_REPLICAS);
- else if (degraded != 0)
- vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
- else
- vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
-}
-
-vdev_ops_t vdev_root_ops = {
- vdev_root_open,
- vdev_root_close,
- vdev_default_asize,
- NULL, /* io_start - not applicable to the root */
- NULL, /* io_done - not applicable to the root */
- vdev_root_state_change,
- VDEV_TYPE_ROOT, /* name of this vdev type */
- B_FALSE /* not a leaf vdev */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zap.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zap.c
deleted file mode 100644
index 4246ec0..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zap.c
+++ /dev/null
@@ -1,1071 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-
-/*
- * This file contains the top half of the zfs directory structure
- * implementation. The bottom half is in zap_leaf.c.
- *
- * The zdir is an extendable hash data structure. There is a table of
- * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
- * each a constant size and hold a variable number of directory entries.
- * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
- *
- * The pointer table holds a power of 2 number of pointers.
- * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to
- * by the pointer at index i in the table holds entries whose hash value
- * has a zd_prefix_len - bit prefix
- */
-
-#include <sys/spa.h>
-#include <sys/dmu.h>
-#include <sys/zfs_context.h>
-#include <sys/zap.h>
-#include <sys/refcount.h>
-#include <sys/zap_impl.h>
-#include <sys/zap_leaf.h>
-#include <sys/zfs_znode.h>
-
-int fzap_default_block_shift = 14; /* 16k blocksize */
-
-static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
-static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
-
-
-void
-fzap_byteswap(void *vbuf, size_t size)
-{
- uint64_t block_type;
-
- block_type = *(uint64_t *)vbuf;
-
- if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
- zap_leaf_byteswap(vbuf, size);
- else {
- /* it's a ptrtbl block */
- byteswap_uint64_array(vbuf, size);
- }
-}
-
-void
-fzap_upgrade(zap_t *zap, dmu_tx_t *tx)
-{
- dmu_buf_t *db;
- zap_leaf_t *l;
- int i;
- zap_phys_t *zp;
-
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- zap->zap_ismicro = FALSE;
-
- (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap,
- &zap->zap_f.zap_phys, zap_evict);
-
- mutex_init(&zap->zap_f.zap_num_entries_mtx, NULL, MUTEX_DEFAULT, 0);
- zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1;
-
- zp = zap->zap_f.zap_phys;
- /*
- * explicitly zero it since it might be coming from an
- * initialized microzap
- */
- bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
- zp->zap_block_type = ZBT_HEADER;
- zp->zap_magic = ZAP_MAGIC;
-
- zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
-
- zp->zap_freeblk = 2; /* block 1 will be the first leaf */
- zp->zap_num_leafs = 1;
- zp->zap_num_entries = 0;
- zp->zap_salt = zap->zap_salt;
-
- /* block 1 will be the first leaf */
- for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
- ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
-
- /*
- * set up block 1 - the first leaf
- */
- VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
- 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db));
- dmu_buf_will_dirty(db, tx);
-
- l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
- l->l_dbuf = db;
- l->l_phys = db->db_data;
-
- zap_leaf_init(l);
-
- kmem_free(l, sizeof (zap_leaf_t));
- dmu_buf_rele(db, FTAG);
-}
-
-static int
-zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
-{
- if (RW_WRITE_HELD(&zap->zap_rwlock))
- return (1);
- if (rw_tryupgrade(&zap->zap_rwlock)) {
- dmu_buf_will_dirty(zap->zap_dbuf, tx);
- return (1);
- }
- return (0);
-}
-
-/*
- * Generic routines for dealing with the pointer & cookie tables.
- */
-
-static int
-zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
- void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
- dmu_tx_t *tx)
-{
- uint64_t b, newblk;
- dmu_buf_t *db_old, *db_new;
- int err;
- int bs = FZAP_BLOCK_SHIFT(zap);
- int hepb = 1<<(bs-4);
- /* hepb = half the number of entries in a block */
-
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- ASSERT(tbl->zt_blk != 0);
- ASSERT(tbl->zt_numblks > 0);
-
- if (tbl->zt_nextblk != 0) {
- newblk = tbl->zt_nextblk;
- } else {
- newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
- tbl->zt_nextblk = newblk;
- ASSERT3U(tbl->zt_blks_copied, ==, 0);
- dmu_prefetch(zap->zap_objset, zap->zap_object,
- tbl->zt_blk << bs, tbl->zt_numblks << bs);
- }
-
- /*
- * Copy the ptrtbl from the old to new location.
- */
-
- b = tbl->zt_blks_copied;
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (tbl->zt_blk + b) << bs, FTAG, &db_old);
- if (err)
- return (err);
-
- /* first half of entries in old[b] go to new[2*b+0] */
- VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (newblk + 2*b+0) << bs, FTAG, &db_new));
- dmu_buf_will_dirty(db_new, tx);
- transfer_func(db_old->db_data, db_new->db_data, hepb);
- dmu_buf_rele(db_new, FTAG);
-
- /* second half of entries in old[b] go to new[2*b+1] */
- VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (newblk + 2*b+1) << bs, FTAG, &db_new));
- dmu_buf_will_dirty(db_new, tx);
- transfer_func((uint64_t *)db_old->db_data + hepb,
- db_new->db_data, hepb);
- dmu_buf_rele(db_new, FTAG);
-
- dmu_buf_rele(db_old, FTAG);
-
- tbl->zt_blks_copied++;
-
- dprintf("copied block %llu of %llu\n",
- tbl->zt_blks_copied, tbl->zt_numblks);
-
- if (tbl->zt_blks_copied == tbl->zt_numblks) {
- (void) dmu_free_range(zap->zap_objset, zap->zap_object,
- tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);
-
- tbl->zt_blk = newblk;
- tbl->zt_numblks *= 2;
- tbl->zt_shift++;
- tbl->zt_nextblk = 0;
- tbl->zt_blks_copied = 0;
-
- dprintf("finished; numblocks now %llu (%lluk entries)\n",
- tbl->zt_numblks, 1<<(tbl->zt_shift-10));
- }
-
- return (0);
-}
-
-static int
-zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
- dmu_tx_t *tx)
-{
- int err;
- uint64_t blk, off;
- int bs = FZAP_BLOCK_SHIFT(zap);
- dmu_buf_t *db;
-
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- ASSERT(tbl->zt_blk != 0);
-
- dprintf("storing %llx at index %llx\n", val, idx);
-
- blk = idx >> (bs-3);
- off = idx & ((1<<(bs-3))-1);
-
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (tbl->zt_blk + blk) << bs, FTAG, &db);
- if (err)
- return (err);
- dmu_buf_will_dirty(db, tx);
-
- if (tbl->zt_nextblk != 0) {
- uint64_t idx2 = idx * 2;
- uint64_t blk2 = idx2 >> (bs-3);
- uint64_t off2 = idx2 & ((1<<(bs-3))-1);
- dmu_buf_t *db2;
-
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (tbl->zt_nextblk + blk2) << bs, FTAG, &db2);
- if (err) {
- dmu_buf_rele(db, FTAG);
- return (err);
- }
- dmu_buf_will_dirty(db2, tx);
- ((uint64_t *)db2->db_data)[off2] = val;
- ((uint64_t *)db2->db_data)[off2+1] = val;
- dmu_buf_rele(db2, FTAG);
- }
-
- ((uint64_t *)db->db_data)[off] = val;
- dmu_buf_rele(db, FTAG);
-
- return (0);
-}
-
-static int
-zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
-{
- uint64_t blk, off;
- int err;
- dmu_buf_t *db;
- int bs = FZAP_BLOCK_SHIFT(zap);
-
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
- blk = idx >> (bs-3);
- off = idx & ((1<<(bs-3))-1);
-
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (tbl->zt_blk + blk) << bs, FTAG, &db);
- if (err)
- return (err);
- *valp = ((uint64_t *)db->db_data)[off];
- dmu_buf_rele(db, FTAG);
-
- if (tbl->zt_nextblk != 0) {
- /*
- * read the nextblk for the sake of i/o error checking,
- * so that zap_table_load() will catch errors for
- * zap_table_store.
- */
- blk = (idx*2) >> (bs-3);
-
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (tbl->zt_nextblk + blk) << bs, FTAG, &db);
- dmu_buf_rele(db, FTAG);
- }
- return (err);
-}
-
-/*
- * Routines for growing the ptrtbl.
- */
-
-static void
-zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n)
-{
- int i;
- for (i = 0; i < n; i++) {
- uint64_t lb = src[i];
- dst[2*i+0] = lb;
- dst[2*i+1] = lb;
- }
-}
-
-static int
-zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
-{
- /* In case things go horribly wrong. */
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= ZAP_HASHBITS-2)
- return (ENOSPC);
-
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
- /*
- * We are outgrowing the "embedded" ptrtbl (the one
- * stored in the header block). Give it its own entire
- * block, which will double the size of the ptrtbl.
- */
- uint64_t newblk;
- dmu_buf_t *db_new;
- int err;
-
- ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
- ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
- ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0);
-
- newblk = zap_allocate_blocks(zap, 1);
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new);
- if (err)
- return (err);
- dmu_buf_will_dirty(db_new, tx);
- zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
- db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
- dmu_buf_rele(db_new, FTAG);
-
- zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
- zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
- zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
-
- ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
- (FZAP_BLOCK_SHIFT(zap)-3));
-
- return (0);
- } else {
- return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
- zap_ptrtbl_transfer, tx));
- }
-}
-
-static void
-zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
-{
- dmu_buf_will_dirty(zap->zap_dbuf, tx);
- mutex_enter(&zap->zap_f.zap_num_entries_mtx);
- ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta);
- zap->zap_f.zap_phys->zap_num_entries += delta;
- mutex_exit(&zap->zap_f.zap_num_entries_mtx);
-}
-
-static uint64_t
-zap_allocate_blocks(zap_t *zap, int nblocks)
-{
- uint64_t newblk;
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- newblk = zap->zap_f.zap_phys->zap_freeblk;
- zap->zap_f.zap_phys->zap_freeblk += nblocks;
- return (newblk);
-}
-
-static zap_leaf_t *
-zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
-{
- void *winner;
- zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
-
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- rw_init(&l->l_rwlock, NULL, RW_DEFAULT, 0);
- rw_enter(&l->l_rwlock, RW_WRITER);
- l->l_blkid = zap_allocate_blocks(zap, 1);
- l->l_dbuf = NULL;
- l->l_phys = NULL;
-
- VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
- l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf));
- winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout);
- ASSERT(winner == NULL);
- dmu_buf_will_dirty(l->l_dbuf, tx);
-
- zap_leaf_init(l);
-
- zap->zap_f.zap_phys->zap_num_leafs++;
-
- return (l);
-}
-
-int
-fzap_count(zap_t *zap, uint64_t *count)
-{
- ASSERT(!zap->zap_ismicro);
- mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
- *count = zap->zap_f.zap_phys->zap_num_entries;
- mutex_exit(&zap->zap_f.zap_num_entries_mtx);
- return (0);
-}
-
-/*
- * Routines for obtaining zap_leaf_t's
- */
-
-void
-zap_put_leaf(zap_leaf_t *l)
-{
- rw_exit(&l->l_rwlock);
- dmu_buf_rele(l->l_dbuf, NULL);
-}
-
-_NOTE(ARGSUSED(0))
-static void
-zap_leaf_pageout(dmu_buf_t *db, void *vl)
-{
- zap_leaf_t *l = vl;
-
- rw_destroy(&l->l_rwlock);
- kmem_free(l, sizeof (zap_leaf_t));
-}
-
-static zap_leaf_t *
-zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
-{
- zap_leaf_t *l, *winner;
-
- ASSERT(blkid != 0);
-
- l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
- rw_init(&l->l_rwlock, NULL, RW_DEFAULT, 0);
- rw_enter(&l->l_rwlock, RW_WRITER);
- l->l_blkid = blkid;
- l->l_bs = highbit(db->db_size)-1;
- l->l_dbuf = db;
- l->l_phys = NULL;
-
- winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
-
- rw_exit(&l->l_rwlock);
- if (winner != NULL) {
- /* someone else set it first */
- zap_leaf_pageout(NULL, l);
- l = winner;
- }
-
- /*
- * lhr_pad was previously used for the next leaf in the leaf
- * chain. There should be no chained leafs (as we have removed
- * support for them).
- */
- ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0);
-
- /*
- * There should be more hash entries than there can be
- * chunks to put in the hash table
- */
- ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
-
- /* The chunks should begin at the end of the hash table */
- ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
- &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
-
- /* The chunks should end at the end of the block */
- ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
- (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size);
-
- return (l);
-}
-
-static int
-zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
- zap_leaf_t **lp)
-{
- dmu_buf_t *db;
- zap_leaf_t *l;
- int bs = FZAP_BLOCK_SHIFT(zap);
- int err;
-
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- blkid << bs, NULL, &db);
- if (err)
- return (err);
-
- ASSERT3U(db->db_object, ==, zap->zap_object);
- ASSERT3U(db->db_offset, ==, blkid << bs);
- ASSERT3U(db->db_size, ==, 1 << bs);
- ASSERT(blkid != 0);
-
- l = dmu_buf_get_user(db);
-
- if (l == NULL)
- l = zap_open_leaf(blkid, db);
-
- rw_enter(&l->l_rwlock, lt);
- /*
- * Must lock before dirtying, otherwise l->l_phys could change,
- * causing ASSERT below to fail.
- */
- if (lt == RW_WRITER)
- dmu_buf_will_dirty(db, tx);
- ASSERT3U(l->l_blkid, ==, blkid);
- ASSERT3P(l->l_dbuf, ==, db);
- ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data);
- ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF);
- ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
-
- *lp = l;
- return (0);
-}
-
-static int
-zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
-{
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
- ASSERT3U(idx, <,
- (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift));
- *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
- return (0);
- } else {
- return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
- idx, valp));
- }
-}
-
-static int
-zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
-{
- ASSERT(tx != NULL);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) {
- ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
- return (0);
- } else {
- return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
- idx, blk, tx));
- }
-}
-
-static int
-zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
-{
- uint64_t idx, blk;
- int err;
-
- ASSERT(zap->zap_dbuf == NULL ||
- zap->zap_f.zap_phys == zap->zap_dbuf->db_data);
- ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC);
- idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
- err = zap_idx_to_blk(zap, idx, &blk);
- if (err != 0)
- return (err);
- err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
-
- ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) ==
- (*lp)->l_phys->l_hdr.lh_prefix);
- return (err);
-}
-
-static int
-zap_expand_leaf(zap_t *zap, zap_leaf_t *l, uint64_t hash, dmu_tx_t *tx,
- zap_leaf_t **lp)
-{
- zap_leaf_t *nl;
- int prefix_diff, i, err;
- uint64_t sibling;
- int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
-
- ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
- ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
- l->l_phys->l_hdr.lh_prefix);
-
- if (zap_tryupgradedir(zap, tx) == 0 ||
- old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
- /* We failed to upgrade, or need to grow the pointer table */
- objset_t *os = zap->zap_objset;
- uint64_t object = zap->zap_object;
-
- zap_put_leaf(l);
- zap_unlockdir(zap);
- err = zap_lockdir(os, object, tx, RW_WRITER, FALSE, &zap);
- if (err)
- return (err);
- ASSERT(!zap->zap_ismicro);
-
- while (old_prefix_len ==
- zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
- err = zap_grow_ptrtbl(zap, tx);
- if (err)
- return (err);
- }
-
- err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
- if (err)
- return (err);
-
- if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) {
- /* it split while our locks were down */
- *lp = l;
- return (0);
- }
- }
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
- ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
- l->l_phys->l_hdr.lh_prefix);
-
- prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
- (old_prefix_len + 1);
- sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
-
- /* check for i/o errors before doing zap_leaf_split */
- for (i = 0; i < (1ULL<<prefix_diff); i++) {
- uint64_t blk;
- err = zap_idx_to_blk(zap, sibling+i, &blk);
- if (err)
- return (err);
- ASSERT3U(blk, ==, l->l_blkid);
- }
-
- nl = zap_create_leaf(zap, tx);
- zap_leaf_split(l, nl);
-
- /* set sibling pointers */
- for (i = 0; i < (1ULL<<prefix_diff); i++) {
- err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
- ASSERT3U(err, ==, 0); /* we checked for i/o errors above */
- }
-
- if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
- /* we want the sibling */
- zap_put_leaf(l);
- *lp = nl;
- } else {
- zap_put_leaf(nl);
- *lp = l;
- }
-
- return (0);
-}
-
-static void
-zap_put_leaf_maybe_grow_ptrtbl(zap_t *zap, zap_leaf_t *l, dmu_tx_t *tx)
-{
- int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
- int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
- l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
-
- zap_put_leaf(l);
-
- if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) {
- int err;
-
- /*
- * We are in the middle of growing the pointer table, or
- * this leaf will soon make us grow it.
- */
- if (zap_tryupgradedir(zap, tx) == 0) {
- objset_t *os = zap->zap_objset;
- uint64_t zapobj = zap->zap_object;
-
- zap_unlockdir(zap);
- err = zap_lockdir(os, zapobj, tx,
- RW_WRITER, FALSE, &zap);
- if (err)
- return;
- }
-
- /* could have finished growing while our locks were down */
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift)
- (void) zap_grow_ptrtbl(zap, tx);
- }
-}
-
-
-static int
-fzap_checksize(const char *name, uint64_t integer_size, uint64_t num_integers)
-{
- if (name && strlen(name) > ZAP_MAXNAMELEN)
- return (E2BIG);
-
- /* Only integer sizes supported by C */
- switch (integer_size) {
- case 1:
- case 2:
- case 4:
- case 8:
- break;
- default:
- return (EINVAL);
- }
-
- if (integer_size * num_integers > ZAP_MAXVALUELEN)
- return (E2BIG);
-
- return (0);
-}
-
-/*
- * Routines for maniplulating attributes.
- */
-int
-fzap_lookup(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers, void *buf)
-{
- zap_leaf_t *l;
- int err;
- uint64_t hash;
- zap_entry_handle_t zeh;
-
- err = fzap_checksize(name, integer_size, num_integers);
- if (err != 0)
- return (err);
-
- hash = zap_hash(zap, name);
- err = zap_deref_leaf(zap, hash, NULL, RW_READER, &l);
- if (err != 0)
- return (err);
- err = zap_leaf_lookup(l, name, hash, &zeh);
- if (err == 0)
- err = zap_entry_read(&zeh, integer_size, num_integers, buf);
-
- zap_put_leaf(l);
- return (err);
-}
-
-int
-fzap_add_cd(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers,
- const void *val, uint32_t cd, dmu_tx_t *tx)
-{
- zap_leaf_t *l;
- uint64_t hash;
- int err;
- zap_entry_handle_t zeh;
-
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- ASSERT(!zap->zap_ismicro);
- ASSERT(fzap_checksize(name, integer_size, num_integers) == 0);
-
- hash = zap_hash(zap, name);
- err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
- if (err != 0)
- return (err);
-retry:
- err = zap_leaf_lookup(l, name, hash, &zeh);
- if (err == 0) {
- err = EEXIST;
- goto out;
- }
- if (err != ENOENT)
- goto out;
-
- err = zap_entry_create(l, name, hash, cd,
- integer_size, num_integers, val, &zeh);
-
- if (err == 0) {
- zap_increment_num_entries(zap, 1, tx);
- } else if (err == EAGAIN) {
- err = zap_expand_leaf(zap, l, hash, tx, &l);
- if (err == 0)
- goto retry;
- }
-
-out:
- zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx);
- return (err);
-}
-
-int
-fzap_add(zap_t *zap, const char *name,
- uint64_t integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx)
-{
- int err = fzap_checksize(name, integer_size, num_integers);
- if (err != 0)
- return (err);
-
- return (fzap_add_cd(zap, name, integer_size, num_integers,
- val, ZAP_MAXCD, tx));
-}
-
-int
-fzap_update(zap_t *zap, const char *name,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
-{
- zap_leaf_t *l;
- uint64_t hash;
- int err, create;
- zap_entry_handle_t zeh;
-
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- err = fzap_checksize(name, integer_size, num_integers);
- if (err != 0)
- return (err);
-
- hash = zap_hash(zap, name);
- err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
- if (err != 0)
- return (err);
-retry:
- err = zap_leaf_lookup(l, name, hash, &zeh);
- create = (err == ENOENT);
- ASSERT(err == 0 || err == ENOENT);
-
- /* XXX If this leaf is chained, split it if we can. */
-
- if (create) {
- err = zap_entry_create(l, name, hash, ZAP_MAXCD,
- integer_size, num_integers, val, &zeh);
- if (err == 0)
- zap_increment_num_entries(zap, 1, tx);
- } else {
- err = zap_entry_update(&zeh, integer_size, num_integers, val);
- }
-
- if (err == EAGAIN) {
- err = zap_expand_leaf(zap, l, hash, tx, &l);
- if (err == 0)
- goto retry;
- }
-
- zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx);
- return (err);
-}
-
-int
-fzap_length(zap_t *zap, const char *name,
- uint64_t *integer_size, uint64_t *num_integers)
-{
- zap_leaf_t *l;
- int err;
- uint64_t hash;
- zap_entry_handle_t zeh;
-
- hash = zap_hash(zap, name);
- err = zap_deref_leaf(zap, hash, NULL, RW_READER, &l);
- if (err != 0)
- return (err);
- err = zap_leaf_lookup(l, name, hash, &zeh);
- if (err != 0)
- goto out;
-
- if (integer_size)
- *integer_size = zeh.zeh_integer_size;
- if (num_integers)
- *num_integers = zeh.zeh_num_integers;
-out:
- zap_put_leaf(l);
- return (err);
-}
-
-int
-fzap_remove(zap_t *zap, const char *name, dmu_tx_t *tx)
-{
- zap_leaf_t *l;
- uint64_t hash;
- int err;
- zap_entry_handle_t zeh;
-
- hash = zap_hash(zap, name);
- err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
- if (err != 0)
- return (err);
- err = zap_leaf_lookup(l, name, hash, &zeh);
- if (err == 0) {
- zap_entry_remove(&zeh);
- zap_increment_num_entries(zap, -1, tx);
- }
- zap_put_leaf(l);
- dprintf("fzap_remove: ds=%p obj=%llu name=%s err=%d\n",
- zap->zap_objset, zap->zap_object, name, err);
- return (err);
-}
-
-int
-zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, char *name)
-{
- zap_cursor_t zc;
- zap_attribute_t *za;
- int err;
-
- za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
- for (zap_cursor_init(&zc, os, zapobj);
- (err = zap_cursor_retrieve(&zc, za)) == 0;
- zap_cursor_advance(&zc)) {
- if (ZFS_DIRENT_OBJ(za->za_first_integer) == value) {
- (void) strcpy(name, za->za_name);
- break;
- }
- }
- zap_cursor_fini(&zc);
- kmem_free(za, sizeof (zap_attribute_t));
- return (err);
-}
-
-
-/*
- * Routines for iterating over the attributes.
- */
-
-int
-fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
-{
- int err = ENOENT;
- zap_entry_handle_t zeh;
- zap_leaf_t *l;
-
- /* retrieve the next entry at or after zc_hash/zc_cd */
- /* if no entry, return ENOENT */
-
- if (zc->zc_leaf &&
- (ZAP_HASH_IDX(zc->zc_hash,
- zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) !=
- zc->zc_leaf->l_phys->l_hdr.lh_prefix)) {
- rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
- zap_put_leaf(zc->zc_leaf);
- zc->zc_leaf = NULL;
- }
-
-again:
- if (zc->zc_leaf == NULL) {
- err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
- &zc->zc_leaf);
- if (err != 0)
- return (err);
- } else {
- rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
- }
- l = zc->zc_leaf;
-
- err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
-
- if (err == ENOENT) {
- uint64_t nocare =
- (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1;
- zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
- zc->zc_cd = 0;
- if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) {
- zc->zc_hash = -1ULL;
- } else {
- zap_put_leaf(zc->zc_leaf);
- zc->zc_leaf = NULL;
- goto again;
- }
- }
-
- if (err == 0) {
- zc->zc_hash = zeh.zeh_hash;
- zc->zc_cd = zeh.zeh_cd;
- za->za_integer_length = zeh.zeh_integer_size;
- za->za_num_integers = zeh.zeh_num_integers;
- if (zeh.zeh_num_integers == 0) {
- za->za_first_integer = 0;
- } else {
- err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer);
- ASSERT(err == 0 || err == EOVERFLOW);
- }
- err = zap_entry_read_name(&zeh,
- sizeof (za->za_name), za->za_name);
- ASSERT(err == 0);
- }
- rw_exit(&zc->zc_leaf->l_rwlock);
- return (err);
-}
-
-
-static void
-zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
-{
- int i, err;
- uint64_t lastblk = 0;
-
- /*
- * NB: if a leaf has more pointers than an entire ptrtbl block
- * can hold, then it'll be accounted for more than once, since
- * we won't have lastblk.
- */
- for (i = 0; i < len; i++) {
- zap_leaf_t *l;
-
- if (tbl[i] == lastblk)
- continue;
- lastblk = tbl[i];
-
- err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
- if (err == 0) {
- zap_leaf_stats(zap, l, zs);
- zap_put_leaf(l);
- }
- }
-}
-
-void
-fzap_get_stats(zap_t *zap, zap_stats_t *zs)
-{
- int bs = FZAP_BLOCK_SHIFT(zap);
- zs->zs_blocksize = 1ULL << bs;
-
- /*
- * Set zap_phys_t fields
- */
- zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs;
- zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries;
- zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk;
- zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type;
- zs->zs_magic = zap->zap_f.zap_phys->zap_magic;
- zs->zs_salt = zap->zap_f.zap_phys->zap_salt;
-
- /*
- * Set zap_ptrtbl fields
- */
- zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
- zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk;
- zs->zs_ptrtbl_blks_copied =
- zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied;
- zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk;
- zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
- zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
-
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
- /* the ptrtbl is entirely in the header block. */
- zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
- 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
- } else {
- int b;
-
- dmu_prefetch(zap->zap_objset, zap->zap_object,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs);
-
- for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
- b++) {
- dmu_buf_t *db;
- int err;
-
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs,
- FTAG, &db);
- if (err == 0) {
- zap_stats_ptrtbl(zap, db->db_data,
- 1<<(bs-3), zs);
- dmu_buf_rele(db, FTAG);
- }
- }
- }
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
deleted file mode 100644
index 5dff514..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
+++ /dev/null
@@ -1,741 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * The 512-byte leaf is broken into 32 16-byte chunks.
- * chunk number n means l_chunk[n], even though the header precedes it.
- * the names are stored null-terminated.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/zap.h>
-#include <sys/zap_impl.h>
-#include <sys/zap_leaf.h>
-#include <sys/spa.h>
-#include <sys/dmu.h>
-
-#define CHAIN_END 0xffff /* end of the chunk chain */
-
-/* half the (current) minimum block size */
-#define MAX_ARRAY_BYTES (8<<10)
-
-#define LEAF_HASH(l, h) \
- ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
- ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len)))
-
-#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
-
-
-static void
-zap_memset(void *a, int c, size_t n)
-{
- char *cp = a;
- char *cpend = cp + n;
-
- while (cp < cpend)
- *cp++ = c;
-}
-
-static void
-stv(int len, void *addr, uint64_t value)
-{
- switch (len) {
- case 1:
- *(uint8_t *)addr = value;
- return;
- case 2:
- *(uint16_t *)addr = value;
- return;
- case 4:
- *(uint32_t *)addr = value;
- return;
- case 8:
- *(uint64_t *)addr = value;
- return;
- }
- ASSERT(!"bad int len");
-}
-
-static uint64_t
-ldv(int len, const void *addr)
-{
- switch (len) {
- case 1:
- return (*(uint8_t *)addr);
- case 2:
- return (*(uint16_t *)addr);
- case 4:
- return (*(uint32_t *)addr);
- case 8:
- return (*(uint64_t *)addr);
- }
- ASSERT(!"bad int len");
- return (0xFEEDFACEDEADBEEFULL);
-}
-
-void
-zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
-{
- int i;
- zap_leaf_t l;
- l.l_bs = highbit(size)-1;
- l.l_phys = buf;
-
- buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type);
- buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix);
- buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic);
- buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree);
- buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries);
- buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len);
- buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist);
-
- for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
- buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
-
- for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
- zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
- struct zap_leaf_entry *le;
-
- switch (lc->l_free.lf_type) {
- case ZAP_CHUNK_ENTRY:
- le = &lc->l_entry;
-
- le->le_type = BSWAP_8(le->le_type);
- le->le_int_size = BSWAP_8(le->le_int_size);
- le->le_next = BSWAP_16(le->le_next);
- le->le_name_chunk = BSWAP_16(le->le_name_chunk);
- le->le_name_length = BSWAP_16(le->le_name_length);
- le->le_value_chunk = BSWAP_16(le->le_value_chunk);
- le->le_value_length = BSWAP_16(le->le_value_length);
- le->le_cd = BSWAP_32(le->le_cd);
- le->le_hash = BSWAP_64(le->le_hash);
- break;
- case ZAP_CHUNK_FREE:
- lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type);
- lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next);
- break;
- case ZAP_CHUNK_ARRAY:
- lc->l_array.la_type = BSWAP_8(lc->l_array.la_type);
- lc->l_array.la_next = BSWAP_16(lc->l_array.la_next);
- /* la_array doesn't need swapping */
- break;
- default:
- ASSERT(!"bad leaf type");
- }
- }
-}
-
-void
-zap_leaf_init(zap_leaf_t *l)
-{
- int i;
-
- l->l_bs = highbit(l->l_dbuf->db_size)-1;
- zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
- zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
- for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
- ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
- ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
- }
- ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
- l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
- l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
- l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
-}
-
-/*
- * Routines which manipulate leaf chunks (l_chunk[]).
- */
-
-static uint16_t
-zap_leaf_chunk_alloc(zap_leaf_t *l)
-{
- int chunk;
-
- ASSERT(l->l_phys->l_hdr.lh_nfree > 0);
-
- chunk = l->l_phys->l_hdr.lh_freelist;
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
-
- l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
-
- l->l_phys->l_hdr.lh_nfree--;
-
- return (chunk);
-}
-
-static void
-zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
-{
- struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
- ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
-
- zlf->lf_type = ZAP_CHUNK_FREE;
- zlf->lf_next = l->l_phys->l_hdr.lh_freelist;
- bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
- l->l_phys->l_hdr.lh_freelist = chunk;
-
- l->l_phys->l_hdr.lh_nfree++;
-}
-
-/*
- * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
- */
-
-static uint16_t
-zap_leaf_array_create(zap_leaf_t *l, const char *buf,
- int integer_size, int num_integers)
-{
- uint16_t chunk_head;
- uint16_t *chunkp = &chunk_head;
- int byten = 0;
- uint64_t value;
- int shift = (integer_size-1)*8;
- int len = num_integers;
-
- ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
-
- while (len > 0) {
- uint16_t chunk = zap_leaf_chunk_alloc(l);
- struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
- int i;
-
- la->la_type = ZAP_CHUNK_ARRAY;
- for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
- if (byten == 0)
- value = ldv(integer_size, buf);
- la->la_array[i] = value >> shift;
- value <<= 8;
- if (++byten == integer_size) {
- byten = 0;
- buf += integer_size;
- if (--len == 0)
- break;
- }
- }
-
- *chunkp = chunk;
- chunkp = &la->la_next;
- }
- *chunkp = CHAIN_END;
-
- return (chunk_head);
-}
-
-static void
-zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
-{
- uint16_t chunk = *chunkp;
-
- *chunkp = CHAIN_END;
-
- while (chunk != CHAIN_END) {
- int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
- ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
- ZAP_CHUNK_ARRAY);
- zap_leaf_chunk_free(l, chunk);
- chunk = nextchunk;
- }
-}
-
-/* array_len and buf_len are in integers, not bytes */
-static void
-zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
- int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
- char *buf)
-{
- int len = MIN(array_len, buf_len);
- int byten = 0;
- uint64_t value = 0;
-
- ASSERT3U(array_int_len, <=, buf_int_len);
-
- /* Fast path for one 8-byte integer */
- if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
- struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
- uint8_t *ip = la->la_array;
- uint64_t *buf64 = (uint64_t *)buf;
-
- *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
- (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
- (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
- (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
- return;
- }
-
- /* Fast path for an array of 1-byte integers (eg. the entry name) */
- if (array_int_len == 1 && buf_int_len == 1 &&
- buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
- while (chunk != CHAIN_END) {
- struct zap_leaf_array *la =
- &ZAP_LEAF_CHUNK(l, chunk).l_array;
- bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES);
- buf += ZAP_LEAF_ARRAY_BYTES;
- chunk = la->la_next;
- }
- return;
- }
-
- while (len > 0) {
- struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
- int i;
-
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
- value = (value << 8) | la->la_array[i];
- byten++;
- if (byten == array_int_len) {
- stv(buf_int_len, buf, value);
- byten = 0;
- len--;
- if (len == 0)
- return;
- buf += buf_int_len;
- }
- }
- chunk = la->la_next;
- }
-}
-
-/*
- * Only to be used on 8-bit arrays.
- * array_len is actual len in bytes (not encoded le_value_length).
- * buf is null-terminated.
- */
-static int
-zap_leaf_array_equal(zap_leaf_t *l, int chunk,
- int array_len, const char *buf)
-{
- int bseen = 0;
-
- while (bseen < array_len) {
- struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
- int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- if (bcmp(la->la_array, buf + bseen, toread))
- break;
- chunk = la->la_next;
- bseen += toread;
- }
- return (bseen == array_len);
-}
-
-/*
- * Routines which manipulate leaf entries.
- */
-
-int
-zap_leaf_lookup(zap_leaf_t *l,
- const char *name, uint64_t h, zap_entry_handle_t *zeh)
-{
- uint16_t *chunkp;
- struct zap_leaf_entry *le;
-
- ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
-
- for (chunkp = LEAF_HASH_ENTPTR(l, h);
- *chunkp != CHAIN_END; chunkp = &le->le_next) {
- uint16_t chunk = *chunkp;
- le = ZAP_LEAF_ENTRY(l, chunk);
-
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
-
- if (le->le_hash != h)
- continue;
-
- if (zap_leaf_array_equal(l, le->le_name_chunk,
- le->le_name_length, name)) {
- zeh->zeh_num_integers = le->le_value_length;
- zeh->zeh_integer_size = le->le_int_size;
- zeh->zeh_cd = le->le_cd;
- zeh->zeh_hash = le->le_hash;
- zeh->zeh_chunkp = chunkp;
- zeh->zeh_leaf = l;
- return (0);
- }
- }
-
- return (ENOENT);
-}
-
-/* Return (h1,cd1 >= h2,cd2) */
-#define HCD_GTEQ(h1, cd1, h2, cd2) \
- ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
-
-int
-zap_leaf_lookup_closest(zap_leaf_t *l,
- uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
-{
- uint16_t chunk;
- uint64_t besth = -1ULL;
- uint32_t bestcd = ZAP_MAXCD;
- uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
- uint16_t lh;
- struct zap_leaf_entry *le;
-
- ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
-
- for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
- for (chunk = l->l_phys->l_hash[lh];
- chunk != CHAIN_END; chunk = le->le_next) {
- le = ZAP_LEAF_ENTRY(l, chunk);
-
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
-
- if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
- HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
- ASSERT3U(bestlh, >=, lh);
- bestlh = lh;
- besth = le->le_hash;
- bestcd = le->le_cd;
-
- zeh->zeh_num_integers = le->le_value_length;
- zeh->zeh_integer_size = le->le_int_size;
- zeh->zeh_cd = le->le_cd;
- zeh->zeh_hash = le->le_hash;
- zeh->zeh_fakechunk = chunk;
- zeh->zeh_chunkp = &zeh->zeh_fakechunk;
- zeh->zeh_leaf = l;
- }
- }
- }
-
- return (bestcd == ZAP_MAXCD ? ENOENT : 0);
-}
-
-int
-zap_entry_read(const zap_entry_handle_t *zeh,
- uint8_t integer_size, uint64_t num_integers, void *buf)
-{
- struct zap_leaf_entry *le =
- ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
- ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
-
- if (le->le_int_size > integer_size)
- return (EINVAL);
-
- zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_int_size,
- le->le_value_length, integer_size, num_integers, buf);
-
- if (zeh->zeh_num_integers > num_integers)
- return (EOVERFLOW);
- return (0);
-
-}
-
-int
-zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf)
-{
- struct zap_leaf_entry *le =
- ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
- ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
-
- zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
- le->le_name_length, 1, buflen, buf);
- if (le->le_name_length > buflen)
- return (EOVERFLOW);
- return (0);
-}
-
-int
-zap_entry_update(zap_entry_handle_t *zeh,
- uint8_t integer_size, uint64_t num_integers, const void *buf)
-{
- int delta_chunks;
- zap_leaf_t *l = zeh->zeh_leaf;
- struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
-
- delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
- ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * le->le_int_size);
-
- if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
- return (EAGAIN);
-
- /*
- * We should search other chained leaves (via
- * zap_entry_remove,create?) otherwise returning EAGAIN will
- * just send us into an infinite loop if we have to chain
- * another leaf block, rather than being able to split this
- * block.
- */
-
- zap_leaf_array_free(l, &le->le_value_chunk);
- le->le_value_chunk =
- zap_leaf_array_create(l, buf, integer_size, num_integers);
- le->le_value_length = num_integers;
- le->le_int_size = integer_size;
- return (0);
-}
-
-void
-zap_entry_remove(zap_entry_handle_t *zeh)
-{
- uint16_t entry_chunk;
- struct zap_leaf_entry *le;
- zap_leaf_t *l = zeh->zeh_leaf;
-
- ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
-
- entry_chunk = *zeh->zeh_chunkp;
- le = ZAP_LEAF_ENTRY(l, entry_chunk);
- ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
-
- zap_leaf_array_free(l, &le->le_name_chunk);
- zap_leaf_array_free(l, &le->le_value_chunk);
-
- *zeh->zeh_chunkp = le->le_next;
- zap_leaf_chunk_free(l, entry_chunk);
-
- l->l_phys->l_hdr.lh_nentries--;
-}
-
-int
-zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
- uint8_t integer_size, uint64_t num_integers, const void *buf,
- zap_entry_handle_t *zeh)
-{
- uint16_t chunk;
- uint16_t *chunkp;
- struct zap_leaf_entry *le;
- uint64_t namelen, valuelen;
- int numchunks;
-
- valuelen = integer_size * num_integers;
- namelen = strlen(name) + 1;
- ASSERT(namelen >= 2);
-
- numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(namelen) +
- ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
- if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
- return (E2BIG);
-
- if (cd == ZAP_MAXCD) {
- for (cd = 0; cd < ZAP_MAXCD; cd++) {
- for (chunk = *LEAF_HASH_ENTPTR(l, h);
- chunk != CHAIN_END; chunk = le->le_next) {
- le = ZAP_LEAF_ENTRY(l, chunk);
- if (le->le_hash == h &&
- le->le_cd == cd) {
- break;
- }
- }
- /* If this cd is not in use, we are good. */
- if (chunk == CHAIN_END)
- break;
- }
- /* If we tried all the cd's, we lose. */
- if (cd == ZAP_MAXCD)
- return (ENOSPC);
- }
-
- if (l->l_phys->l_hdr.lh_nfree < numchunks)
- return (EAGAIN);
-
- /* make the entry */
- chunk = zap_leaf_chunk_alloc(l);
- le = ZAP_LEAF_ENTRY(l, chunk);
- le->le_type = ZAP_CHUNK_ENTRY;
- le->le_name_chunk = zap_leaf_array_create(l, name, 1, namelen);
- le->le_name_length = namelen;
- le->le_value_chunk =
- zap_leaf_array_create(l, buf, integer_size, num_integers);
- le->le_value_length = num_integers;
- le->le_int_size = integer_size;
- le->le_hash = h;
- le->le_cd = cd;
-
- /* link it into the hash chain */
- chunkp = LEAF_HASH_ENTPTR(l, h);
- le->le_next = *chunkp;
- *chunkp = chunk;
-
- l->l_phys->l_hdr.lh_nentries++;
-
- zeh->zeh_leaf = l;
- zeh->zeh_num_integers = num_integers;
- zeh->zeh_integer_size = le->le_int_size;
- zeh->zeh_cd = le->le_cd;
- zeh->zeh_hash = le->le_hash;
- zeh->zeh_chunkp = chunkp;
-
- return (0);
-}
-
-/*
- * Routines for transferring entries between leafs.
- */
-
-static void
-zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
-{
- struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
- uint16_t *ptr = LEAF_HASH_ENTPTR(l, le->le_hash);
- le->le_next = *ptr;
- *ptr = entry;
-}
-
-static uint16_t
-zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
-{
- uint16_t new_chunk;
- uint16_t *nchunkp = &new_chunk;
-
- while (chunk != CHAIN_END) {
- uint16_t nchunk = zap_leaf_chunk_alloc(nl);
- struct zap_leaf_array *nla =
- &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
- struct zap_leaf_array *la =
- &ZAP_LEAF_CHUNK(l, chunk).l_array;
- int nextchunk = la->la_next;
-
- ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
- ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
-
- *nla = *la; /* structure assignment */
-
- zap_leaf_chunk_free(l, chunk);
- chunk = nextchunk;
- *nchunkp = nchunk;
- nchunkp = &nla->la_next;
- }
- *nchunkp = CHAIN_END;
- return (new_chunk);
-}
-
-static void
-zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
-{
- struct zap_leaf_entry *le, *nle;
- uint16_t chunk;
-
- le = ZAP_LEAF_ENTRY(l, entry);
- ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
-
- chunk = zap_leaf_chunk_alloc(nl);
- nle = ZAP_LEAF_ENTRY(nl, chunk);
- *nle = *le; /* structure assignment */
-
- zap_leaf_rehash_entry(nl, chunk);
-
- nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
- nle->le_value_chunk =
- zap_leaf_transfer_array(l, le->le_value_chunk, nl);
-
- zap_leaf_chunk_free(l, entry);
-
- l->l_phys->l_hdr.lh_nentries--;
- nl->l_phys->l_hdr.lh_nentries++;
-}
-
-/*
- * Transfer the entries whose hash prefix ends in 1 to the new leaf.
- */
-void
-zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl)
-{
- int i;
- int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
-
- /* set new prefix and prefix_len */
- l->l_phys->l_hdr.lh_prefix <<= 1;
- l->l_phys->l_hdr.lh_prefix_len++;
- nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
- nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
-
- /* break existing hash chains */
- zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
-
- /*
- * Transfer entries whose hash bit 'bit' is set to nl; rehash
- * the remaining entries
- *
- * NB: We could find entries via the hashtable instead. That
- * would be O(hashents+numents) rather than O(numblks+numents),
- * but this accesses memory more sequentially, and when we're
- * called, the block is usually pretty full.
- */
- for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
- struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
- if (le->le_type != ZAP_CHUNK_ENTRY)
- continue;
-
- if (le->le_hash & (1ULL << bit))
- zap_leaf_transfer_entry(l, i, nl);
- else
- zap_leaf_rehash_entry(l, i);
- }
-}
-
-void
-zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
-{
- int i, n;
-
- n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
- l->l_phys->l_hdr.lh_prefix_len;
- n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
- zs->zs_leafs_with_2n_pointers[n]++;
-
-
- n = l->l_phys->l_hdr.lh_nentries/5;
- n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
- zs->zs_blocks_with_n5_entries[n]++;
-
- n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
- l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
- (1<<FZAP_BLOCK_SHIFT(zap));
- n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
- zs->zs_blocks_n_tenths_full[n]++;
-
- for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
- int nentries = 0;
- int chunk = l->l_phys->l_hash[i];
-
- while (chunk != CHAIN_END) {
- struct zap_leaf_entry *le =
- ZAP_LEAF_ENTRY(l, chunk);
-
- n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) +
- ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length *
- le->le_int_size);
- n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
- zs->zs_entries_using_n_chunks[n]++;
-
- chunk = le->le_next;
- nentries++;
- }
-
- n = nentries;
- n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
- zs->zs_buckets_with_n_entries[n]++;
- }
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
deleted file mode 100644
index 9a882a5..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
+++ /dev/null
@@ -1,857 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/dmu.h>
-#include <sys/zfs_context.h>
-#include <sys/zap.h>
-#include <sys/refcount.h>
-#include <sys/zap_impl.h>
-#include <sys/zap_leaf.h>
-#include <sys/avl.h>
-
-
-static void mzap_upgrade(zap_t *zap, dmu_tx_t *tx);
-
-
-static void
-mzap_byteswap(mzap_phys_t *buf, size_t size)
-{
- int i, max;
- buf->mz_block_type = BSWAP_64(buf->mz_block_type);
- buf->mz_salt = BSWAP_64(buf->mz_salt);
- max = (size / MZAP_ENT_LEN) - 1;
- for (i = 0; i < max; i++) {
- buf->mz_chunk[i].mze_value =
- BSWAP_64(buf->mz_chunk[i].mze_value);
- buf->mz_chunk[i].mze_cd =
- BSWAP_32(buf->mz_chunk[i].mze_cd);
- }
-}
-
-void
-zap_byteswap(void *buf, size_t size)
-{
- uint64_t block_type;
-
- block_type = *(uint64_t *)buf;
-
- if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
- /* ASSERT(magic == ZAP_LEAF_MAGIC); */
- mzap_byteswap(buf, size);
- } else {
- fzap_byteswap(buf, size);
- }
-}
-
-static int
-mze_compare(const void *arg1, const void *arg2)
-{
- const mzap_ent_t *mze1 = arg1;
- const mzap_ent_t *mze2 = arg2;
-
- if (mze1->mze_hash > mze2->mze_hash)
- return (+1);
- if (mze1->mze_hash < mze2->mze_hash)
- return (-1);
- if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd)
- return (+1);
- if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd)
- return (-1);
- return (0);
-}
-
-static void
-mze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep)
-{
- mzap_ent_t *mze;
-
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- ASSERT(mzep->mze_cd < ZAP_MAXCD);
- ASSERT3U(zap_hash(zap, mzep->mze_name), ==, hash);
-
- mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
- mze->mze_chunkid = chunkid;
- mze->mze_hash = hash;
- mze->mze_phys = *mzep;
- avl_add(&zap->zap_m.zap_avl, mze);
-}
-
-static mzap_ent_t *
-mze_find(zap_t *zap, const char *name, uint64_t hash)
-{
- mzap_ent_t mze_tofind;
- mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
-
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- ASSERT3U(zap_hash(zap, name), ==, hash);
-
- if (strlen(name) >= sizeof (mze_tofind.mze_phys.mze_name))
- return (NULL);
-
- mze_tofind.mze_hash = hash;
- mze_tofind.mze_phys.mze_cd = 0;
-
- mze = avl_find(avl, &mze_tofind, &idx);
- if (mze == NULL)
- mze = avl_nearest(avl, idx, AVL_AFTER);
- for (; mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
- if (strcmp(name, mze->mze_phys.mze_name) == 0)
- return (mze);
- }
- return (NULL);
-}
-
-static uint32_t
-mze_find_unused_cd(zap_t *zap, uint64_t hash)
-{
- mzap_ent_t mze_tofind;
- mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
- uint32_t cd;
-
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
- mze_tofind.mze_hash = hash;
- mze_tofind.mze_phys.mze_cd = 0;
-
- cd = 0;
- for (mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
- if (mze->mze_phys.mze_cd != cd)
- break;
- cd++;
- }
-
- return (cd);
-}
-
-static void
-mze_remove(zap_t *zap, mzap_ent_t *mze)
-{
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- avl_remove(&zap->zap_m.zap_avl, mze);
- kmem_free(mze, sizeof (mzap_ent_t));
-}
-
-static void
-mze_destroy(zap_t *zap)
-{
- mzap_ent_t *mze;
- void *avlcookie = NULL;
-
- while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
- kmem_free(mze, sizeof (mzap_ent_t));
- avl_destroy(&zap->zap_m.zap_avl);
-}
-
-static zap_t *
-mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
-{
- zap_t *winner;
- zap_t *zap;
- int i;
-
- ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
-
- zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
- rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, 0);
- rw_enter(&zap->zap_rwlock, RW_WRITER);
- zap->zap_objset = os;
- zap->zap_object = obj;
- zap->zap_dbuf = db;
-
- if (((uint64_t *)db->db_data)[0] != ZBT_MICRO) {
- mutex_init(&zap->zap_f.zap_num_entries_mtx, NULL,
- MUTEX_DEFAULT, 0);
- zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
- } else {
- zap->zap_ismicro = TRUE;
- }
-
- /*
- * Make sure that zap_ismicro is set before we let others see
- * it, because zap_lockdir() checks zap_ismicro without the lock
- * held.
- */
- winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
-
- if (winner != NULL) {
- rw_exit(&zap->zap_rwlock);
- rw_destroy(&zap->zap_rwlock);
- if (!zap->zap_ismicro)
- mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
- kmem_free(zap, sizeof (zap_t));
- return (winner);
- }
-
- if (zap->zap_ismicro) {
- zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
- zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
- avl_create(&zap->zap_m.zap_avl, mze_compare,
- sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
-
- for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
- mzap_ent_phys_t *mze =
- &zap->zap_m.zap_phys->mz_chunk[i];
- if (mze->mze_name[0]) {
- zap->zap_m.zap_num_entries++;
- mze_insert(zap, i,
- zap_hash(zap, mze->mze_name), mze);
- }
- }
- } else {
- zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
-
- ASSERT3U(sizeof (struct zap_leaf_header), ==,
- 2*ZAP_LEAF_CHUNKSIZE);
-
- /*
- * The embedded pointer table should not overlap the
- * other members.
- */
- ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
- &zap->zap_f.zap_phys->zap_salt);
-
- /*
- * The embedded pointer table should end at the end of
- * the block
- */
- ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
- 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
- (uintptr_t)zap->zap_f.zap_phys, ==,
- zap->zap_dbuf->db_size);
- }
- rw_exit(&zap->zap_rwlock);
- return (zap);
-}
-
-int
-zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
- krw_t lti, int fatreader, zap_t **zapp)
-{
- zap_t *zap;
- dmu_buf_t *db;
- krw_t lt;
- int err;
-
- *zapp = NULL;
-
- err = dmu_buf_hold(os, obj, 0, NULL, &db);
- if (err)
- return (err);
-
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(db, &doi);
- ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
- }
-#endif
-
- zap = dmu_buf_get_user(db);
- if (zap == NULL)
- zap = mzap_open(os, obj, db);
-
- /*
- * We're checking zap_ismicro without the lock held, in order to
- * tell what type of lock we want. Once we have some sort of
- * lock, see if it really is the right type. In practice this
- * can only be different if it was upgraded from micro to fat,
- * and micro wanted WRITER but fat only needs READER.
- */
- lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
- rw_enter(&zap->zap_rwlock, lt);
- if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
- /* it was upgraded, now we only need reader */
- ASSERT(lt == RW_WRITER);
- ASSERT(RW_READER ==
- (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
- rw_downgrade(&zap->zap_rwlock);
- lt = RW_READER;
- }
-
- zap->zap_objset = os;
-
- if (lt == RW_WRITER)
- dmu_buf_will_dirty(db, tx);
-
- ASSERT3P(zap->zap_dbuf, ==, db);
-
- ASSERT(!zap->zap_ismicro ||
- zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
- if (zap->zap_ismicro && tx &&
- zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
- uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
- if (newsz > MZAP_MAX_BLKSZ) {
- dprintf("upgrading obj %llu: num_entries=%u\n",
- obj, zap->zap_m.zap_num_entries);
- mzap_upgrade(zap, tx);
- *zapp = zap;
- return (0);
- }
- err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
- ASSERT3U(err, ==, 0);
- zap->zap_m.zap_num_chunks =
- db->db_size / MZAP_ENT_LEN - 1;
- }
-
- *zapp = zap;
- return (0);
-}
-
-void
-zap_unlockdir(zap_t *zap)
-{
- rw_exit(&zap->zap_rwlock);
- dmu_buf_rele(zap->zap_dbuf, NULL);
-}
-
-static void
-mzap_upgrade(zap_t *zap, dmu_tx_t *tx)
-{
- mzap_phys_t *mzp;
- int i, sz, nchunks, err;
-
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- sz = zap->zap_dbuf->db_size;
- mzp = kmem_alloc(sz, KM_SLEEP);
- bcopy(zap->zap_dbuf->db_data, mzp, sz);
- nchunks = zap->zap_m.zap_num_chunks;
-
- err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
- 1ULL << fzap_default_block_shift, 0, tx);
- ASSERT(err == 0);
-
- dprintf("upgrading obj=%llu with %u chunks\n",
- zap->zap_object, nchunks);
- mze_destroy(zap);
-
- fzap_upgrade(zap, tx);
-
- for (i = 0; i < nchunks; i++) {
- int err;
- mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
- if (mze->mze_name[0] == 0)
- continue;
- dprintf("adding %s=%llu\n",
- mze->mze_name, mze->mze_value);
- err = fzap_add_cd(zap,
- mze->mze_name, 8, 1, &mze->mze_value,
- mze->mze_cd, tx);
- ASSERT3U(err, ==, 0);
- }
- kmem_free(mzp, sz);
-}
-
-uint64_t
-zap_hash(zap_t *zap, const char *name)
-{
- const uint8_t *cp;
- uint8_t c;
- uint64_t crc = zap->zap_salt;
-
- ASSERT(crc != 0);
- ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
- for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF];
-
- /*
- * Only use 28 bits, since we need 4 bits in the cookie for the
- * collision differentiator. We MUST use the high bits, since
- * those are the onces that we first pay attention to when
- * chosing the bucket.
- */
- crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
-
- return (crc);
-}
-
-
-static void
-mzap_create_impl(objset_t *os, uint64_t obj, dmu_tx_t *tx)
-{
- dmu_buf_t *db;
- mzap_phys_t *zp;
-
- VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db));
-
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(db, &doi);
- ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
- }
-#endif
-
- dmu_buf_will_dirty(db, tx);
- zp = db->db_data;
- zp->mz_block_type = ZBT_MICRO;
- zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
- ASSERT(zp->mz_salt != 0);
- dmu_buf_rele(db, FTAG);
-}
-
-int
-zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- int err;
-
- err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
- if (err != 0)
- return (err);
- mzap_create_impl(os, obj, tx);
- return (0);
-}
-
-uint64_t
-zap_create(objset_t *os, dmu_object_type_t ot,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
- uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
-
- mzap_create_impl(os, obj, tx);
- return (obj);
-}
-
-int
-zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
-{
- /*
- * dmu_object_free will free the object number and free the
- * data. Freeing the data will cause our pageout function to be
- * called, which will destroy our data (zap_leaf_t's and zap_t).
- */
-
- return (dmu_object_free(os, zapobj, tx));
-}
-
-_NOTE(ARGSUSED(0))
-void
-zap_evict(dmu_buf_t *db, void *vzap)
-{
- zap_t *zap = vzap;
-
- rw_destroy(&zap->zap_rwlock);
-
- if (zap->zap_ismicro)
- mze_destroy(zap);
- else
- mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
-
- kmem_free(zap, sizeof (zap_t));
-}
-
-int
-zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
-{
- zap_t *zap;
- int err;
-
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap);
- if (err)
- return (err);
- if (!zap->zap_ismicro) {
- err = fzap_count(zap, count);
- } else {
- *count = zap->zap_m.zap_num_entries;
- }
- zap_unlockdir(zap);
- return (err);
-}
-
-/*
- * Routines for maniplulating attributes.
- */
-
-int
-zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
- uint64_t integer_size, uint64_t num_integers, void *buf)
-{
- zap_t *zap;
- int err;
- mzap_ent_t *mze;
-
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap);
- if (err)
- return (err);
- if (!zap->zap_ismicro) {
- err = fzap_lookup(zap, name,
- integer_size, num_integers, buf);
- } else {
- mze = mze_find(zap, name, zap_hash(zap, name));
- if (mze == NULL) {
- err = ENOENT;
- } else {
- if (num_integers < 1)
- err = EOVERFLOW;
- else if (integer_size != 8)
- err = EINVAL;
- else
- *(uint64_t *)buf = mze->mze_phys.mze_value;
- }
- }
- zap_unlockdir(zap);
- return (err);
-}
-
-int
-zap_length(objset_t *os, uint64_t zapobj, const char *name,
- uint64_t *integer_size, uint64_t *num_integers)
-{
- zap_t *zap;
- int err;
- mzap_ent_t *mze;
-
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap);
- if (err)
- return (err);
- if (!zap->zap_ismicro) {
- err = fzap_length(zap, name, integer_size, num_integers);
- } else {
- mze = mze_find(zap, name, zap_hash(zap, name));
- if (mze == NULL) {
- err = ENOENT;
- } else {
- if (integer_size)
- *integer_size = 8;
- if (num_integers)
- *num_integers = 1;
- }
- }
- zap_unlockdir(zap);
- return (err);
-}
-
-static void
-mzap_addent(zap_t *zap, const char *name, uint64_t hash, uint64_t value)
-{
- int i;
- int start = zap->zap_m.zap_alloc_next;
- uint32_t cd;
-
- dprintf("obj=%llu %s=%llu\n", zap->zap_object, name, value);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
-#ifdef ZFS_DEBUG
- for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
- mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
- ASSERT(strcmp(name, mze->mze_name) != 0);
- }
-#endif
-
- cd = mze_find_unused_cd(zap, hash);
- /* given the limited size of the microzap, this can't happen */
- ASSERT(cd != ZAP_MAXCD);
-
-again:
- for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
- mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
- if (mze->mze_name[0] == 0) {
- mze->mze_value = value;
- mze->mze_cd = cd;
- (void) strcpy(mze->mze_name, name);
- zap->zap_m.zap_num_entries++;
- zap->zap_m.zap_alloc_next = i+1;
- if (zap->zap_m.zap_alloc_next ==
- zap->zap_m.zap_num_chunks)
- zap->zap_m.zap_alloc_next = 0;
- mze_insert(zap, i, hash, mze);
- return;
- }
- }
- if (start != 0) {
- start = 0;
- goto again;
- }
- ASSERT(!"out of entries!");
-}
-
-int
-zap_add(objset_t *os, uint64_t zapobj, const char *name,
- int integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx)
-{
- zap_t *zap;
- int err;
- mzap_ent_t *mze;
- const uint64_t *intval = val;
- uint64_t hash;
-
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, &zap);
- if (err)
- return (err);
- if (!zap->zap_ismicro) {
- err = fzap_add(zap, name, integer_size, num_integers, val, tx);
- } else if (integer_size != 8 || num_integers != 1 ||
- strlen(name) >= MZAP_NAME_LEN) {
- dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
- zapobj, integer_size, num_integers, name);
- mzap_upgrade(zap, tx);
- err = fzap_add(zap, name, integer_size, num_integers, val, tx);
- } else {
- hash = zap_hash(zap, name);
- mze = mze_find(zap, name, hash);
- if (mze != NULL) {
- err = EEXIST;
- } else {
- mzap_addent(zap, name, hash, *intval);
- }
- }
- zap_unlockdir(zap);
- return (err);
-}
-
-int
-zap_update(objset_t *os, uint64_t zapobj, const char *name,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
-{
- zap_t *zap;
- mzap_ent_t *mze;
- const uint64_t *intval = val;
- uint64_t hash;
- int err;
-
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, &zap);
- if (err)
- return (err);
- ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- if (!zap->zap_ismicro) {
- err = fzap_update(zap, name,
- integer_size, num_integers, val, tx);
- } else if (integer_size != 8 || num_integers != 1 ||
- strlen(name) >= MZAP_NAME_LEN) {
- dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
- zapobj, integer_size, num_integers, name);
- mzap_upgrade(zap, tx);
- err = fzap_update(zap, name,
- integer_size, num_integers, val, tx);
- } else {
- hash = zap_hash(zap, name);
- mze = mze_find(zap, name, hash);
- if (mze != NULL) {
- mze->mze_phys.mze_value = *intval;
- zap->zap_m.zap_phys->mz_chunk
- [mze->mze_chunkid].mze_value = *intval;
- } else {
- mzap_addent(zap, name, hash, *intval);
- }
- }
- zap_unlockdir(zap);
- return (err);
-}
-
-int
-zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
-{
- zap_t *zap;
- int err;
- mzap_ent_t *mze;
-
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, &zap);
- if (err)
- return (err);
- if (!zap->zap_ismicro) {
- err = fzap_remove(zap, name, tx);
- } else {
- mze = mze_find(zap, name, zap_hash(zap, name));
- if (mze == NULL) {
- dprintf("fail: %s\n", name);
- err = ENOENT;
- } else {
- dprintf("success: %s\n", name);
- zap->zap_m.zap_num_entries--;
- bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
- sizeof (mzap_ent_phys_t));
- mze_remove(zap, mze);
- }
- }
- zap_unlockdir(zap);
- return (err);
-}
-
-
-/*
- * Routines for iterating over the attributes.
- */
-
-/*
- * We want to keep the high 32 bits of the cursor zero if we can, so
- * that 32-bit programs can access this. So use a small hash value so
- * we can fit 4 bits of cd into the 32-bit cursor.
- *
- * [ 4 zero bits | 32-bit collision differentiator | 28-bit hash value ]
- */
-void
-zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
- uint64_t serialized)
-{
- zc->zc_objset = os;
- zc->zc_zap = NULL;
- zc->zc_leaf = NULL;
- zc->zc_zapobj = zapobj;
- if (serialized == -1ULL) {
- zc->zc_hash = -1ULL;
- zc->zc_cd = 0;
- } else {
- zc->zc_hash = serialized << (64-ZAP_HASHBITS);
- zc->zc_cd = serialized >> ZAP_HASHBITS;
- if (zc->zc_cd >= ZAP_MAXCD) /* corrupt serialized */
- zc->zc_cd = 0;
- }
-}
-
-void
-zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
-{
- zap_cursor_init_serialized(zc, os, zapobj, 0);
-}
-
-void
-zap_cursor_fini(zap_cursor_t *zc)
-{
- if (zc->zc_zap) {
- rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
- zap_unlockdir(zc->zc_zap);
- zc->zc_zap = NULL;
- }
- if (zc->zc_leaf) {
- rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
- zap_put_leaf(zc->zc_leaf);
- zc->zc_leaf = NULL;
- }
- zc->zc_objset = NULL;
-}
-
-uint64_t
-zap_cursor_serialize(zap_cursor_t *zc)
-{
- if (zc->zc_hash == -1ULL)
- return (-1ULL);
- ASSERT((zc->zc_hash & (ZAP_MAXCD-1)) == 0);
- ASSERT(zc->zc_cd < ZAP_MAXCD);
- return ((zc->zc_hash >> (64-ZAP_HASHBITS)) |
- ((uint64_t)zc->zc_cd << ZAP_HASHBITS));
-}
-
-int
-zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
-{
- int err;
- avl_index_t idx;
- mzap_ent_t mze_tofind;
- mzap_ent_t *mze;
-
- if (zc->zc_hash == -1ULL)
- return (ENOENT);
-
- if (zc->zc_zap == NULL) {
- err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
- RW_READER, TRUE, &zc->zc_zap);
- if (err)
- return (err);
- } else {
- rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
- }
- if (!zc->zc_zap->zap_ismicro) {
- err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
- } else {
- err = ENOENT;
-
- mze_tofind.mze_hash = zc->zc_hash;
- mze_tofind.mze_phys.mze_cd = zc->zc_cd;
-
- mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
- ASSERT(mze == NULL || 0 == bcmp(&mze->mze_phys,
- &zc->zc_zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
- sizeof (mze->mze_phys)));
- if (mze == NULL) {
- mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
- idx, AVL_AFTER);
- }
- if (mze) {
- za->za_integer_length = 8;
- za->za_num_integers = 1;
- za->za_first_integer = mze->mze_phys.mze_value;
- (void) strcpy(za->za_name, mze->mze_phys.mze_name);
- zc->zc_hash = mze->mze_hash;
- zc->zc_cd = mze->mze_phys.mze_cd;
- err = 0;
- } else {
- zc->zc_hash = -1ULL;
- }
- }
- rw_exit(&zc->zc_zap->zap_rwlock);
- return (err);
-}
-
-void
-zap_cursor_advance(zap_cursor_t *zc)
-{
- if (zc->zc_hash == -1ULL)
- return;
- zc->zc_cd++;
- if (zc->zc_cd >= ZAP_MAXCD) {
- zc->zc_cd = 0;
- zc->zc_hash += 1ULL<<(64-ZAP_HASHBITS);
- if (zc->zc_hash == 0) /* EOF */
- zc->zc_hash = -1ULL;
- }
-}
-
-int
-zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
-{
- int err;
- zap_t *zap;
-
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, &zap);
- if (err)
- return (err);
-
- bzero(zs, sizeof (zap_stats_t));
-
- if (zap->zap_ismicro) {
- zs->zs_blocksize = zap->zap_dbuf->db_size;
- zs->zs_num_entries = zap->zap_m.zap_num_entries;
- zs->zs_num_blocks = 1;
- } else {
- fzap_get_stats(zap, zs);
- }
- zap_unlockdir(zap);
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs.conf b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs.conf
deleted file mode 100644
index 0988190..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs.conf
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# ident "%Z%%M% %I% %E% SMI"
-#
-name="zfs" parent="pseudo";
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
deleted file mode 100644
index dd94618..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
+++ /dev/null
@@ -1,1608 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/systm.h>
-#include <sys/sysmacros.h>
-#include <sys/resource.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/unistd.h>
-#include <sys/sdt.h>
-#include <sys/fs/zfs.h>
-#include <sys/policy.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/dmu.h>
-#include <sys/zap.h>
-#include <acl/acl_common.h>
-
-#define ALLOW ACE_ACCESS_ALLOWED_ACE_TYPE
-#define DENY ACE_ACCESS_DENIED_ACE_TYPE
-
-#define OWNING_GROUP (ACE_GROUP|ACE_IDENTIFIER_GROUP)
-#define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
- ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
-#define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
- ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
-#define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
- ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
-#define WRITE_MASK (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS| \
- ACE_WRITE_ATTRIBUTES|ACE_WRITE_ACL|ACE_WRITE_OWNER)
-
-#define OGE_CLEAR (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
- ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
-
-#define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
- ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
-
-#define ALL_INHERIT (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
- ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
-
-#define SECURE_CLEAR (ACE_WRITE_ACL|ACE_WRITE_OWNER)
-
-#define OGE_PAD 6 /* traditional owner/group/everyone ACES */
-
-static int zfs_ace_can_use(znode_t *zp, ace_t *);
-
-static zfs_acl_t *
-zfs_acl_alloc(int slots)
-{
- zfs_acl_t *aclp;
-
- aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
- if (slots != 0) {
- aclp->z_acl = kmem_alloc(ZFS_ACL_SIZE(slots), KM_SLEEP);
- aclp->z_acl_count = 0;
- aclp->z_state = ACL_DATA_ALLOCED;
- } else {
- aclp->z_state = 0;
- }
- aclp->z_slots = slots;
- return (aclp);
-}
-
-void
-zfs_acl_free(zfs_acl_t *aclp)
-{
- if (aclp->z_state == ACL_DATA_ALLOCED) {
- kmem_free(aclp->z_acl, ZFS_ACL_SIZE(aclp->z_slots));
- }
- kmem_free(aclp, sizeof (zfs_acl_t));
-}
-
-static uint32_t
-zfs_v4_to_unix(uint32_t access_mask)
-{
- uint32_t new_mask = 0;
-
- /*
- * This is used for mapping v4 permissions into permissions
- * that can be passed to secpolicy_vnode_access()
- */
- if (access_mask & (ACE_READ_DATA | ACE_LIST_DIRECTORY |
- ACE_READ_ATTRIBUTES | ACE_READ_ACL))
- new_mask |= S_IROTH;
- if (access_mask & (ACE_WRITE_DATA | ACE_APPEND_DATA |
- ACE_WRITE_ATTRIBUTES | ACE_ADD_FILE | ACE_WRITE_NAMED_ATTRS))
- new_mask |= S_IWOTH;
- if (access_mask & (ACE_EXECUTE | ACE_READ_NAMED_ATTRS))
- new_mask |= S_IXOTH;
-
- return (new_mask);
-}
-
-/*
- * Convert unix access mask to v4 access mask
- */
-static uint32_t
-zfs_unix_to_v4(uint32_t access_mask)
-{
- uint32_t new_mask = 0;
-
- if (access_mask & 01)
- new_mask |= (ACE_EXECUTE);
- if (access_mask & 02) {
- new_mask |= (ACE_WRITE_DATA);
- } if (access_mask & 04) {
- new_mask |= ACE_READ_DATA;
- }
- return (new_mask);
-}
-
-static void
-zfs_set_ace(ace_t *zacep, uint32_t access_mask, int access_type,
- uid_t uid, int entry_type)
-{
- zacep->a_access_mask = access_mask;
- zacep->a_type = access_type;
- zacep->a_who = uid;
- zacep->a_flags = entry_type;
-}
-
-static uint64_t
-zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
-{
- int i;
- int entry_type;
- mode_t mode = (zp->z_phys->zp_mode &
- (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
- mode_t seen = 0;
- ace_t *acep;
-
- for (i = 0, acep = aclp->z_acl;
- i != aclp->z_acl_count; i++, acep++) {
- entry_type = (acep->a_flags & ACE_TYPE_FLAGS);
- if (entry_type == ACE_OWNER) {
- if ((acep->a_access_mask & ACE_READ_DATA) &&
- (!(seen & S_IRUSR))) {
- seen |= S_IRUSR;
- if (acep->a_type == ALLOW) {
- mode |= S_IRUSR;
- }
- }
- if ((acep->a_access_mask & ACE_WRITE_DATA) &&
- (!(seen & S_IWUSR))) {
- seen |= S_IWUSR;
- if (acep->a_type == ALLOW) {
- mode |= S_IWUSR;
- }
- }
- if ((acep->a_access_mask & ACE_EXECUTE) &&
- (!(seen & S_IXUSR))) {
- seen |= S_IXUSR;
- if (acep->a_type == ALLOW) {
- mode |= S_IXUSR;
- }
- }
- } else if (entry_type == OWNING_GROUP) {
- if ((acep->a_access_mask & ACE_READ_DATA) &&
- (!(seen & S_IRGRP))) {
- seen |= S_IRGRP;
- if (acep->a_type == ALLOW) {
- mode |= S_IRGRP;
- }
- }
- if ((acep->a_access_mask & ACE_WRITE_DATA) &&
- (!(seen & S_IWGRP))) {
- seen |= S_IWGRP;
- if (acep->a_type == ALLOW) {
- mode |= S_IWGRP;
- }
- }
- if ((acep->a_access_mask & ACE_EXECUTE) &&
- (!(seen & S_IXGRP))) {
- seen |= S_IXGRP;
- if (acep->a_type == ALLOW) {
- mode |= S_IXGRP;
- }
- }
- } else if (entry_type == ACE_EVERYONE) {
- if ((acep->a_access_mask & ACE_READ_DATA)) {
- if (!(seen & S_IRUSR)) {
- seen |= S_IRUSR;
- if (acep->a_type == ALLOW) {
- mode |= S_IRUSR;
- }
- }
- if (!(seen & S_IRGRP)) {
- seen |= S_IRGRP;
- if (acep->a_type == ALLOW) {
- mode |= S_IRGRP;
- }
- }
- if (!(seen & S_IROTH)) {
- seen |= S_IROTH;
- if (acep->a_type == ALLOW) {
- mode |= S_IROTH;
- }
- }
- }
- if ((acep->a_access_mask & ACE_WRITE_DATA)) {
- if (!(seen & S_IWUSR)) {
- seen |= S_IWUSR;
- if (acep->a_type == ALLOW) {
- mode |= S_IWUSR;
- }
- }
- if (!(seen & S_IWGRP)) {
- seen |= S_IWGRP;
- if (acep->a_type == ALLOW) {
- mode |= S_IWGRP;
- }
- }
- if (!(seen & S_IWOTH)) {
- seen |= S_IWOTH;
- if (acep->a_type == ALLOW) {
- mode |= S_IWOTH;
- }
- }
- }
- if ((acep->a_access_mask & ACE_EXECUTE)) {
- if (!(seen & S_IXUSR)) {
- seen |= S_IXUSR;
- if (acep->a_type == ALLOW) {
- mode |= S_IXUSR;
- }
- }
- if (!(seen & S_IXGRP)) {
- seen |= S_IXGRP;
- if (acep->a_type == ALLOW) {
- mode |= S_IXGRP;
- }
- }
- if (!(seen & S_IXOTH)) {
- seen |= S_IXOTH;
- if (acep->a_type == ALLOW) {
- mode |= S_IXOTH;
- }
- }
- }
- }
- }
- return (mode);
-}
-
-static zfs_acl_t *
-zfs_acl_node_read_internal(znode_t *zp)
-{
- zfs_acl_t *aclp;
-
- aclp = zfs_acl_alloc(0);
- aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
- aclp->z_acl = &zp->z_phys->zp_acl.z_ace_data[0];
-
- return (aclp);
-}
-
-/*
- * Read an external acl object.
- */
-static int
-zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp)
-{
- uint64_t extacl = zp->z_phys->zp_acl.z_acl_extern_obj;
- zfs_acl_t *aclp;
- int error;
-
- ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-
- if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
- *aclpp = zfs_acl_node_read_internal(zp);
- return (0);
- }
-
- aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_count);
-
- error = dmu_read(zp->z_zfsvfs->z_os, extacl, 0,
- ZFS_ACL_SIZE(zp->z_phys->zp_acl.z_acl_count), aclp->z_acl);
- if (error != 0) {
- zfs_acl_free(aclp);
- return (error);
- }
-
- aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
-
- *aclpp = aclp;
- return (0);
-}
-
-static boolean_t
-zfs_acl_valid(znode_t *zp, ace_t *uace, int aclcnt, int *inherit)
-{
- ace_t *acep;
- int i;
-
- *inherit = 0;
-
- if (aclcnt > MAX_ACL_ENTRIES || aclcnt <= 0) {
- return (B_FALSE);
- }
-
- for (i = 0, acep = uace; i != aclcnt; i++, acep++) {
-
- /*
- * first check type of entry
- */
-
- switch (acep->a_flags & ACE_TYPE_FLAGS) {
- case ACE_OWNER:
- acep->a_who = -1;
- break;
- case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
- case ACE_IDENTIFIER_GROUP:
- if (acep->a_flags & ACE_GROUP) {
- acep->a_who = -1;
- }
- break;
- case ACE_EVERYONE:
- acep->a_who = -1;
- break;
- }
-
- /*
- * next check inheritance level flags
- */
-
- if (acep->a_type != ALLOW && acep->a_type != DENY)
- return (B_FALSE);
-
- /*
- * Only directories should have inheritance flags.
- */
- if (ZTOV(zp)->v_type != VDIR && (acep->a_flags &
- (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE|
- ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE))) {
- return (B_FALSE);
- }
-
- if (acep->a_flags &
- (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))
- *inherit = 1;
-
- if (acep->a_flags &
- (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
- if ((acep->a_flags & (ACE_FILE_INHERIT_ACE|
- ACE_DIRECTORY_INHERIT_ACE)) == 0) {
- return (B_FALSE);
- }
- }
- }
-
- return (B_TRUE);
-}
-/*
- * common code for setting acl's.
- *
- * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
- * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
- * already checked the acl and knows whether to inherit.
- */
-int
-zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, dmu_tx_t *tx, int *ihp)
-{
- int inherit = 0;
- int error;
- znode_phys_t *zphys = zp->z_phys;
- zfs_znode_acl_t *zacl = &zphys->zp_acl;
- uint32_t acl_phys_size = ZFS_ACL_SIZE(aclp->z_acl_count);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- uint64_t aoid = zphys->zp_acl.z_acl_extern_obj;
-
- ASSERT(MUTEX_HELD(&zp->z_lock));
- ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-
- if (ihp)
- inherit = *ihp; /* already determined by caller */
- else if (!zfs_acl_valid(zp, aclp->z_acl,
- aclp->z_acl_count, &inherit)) {
- return (EINVAL);
- }
-
- dmu_buf_will_dirty(zp->z_dbuf, tx);
-
- /*
- * Will ACL fit internally?
- */
- if (aclp->z_acl_count > ACE_SLOT_CNT) {
- if (aoid == 0) {
- aoid = dmu_object_alloc(zfsvfs->z_os,
- DMU_OT_ACL, acl_phys_size, DMU_OT_NONE, 0, tx);
- } else {
- (void) dmu_object_set_blocksize(zfsvfs->z_os, aoid,
- acl_phys_size, 0, tx);
- }
- zphys->zp_acl.z_acl_extern_obj = aoid;
- zphys->zp_acl.z_acl_count = aclp->z_acl_count;
- dmu_write(zfsvfs->z_os, aoid, 0,
- acl_phys_size, aclp->z_acl, tx);
- } else {
- /*
- * Migrating back embedded?
- */
- if (zphys->zp_acl.z_acl_extern_obj) {
- error = dmu_object_free(zfsvfs->z_os,
- zp->z_phys->zp_acl.z_acl_extern_obj, tx);
- if (error)
- return (error);
- zphys->zp_acl.z_acl_extern_obj = 0;
- }
- bcopy(aclp->z_acl, zacl->z_ace_data,
- aclp->z_acl_count * sizeof (ace_t));
- zacl->z_acl_count = aclp->z_acl_count;
- }
-
- zp->z_phys->zp_flags &= ~(ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE);
- if (inherit) {
- zp->z_phys->zp_flags |= ZFS_INHERIT_ACE;
- } else if (ace_trivial(zacl->z_ace_data, zacl->z_acl_count) == 0) {
- zp->z_phys->zp_flags |= ZFS_ACL_TRIVIAL;
- }
-
- zphys->zp_mode = zfs_mode_compute(zp, aclp);
- zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
-
- return (0);
-}
-
-/*
- * Create space for slots_needed ACEs to be append
- * to aclp.
- */
-static void
-zfs_acl_append(zfs_acl_t *aclp, int slots_needed)
-{
- ace_t *newacep;
- ace_t *oldaclp;
- int slot_cnt;
- int slots_left = aclp->z_slots - aclp->z_acl_count;
-
- if (aclp->z_state == ACL_DATA_ALLOCED)
- ASSERT(aclp->z_slots >= aclp->z_acl_count);
- if (slots_left < slots_needed || aclp->z_state != ACL_DATA_ALLOCED) {
- slot_cnt = aclp->z_slots + 1 + (slots_needed - slots_left);
- newacep = kmem_alloc(ZFS_ACL_SIZE(slot_cnt), KM_SLEEP);
- bcopy(aclp->z_acl, newacep,
- ZFS_ACL_SIZE(aclp->z_acl_count));
- oldaclp = aclp->z_acl;
- if (aclp->z_state == ACL_DATA_ALLOCED)
- kmem_free(oldaclp, ZFS_ACL_SIZE(aclp->z_slots));
- aclp->z_acl = newacep;
- aclp->z_slots = slot_cnt;
- aclp->z_state = ACL_DATA_ALLOCED;
- }
-}
-
-/*
- * Remove "slot" ACE from aclp
- */
-static void
-zfs_ace_remove(zfs_acl_t *aclp, int slot)
-{
- if (aclp->z_acl_count > 1) {
- (void) memmove(&aclp->z_acl[slot],
- &aclp->z_acl[slot +1], sizeof (ace_t) *
- (--aclp->z_acl_count - slot));
- } else
- aclp->z_acl_count--;
-}
-
-/*
- * Update access mask for prepended ACE
- *
- * This applies the "groupmask" value for aclmode property.
- */
-static void
-zfs_acl_prepend_fixup(ace_t *acep, ace_t *origacep, mode_t mode, uid_t owner)
-{
-
- int rmask, wmask, xmask;
- int user_ace;
-
- user_ace = (!(acep->a_flags &
- (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP)));
-
- if (user_ace && (acep->a_who == owner)) {
- rmask = S_IRUSR;
- wmask = S_IWUSR;
- xmask = S_IXUSR;
- } else {
- rmask = S_IRGRP;
- wmask = S_IWGRP;
- xmask = S_IXGRP;
- }
-
- if (origacep->a_access_mask & ACE_READ_DATA) {
- if (mode & rmask)
- acep->a_access_mask &= ~ACE_READ_DATA;
- else
- acep->a_access_mask |= ACE_READ_DATA;
- }
-
- if (origacep->a_access_mask & ACE_WRITE_DATA) {
- if (mode & wmask)
- acep->a_access_mask &= ~ACE_WRITE_DATA;
- else
- acep->a_access_mask |= ACE_WRITE_DATA;
- }
-
- if (origacep->a_access_mask & ACE_APPEND_DATA) {
- if (mode & wmask)
- acep->a_access_mask &= ~ACE_APPEND_DATA;
- else
- acep->a_access_mask |= ACE_APPEND_DATA;
- }
-
- if (origacep->a_access_mask & ACE_EXECUTE) {
- if (mode & xmask)
- acep->a_access_mask &= ~ACE_EXECUTE;
- else
- acep->a_access_mask |= ACE_EXECUTE;
- }
-}
-
-/*
- * Apply mode to canonical six ACEs.
- */
-static void
-zfs_acl_fixup_canonical_six(zfs_acl_t *aclp, mode_t mode)
-{
- int cnt;
- ace_t *acep;
-
- cnt = aclp->z_acl_count -1;
- acep = aclp->z_acl;
-
- /*
- * Fixup final ACEs to match the mode
- */
-
- ASSERT(cnt >= 5);
- adjust_ace_pair(&acep[cnt - 1], mode); /* everyone@ */
- adjust_ace_pair(&acep[cnt - 3], (mode & 0070) >> 3); /* group@ */
- adjust_ace_pair(&acep[cnt - 5], (mode & 0700) >> 6); /* owner@ */
-}
-
-
-static int
-zfs_acl_ace_match(ace_t *acep, int allow_deny, int type, int mask)
-{
- return (acep->a_access_mask == mask && acep->a_type == allow_deny &&
- ((acep->a_flags & ACE_TYPE_FLAGS) == type));
-}
-
-/*
- * Can prepended ACE be reused?
- */
-static int
-zfs_reuse_deny(ace_t *acep, int i)
-{
- int okay_masks;
-
- if (i < 1)
- return (B_FALSE);
-
- if (acep[i-1].a_type != DENY)
- return (B_FALSE);
-
- if (acep[i-1].a_flags != (acep[i].a_flags & ACE_IDENTIFIER_GROUP))
- return (B_FALSE);
-
- okay_masks = (acep[i].a_access_mask & OKAY_MASK_BITS);
-
- if (acep[i-1].a_access_mask & ~okay_masks)
- return (B_FALSE);
-
- return (B_TRUE);
-}
-
-/*
- * Create space to prepend an ACE
- */
-static void
-zfs_acl_prepend(zfs_acl_t *aclp, int i)
-{
- ace_t *oldaclp = NULL;
- ace_t *to, *from;
- int slots_left = aclp->z_slots - aclp->z_acl_count;
- int oldslots;
- int need_free = 0;
-
- if (aclp->z_state == ACL_DATA_ALLOCED)
- ASSERT(aclp->z_slots >= aclp->z_acl_count);
-
- if (slots_left == 0 || aclp->z_state != ACL_DATA_ALLOCED) {
-
- to = kmem_alloc(ZFS_ACL_SIZE(aclp->z_acl_count +
- OGE_PAD), KM_SLEEP);
- if (aclp->z_state == ACL_DATA_ALLOCED)
- need_free++;
- from = aclp->z_acl;
- oldaclp = aclp->z_acl;
- (void) memmove(to, from,
- sizeof (ace_t) * aclp->z_acl_count);
- aclp->z_state = ACL_DATA_ALLOCED;
- } else {
- from = aclp->z_acl;
- to = aclp->z_acl;
- }
-
-
- (void) memmove(&to[i + 1], &from[i],
- sizeof (ace_t) * (aclp->z_acl_count - i));
-
- if (oldaclp) {
- aclp->z_acl = to;
- oldslots = aclp->z_slots;
- aclp->z_slots = aclp->z_acl_count + OGE_PAD;
- if (need_free)
- kmem_free(oldaclp, ZFS_ACL_SIZE(oldslots));
- }
-
-}
-
-/*
- * Prepend deny ACE
- */
-static void
-zfs_acl_prepend_deny(znode_t *zp, zfs_acl_t *aclp, int i,
- mode_t mode)
-{
- ace_t *acep;
-
- zfs_acl_prepend(aclp, i);
-
- acep = aclp->z_acl;
- zfs_set_ace(&acep[i], 0, DENY, acep[i + 1].a_who,
- (acep[i + 1].a_flags & ACE_TYPE_FLAGS));
- zfs_acl_prepend_fixup(&acep[i], &acep[i+1], mode, zp->z_phys->zp_uid);
- aclp->z_acl_count++;
-}
-
-/*
- * Split an inherited ACE into inherit_only ACE
- * and original ACE with inheritance flags stripped off.
- */
-static void
-zfs_acl_split_ace(zfs_acl_t *aclp, int i)
-{
- ace_t *acep = aclp->z_acl;
-
- zfs_acl_prepend(aclp, i);
- acep = aclp->z_acl;
- acep[i] = acep[i + 1];
- acep[i].a_flags |= ACE_INHERIT_ONLY_ACE;
- acep[i + 1].a_flags &= ~ALL_INHERIT;
- aclp->z_acl_count++;
-}
-
-/*
- * Are ACES started at index i, the canonical six ACES?
- */
-static int
-zfs_have_canonical_six(zfs_acl_t *aclp, int i)
-{
- ace_t *acep = aclp->z_acl;
-
- if ((zfs_acl_ace_match(&acep[i],
- DENY, ACE_OWNER, 0) &&
- zfs_acl_ace_match(&acep[i + 1], ALLOW, ACE_OWNER,
- OWNER_ALLOW_MASK) && zfs_acl_ace_match(&acep[i + 2],
- DENY, OWNING_GROUP, 0) && zfs_acl_ace_match(&acep[i + 3],
- ALLOW, OWNING_GROUP, 0) && zfs_acl_ace_match(&acep[i + 4],
- DENY, ACE_EVERYONE, EVERYONE_DENY_MASK) &&
- zfs_acl_ace_match(&acep[i + 5], ALLOW, ACE_EVERYONE,
- EVERYONE_ALLOW_MASK))) {
- return (1);
- } else {
- return (0);
- }
-}
-
-/*
- * Apply step 1g, to group entries
- *
- * Need to deal with corner case where group may have
- * greater permissions than owner. If so then limit
- * group permissions, based on what extra permissions
- * group has.
- */
-static void
-zfs_fixup_group_entries(ace_t *acep, mode_t mode)
-{
- mode_t extramode = (mode >> 3) & 07;
- mode_t ownermode = (mode >> 6);
-
- if (acep[0].a_flags & ACE_IDENTIFIER_GROUP) {
-
- extramode &= ~ownermode;
-
- if (extramode) {
- if (extramode & 04) {
- acep[0].a_access_mask &= ~ACE_READ_DATA;
- acep[1].a_access_mask &= ~ACE_READ_DATA;
- }
- if (extramode & 02) {
- acep[0].a_access_mask &=
- ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
- acep[1].a_access_mask &=
- ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
- }
- if (extramode & 01) {
- acep[0].a_access_mask &= ~ACE_EXECUTE;
- acep[1].a_access_mask &= ~ACE_EXECUTE;
- }
- }
- }
-}
-
-/*
- * Apply the chmod algorithm as described
- * in PSARC/2002/240
- */
-static int
-zfs_acl_chmod(znode_t *zp, uint64_t mode, zfs_acl_t *aclp,
- dmu_tx_t *tx)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- ace_t *acep;
- int i;
- int error;
- int entry_type;
- int reuse_deny;
- int need_canonical_six = 1;
- int inherit = 0;
- int iflags;
-
- ASSERT(MUTEX_HELD(&zp->z_acl_lock));
- ASSERT(MUTEX_HELD(&zp->z_lock));
-
- i = 0;
- while (i < aclp->z_acl_count) {
- acep = aclp->z_acl;
- entry_type = (acep[i].a_flags & ACE_TYPE_FLAGS);
- iflags = (acep[i].a_flags & ALL_INHERIT);
-
- if ((acep[i].a_type != ALLOW && acep[i].a_type != DENY) ||
- (iflags & ACE_INHERIT_ONLY_ACE)) {
- i++;
- if (iflags)
- inherit = 1;
- continue;
- }
-
-
- if (zfsvfs->z_acl_mode == ZFS_ACL_DISCARD) {
- zfs_ace_remove(aclp, i);
- continue;
- }
-
- /*
- * Need to split ace into two?
- */
- if ((iflags & (ACE_FILE_INHERIT_ACE|
- ACE_DIRECTORY_INHERIT_ACE)) &&
- (!(iflags & ACE_INHERIT_ONLY_ACE))) {
- zfs_acl_split_ace(aclp, i);
- i++;
- inherit = 1;
- continue;
- }
-
- if (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
- (entry_type == OWNING_GROUP)) {
- acep[i].a_access_mask &= ~OGE_CLEAR;
- i++;
- continue;
-
- } else {
- if (acep[i].a_type == ALLOW) {
-
- /*
- * Check preceding ACE if any, to see
- * if we need to prepend a DENY ACE.
- * This is only applicable when the acl_mode
- * property == groupmask.
- */
- if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK) {
-
- reuse_deny = zfs_reuse_deny(acep, i);
-
- if (reuse_deny == B_FALSE) {
- zfs_acl_prepend_deny(zp, aclp,
- i, mode);
- i++;
- acep = aclp->z_acl;
- } else {
- zfs_acl_prepend_fixup(
- &acep[i - 1],
- &acep[i], mode,
- zp->z_phys->zp_uid);
- }
- zfs_fixup_group_entries(&acep[i - 1],
- mode);
- }
- }
- i++;
- }
- }
-
- /*
- * Check out last six aces, if we have six.
- */
-
- if (aclp->z_acl_count >= 6) {
- i = aclp->z_acl_count - 6;
-
- if (zfs_have_canonical_six(aclp, i)) {
- need_canonical_six = 0;
- }
- }
-
- if (need_canonical_six) {
-
- zfs_acl_append(aclp, 6);
- i = aclp->z_acl_count;
- acep = aclp->z_acl;
- zfs_set_ace(&acep[i++], 0, DENY, -1, ACE_OWNER);
- zfs_set_ace(&acep[i++], OWNER_ALLOW_MASK, ALLOW, -1, ACE_OWNER);
- zfs_set_ace(&acep[i++], 0, DENY, -1, OWNING_GROUP);
- zfs_set_ace(&acep[i++], 0, ALLOW, -1, OWNING_GROUP);
- zfs_set_ace(&acep[i++], EVERYONE_DENY_MASK,
- DENY, -1, ACE_EVERYONE);
- zfs_set_ace(&acep[i++], EVERYONE_ALLOW_MASK,
- ALLOW, -1, ACE_EVERYONE);
- aclp->z_acl_count += 6;
- }
-
- zfs_acl_fixup_canonical_six(aclp, mode);
-
- zp->z_phys->zp_mode = mode;
- error = zfs_aclset_common(zp, aclp, tx, &inherit);
- return (error);
-}
-
-
-int
-zfs_acl_chmod_setattr(znode_t *zp, uint64_t mode, dmu_tx_t *tx)
-{
- zfs_acl_t *aclp = NULL;
- int error;
-
- ASSERT(MUTEX_HELD(&zp->z_lock));
- mutex_enter(&zp->z_acl_lock);
- error = zfs_acl_node_read(zp, &aclp);
- if (error == 0)
- error = zfs_acl_chmod(zp, mode, aclp, tx);
- mutex_exit(&zp->z_acl_lock);
- if (aclp)
- zfs_acl_free(aclp);
- return (error);
-}
-
-/*
- * strip off write_owner and write_acl
- */
-static void
-zfs_securemode_update(zfsvfs_t *zfsvfs, ace_t *acep)
-{
- if ((zfsvfs->z_acl_inherit == ZFS_ACL_SECURE) &&
- (acep->a_type == ALLOW))
- acep->a_access_mask &= ~SECURE_CLEAR;
-}
-
-/*
- * inherit inheritable ACEs from parent
- */
-static zfs_acl_t *
-zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- ace_t *pacep;
- ace_t *acep;
- int ace_cnt = 0;
- int pace_cnt;
- int i, j;
- zfs_acl_t *aclp = NULL;
-
- i = j = 0;
- pace_cnt = paclp->z_acl_count;
- pacep = paclp->z_acl;
- if (zfsvfs->z_acl_inherit != ZFS_ACL_DISCARD) {
- for (i = 0; i != pace_cnt; i++) {
-
- if (zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW &&
- pacep[i].a_type == ALLOW)
- continue;
-
- if (zfs_ace_can_use(zp, &pacep[i])) {
- ace_cnt++;
- if (!(pacep[i].a_flags &
- ACE_NO_PROPAGATE_INHERIT_ACE))
- ace_cnt++;
- }
- }
- }
-
- aclp = zfs_acl_alloc(ace_cnt + OGE_PAD);
- if (ace_cnt && zfsvfs->z_acl_inherit != ZFS_ACL_DISCARD) {
- acep = aclp->z_acl;
- pacep = paclp->z_acl;
- for (i = 0; i != pace_cnt; i++) {
-
- if (zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW &&
- pacep[i].a_type == ALLOW)
- continue;
-
- if (zfs_ace_can_use(zp, &pacep[i])) {
-
- /*
- * Now create entry for inherited ace
- */
-
- acep[j] = pacep[i];
-
- /*
- * When AUDIT/ALARM a_types are supported
- * they should be inherited here.
- */
-
- if ((pacep[i].a_flags &
- ACE_NO_PROPAGATE_INHERIT_ACE) ||
- (ZTOV(zp)->v_type != VDIR)) {
- acep[j].a_flags &= ~ALL_INHERIT;
- zfs_securemode_update(zfsvfs, &acep[j]);
- j++;
- continue;
- }
-
- ASSERT(ZTOV(zp)->v_type == VDIR);
-
- /*
- * If we are inheriting an ACE targeted for
- * only files, then make sure inherit_only
- * is on for future propagation.
- */
- if ((pacep[i].a_flags & (ACE_FILE_INHERIT_ACE |
- ACE_DIRECTORY_INHERIT_ACE)) !=
- ACE_FILE_INHERIT_ACE) {
- j++;
- acep[j] = acep[j-1];
- acep[j-1].a_flags |=
- ACE_INHERIT_ONLY_ACE;
- acep[j].a_flags &= ~ALL_INHERIT;
- } else {
- acep[j].a_flags |= ACE_INHERIT_ONLY_ACE;
- }
- zfs_securemode_update(zfsvfs, &acep[j]);
- j++;
- }
- }
- }
- aclp->z_acl_count = j;
- ASSERT(aclp->z_slots >= aclp->z_acl_count);
-
- return (aclp);
-}
-
-/*
- * Create file system object initial permissions
- * including inheritable ACEs.
- */
-void
-zfs_perm_init(znode_t *zp, znode_t *parent, int flag,
- vattr_t *vap, dmu_tx_t *tx, cred_t *cr)
-{
- uint64_t mode;
- uid_t uid;
- gid_t gid;
- int error;
- int pull_down;
- zfs_acl_t *aclp, *paclp;
-
- mode = MAKEIMODE(vap->va_type, vap->va_mode);
-
- /*
- * Determine uid and gid.
- */
- if ((flag & (IS_ROOT_NODE | IS_REPLAY)) ||
- ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
- uid = vap->va_uid;
- gid = vap->va_gid;
- } else {
- uid = crgetuid(cr);
- if ((vap->va_mask & AT_GID) &&
- ((vap->va_gid == parent->z_phys->zp_gid) ||
- groupmember(vap->va_gid, cr) ||
- secpolicy_vnode_create_gid(cr) == 0))
- gid = vap->va_gid;
- else
-#ifdef __FreeBSD__
- gid = parent->z_phys->zp_gid;
-#else
- gid = (parent->z_phys->zp_mode & S_ISGID) ?
- parent->z_phys->zp_gid : crgetgid(cr);
-#endif
- }
-
- /*
- * If we're creating a directory, and the parent directory has the
- * set-GID bit set, set in on the new directory.
- * Otherwise, if the user is neither privileged nor a member of the
- * file's new group, clear the file's set-GID bit.
- */
-
- if ((parent->z_phys->zp_mode & S_ISGID) && (vap->va_type == VDIR))
- mode |= S_ISGID;
- else {
- if ((mode & S_ISGID) &&
- secpolicy_vnode_setids_setgids(cr, gid) != 0)
- mode &= ~S_ISGID;
- }
-
- zp->z_phys->zp_uid = uid;
- zp->z_phys->zp_gid = gid;
- zp->z_phys->zp_mode = mode;
-
- mutex_enter(&parent->z_lock);
- pull_down = (parent->z_phys->zp_flags & ZFS_INHERIT_ACE);
- if (pull_down) {
- mutex_enter(&parent->z_acl_lock);
- VERIFY(0 == zfs_acl_node_read(parent, &paclp));
- mutex_exit(&parent->z_acl_lock);
- aclp = zfs_acl_inherit(zp, paclp);
- zfs_acl_free(paclp);
- } else {
- aclp = zfs_acl_alloc(6);
- }
- mutex_exit(&parent->z_lock);
- mutex_enter(&zp->z_lock);
- mutex_enter(&zp->z_acl_lock);
- error = zfs_acl_chmod(zp, mode, aclp, tx);
- mutex_exit(&zp->z_lock);
- mutex_exit(&zp->z_acl_lock);
- ASSERT3U(error, ==, 0);
- zfs_acl_free(aclp);
-}
-
-/*
- * Should ACE be inherited?
- */
-static int
-zfs_ace_can_use(znode_t *zp, ace_t *acep)
-{
- int vtype = ZTOV(zp)->v_type;
-
- int iflags = (acep->a_flags & 0xf);
-
- if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
- return (1);
- else if (iflags & ACE_FILE_INHERIT_ACE)
- return (!((vtype == VDIR) &&
- (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
- return (0);
-}
-
-#ifdef TODO
-/*
- * Retrieve a files ACL
- */
-int
-zfs_getacl(znode_t *zp, vsecattr_t *vsecp, cred_t *cr)
-{
- zfs_acl_t *aclp;
- ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
- int error;
-
- if (error = zfs_zaccess(zp, ACE_READ_ACL, cr)) {
- /*
- * If owner of file then allow reading of the
- * ACL.
- */
- if (crgetuid(cr) != zp->z_phys->zp_uid)
- return (error);
- }
-
- if (mask == 0)
- return (ENOSYS);
-
- mutex_enter(&zp->z_acl_lock);
-
- error = zfs_acl_node_read(zp, &aclp);
- if (error != 0) {
- mutex_exit(&zp->z_acl_lock);
- return (error);
- }
-
-
- if (mask & VSA_ACECNT) {
- vsecp->vsa_aclcnt = aclp->z_acl_count;
- }
-
- if (mask & VSA_ACE) {
- vsecp->vsa_aclentp = kmem_alloc(aclp->z_acl_count *
- sizeof (ace_t), KM_SLEEP);
- bcopy(aclp->z_acl, vsecp->vsa_aclentp,
- aclp->z_acl_count * sizeof (ace_t));
- }
-
- mutex_exit(&zp->z_acl_lock);
-
- zfs_acl_free(aclp);
-
- return (0);
-}
-#endif /* TODO */
-
-#ifdef TODO
-/*
- * Set a files ACL
- */
-int
-zfs_setacl(znode_t *zp, vsecattr_t *vsecp, cred_t *cr)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- ace_t *acep = vsecp->vsa_aclentp;
- int aclcnt = vsecp->vsa_aclcnt;
- ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
- dmu_tx_t *tx;
- int error;
- int inherit;
- zfs_acl_t *aclp;
-
- if (mask == 0)
- return (EINVAL);
-
- if (!zfs_acl_valid(zp, acep, aclcnt, &inherit))
- return (EINVAL);
-top:
- error = zfs_zaccess_v4_perm(zp, ACE_WRITE_ACL, cr);
- if (error == EACCES || error == ACCESS_UNDETERMINED) {
- if ((error = secpolicy_vnode_setdac(cr,
- zp->z_phys->zp_uid)) != 0) {
- return (error);
- }
- } else if (error) {
- return (error == EROFS ? error : EPERM);
- }
-
- mutex_enter(&zp->z_lock);
- mutex_enter(&zp->z_acl_lock);
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, zp->z_id);
-
- if (zp->z_phys->zp_acl.z_acl_extern_obj) {
- dmu_tx_hold_write(tx, zp->z_phys->zp_acl.z_acl_extern_obj,
- 0, ZFS_ACL_SIZE(aclcnt));
- } else if (aclcnt > ACE_SLOT_CNT) {
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, ZFS_ACL_SIZE(aclcnt));
- }
-
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- mutex_exit(&zp->z_acl_lock);
- mutex_exit(&zp->z_lock);
-
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- return (error);
- }
-
- aclp = zfs_acl_alloc(aclcnt);
- bcopy(acep, aclp->z_acl, sizeof (ace_t) * aclcnt);
- aclp->z_acl_count = aclcnt;
- error = zfs_aclset_common(zp, aclp, tx, &inherit);
- ASSERT(error == 0);
-
- zfs_acl_free(aclp);
- zfs_log_acl(zilog, tx, TX_ACL, zp, aclcnt, acep);
- dmu_tx_commit(tx);
-done:
- mutex_exit(&zp->z_acl_lock);
- mutex_exit(&zp->z_lock);
-
- return (error);
-}
-#endif /* TODO */
-
-static int
-zfs_ace_access(ace_t *zacep, int *working_mode)
-{
- if (*working_mode == 0) {
- return (0);
- }
-
- if (zacep->a_access_mask & *working_mode) {
- if (zacep->a_type == ALLOW) {
- *working_mode &=
- ~(*working_mode & zacep->a_access_mask);
- if (*working_mode == 0)
- return (0);
- } else if (zacep->a_type == DENY) {
- return (EACCES);
- }
- }
-
- /*
- * haven't been specifcally denied at this point
- * so return UNDETERMINED.
- */
-
- return (ACCESS_UNDETERMINED);
-}
-
-
-static int
-zfs_zaccess_common(znode_t *zp, int v4_mode, int *working_mode, cred_t *cr)
-{
- zfs_acl_t *aclp;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- ace_t *zacep;
- gid_t gid;
- int cnt;
- int i;
- int error;
- int access_deny = ACCESS_UNDETERMINED;
- uint_t entry_type;
- uid_t uid = crgetuid(cr);
-
- if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */
- *working_mode = 0;
- return (0);
- }
-
- *working_mode = v4_mode;
-
- if ((v4_mode & WRITE_MASK) &&
- (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
- (!IS_DEVVP(ZTOV(zp)))) {
- return (EROFS);
- }
-
- mutex_enter(&zp->z_acl_lock);
-
- error = zfs_acl_node_read(zp, &aclp);
- if (error != 0) {
- mutex_exit(&zp->z_acl_lock);
- return (error);
- }
-
-
- zacep = aclp->z_acl;
- cnt = aclp->z_acl_count;
-
- for (i = 0; i != cnt; i++) {
-
- DTRACE_PROBE2(zfs__access__common,
- ace_t *, &zacep[i], int, *working_mode);
-
- if (zacep[i].a_flags & ACE_INHERIT_ONLY_ACE)
- continue;
-
- entry_type = (zacep[i].a_flags & ACE_TYPE_FLAGS);
- switch (entry_type) {
- case ACE_OWNER:
- if (uid == zp->z_phys->zp_uid) {
- access_deny = zfs_ace_access(&zacep[i],
- working_mode);
- }
- break;
- case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
- case ACE_IDENTIFIER_GROUP:
- /*
- * Owning group gid is in znode not ACL
- */
- if (entry_type == (ACE_IDENTIFIER_GROUP | ACE_GROUP))
- gid = zp->z_phys->zp_gid;
- else
- gid = zacep[i].a_who;
-
- if (groupmember(gid, cr)) {
- access_deny = zfs_ace_access(&zacep[i],
- working_mode);
- }
- break;
- case ACE_EVERYONE:
- access_deny = zfs_ace_access(&zacep[i], working_mode);
- break;
-
- /* USER Entry */
- default:
- if (entry_type == 0) {
- if (uid == zacep[i].a_who) {
- access_deny = zfs_ace_access(&zacep[i],
- working_mode);
- }
- break;
- }
- zfs_acl_free(aclp);
- mutex_exit(&zp->z_acl_lock);
- return (EIO);
- }
-
- if (access_deny != ACCESS_UNDETERMINED)
- break;
- }
-
- mutex_exit(&zp->z_acl_lock);
- zfs_acl_free(aclp);
-
- return (access_deny);
-}
-
-
-/*
- * Determine whether Access should be granted/denied, invoking least
- * priv subsytem when a deny is determined.
- */
-int
-zfs_zaccess(znode_t *zp, int mode, cred_t *cr)
-{
- int working_mode;
- int error;
- int is_attr;
- znode_t *xzp;
- znode_t *check_zp = zp;
-
- is_attr = ((zp->z_phys->zp_flags & ZFS_XATTR) &&
- (ZTOV(zp)->v_type == VDIR));
-
- /*
- * If attribute then validate against base file
- */
- if (is_attr) {
- if ((error = zfs_zget(zp->z_zfsvfs,
- zp->z_phys->zp_parent, &xzp)) != 0) {
- return (error);
- }
- check_zp = xzp;
- /*
- * fixup mode to map to xattr perms
- */
-
- if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
- mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
- mode |= ACE_WRITE_NAMED_ATTRS;
- }
-
- if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
- mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
- mode |= ACE_READ_NAMED_ATTRS;
- }
- }
-
- error = zfs_zaccess_common(check_zp, mode, &working_mode, cr);
-
- if (error == EROFS) {
- if (is_attr)
- VN_RELE(ZTOV(xzp));
- return (error);
- }
-
- if (error || working_mode) {
- working_mode = (zfs_v4_to_unix(working_mode) << 6);
- error = secpolicy_vnode_access(cr, ZTOV(check_zp),
- check_zp->z_phys->zp_uid, working_mode);
- }
-
- if (is_attr)
- VN_RELE(ZTOV(xzp));
-
- return (error);
-}
-
-/*
- * Special zaccess function to check for special nfsv4 perm.
- * doesn't call secpolicy_vnode_access() for failure, since that
- * would probably be the wrong policy function to call.
- * instead its up to the caller to handle that situation.
- */
-
-int
-zfs_zaccess_v4_perm(znode_t *zp, int mode, cred_t *cr)
-{
- int working_mode = 0;
- return (zfs_zaccess_common(zp, mode, &working_mode, cr));
-}
-
-/*
- * Translate tradition unix VREAD/VWRITE/VEXEC mode into
- * native ACL format and call zfs_zaccess()
- */
-int
-zfs_zaccess_rwx(znode_t *zp, mode_t mode, cred_t *cr)
-{
- int v4_mode = zfs_unix_to_v4(mode >> 6);
-
- return (zfs_zaccess(zp, v4_mode, cr));
-}
-
-static int
-zfs_delete_final_check(znode_t *zp, znode_t *dzp, cred_t *cr)
-{
- int error;
-
- error = secpolicy_vnode_access(cr, ZTOV(zp),
- dzp->z_phys->zp_uid, S_IWRITE|S_IEXEC);
-
- if (error == 0)
- error = zfs_sticky_remove_access(dzp, zp, cr);
-
- return (error);
-}
-
-/*
- * Determine whether Access should be granted/deny, without
- * consulting least priv subsystem.
- *
- *
- * The following chart is the recommended NFSv4 enforcement for
- * ability to delete an object.
- *
- * -------------------------------------------------------
- * | Parent Dir | Target Object Permissions |
- * | permissions | |
- * -------------------------------------------------------
- * | | ACL Allows | ACL Denies| Delete |
- * | | Delete | Delete | unspecified|
- * -------------------------------------------------------
- * | ACL Allows | Permit | Permit | Permit |
- * | DELETE_CHILD | |
- * -------------------------------------------------------
- * | ACL Denies | Permit | Deny | Deny |
- * | DELETE_CHILD | | | |
- * -------------------------------------------------------
- * | ACL specifies | | | |
- * | only allow | Permit | Permit | Permit |
- * | write and | | | |
- * | execute | | | |
- * -------------------------------------------------------
- * | ACL denies | | | |
- * | write and | Permit | Deny | Deny |
- * | execute | | | |
- * -------------------------------------------------------
- * ^
- * |
- * No search privilege, can't even look up file?
- *
- */
-int
-zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
-{
- int dzp_working_mode = 0;
- int zp_working_mode = 0;
- int dzp_error, zp_error;
-
- /*
- * Arghh, this check is going to require a couple of questions
- * to be asked. We want specific DELETE permissions to
- * take precedence over WRITE/EXECUTE. We don't
- * want an ACL such as this to mess us up.
- * user:joe:write_data:deny,user:joe:delete:allow
- *
- * However, deny permissions may ultimately be overridden
- * by secpolicy_vnode_access().
- */
-
- dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
- &dzp_working_mode, cr);
- zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode, cr);
-
- if (dzp_error == EROFS || zp_error == EROFS)
- return (dzp_error);
-
- /*
- * First check the first row.
- * We only need to see if parent Allows delete_child
- */
- if ((dzp_working_mode & ACE_DELETE_CHILD) == 0)
- return (0);
-
- /*
- * Second row
- * we already have the necessary information in
- * zp_working_mode, zp_error and dzp_error.
- */
-
- if ((zp_working_mode & ACE_DELETE) == 0)
- return (0);
-
- /*
- * Now zp_error should either be EACCES which indicates
- * a "deny" delete entry or ACCESS_UNDETERMINED if the "delete"
- * entry exists on the target.
- *
- * dzp_error should be either EACCES which indicates a "deny"
- * entry for delete_child or ACCESS_UNDETERMINED if no delete_child
- * entry exists. If value is EACCES then we are done
- * and zfs_delete_final_check() will make the final decision
- * regarding to allow the delete.
- */
-
- ASSERT(zp_error != 0 && dzp_error != 0);
- if (dzp_error == EACCES)
- return (zfs_delete_final_check(zp, dzp, cr));
-
- /*
- * Third Row
- * Only need to check for write/execute on parent
- */
-
- dzp_error = zfs_zaccess_common(dzp, ACE_WRITE_DATA|ACE_EXECUTE,
- &dzp_working_mode, cr);
-
- if (dzp_error == EROFS)
- return (dzp_error);
-
- if ((dzp_working_mode & (ACE_WRITE_DATA|ACE_EXECUTE)) == 0)
- return (zfs_sticky_remove_access(dzp, zp, cr));
-
- /*
- * Fourth Row
- */
-
- if (((dzp_working_mode & (ACE_WRITE_DATA|ACE_EXECUTE)) != 0) &&
- ((zp_working_mode & ACE_DELETE) == 0))
- return (zfs_sticky_remove_access(dzp, zp, cr));
-
- return (zfs_delete_final_check(zp, dzp, cr));
-}
-
-int
-zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
- znode_t *tzp, cred_t *cr)
-{
- int add_perm;
- int error;
-
- add_perm = (ZTOV(szp)->v_type == VDIR) ?
- ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
-
- /*
- * Rename permissions are combination of delete permission +
- * add file/subdir permission.
- */
-
- /*
- * first make sure we do the delete portion.
- *
- * If that succeeds then check for add_file/add_subdir permissions
- */
-
- if (error = zfs_zaccess_delete(sdzp, szp, cr))
- return (error);
-
- /*
- * If we have a tzp, see if we can delete it?
- */
- if (tzp) {
- if (error = zfs_zaccess_delete(tdzp, tzp, cr))
- return (error);
- }
-
- /*
- * Now check for add permissions
- */
- error = zfs_zaccess(tdzp, add_perm, cr);
-
- return (error);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c
deleted file mode 100644
index c8450d4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/vfs.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_acl.h>
-
-void
-zfs_ace_byteswap(ace_t *ace, int ace_cnt)
-{
- int i;
-
- for (i = 0; i != ace_cnt; i++, ace++) {
- ace->a_who = BSWAP_32(ace->a_who);
- ace->a_access_mask = BSWAP_32(ace->a_access_mask);
- ace->a_flags = BSWAP_16(ace->a_flags);
- ace->a_type = BSWAP_16(ace->a_type);
- }
-}
-
-/* ARGSUSED */
-void
-zfs_acl_byteswap(void *buf, size_t size)
-{
- int cnt;
-
- /*
- * Arggh, since we don't know how many ACEs are in
- * the array, we have to swap the entire block
- */
-
- cnt = size / sizeof (ace_t);
-
- zfs_ace_byteswap((ace_t *)buf, cnt);
-}
-
-void
-zfs_znode_byteswap(void *buf, size_t size)
-{
- znode_phys_t *zp = buf;
-
- ASSERT(size >= sizeof (znode_phys_t));
-
- zp->zp_crtime[0] = BSWAP_64(zp->zp_crtime[0]);
- zp->zp_crtime[1] = BSWAP_64(zp->zp_crtime[1]);
- zp->zp_atime[0] = BSWAP_64(zp->zp_atime[0]);
- zp->zp_atime[1] = BSWAP_64(zp->zp_atime[1]);
- zp->zp_mtime[0] = BSWAP_64(zp->zp_mtime[0]);
- zp->zp_mtime[1] = BSWAP_64(zp->zp_mtime[1]);
- zp->zp_ctime[0] = BSWAP_64(zp->zp_ctime[0]);
- zp->zp_ctime[1] = BSWAP_64(zp->zp_ctime[1]);
- zp->zp_gen = BSWAP_64(zp->zp_gen);
- zp->zp_mode = BSWAP_64(zp->zp_mode);
- zp->zp_size = BSWAP_64(zp->zp_size);
- zp->zp_parent = BSWAP_64(zp->zp_parent);
- zp->zp_links = BSWAP_64(zp->zp_links);
- zp->zp_xattr = BSWAP_64(zp->zp_xattr);
- zp->zp_rdev = BSWAP_64(zp->zp_rdev);
- zp->zp_flags = BSWAP_64(zp->zp_flags);
- zp->zp_uid = BSWAP_64(zp->zp_uid);
- zp->zp_gid = BSWAP_64(zp->zp_gid);
- zp->zp_pad[0] = BSWAP_64(zp->zp_pad[0]);
- zp->zp_pad[1] = BSWAP_64(zp->zp_pad[1]);
- zp->zp_pad[2] = BSWAP_64(zp->zp_pad[2]);
- zp->zp_pad[3] = BSWAP_64(zp->zp_pad[3]);
-
- zp->zp_acl.z_acl_extern_obj = BSWAP_64(zp->zp_acl.z_acl_extern_obj);
- zp->zp_acl.z_acl_count = BSWAP_32(zp->zp_acl.z_acl_count);
- zp->zp_acl.z_acl_version = BSWAP_16(zp->zp_acl.z_acl_version);
- zp->zp_acl.z_acl_pad = BSWAP_16(zp->zp_acl.z_acl_pad);
- zfs_ace_byteswap(&zp->zp_acl.z_ace_data[0], ACE_SLOT_CNT);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
deleted file mode 100644
index 0c2fb02..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
+++ /dev/null
@@ -1,1119 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * ZFS control directory (a.k.a. ".zfs")
- *
- * This directory provides a common location for all ZFS meta-objects.
- * Currently, this is only the 'snapshot' directory, but this may expand in the
- * future. The elements are built using the GFS primitives, as the hierarchy
- * does not actually exist on disk.
- *
- * For 'snapshot', we don't want to have all snapshots always mounted, because
- * this would take up a huge amount of space in /etc/mnttab. We have three
- * types of objects:
- *
- * ctldir ------> snapshotdir -------> snapshot
- * |
- * |
- * V
- * mounted fs
- *
- * The 'snapshot' node contains just enough information to lookup '..' and act
- * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
- * perform an automount of the underlying filesystem and return the
- * corresponding vnode.
- *
- * All mounts are handled automatically by the kernel, but unmounts are
- * (currently) handled from user land. The main reason is that there is no
- * reliable way to auto-unmount the filesystem when it's "no longer in use".
- * When the user unmounts a filesystem, we call zfsctl_unmount(), which
- * unmounts any snapshots within the snapshot directory.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/namei.h>
-#include <sys/gfs.h>
-#include <sys/stat.h>
-#include <sys/dmu.h>
-#include <sys/mount.h>
-
-typedef struct {
- char *se_name;
- vnode_t *se_root;
- avl_node_t se_node;
-} zfs_snapentry_t;
-
-static int
-snapentry_compare(const void *a, const void *b)
-{
- const zfs_snapentry_t *sa = a;
- const zfs_snapentry_t *sb = b;
- int ret = strcmp(sa->se_name, sb->se_name);
-
- if (ret < 0)
- return (-1);
- else if (ret > 0)
- return (1);
- else
- return (0);
-}
-
-static struct vop_vector zfsctl_ops_root;
-static struct vop_vector zfsctl_ops_snapdir;
-static struct vop_vector zfsctl_ops_snapshot;
-
-static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
-static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
-
-typedef struct zfsctl_node {
- gfs_dir_t zc_gfs_private;
- uint64_t zc_id;
- timestruc_t zc_cmtime; /* ctime and mtime, always the same */
-} zfsctl_node_t;
-
-typedef struct zfsctl_snapdir {
- zfsctl_node_t sd_node;
- kmutex_t sd_lock;
- avl_tree_t sd_snaps;
-} zfsctl_snapdir_t;
-
-/*
- * Root directory elements. We have only a single static entry, 'snapshot'.
- */
-static gfs_dirent_t zfsctl_root_entries[] = {
- { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
- { NULL }
-};
-
-/* include . and .. in the calculation */
-#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
- sizeof (gfs_dirent_t)) + 1)
-
-
-/*
- * Initialize the various GFS pieces we'll need to create and manipulate .zfs
- * directories. This is called from the ZFS init routine, and initializes the
- * vnode ops vectors that we'll be using.
- */
-void
-zfsctl_init(void)
-{
-}
-
-void
-zfsctl_fini(void)
-{
-}
-
-/*
- * Return the inode number associated with the 'snapshot' directory.
- */
-/* ARGSUSED */
-static ino64_t
-zfsctl_root_inode_cb(vnode_t *vp, int index)
-{
- ASSERT(index == 0);
- return (ZFSCTL_INO_SNAPDIR);
-}
-
-/*
- * Create the '.zfs' directory. This directory is cached as part of the VFS
- * structure. This results in a hold on the vfs_t. The code in zfs_umount()
- * therefore checks against a vfs_count of 2 instead of 1. This reference
- * is removed when the ctldir is destroyed in the unmount.
- */
-void
-zfsctl_create(zfsvfs_t *zfsvfs)
-{
- vnode_t *vp, *rvp;
- zfsctl_node_t *zcp;
-
- ASSERT(zfsvfs->z_ctldir == NULL);
-
- vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
- &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
- zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
- zcp = vp->v_data;
- zcp->zc_id = ZFSCTL_INO_ROOT;
-
- VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp, curthread) == 0);
- ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime);
- VN_URELE(rvp);
-
- /*
- * We're only faking the fact that we have a root of a filesystem for
- * the sake of the GFS interfaces. Undo the flag manipulation it did
- * for us.
- */
- vp->v_vflag &= ~VV_ROOT;
-
- zfsvfs->z_ctldir = vp;
-}
-
-/*
- * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
- * There might still be more references if we were force unmounted, but only
- * new zfs_inactive() calls can occur and they don't reference .zfs
- */
-void
-zfsctl_destroy(zfsvfs_t *zfsvfs)
-{
- VN_RELE(zfsvfs->z_ctldir);
- zfsvfs->z_ctldir = NULL;
-}
-
-/*
- * Given a root znode, retrieve the associated .zfs directory.
- * Add a hold to the vnode and return it.
- */
-vnode_t *
-zfsctl_root(znode_t *zp)
-{
- ASSERT(zfs_has_ctldir(zp));
- VN_HOLD(zp->z_zfsvfs->z_ctldir);
- return (zp->z_zfsvfs->z_ctldir);
-}
-
-/*
- * Common open routine. Disallow any write access.
- */
-/* ARGSUSED */
-static int
-zfsctl_common_open(struct vop_open_args *ap)
-{
- int flags = ap->a_mode;
-
- if (flags & FWRITE)
- return (EACCES);
-
- return (0);
-}
-
-/*
- * Common close routine. Nothing to do here.
- */
-/* ARGSUSED */
-static int
-zfsctl_common_close(struct vop_close_args *ap)
-{
- return (0);
-}
-
-/*
- * Common access routine. Disallow writes.
- */
-/* ARGSUSED */
-static int
-zfsctl_common_access(ap)
- struct vop_access_args /* {
- struct vnode *a_vp;
- int a_mode;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
- int mode = ap->a_mode;
-
- if (mode & VWRITE)
- return (EACCES);
-
- return (0);
-}
-
-/*
- * Common getattr function. Fill in basic information.
- */
-static void
-zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
-{
- zfsctl_node_t *zcp = vp->v_data;
- timestruc_t now;
-
- vap->va_uid = 0;
- vap->va_gid = 0;
- vap->va_rdev = 0;
- /*
- * We are a purly virtual object, so we have no
- * blocksize or allocated blocks.
- */
- vap->va_blksize = 0;
- vap->va_nblocks = 0;
- vap->va_seq = 0;
- vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
- vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
- S_IROTH | S_IXOTH;
- vap->va_type = VDIR;
- /*
- * We live in the now (for atime).
- */
- gethrestime(&now);
- vap->va_atime = now;
- vap->va_mtime = vap->va_ctime = vap->va_birthtime = zcp->zc_cmtime;
- /* FreeBSD: Reset chflags(2) flags. */
- vap->va_flags = 0;
-}
-
-static int
-zfsctl_common_fid(ap)
- struct vop_fid_args /* {
- struct vnode *a_vp;
- struct fid *a_fid;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- fid_t *fidp = (void *)ap->a_fid;
- zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
- zfsctl_node_t *zcp = vp->v_data;
- uint64_t object = zcp->zc_id;
- zfid_short_t *zfid;
- int i;
-
- ZFS_ENTER(zfsvfs);
-
- fidp->fid_len = SHORT_FID_LEN;
-
- zfid = (zfid_short_t *)fidp;
-
- zfid->zf_len = SHORT_FID_LEN;
-
- for (i = 0; i < sizeof (zfid->zf_object); i++)
- zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
- /* .zfs znodes always have a generation number of 0 */
- for (i = 0; i < sizeof (zfid->zf_gen); i++)
- zfid->zf_gen[i] = 0;
-
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-static int
-zfsctl_common_reclaim(ap)
- struct vop_reclaim_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
-
- /*
- * Destroy the vm object and flush associated pages.
- */
- vnode_destroy_vobject(vp);
- VI_LOCK(vp);
- vp->v_data = NULL;
- VI_UNLOCK(vp);
- return (0);
-}
-
-/*
- * .zfs inode namespace
- *
- * We need to generate unique inode numbers for all files and directories
- * within the .zfs pseudo-filesystem. We use the following scheme:
- *
- * ENTRY ZFSCTL_INODE
- * .zfs 1
- * .zfs/snapshot 2
- * .zfs/snapshot/<snap> objectid(snap)
- */
-
-#define ZFSCTL_INO_SNAP(id) (id)
-
-/*
- * Get root directory attributes.
- */
-/* ARGSUSED */
-static int
-zfsctl_root_getattr(ap)
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
- struct vnode *vp = ap->a_vp;
- struct vattr *vap = ap->a_vap;
- zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
-
- ZFS_ENTER(zfsvfs);
- vap->va_nodeid = ZFSCTL_INO_ROOT;
- vap->va_nlink = vap->va_size = NROOT_ENTRIES;
-
- zfsctl_common_getattr(vp, vap);
- ZFS_EXIT(zfsvfs);
-
- return (0);
-}
-
-/*
- * Special case the handling of "..".
- */
-/* ARGSUSED */
-int
-zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
- int flags, vnode_t *rdir, cred_t *cr)
-{
- zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
- int err;
-
- ZFS_ENTER(zfsvfs);
-
- if (strcmp(nm, "..") == 0) {
- err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp, curthread);
- if (err == 0)
- VOP_UNLOCK(*vpp, 0);
- } else {
- err = gfs_dir_lookup(dvp, nm, vpp);
- }
-
- ZFS_EXIT(zfsvfs);
-
- return (err);
-}
-
-/*
- * Special case the handling of "..".
- */
-/* ARGSUSED */
-int
-zfsctl_root_lookup_vop(ap)
- struct vop_lookup_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- vnode_t *dvp = ap->a_dvp;
- vnode_t **vpp = ap->a_vpp;
- cred_t *cr = ap->a_cnp->cn_cred;
- int flags = ap->a_cnp->cn_flags;
- int nameiop = ap->a_cnp->cn_nameiop;
- char nm[NAME_MAX + 1];
- int err;
-
- if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
- return (EOPNOTSUPP);
-
- ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
- strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
-
- err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr);
- if (err == 0 && (nm[0] != '.' || nm[1] != '\0'))
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
-
- return (err);
-}
-
-static struct vop_vector zfsctl_ops_root = {
- .vop_default = &default_vnodeops,
- .vop_open = zfsctl_common_open,
- .vop_close = zfsctl_common_close,
- .vop_ioctl = VOP_EINVAL,
- .vop_getattr = zfsctl_root_getattr,
- .vop_access = zfsctl_common_access,
- .vop_readdir = gfs_vop_readdir,
- .vop_lookup = zfsctl_root_lookup_vop,
- .vop_inactive = gfs_vop_inactive,
- .vop_reclaim = zfsctl_common_reclaim,
- .vop_fid = zfsctl_common_fid,
-};
-
-static int
-zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
-{
- objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
-
- dmu_objset_name(os, zname);
- if (strlen(zname) + 1 + strlen(name) >= len)
- return (ENAMETOOLONG);
- (void) strcat(zname, "@");
- (void) strcat(zname, name);
- return (0);
-}
-
-static int
-zfsctl_unmount_snap(vnode_t *dvp, const char *name, int force, cred_t *cr)
-{
- zfsctl_snapdir_t *sdp = dvp->v_data;
- zfs_snapentry_t search, *sep;
- struct vop_inactive_args ap;
- avl_index_t where;
- int err;
-
- ASSERT(MUTEX_HELD(&sdp->sd_lock));
-
- search.se_name = (char *)name;
- if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL)
- return (ENOENT);
-
- ASSERT(vn_ismntpt(sep->se_root));
-
- /* this will be dropped by dounmount() */
- if ((err = vn_vfswlock(sep->se_root)) != 0)
- return (err);
-
- err = dounmount(vn_mountedvfs(sep->se_root), force, curthread);
- if (err)
- return (err);
- ASSERT(sep->se_root->v_count == 1);
- ap.a_vp = sep->se_root;
- gfs_vop_inactive(&ap);
-
- avl_remove(&sdp->sd_snaps, sep);
- kmem_free(sep->se_name, strlen(sep->se_name) + 1);
- kmem_free(sep, sizeof (zfs_snapentry_t));
-
- return (0);
-}
-
-#if 0
-static void
-zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
-{
- avl_index_t where;
- vfs_t *vfsp;
- refstr_t *pathref;
- char newpath[MAXNAMELEN];
- char *tail;
-
- ASSERT(MUTEX_HELD(&sdp->sd_lock));
- ASSERT(sep != NULL);
-
- vfsp = vn_mountedvfs(sep->se_root);
- ASSERT(vfsp != NULL);
-
- vfs_lock_wait(vfsp);
-
- /*
- * Change the name in the AVL tree.
- */
- avl_remove(&sdp->sd_snaps, sep);
- kmem_free(sep->se_name, strlen(sep->se_name) + 1);
- sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
- (void) strcpy(sep->se_name, nm);
- VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
- avl_insert(&sdp->sd_snaps, sep, where);
-
- /*
- * Change the current mountpoint info:
- * - update the tail of the mntpoint path
- * - update the tail of the resource path
- */
- pathref = vfs_getmntpoint(vfsp);
- (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
- VERIFY((tail = strrchr(newpath, '/')) != NULL);
- *(tail+1) = '\0';
- ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
- (void) strcat(newpath, nm);
- refstr_rele(pathref);
- vfs_setmntpoint(vfsp, newpath);
-
- pathref = vfs_getresource(vfsp);
- (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
- VERIFY((tail = strrchr(newpath, '@')) != NULL);
- *(tail+1) = '\0';
- ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
- (void) strcat(newpath, nm);
- refstr_rele(pathref);
- vfs_setresource(vfsp, newpath);
-
- vfs_unlock(vfsp);
-}
-#endif
-
-#if 0
-static int
-zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
- cred_t *cr)
-{
- zfsctl_snapdir_t *sdp = sdvp->v_data;
- zfs_snapentry_t search, *sep;
- avl_index_t where;
- char from[MAXNAMELEN], to[MAXNAMELEN];
- int err;
-
- err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
- if (err)
- return (err);
- err = zfs_secpolicy_write(from, cr);
- if (err)
- return (err);
-
- /*
- * Cannot move snapshots out of the snapdir.
- */
- if (sdvp != tdvp)
- return (EINVAL);
-
- if (strcmp(snm, tnm) == 0)
- return (0);
-
- err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
- if (err)
- return (err);
-
- mutex_enter(&sdp->sd_lock);
-
- search.se_name = (char *)snm;
- if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
- mutex_exit(&sdp->sd_lock);
- return (ENOENT);
- }
-
- err = dmu_objset_rename(from, to, B_FALSE);
- if (err == 0)
- zfsctl_rename_snap(sdp, sep, tnm);
-
- mutex_exit(&sdp->sd_lock);
-
- return (err);
-}
-#endif
-
-#if 0
-/* ARGSUSED */
-static int
-zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr)
-{
- zfsctl_snapdir_t *sdp = dvp->v_data;
- char snapname[MAXNAMELEN];
- int err;
-
- err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
- if (err)
- return (err);
- err = zfs_secpolicy_write(snapname, cr);
- if (err)
- return (err);
-
- mutex_enter(&sdp->sd_lock);
-
- err = zfsctl_unmount_snap(dvp, name, 0, cr);
- if (err) {
- mutex_exit(&sdp->sd_lock);
- return (err);
- }
-
- err = dmu_objset_destroy(snapname);
-
- mutex_exit(&sdp->sd_lock);
-
- return (err);
-}
-#endif
-
-/*
- * Lookup entry point for the 'snapshot' directory. Try to open the
- * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
- * Perform a mount of the associated dataset on top of the vnode.
- */
-/* ARGSUSED */
-int
-zfsctl_snapdir_lookup(ap)
- struct vop_lookup_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- vnode_t *dvp = ap->a_dvp;
- vnode_t **vpp = ap->a_vpp;
- char nm[NAME_MAX + 1];
- zfsctl_snapdir_t *sdp = dvp->v_data;
- objset_t *snap;
- char snapname[MAXNAMELEN];
- char *mountpoint;
- zfs_snapentry_t *sep, search;
- size_t mountpoint_len;
- avl_index_t where;
- zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
- int err;
-
- ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
- strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
-
- ASSERT(dvp->v_type == VDIR);
-
- if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0)
- return (0);
-
- *vpp = NULL;
-
- /*
- * If we get a recursive call, that means we got called
- * from the domount() code while it was trying to look up the
- * spec (which looks like a local path for zfs). We need to
- * add some flag to domount() to tell it not to do this lookup.
- */
- if (MUTEX_HELD(&sdp->sd_lock))
- return (ENOENT);
-
- ZFS_ENTER(zfsvfs);
-
- mutex_enter(&sdp->sd_lock);
- search.se_name = (char *)nm;
- if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
- *vpp = sep->se_root;
- VN_HOLD(*vpp);
- if ((*vpp)->v_mountedhere == NULL) {
- /*
- * The snapshot was unmounted behind our backs,
- * try to remount it.
- */
- goto domount;
- }
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- mutex_exit(&sdp->sd_lock);
- ZFS_EXIT(zfsvfs);
- return (0);
- }
-
- /*
- * The requested snapshot is not currently mounted, look it up.
- */
- err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
- if (err) {
- mutex_exit(&sdp->sd_lock);
- ZFS_EXIT(zfsvfs);
- return (err);
- }
- if (dmu_objset_open(snapname, DMU_OST_ZFS,
- DS_MODE_STANDARD | DS_MODE_READONLY, &snap) != 0) {
- mutex_exit(&sdp->sd_lock);
- ZFS_EXIT(zfsvfs);
- return (ENOENT);
- }
-
- sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
- sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
- (void) strcpy(sep->se_name, nm);
- *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
- VN_HOLD(*vpp);
- avl_insert(&sdp->sd_snaps, sep, where);
-
- dmu_objset_close(snap);
-domount:
- mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
- strlen("/.zfs/snapshot/") + strlen(nm) + 1;
- mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
- (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
- dvp->v_vfsp->mnt_stat.f_mntonname, nm);
- err = domount(curthread, *vpp, "zfs", mountpoint, snapname, 0);
- kmem_free(mountpoint, mountpoint_len);
- /* FreeBSD: This line was moved from below to avoid a lock recursion. */
- if (err == 0)
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- mutex_exit(&sdp->sd_lock);
-
- /*
- * If we had an error, drop our hold on the vnode and
- * zfsctl_snapshot_inactive() will clean up.
- */
- if (err) {
- VN_RELE(*vpp);
- *vpp = NULL;
- }
- return (err);
-}
-
-/* ARGSUSED */
-static int
-zfsctl_snapdir_readdir_cb(vnode_t *vp, struct dirent64 *dp, int *eofp,
- offset_t *offp, offset_t *nextp, void *data)
-{
- zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
- char snapname[MAXNAMELEN];
- uint64_t id, cookie;
-
- ZFS_ENTER(zfsvfs);
-
- cookie = *offp;
- if (dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
- &cookie) == ENOENT) {
- *eofp = 1;
- ZFS_EXIT(zfsvfs);
- return (0);
- }
-
- (void) strcpy(dp->d_name, snapname);
- dp->d_ino = ZFSCTL_INO_SNAP(id);
- *nextp = cookie;
-
- ZFS_EXIT(zfsvfs);
-
- return (0);
-}
-
-vnode_t *
-zfsctl_mknode_snapdir(vnode_t *pvp)
-{
- vnode_t *vp;
- zfsctl_snapdir_t *sdp;
-
- vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, pvp->v_vfsp,
- &zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
- zfsctl_snapdir_readdir_cb, NULL);
- sdp = vp->v_data;
- sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
- sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
- mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
- avl_create(&sdp->sd_snaps, snapentry_compare,
- sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
- return (vp);
-}
-
-/* ARGSUSED */
-static int
-zfsctl_snapdir_getattr(ap)
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
- struct vnode *vp = ap->a_vp;
- struct vattr *vap = ap->a_vap;
- zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
- zfsctl_snapdir_t *sdp = vp->v_data;
-
- ZFS_ENTER(zfsvfs);
- zfsctl_common_getattr(vp, vap);
- vap->va_nodeid = gfs_file_inode(vp);
- vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
- ZFS_EXIT(zfsvfs);
-
- return (0);
-}
-
-/* ARGSUSED */
-static int
-zfsctl_snapdir_inactive(ap)
- struct vop_inactive_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- zfsctl_snapdir_t *sdp = vp->v_data;
- void *private;
-
- private = gfs_dir_inactive(vp);
- if (private != NULL) {
- ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
- mutex_destroy(&sdp->sd_lock);
- avl_destroy(&sdp->sd_snaps);
- kmem_free(private, sizeof (zfsctl_snapdir_t));
- }
- return (0);
-}
-
-static struct vop_vector zfsctl_ops_snapdir = {
- .vop_default = &default_vnodeops,
- .vop_open = zfsctl_common_open,
- .vop_close = zfsctl_common_close,
- .vop_ioctl = VOP_EINVAL,
- .vop_getattr = zfsctl_snapdir_getattr,
- .vop_access = zfsctl_common_access,
- .vop_readdir = gfs_vop_readdir,
- .vop_lookup = zfsctl_snapdir_lookup,
- .vop_inactive = zfsctl_snapdir_inactive,
- .vop_reclaim = zfsctl_common_reclaim,
- .vop_fid = zfsctl_common_fid,
-};
-
-static vnode_t *
-zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
-{
- vnode_t *vp;
- zfsctl_node_t *zcp;
-
- vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
- &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
- zcp = vp->v_data;
- zcp->zc_id = objset;
-
- return (vp);
-}
-
-static int
-zfsctl_snapshot_inactive(ap)
- struct vop_inactive_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- struct vop_inactive_args iap;
- zfsctl_snapdir_t *sdp;
- zfs_snapentry_t *sep, *next;
- int locked;
- vnode_t *dvp;
-
- VERIFY(gfs_dir_lookup(vp, "..", &dvp) == 0);
- sdp = dvp->v_data;
- VOP_UNLOCK(dvp, 0);
-
- if (!(locked = MUTEX_HELD(&sdp->sd_lock)))
- mutex_enter(&sdp->sd_lock);
-
- if (vp->v_count > 1) {
- if (!locked)
- mutex_exit(&sdp->sd_lock);
- return (0);
- }
- ASSERT(!vn_ismntpt(vp));
-
- sep = avl_first(&sdp->sd_snaps);
- while (sep != NULL) {
- next = AVL_NEXT(&sdp->sd_snaps, sep);
-
- if (sep->se_root == vp) {
- avl_remove(&sdp->sd_snaps, sep);
- kmem_free(sep->se_name, strlen(sep->se_name) + 1);
- kmem_free(sep, sizeof (zfs_snapentry_t));
- break;
- }
- sep = next;
- }
- ASSERT(sep != NULL);
-
- if (!locked)
- mutex_exit(&sdp->sd_lock);
- VN_RELE(dvp);
-
- /*
- * Dispose of the vnode for the snapshot mount point.
- * This is safe to do because once this entry has been removed
- * from the AVL tree, it can't be found again, so cannot become
- * "active". If we lookup the same name again we will end up
- * creating a new vnode.
- */
- iap.a_vp = vp;
- return (gfs_vop_inactive(&iap));
-}
-
-static int
-zfsctl_traverse_begin(vnode_t **vpp, int lktype, kthread_t *td)
-{
-
- VN_HOLD(*vpp);
- /* Snapshot should be already mounted, but just in case. */
- if (vn_mountedvfs(*vpp) == NULL)
- return (ENOENT);
- return (traverse(vpp, lktype));
-}
-
-static void
-zfsctl_traverse_end(vnode_t *vp, int err)
-{
-
- if (err == 0)
- vput(vp);
- else
- VN_RELE(vp);
-}
-
-static int
-zfsctl_snapshot_getattr(ap)
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- int err;
-
- err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY, ap->a_td);
- if (err == 0)
- err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred, ap->a_td);
- zfsctl_traverse_end(vp, err);
- return (err);
-}
-
-static int
-zfsctl_snapshot_fid(ap)
- struct vop_fid_args /* {
- struct vnode *a_vp;
- struct fid *a_fid;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- int err;
-
- err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY, curthread);
- if (err == 0)
- err = VOP_VPTOFH(vp, (void *)ap->a_fid);
- zfsctl_traverse_end(vp, err);
- return (err);
-}
-
-/*
- * These VP's should never see the light of day. They should always
- * be covered.
- */
-static struct vop_vector zfsctl_ops_snapshot = {
- .vop_default = &default_vnodeops,
- .vop_inactive = zfsctl_snapshot_inactive,
- .vop_reclaim = zfsctl_common_reclaim,
- .vop_getattr = zfsctl_snapshot_getattr,
- .vop_fid = zfsctl_snapshot_fid,
-};
-
-int
-zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- vnode_t *dvp, *vp;
- zfsctl_snapdir_t *sdp;
- zfsctl_node_t *zcp;
- zfs_snapentry_t *sep;
- int error;
-
- ASSERT(zfsvfs->z_ctldir != NULL);
- error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
- NULL, 0, NULL, kcred);
- if (error != 0)
- return (error);
- sdp = dvp->v_data;
-
- mutex_enter(&sdp->sd_lock);
- sep = avl_first(&sdp->sd_snaps);
- while (sep != NULL) {
- vp = sep->se_root;
- zcp = vp->v_data;
- if (zcp->zc_id == objsetid)
- break;
-
- sep = AVL_NEXT(&sdp->sd_snaps, sep);
- }
-
- if (sep != NULL) {
- VN_HOLD(vp);
- error = traverse(&vp, LK_SHARED | LK_RETRY);
- if (error == 0) {
- if (vp == sep->se_root)
- error = EINVAL;
- else
- *zfsvfsp = VTOZ(vp)->z_zfsvfs;
- }
- mutex_exit(&sdp->sd_lock);
- if (error == 0)
- VN_URELE(vp);
- else
- VN_RELE(vp);
- } else {
- error = EINVAL;
- mutex_exit(&sdp->sd_lock);
- }
-
- VN_RELE(dvp);
-
- return (error);
-}
-
-/*
- * Unmount any snapshots for the given filesystem. This is called from
- * zfs_umount() - if we have a ctldir, then go through and unmount all the
- * snapshots.
- */
-int
-zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
-{
- struct vop_inactive_args ap;
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- vnode_t *dvp, *svp;
- zfsctl_snapdir_t *sdp;
- zfs_snapentry_t *sep, *next;
- int error;
-
- ASSERT(zfsvfs->z_ctldir != NULL);
- error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
- NULL, 0, NULL, cr);
- if (error != 0)
- return (error);
- sdp = dvp->v_data;
-
- mutex_enter(&sdp->sd_lock);
-
- sep = avl_first(&sdp->sd_snaps);
- while (sep != NULL) {
- svp = sep->se_root;
- next = AVL_NEXT(&sdp->sd_snaps, sep);
-
- /*
- * If this snapshot is not mounted, then it must
- * have just been unmounted by somebody else, and
- * will be cleaned up by zfsctl_snapdir_inactive().
- */
- if (vn_ismntpt(svp)) {
- if ((error = vn_vfswlock(svp)) != 0)
- goto out;
-
- /*
- * Increase usecount, so dounmount() won't vrele() it
- * to 0 and call zfsctl_snapdir_inactive().
- */
- VN_HOLD(svp);
- vfsp = vn_mountedvfs(svp);
- mtx_lock(&Giant);
- error = dounmount(vfsp, fflags, curthread);
- mtx_unlock(&Giant);
- if (error != 0) {
- VN_RELE(svp);
- goto out;
- }
-
- avl_remove(&sdp->sd_snaps, sep);
- kmem_free(sep->se_name, strlen(sep->se_name) + 1);
- kmem_free(sep, sizeof (zfs_snapentry_t));
-
- /*
- * We can't use VN_RELE(), as that will try to
- * invoke zfsctl_snapdir_inactive(), and that
- * would lead to an attempt to re-grab the sd_lock.
- */
- ASSERT3U(svp->v_count, ==, 1);
- ap.a_vp = svp;
- gfs_vop_inactive(&ap);
- }
- sep = next;
- }
-out:
- mutex_exit(&sdp->sd_lock);
- VN_RELE(dvp);
-
- return (error);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
deleted file mode 100644
index f233b8f..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
+++ /dev/null
@@ -1,797 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/systm.h>
-#include <sys/sysmacros.h>
-#include <sys/resource.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/kmem.h>
-#include <sys/uio.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/stat.h>
-#include <sys/unistd.h>
-#include <sys/random.h>
-#include <sys/policy.h>
-#include <sys/kcondvar.h>
-#include <sys/callb.h>
-#include <sys/smp.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/fs/zfs.h>
-#include <sys/zap.h>
-#include <sys/dmu.h>
-#include <sys/atomic.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/dnlc.h>
-
-/*
- * Lock a directory entry. A dirlock on <dzp, name> protects that name
- * in dzp's directory zap object. As long as you hold a dirlock, you can
- * assume two things: (1) dzp cannot be reaped, and (2) no other thread
- * can change the zap entry for (i.e. link or unlink) this name.
- *
- * Input arguments:
- * dzp - znode for directory
- * name - name of entry to lock
- * flag - ZNEW: if the entry already exists, fail with EEXIST.
- * ZEXISTS: if the entry does not exist, fail with ENOENT.
- * ZSHARED: allow concurrent access with other ZSHARED callers.
- * ZXATTR: we want dzp's xattr directory
- *
- * Output arguments:
- * zpp - pointer to the znode for the entry (NULL if there isn't one)
- * dlpp - pointer to the dirlock for this entry (NULL on error)
- *
- * Return value: 0 on success or errno on failure.
- *
- * NOTE: Always checks for, and rejects, '.' and '..'.
- */
-int
-zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
- int flag)
-{
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zfs_dirlock_t *dl;
- uint64_t zoid;
- int error;
- vnode_t *vp;
-
- *zpp = NULL;
- *dlpp = NULL;
-
- /*
- * Verify that we are not trying to lock '.', '..', or '.zfs'
- */
- if (name[0] == '.' &&
- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
- zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
- return (EEXIST);
-
- /*
- * Wait until there are no locks on this name.
- */
- rw_enter(&dzp->z_name_lock, RW_READER);
- mutex_enter(&dzp->z_lock);
- for (;;) {
- if (dzp->z_unlinked) {
- mutex_exit(&dzp->z_lock);
- rw_exit(&dzp->z_name_lock);
- return (ENOENT);
- }
- for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next)
- if (strcmp(name, dl->dl_name) == 0)
- break;
- if (dl == NULL) {
- /*
- * Allocate a new dirlock and add it to the list.
- */
- dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
- cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
- dl->dl_name = name;
- dl->dl_sharecnt = 0;
- dl->dl_namesize = 0;
- dl->dl_dzp = dzp;
- dl->dl_next = dzp->z_dirlocks;
- dzp->z_dirlocks = dl;
- break;
- }
- if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
- break;
- cv_wait(&dl->dl_cv, &dzp->z_lock);
- }
-
- if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
- /*
- * We're the second shared reference to dl. Make a copy of
- * dl_name in case the first thread goes away before we do.
- * Note that we initialize the new name before storing its
- * pointer into dl_name, because the first thread may load
- * dl->dl_name at any time. He'll either see the old value,
- * which is his, or the new shared copy; either is OK.
- */
- dl->dl_namesize = strlen(dl->dl_name) + 1;
- name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
- bcopy(dl->dl_name, name, dl->dl_namesize);
- dl->dl_name = name;
- }
-
- mutex_exit(&dzp->z_lock);
-
- /*
- * We have a dirlock on the name. (Note that it is the dirlock,
- * not the dzp's z_lock, that protects the name in the zap object.)
- * See if there's an object by this name; if so, put a hold on it.
- */
- if (flag & ZXATTR) {
- zoid = dzp->z_phys->zp_xattr;
- error = (zoid == 0 ? ENOENT : 0);
- } else {
- vp = dnlc_lookup(ZTOV(dzp), name);
- if (vp == DNLC_NO_VNODE) {
- VN_RELE(vp);
- error = ENOENT;
- } else if (vp) {
- if (flag & ZNEW) {
- zfs_dirent_unlock(dl);
- VN_RELE(vp);
- return (EEXIST);
- }
- *dlpp = dl;
- *zpp = VTOZ(vp);
- return (0);
- } else {
- error = zap_lookup(zfsvfs->z_os, dzp->z_id, name,
- 8, 1, &zoid);
- zoid = ZFS_DIRENT_OBJ(zoid);
- if (error == ENOENT)
- dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
- }
- }
- if (error) {
- if (error != ENOENT || (flag & ZEXISTS)) {
- zfs_dirent_unlock(dl);
- return (error);
- }
- } else {
- if (flag & ZNEW) {
- zfs_dirent_unlock(dl);
- return (EEXIST);
- }
- error = zfs_zget(zfsvfs, zoid, zpp);
- if (error) {
- zfs_dirent_unlock(dl);
- return (error);
- }
- if (!(flag & ZXATTR))
- dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
- }
-
- *dlpp = dl;
-
- return (0);
-}
-
-/*
- * Unlock this directory entry and wake anyone who was waiting for it.
- */
-void
-zfs_dirent_unlock(zfs_dirlock_t *dl)
-{
- znode_t *dzp = dl->dl_dzp;
- zfs_dirlock_t **prev_dl, *cur_dl;
-
- mutex_enter(&dzp->z_lock);
- rw_exit(&dzp->z_name_lock);
- if (dl->dl_sharecnt > 1) {
- dl->dl_sharecnt--;
- mutex_exit(&dzp->z_lock);
- return;
- }
- prev_dl = &dzp->z_dirlocks;
- while ((cur_dl = *prev_dl) != dl)
- prev_dl = &cur_dl->dl_next;
- *prev_dl = dl->dl_next;
- cv_broadcast(&dl->dl_cv);
- mutex_exit(&dzp->z_lock);
-
- if (dl->dl_namesize != 0)
- kmem_free(dl->dl_name, dl->dl_namesize);
- cv_destroy(&dl->dl_cv);
- kmem_free(dl, sizeof (*dl));
-}
-
-/*
- * Look up an entry in a directory.
- *
- * NOTE: '.' and '..' are handled as special cases because
- * no directory entries are actually stored for them. If this is
- * the root of a filesystem, then '.zfs' is also treated as a
- * special pseudo-directory.
- */
-int
-zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp)
-{
- zfs_dirlock_t *dl;
- znode_t *zp;
- int error = 0;
-
- if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
- *vpp = ZTOV(dzp);
- VN_HOLD(*vpp);
- } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- /*
- * If we are a snapshot mounted under .zfs, return
- * the vp for the snapshot directory.
- */
- if (dzp->z_phys->zp_parent == dzp->z_id &&
- zfsvfs->z_parent != zfsvfs) {
- error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
- "snapshot", vpp, NULL, 0, NULL, kcred);
- return (error);
- }
- rw_enter(&dzp->z_parent_lock, RW_READER);
- error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp);
- if (error == 0)
- *vpp = ZTOV(zp);
- rw_exit(&dzp->z_parent_lock);
- } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
- *vpp = zfsctl_root(dzp);
- } else {
- error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS | ZSHARED);
- if (error == 0) {
- *vpp = ZTOV(zp);
- zfs_dirent_unlock(dl);
- dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
- }
- }
-
- return (error);
-}
-
-static char *
-zfs_unlinked_hexname(char namebuf[17], uint64_t x)
-{
- char *name = &namebuf[16];
- const char digits[16] = "0123456789abcdef";
-
- *name = '\0';
- do {
- *--name = digits[x & 0xf];
- x >>= 4;
- } while (x != 0);
-
- return (name);
-}
-
-/*
- * unlinked Set (formerly known as the "delete queue") Error Handling
- *
- * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
- * don't specify the name of the entry that we will be manipulating. We
- * also fib and say that we won't be adding any new entries to the
- * unlinked set, even though we might (this is to lower the minimum file
- * size that can be deleted in a full filesystem). So on the small
- * chance that the nlink list is using a fat zap (ie. has more than
- * 2000 entries), we *may* not pre-read a block that's needed.
- * Therefore it is remotely possible for some of the assertions
- * regarding the unlinked set below to fail due to i/o error. On a
- * nondebug system, this will result in the space being leaked.
- */
-void
-zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- char obj_name[17];
- int error;
-
- ASSERT(zp->z_unlinked);
- ASSERT3U(zp->z_phys->zp_links, ==, 0);
-
- error = zap_add(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
- zfs_unlinked_hexname(obj_name, zp->z_id), 8, 1, &zp->z_id, tx);
- ASSERT3U(error, ==, 0);
-}
-
-/*
- * Clean up any znodes that had no links when we either crashed or
- * (force) umounted the file system.
- */
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
-{
- zap_cursor_t zc;
- zap_attribute_t zap;
- dmu_object_info_t doi;
- znode_t *zp;
- int error;
-
- /*
- * Interate over the contents of the unlinked set.
- */
- for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
- zap_cursor_retrieve(&zc, &zap) == 0;
- zap_cursor_advance(&zc)) {
-
- /*
- * See what kind of object we have in list
- */
-
- error = dmu_object_info(zfsvfs->z_os,
- zap.za_first_integer, &doi);
- if (error != 0)
- continue;
-
- ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
- (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
- /*
- * We need to re-mark these list entries for deletion,
- * so we pull them back into core and set zp->z_unlinked.
- */
- error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
-
- /*
- * We may pick up znodes that are already marked for deletion.
- * This could happen during the purge of an extended attribute
- * directory. All we need to do is skip over them, since they
- * are already in the system marked z_unlinked.
- */
- if (error != 0)
- continue;
-
- zp->z_unlinked = B_TRUE;
- VN_RELE(ZTOV(zp));
- }
- zap_cursor_fini(&zc);
-}
-
-/*
- * Delete the entire contents of a directory. Return a count
- * of the number of entries that could not be deleted.
- *
- * NOTE: this function assumes that the directory is inactive,
- * so there is no need to lock its entries before deletion.
- * Also, it assumes the directory contents is *only* regular
- * files.
- */
-static int
-zfs_purgedir(znode_t *dzp)
-{
- zap_cursor_t zc;
- zap_attribute_t zap;
- znode_t *xzp;
- dmu_tx_t *tx;
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zfs_dirlock_t dl;
- int skipped = 0;
- int error;
-
- for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
- (error = zap_cursor_retrieve(&zc, &zap)) == 0;
- zap_cursor_advance(&zc)) {
- error = zfs_zget(zfsvfs,
- ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
- ASSERT3U(error, ==, 0);
-
- ASSERT((ZTOV(xzp)->v_type == VREG) ||
- (ZTOV(xzp)->v_type == VLNK));
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, dzp->z_id);
- dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
- dmu_tx_hold_bonus(tx, xzp->z_id);
- dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- VN_RELE(ZTOV(xzp));
- skipped += 1;
- continue;
- }
- bzero(&dl, sizeof (dl));
- dl.dl_dzp = dzp;
- dl.dl_name = zap.za_name;
-
- error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
- ASSERT3U(error, ==, 0);
- dmu_tx_commit(tx);
-
- VN_RELE(ZTOV(xzp));
- }
- zap_cursor_fini(&zc);
- ASSERT(error == ENOENT);
- return (skipped);
-}
-
-void
-zfs_rmnode(znode_t *zp)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- objset_t *os = zfsvfs->z_os;
- znode_t *xzp = NULL;
- char obj_name[17];
- dmu_tx_t *tx;
- uint64_t acl_obj;
- int error;
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs);
-
- ASSERT(zp->z_phys->zp_links == 0);
-
- /*
- * If this is an attribute directory, purge its contents.
- */
- if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
- (zp->z_phys->zp_flags & ZFS_XATTR)) {
- if (zfs_purgedir(zp) != 0) {
- /*
- * Not enough space to delete some xattrs.
- * Leave it on the unlinked set.
- */
- VFS_UNLOCK_GIANT(vfslocked);
- return;
- }
- }
-
- /*
- * If the file has extended attributes, we're going to unlink
- * the xattr dir.
- */
- if (zp->z_phys->zp_xattr) {
- error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
- ASSERT(error == 0);
- }
-
- acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
-
- /*
- * Set up the transaction.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
- dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- if (xzp) {
- dmu_tx_hold_bonus(tx, xzp->z_id);
- dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
- }
- if (acl_obj)
- dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- /*
- * Not enough space to delete the file. Leave it in the
- * unlinked set, leaking it until the fs is remounted (at
- * which point we'll call zfs_unlinked_drain() to process it).
- */
- dmu_tx_abort(tx);
- VFS_UNLOCK_GIANT(vfslocked);
- return;
- }
-
- if (xzp) {
- dmu_buf_will_dirty(xzp->z_dbuf, tx);
- mutex_enter(&xzp->z_lock);
- xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
- xzp->z_phys->zp_links = 0; /* no more links to it */
- mutex_exit(&xzp->z_lock);
- zfs_unlinked_add(xzp, tx);
- }
-
- /* Remove this znode from the unlinked set */
- error = zap_remove(os, zfsvfs->z_unlinkedobj,
- zfs_unlinked_hexname(obj_name, zp->z_id), tx);
- ASSERT3U(error, ==, 0);
-
- zfs_znode_delete(zp, tx);
-
- dmu_tx_commit(tx);
-
- if (xzp)
- VN_RELE(ZTOV(xzp));
- VFS_UNLOCK_GIANT(vfslocked);
-}
-
-/*
- * Link zp into dl. Can only fail if zp has been unlinked.
- */
-int
-zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
-{
- znode_t *dzp = dl->dl_dzp;
- vnode_t *vp = ZTOV(zp);
- uint64_t value;
- int zp_is_dir = (vp->v_type == VDIR);
- int error;
-
- dmu_buf_will_dirty(zp->z_dbuf, tx);
- mutex_enter(&zp->z_lock);
-
- if (!(flag & ZRENAMING)) {
- if (zp->z_unlinked) { /* no new links to unlinked zp */
- ASSERT(!(flag & (ZNEW | ZEXISTS)));
- mutex_exit(&zp->z_lock);
- return (ENOENT);
- }
- zp->z_phys->zp_links++;
- }
- zp->z_phys->zp_parent = dzp->z_id; /* dzp is now zp's parent */
-
- if (!(flag & ZNEW))
- zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
- mutex_exit(&zp->z_lock);
-
- dmu_buf_will_dirty(dzp->z_dbuf, tx);
- mutex_enter(&dzp->z_lock);
- dzp->z_phys->zp_size++; /* one dirent added */
- dzp->z_phys->zp_links += zp_is_dir; /* ".." link from zp */
- zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
- mutex_exit(&dzp->z_lock);
-
- /*
- * MacOS X will fill in the 4-bit object type here.
- */
- value = ZFS_DIRENT_MAKE(IFTODT(zp->z_phys->zp_mode), zp->z_id);
- error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
- 8, 1, &value, tx);
- ASSERT(error == 0);
-
- dnlc_update(ZTOV(dzp), dl->dl_name, vp);
-
- return (0);
-}
-
-/*
- * Unlink zp from dl, and mark zp for deletion if this was the last link.
- * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
- * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
- * If it's non-NULL, we use it to indicate whether the znode needs deletion,
- * and it's the caller's job to do it.
- */
-int
-zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
- boolean_t *unlinkedp)
-{
- znode_t *dzp = dl->dl_dzp;
- vnode_t *vp = ZTOV(zp);
- int zp_is_dir = (vp->v_type == VDIR);
- boolean_t unlinked = B_FALSE;
- int error;
-
- dnlc_remove(ZTOV(dzp), dl->dl_name);
-
- if (!(flag & ZRENAMING)) {
- dmu_buf_will_dirty(zp->z_dbuf, tx);
-
- if (vn_vfswlock(vp)) /* prevent new mounts on zp */
- return (EBUSY);
-
- if (vn_ismntpt(vp)) { /* don't remove mount point */
- vn_vfsunlock(vp);
- return (EBUSY);
- }
-
- mutex_enter(&zp->z_lock);
- if (zp_is_dir && !zfs_dirempty(zp)) { /* dir not empty */
- mutex_exit(&zp->z_lock);
- vn_vfsunlock(vp);
- return (ENOTEMPTY);
- }
- if (zp->z_phys->zp_links <= zp_is_dir) {
- zfs_panic_recover("zfs: link count on vnode %p is %u, "
- "should be at least %u", zp->z_vnode,
- (int)zp->z_phys->zp_links,
- zp_is_dir + 1);
- zp->z_phys->zp_links = zp_is_dir + 1;
- }
- if (--zp->z_phys->zp_links == zp_is_dir) {
- zp->z_unlinked = B_TRUE;
- zp->z_phys->zp_links = 0;
- unlinked = B_TRUE;
- } else {
- zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
- }
- mutex_exit(&zp->z_lock);
- vn_vfsunlock(vp);
- }
-
- dmu_buf_will_dirty(dzp->z_dbuf, tx);
- mutex_enter(&dzp->z_lock);
- dzp->z_phys->zp_size--; /* one dirent removed */
- dzp->z_phys->zp_links -= zp_is_dir; /* ".." link from zp */
- zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
- mutex_exit(&dzp->z_lock);
-
- error = zap_remove(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, tx);
- ASSERT(error == 0);
-
- if (unlinkedp != NULL)
- *unlinkedp = unlinked;
- else if (unlinked)
- zfs_unlinked_add(zp, tx);
-
- return (0);
-}
-
-/*
- * Indicate whether the directory is empty. Works with or without z_lock
- * held, but can only be consider a hint in the latter case. Returns true
- * if only "." and ".." remain and there's no work in progress.
- */
-boolean_t
-zfs_dirempty(znode_t *dzp)
-{
- return (dzp->z_phys->zp_size == 2 && dzp->z_dirlocks == 0);
-}
-
-int
-zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- znode_t *xzp;
- dmu_tx_t *tx;
- uint64_t xoid;
- int error;
-
- *xvpp = NULL;
-
- if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, cr))
- return (error);
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, zp->z_id);
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- return (error);
- }
- zfs_mknode(zp, vap, &xoid, tx, cr, IS_XATTR, &xzp, 0);
- ASSERT(xzp->z_id == xoid);
- ASSERT(xzp->z_phys->zp_parent == zp->z_id);
- dmu_buf_will_dirty(zp->z_dbuf, tx);
- zp->z_phys->zp_xattr = xoid;
-
- (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "");
- dmu_tx_commit(tx);
-
- *xvpp = ZTOV(xzp);
-
- return (0);
-}
-
-/*
- * Return a znode for the extended attribute directory for zp.
- * ** If the directory does not already exist, it is created **
- *
- * IN: zp - znode to obtain attribute directory from
- * cr - credentials of caller
- * flags - flags from the VOP_LOOKUP call
- *
- * OUT: xzpp - pointer to extended attribute znode
- *
- * RETURN: 0 on success
- * error number on failure
- */
-int
-zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- znode_t *xzp;
- zfs_dirlock_t *dl;
- vattr_t va;
- int error;
-top:
- error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR);
- if (error)
- return (error);
-
- if (xzp != NULL) {
- *xvpp = ZTOV(xzp);
- zfs_dirent_unlock(dl);
- return (0);
- }
-
- ASSERT(zp->z_phys->zp_xattr == 0);
-
-#ifdef TODO
- if (!(flags & CREATE_XATTR_DIR)) {
- zfs_dirent_unlock(dl);
- return (ENOENT);
- }
-#endif
-
- if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
- zfs_dirent_unlock(dl);
- return (EROFS);
- }
-
- /*
- * The ability to 'create' files in an attribute
- * directory comes from the write_xattr permission on the base file.
- *
- * The ability to 'search' an attribute directory requires
- * read_xattr permission on the base file.
- *
- * Once in a directory the ability to read/write attributes
- * is controlled by the permissions on the attribute file.
- */
- va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
- va.va_type = VDIR;
- va.va_mode = S_IFDIR | S_ISVTX | 0777;
- va.va_uid = (uid_t)zp->z_phys->zp_uid;
- va.va_gid = (gid_t)zp->z_phys->zp_gid;
-
- error = zfs_make_xattrdir(zp, &va, xvpp, cr);
- zfs_dirent_unlock(dl);
-
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- /* NB: we already did dmu_tx_wait() if necessary */
- goto top;
- }
-
- return (error);
-}
-
-/*
- * Decide whether it is okay to remove within a sticky directory.
- *
- * In sticky directories, write access is not sufficient;
- * you can remove entries from a directory only if:
- *
- * you own the directory,
- * you own the entry,
- * the entry is a plain file and you have write access,
- * or you are privileged (checked in secpolicy...).
- *
- * The function returns 0 if remove access is granted.
- */
-int
-zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
-{
- uid_t uid;
-
- if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL) /* ZIL replay */
- return (0);
-
- if ((zdp->z_phys->zp_mode & S_ISVTX) == 0 ||
- (uid = crgetuid(cr)) == zdp->z_phys->zp_uid ||
- uid == zp->z_phys->zp_uid ||
- (ZTOV(zp)->v_type == VREG &&
- zfs_zaccess(zp, ACE_WRITE_DATA, cr) == 0))
- return (0);
- else
- return (secpolicy_vnode_remove(cr));
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
deleted file mode 100644
index e2385a0..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio.h>
-
-#include <sys/fm/fs/zfs.h>
-#include <sys/fm/protocol.h>
-#include <sys/fm/util.h>
-
-#ifdef _KERNEL
-/* Including sys/bus.h is just too hard, so I declare what I need here. */
-extern void devctl_notify(const char *__system, const char *__subsystem,
- const char *__type, const char *__data);
-#endif
-
-/*
- * This general routine is responsible for generating all the different ZFS
- * ereports. The payload is dependent on the class, and which arguments are
- * supplied to the function:
- *
- * EREPORT POOL VDEV IO
- * block X X X
- * data X X
- * device X X
- * pool X
- *
- * If we are in a loading state, all errors are chained together by the same
- * SPA-wide ENA.
- *
- * For isolated I/O requests, we get the ENA from the zio_t. The propagation
- * gets very complicated due to RAID-Z, gang blocks, and vdev caching. We want
- * to chain together all ereports associated with a logical piece of data. For
- * read I/Os, there are basically three 'types' of I/O, which form a roughly
- * layered diagram:
- *
- * +---------------+
- * | Aggregate I/O | No associated logical data or device
- * +---------------+
- * |
- * V
- * +---------------+ Reads associated with a piece of logical data.
- * | Read I/O | This includes reads on behalf of RAID-Z,
- * +---------------+ mirrors, gang blocks, retries, etc.
- * |
- * V
- * +---------------+ Reads associated with a particular device, but
- * | Physical I/O | no logical data. Issued as part of vdev caching
- * +---------------+ and I/O aggregation.
- *
- * Note that 'physical I/O' here is not the same terminology as used in the rest
- * of ZIO. Typically, 'physical I/O' simply means that there is no attached
- * blockpointer. But I/O with no associated block pointer can still be related
- * to a logical piece of data (i.e. RAID-Z requests).
- *
- * Purely physical I/O always have unique ENAs. They are not related to a
- * particular piece of logical data, and therefore cannot be chained together.
- * We still generate an ereport, but the DE doesn't correlate it with any
- * logical piece of data. When such an I/O fails, the delegated I/O requests
- * will issue a retry, which will trigger the 'real' ereport with the correct
- * ENA.
- *
- * We keep track of the ENA for a ZIO chain through the 'io_logical' member.
- * When a new logical I/O is issued, we set this to point to itself. Child I/Os
- * then inherit this pointer, so that when it is first set subsequent failures
- * will use the same ENA. If a physical I/O is issued (by passing the
- * ZIO_FLAG_NOBOOKMARK flag), then this pointer is reset, guaranteeing that a
- * unique ENA will be generated. For an aggregate I/O, this pointer is set to
- * NULL, and no ereport will be generated (since it doesn't actually correspond
- * to any particular device or piece of data).
- */
-void
-zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
- uint64_t stateoroffset, uint64_t size)
-{
-#ifdef _KERNEL
- char buf[1024];
- struct sbuf sb;
- struct timespec ts;
-
- /*
- * If we are doing a spa_tryimport(), ignore errors.
- */
- if (spa->spa_load_state == SPA_LOAD_TRYIMPORT)
- return;
-
- /*
- * If we are in the middle of opening a pool, and the previous attempt
- * failed, don't bother logging any new ereports - we're just going to
- * get the same diagnosis anyway.
- */
- if (spa->spa_load_state != SPA_LOAD_NONE &&
- spa->spa_last_open_failed)
- return;
-
- /*
- * Ignore any errors from I/Os that we are going to retry anyway - we
- * only generate errors from the final failure.
- */
- if (zio && zio_should_retry(zio))
- return;
-
- /*
- * If this is not a read or write zio, ignore the error. This can occur
- * if the DKIOCFLUSHWRITECACHE ioctl fails.
- */
- if (zio && zio->io_type != ZIO_TYPE_READ &&
- zio->io_type != ZIO_TYPE_WRITE)
- return;
-
- nanotime(&ts);
-
- sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
- sbuf_printf(&sb, "time=%ju.%ld", (uintmax_t)ts.tv_sec, ts.tv_nsec);
-
- /*
- * Serialize ereport generation
- */
- mutex_enter(&spa->spa_errlist_lock);
-
-#if 0
- /*
- * Determine the ENA to use for this event. If we are in a loading
- * state, use a SPA-wide ENA. Otherwise, if we are in an I/O state, use
- * a root zio-wide ENA. Otherwise, simply use a unique ENA.
- */
- if (spa->spa_load_state != SPA_LOAD_NONE) {
-#if 0
- if (spa->spa_ena == 0)
- spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1);
-#endif
- ena = spa->spa_ena;
- } else if (zio != NULL && zio->io_logical != NULL) {
-#if 0
- if (zio->io_logical->io_ena == 0)
- zio->io_logical->io_ena =
- fm_ena_generate(0, FM_ENA_FMT1);
-#endif
- ena = zio->io_logical->io_ena;
- } else {
-#if 0
- ena = fm_ena_generate(0, FM_ENA_FMT1);
-#else
- ena = 0;
-#endif
- }
-#endif
-
- /*
- * Construct the full class, detector, and other standard FMA fields.
- */
- sbuf_printf(&sb, " ereport_version=%u", FM_EREPORT_VERSION);
- sbuf_printf(&sb, " class=%s.%s", ZFS_ERROR_CLASS, subclass);
-
- sbuf_printf(&sb, " zfs_scheme_version=%u", FM_ZFS_SCHEME_VERSION);
-
- /*
- * Construct the per-ereport payload, depending on which parameters are
- * passed in.
- */
-
- /*
- * Generic payload members common to all ereports.
- *
- * The direct reference to spa_name is used rather than spa_name()
- * because of the asynchronous nature of the zio pipeline. spa_name()
- * asserts that the config lock is held in some form. This is always
- * the case in I/O context, but because the check for RW_WRITER compares
- * against 'curthread', we may be in an asynchronous context and blow
- * this assert. Rather than loosen this assert, we acknowledge that all
- * contexts in which this function is called (pool open, I/O) are safe,
- * and dereference the name directly.
- */
- sbuf_printf(&sb, " %s=%s", FM_EREPORT_PAYLOAD_ZFS_POOL, spa->spa_name);
- sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
- spa_guid(spa));
- sbuf_printf(&sb, " %s=%u", FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT,
- spa->spa_load_state);
-
- if (vd != NULL) {
- vdev_t *pvd = vd->vdev_parent;
-
- sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
- vd->vdev_guid);
- sbuf_printf(&sb, " %s=%s", FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
- vd->vdev_ops->vdev_op_type);
- if (vd->vdev_path)
- sbuf_printf(&sb, " %s=%s",
- FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, vd->vdev_path);
- if (vd->vdev_devid)
- sbuf_printf(&sb, " %s=%s",
- FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, vd->vdev_devid);
-
- if (pvd != NULL) {
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, pvd->vdev_guid);
- sbuf_printf(&sb, " %s=%s",
- FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE,
- pvd->vdev_ops->vdev_op_type);
- if (pvd->vdev_path)
- sbuf_printf(&sb, " %s=%s",
- FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH,
- pvd->vdev_path);
- if (pvd->vdev_devid)
- sbuf_printf(&sb, " %s=%s",
- FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
- pvd->vdev_devid);
- }
- }
-
- if (zio != NULL) {
- /*
- * Payload common to all I/Os.
- */
- sbuf_printf(&sb, " %s=%u", FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
- zio->io_error);
-
- /*
- * If the 'size' parameter is non-zero, it indicates this is a
- * RAID-Z or other I/O where the physical offset and length are
- * provided for us, instead of within the zio_t.
- */
- if (vd != NULL) {
- if (size) {
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
- stateoroffset);
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, size);
- } else {
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
- zio->io_offset);
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
- zio->io_size);
- }
- }
-
- /*
- * Payload for I/Os with corresponding logical information.
- */
- if (zio->io_logical != NULL) {
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
- zio->io_logical->io_bookmark.zb_object);
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
- zio->io_logical->io_bookmark.zb_level);
- sbuf_printf(&sb, " %s=%ju",
- FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
- zio->io_logical->io_bookmark.zb_blkid);
- }
- } else if (vd != NULL) {
- /*
- * If we have a vdev but no zio, this is a device fault, and the
- * 'stateoroffset' parameter indicates the previous state of the
- * vdev.
- */
- sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
- stateoroffset);
- }
- mutex_exit(&spa->spa_errlist_lock);
-
- sbuf_finish(&sb);
- ZFS_LOG(1, "%s", sbuf_data(&sb));
- devctl_notify("ZFS", spa->spa_name, subclass, sbuf_data(&sb));
- if (sbuf_overflowed(&sb))
- printf("ZFS WARNING: sbuf overflowed\n");
- sbuf_delete(&sb);
-#endif
-}
-
-/*
- * The 'resource.fs.zfs.ok' event is an internal signal that the associated
- * resource (pool or disk) has been identified by ZFS as healthy. This will
- * then trigger the DE to close the associated case, if any.
- */
-void
-zfs_post_ok(spa_t *spa, vdev_t *vd)
-{
-#ifdef _KERNEL
- char buf[1024];
- char class[64];
- struct sbuf sb;
- struct timespec ts;
-
- nanotime(&ts);
-
- sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
- sbuf_printf(&sb, "time=%ju.%ld", (uintmax_t)ts.tv_sec, ts.tv_nsec);
-
- snprintf(class, sizeof(class), "%s.%s.%s", FM_RSRC_RESOURCE,
- ZFS_ERROR_CLASS, FM_RESOURCE_OK);
- sbuf_printf(&sb, " %s=%hhu", FM_VERSION, FM_RSRC_VERSION);
- sbuf_printf(&sb, " %s=%s", FM_CLASS, class);
- sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
- spa_guid(spa));
- if (vd)
- sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
- vd->vdev_guid);
- sbuf_finish(&sb);
- devctl_notify("ZFS", spa->spa_name, class, sbuf_data(&sb));
- if (sbuf_overflowed(&sb))
- printf("ZFS WARNING: sbuf overflowed\n");
- sbuf_delete(&sb);
-#endif
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
deleted file mode 100644
index c9424be..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ /dev/null
@@ -1,1826 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/errno.h>
-#include <sys/uio.h>
-#include <sys/buf.h>
-#include <sys/file.h>
-#include <sys/kmem.h>
-#include <sys/conf.h>
-#include <sys/cmn_err.h>
-#include <sys/stat.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zap.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev.h>
-#include <sys/vdev_impl.h>
-#include <sys/dmu.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_prop.h>
-#include <sys/sunddi.h>
-#include <sys/policy.h>
-#include <sys/zone.h>
-#include <sys/nvpair.h>
-#include <sys/mount.h>
-#include <sys/taskqueue.h>
-#include <sys/sdt.h>
-#include <sys/varargs.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zvol.h>
-
-#include "zfs_namecheck.h"
-#include "zfs_prop.h"
-
-CTASSERT(sizeof(zfs_cmd_t) <= PAGE_SIZE);
-
-static struct cdev *zfsdev;
-
-extern void zfs_init(void);
-extern void zfs_fini(void);
-
-typedef int zfs_ioc_func_t(zfs_cmd_t *);
-typedef int zfs_secpolicy_func_t(const char *, cred_t *);
-
-typedef struct zfs_ioc_vec {
- zfs_ioc_func_t *zvec_func;
- zfs_secpolicy_func_t *zvec_secpolicy;
- enum {
- no_name,
- pool_name,
- dataset_name
- } zvec_namecheck;
-} zfs_ioc_vec_t;
-
-/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
-void
-__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
-{
- const char *newfile;
- char buf[256];
- va_list adx;
-
- /*
- * Get rid of annoying "../common/" prefix to filename.
- */
- newfile = strrchr(file, '/');
- if (newfile != NULL) {
- newfile = newfile + 1; /* Get rid of leading / */
- } else {
- newfile = file;
- }
-
- va_start(adx, fmt);
- (void) vsnprintf(buf, sizeof (buf), fmt, adx);
- va_end(adx);
-
- /*
- * To get this data, use the zfs-dprintf probe as so:
- * dtrace -q -n 'zfs-dprintf \
- * /stringof(arg0) == "dbuf.c"/ \
- * {printf("%s: %s", stringof(arg1), stringof(arg3))}'
- * arg0 = file name
- * arg1 = function name
- * arg2 = line number
- * arg3 = message
- */
- DTRACE_PROBE4(zfs__dprintf,
- char *, newfile, char *, func, int, line, char *, buf);
-}
-
-/*
- * Policy for top-level read operations (list pools). Requires no privileges,
- * and can be used in the local zone, as there is no associated dataset.
- */
-/* ARGSUSED */
-static int
-zfs_secpolicy_none(const char *unused1, cred_t *cr)
-{
- return (0);
-}
-
-/*
- * Policy for dataset read operations (list children, get statistics). Requires
- * no privileges, but must be visible in the local zone.
- */
-/* ARGSUSED */
-static int
-zfs_secpolicy_read(const char *dataset, cred_t *cr)
-{
- if (INGLOBALZONE(curproc) ||
- zone_dataset_visible(dataset, NULL))
- return (0);
-
- return (ENOENT);
-}
-
-static int
-zfs_dozonecheck(const char *dataset, cred_t *cr)
-{
- uint64_t zoned;
- int writable = 1;
-
- /*
- * The dataset must be visible by this zone -- check this first
- * so they don't see EPERM on something they shouldn't know about.
- */
- if (!INGLOBALZONE(curproc) &&
- !zone_dataset_visible(dataset, &writable))
- return (ENOENT);
-
- if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
- return (ENOENT);
-
- if (INGLOBALZONE(curproc)) {
- /*
- * If the fs is zoned, only root can access it from the
- * global zone.
- */
- if (secpolicy_zfs(cr) && zoned)
- return (EPERM);
- } else {
- /*
- * If we are in a local zone, the 'zoned' property must be set.
- */
- if (!zoned)
- return (EPERM);
-
- /* must be writable by this zone */
- if (!writable)
- return (EPERM);
- }
- return (0);
-}
-
-/*
- * Policy for dataset write operations (create children, set properties, etc).
- * Requires SYS_MOUNT privilege, and must be writable in the local zone.
- */
-int
-zfs_secpolicy_write(const char *dataset, cred_t *cr)
-{
- int error;
-
- if (error = zfs_dozonecheck(dataset, cr))
- return (error);
-
- return (secpolicy_zfs(cr));
-}
-
-/*
- * Policy for operations that want to write a dataset's parent:
- * create, destroy, snapshot, clone, restore.
- */
-static int
-zfs_secpolicy_parent(const char *dataset, cred_t *cr)
-{
- char parentname[MAXNAMELEN];
- char *cp;
-
- /*
- * Remove the @bla or /bla from the end of the name to get the parent.
- */
- (void) strncpy(parentname, dataset, sizeof (parentname));
- cp = strrchr(parentname, '@');
- if (cp != NULL) {
- cp[0] = '\0';
- } else {
- cp = strrchr(parentname, '/');
- if (cp == NULL)
- return (ENOENT);
- cp[0] = '\0';
-
- }
-
- return (zfs_secpolicy_write(parentname, cr));
-}
-
-/*
- * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
- * SYS_CONFIG privilege, which is not available in a local zone.
- */
-/* ARGSUSED */
-static int
-zfs_secpolicy_config(const char *unused, cred_t *cr)
-{
- if (secpolicy_sys_config(cr, B_FALSE) != 0)
- return (EPERM);
-
- return (0);
-}
-
-/*
- * Policy for fault injection. Requires all privileges.
- */
-/* ARGSUSED */
-static int
-zfs_secpolicy_inject(const char *unused, cred_t *cr)
-{
- return (secpolicy_zinject(cr));
-}
-
-/*
- * Policy for dataset backup operations (sendbackup).
- * Requires SYS_MOUNT privilege, and must be writable in the local zone.
- */
-static int
-zfs_secpolicy_operator(const char *dataset, cred_t *cr)
-{
- int writable = 1;
-
- if (!INGLOBALZONE(curproc) && !zone_dataset_visible(dataset, &writable))
- return (ENOENT);
- if (secpolicy_zfs(cr) != 0 && !groupmember(GID_OPERATOR, cr))
- return (EPERM);
- return (0);
-}
-
-/*
- * Returns the nvlist as specified by the user in the zfs_cmd_t.
- */
-static int
-get_nvlist(zfs_cmd_t *zc, nvlist_t **nvp)
-{
- char *packed;
- size_t size;
- int error;
- nvlist_t *config = NULL;
-
- /*
- * Read in and unpack the user-supplied nvlist.
- */
- if ((size = zc->zc_nvlist_src_size) == 0)
- return (EINVAL);
-
- packed = kmem_alloc(size, KM_SLEEP);
-
- if ((error = xcopyin((void *)(uintptr_t)zc->zc_nvlist_src, packed,
- size)) != 0) {
- kmem_free(packed, size);
- return (error);
- }
-
- if ((error = nvlist_unpack(packed, size, &config, 0)) != 0) {
- kmem_free(packed, size);
- return (error);
- }
-
- kmem_free(packed, size);
-
- *nvp = config;
- return (0);
-}
-
-static int
-put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
-{
- char *packed = NULL;
- size_t size;
- int error;
-
- VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
-
- if (size > zc->zc_nvlist_dst_size) {
- /*
- * Solaris returns ENOMEM here, because even if an error is
- * returned from an ioctl(2), new zc_nvlist_dst_size will be
- * passed to the userland. This is not the case for FreeBSD.
- * We need to return 0, so the kernel will copy the
- * zc_nvlist_dst_size back and the userland can discover that a
- * bigger buffer is needed.
- */
- error = 0;
- } else {
- VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
- KM_SLEEP) == 0);
- error = xcopyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
- size);
- kmem_free(packed, size);
- }
-
- zc->zc_nvlist_dst_size = size;
- return (error);
-}
-
-static int
-zfs_ioc_pool_create(zfs_cmd_t *zc)
-{
- int error;
- nvlist_t *config;
-
- if ((error = get_nvlist(zc, &config)) != 0)
- return (error);
-
- error = spa_create(zc->zc_name, config, zc->zc_value[0] == '\0' ?
- NULL : zc->zc_value);
-
- nvlist_free(config);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_destroy(zfs_cmd_t *zc)
-{
- return (spa_destroy(zc->zc_name));
-}
-
-static int
-zfs_ioc_pool_import(zfs_cmd_t *zc)
-{
- int error;
- nvlist_t *config;
- uint64_t guid;
-
- if ((error = get_nvlist(zc, &config)) != 0)
- return (error);
-
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
- guid != zc->zc_guid)
- error = EINVAL;
- else
- error = spa_import(zc->zc_name, config,
- zc->zc_value[0] == '\0' ? NULL : zc->zc_value);
-
- nvlist_free(config);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_export(zfs_cmd_t *zc)
-{
- return (spa_export(zc->zc_name, NULL));
-}
-
-static int
-zfs_ioc_pool_configs(zfs_cmd_t *zc)
-{
- nvlist_t *configs;
- int error;
-
- if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
- return (EEXIST);
-
- error = put_nvlist(zc, configs);
-
- nvlist_free(configs);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_stats(zfs_cmd_t *zc)
-{
- nvlist_t *config;
- int error;
- int ret = 0;
-
- error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
- sizeof (zc->zc_value));
-
- if (config != NULL) {
- ret = put_nvlist(zc, config);
- nvlist_free(config);
-
- /*
- * The config may be present even if 'error' is non-zero.
- * In this case we return success, and preserve the real errno
- * in 'zc_cookie'.
- */
- zc->zc_cookie = error;
- } else {
- ret = error;
- }
-
- return (ret);
-}
-
-/*
- * Try to import the given pool, returning pool stats as appropriate so that
- * user land knows which devices are available and overall pool health.
- */
-static int
-zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
-{
- nvlist_t *tryconfig, *config;
- int error;
-
- if ((error = get_nvlist(zc, &tryconfig)) != 0)
- return (error);
-
- config = spa_tryimport(tryconfig);
-
- nvlist_free(tryconfig);
-
- if (config == NULL)
- return (EINVAL);
-
- error = put_nvlist(zc, config);
- nvlist_free(config);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_scrub(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- error = spa_scrub(spa, zc->zc_cookie, B_FALSE);
-
- spa_close(spa, FTAG);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_freeze(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error == 0) {
- spa_freeze(spa);
- spa_close(spa, FTAG);
- }
- return (error);
-}
-
-static int
-zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- spa_upgrade(spa);
-
- spa_close(spa, FTAG);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_get_history(zfs_cmd_t *zc)
-{
- spa_t *spa;
- char *hist_buf;
- uint64_t size;
- int error;
-
- if ((size = zc->zc_history_len) == 0)
- return (EINVAL);
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- if (spa_version(spa) < ZFS_VERSION_ZPOOL_HISTORY) {
- spa_close(spa, FTAG);
- return (ENOTSUP);
- }
-
- hist_buf = kmem_alloc(size, KM_SLEEP);
- if ((error = spa_history_get(spa, &zc->zc_history_offset,
- &zc->zc_history_len, hist_buf)) == 0) {
- error = xcopyout(hist_buf, (char *)(uintptr_t)zc->zc_history,
- zc->zc_history_len);
- }
-
- spa_close(spa, FTAG);
- kmem_free(hist_buf, size);
- return (error);
-}
-
-static int
-zfs_ioc_pool_log_history(zfs_cmd_t *zc)
-{
- spa_t *spa;
- char *history_str = NULL;
- size_t size;
- int error;
-
- size = zc->zc_history_len;
- if (size == 0 || size > HIS_MAX_RECORD_LEN)
- return (EINVAL);
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- if (spa_version(spa) < ZFS_VERSION_ZPOOL_HISTORY) {
- spa_close(spa, FTAG);
- return (ENOTSUP);
- }
-
- /* add one for the NULL delimiter */
- size++;
- history_str = kmem_alloc(size, KM_SLEEP);
- if ((error = xcopyin((void *)(uintptr_t)zc->zc_history, history_str,
- size)) != 0) {
- spa_close(spa, FTAG);
- kmem_free(history_str, size);
- return (error);
- }
- history_str[size - 1] = '\0';
-
- error = spa_history_log(spa, history_str, zc->zc_history_offset);
-
- spa_close(spa, FTAG);
- kmem_free(history_str, size);
-
- return (error);
-}
-
-static int
-zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
-{
- int error;
-
- if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
- return (error);
-
- return (0);
-}
-
-static int
-zfs_ioc_obj_to_path(zfs_cmd_t *zc)
-{
- objset_t *osp;
- int error;
-
- if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
- DS_MODE_NONE | DS_MODE_READONLY, &osp)) != 0)
- return (error);
-
- error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
- sizeof (zc->zc_value));
- dmu_objset_close(osp);
-
- return (error);
-}
-
-static int
-zfs_ioc_vdev_add(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
- nvlist_t *config;
-
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error != 0)
- return (error);
-
- /*
- * A root pool with concatenated devices is not supported.
- * Thus, can not add a device to a root pool with one device.
- */
- if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0) {
- spa_close(spa, FTAG);
- return (EDOM);
- }
-
- if ((error = get_nvlist(zc, &config)) == 0) {
- error = spa_vdev_add(spa, config);
- nvlist_free(config);
- }
-
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_vdev_remove(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error != 0)
- return (error);
- error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_vdev_online(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
- error = vdev_online(spa, zc->zc_guid);
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_vdev_offline(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int istmp = zc->zc_cookie;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
- error = vdev_offline(spa, zc->zc_guid, istmp);
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_vdev_attach(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int replacing = zc->zc_cookie;
- nvlist_t *config;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- if ((error = get_nvlist(zc, &config)) == 0) {
- error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
- nvlist_free(config);
- }
-
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_vdev_detach(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE);
-
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
-{
- spa_t *spa;
- char *path = zc->zc_value;
- uint64_t guid = zc->zc_guid;
- int error;
-
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error != 0)
- return (error);
-
- error = spa_vdev_setpath(spa, guid, path);
- spa_close(spa, FTAG);
- return (error);
-}
-
-static int
-zfs_ioc_objset_stats(zfs_cmd_t *zc)
-{
- objset_t *os = NULL;
- int error;
- nvlist_t *nv;
-
-retry:
- error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- if (error != 0) {
- /*
- * This is ugly: dmu_objset_open() can return EBUSY if
- * the objset is held exclusively. Fortunately this hold is
- * only for a short while, so we retry here.
- * This avoids user code having to handle EBUSY,
- * for example for a "zfs list".
- */
- if (error == EBUSY) {
- delay(1);
- goto retry;
- }
- return (error);
- }
-
- dmu_objset_fast_stat(os, &zc->zc_objset_stats);
-
- if (zc->zc_nvlist_dst != 0 &&
- (error = dsl_prop_get_all(os, &nv)) == 0) {
- dmu_objset_stats(os, nv);
- /*
- * NB: zvol_get_stats() will read the objset contents,
- * which we aren't supposed to do with a
- * DS_MODE_STANDARD open, because it could be
- * inconsistent. So this is a bit of a workaround...
- */
- if (!zc->zc_objset_stats.dds_inconsistent &&
- dmu_objset_type(os) == DMU_OST_ZVOL)
- VERIFY(zvol_get_stats(os, nv) == 0);
- error = put_nvlist(zc, nv);
- nvlist_free(nv);
- }
-
- spa_altroot(dmu_objset_spa(os), zc->zc_value, sizeof (zc->zc_value));
-
- dmu_objset_close(os);
- if (error == ENOMEM)
- error = 0;
- return (error);
-}
-
-static int
-zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
-{
- objset_t *os;
- int error;
- char *p;
-
-retry:
- error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- if (error != 0) {
- /*
- * This is ugly: dmu_objset_open() can return EBUSY if
- * the objset is held exclusively. Fortunately this hold is
- * only for a short while, so we retry here.
- * This avoids user code having to handle EBUSY,
- * for example for a "zfs list".
- */
- if (error == EBUSY) {
- delay(1);
- goto retry;
- }
- if (error == ENOENT)
- error = ESRCH;
- return (error);
- }
-
- p = strrchr(zc->zc_name, '/');
- if (p == NULL || p[1] != '\0')
- (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
- p = zc->zc_name + strlen(zc->zc_name);
-
- do {
- error = dmu_dir_list_next(os,
- sizeof (zc->zc_name) - (p - zc->zc_name), p,
- NULL, &zc->zc_cookie);
- if (error == ENOENT)
- error = ESRCH;
- } while (error == 0 && !INGLOBALZONE(curproc) &&
- !zone_dataset_visible(zc->zc_name, NULL));
-
- /*
- * If it's a hidden dataset (ie. with a '$' in its name), don't
- * try to get stats for it. Userland will skip over it.
- */
- if (error == 0 && strchr(zc->zc_name, '$') == NULL)
- error = zfs_ioc_objset_stats(zc); /* fill in the stats */
-
- dmu_objset_close(os);
- return (error);
-}
-
-static int
-zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
-{
- objset_t *os;
- int error;
-
-retry:
- error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- if (error != 0) {
- /*
- * This is ugly: dmu_objset_open() can return EBUSY if
- * the objset is held exclusively. Fortunately this hold is
- * only for a short while, so we retry here.
- * This avoids user code having to handle EBUSY,
- * for example for a "zfs list".
- */
- if (error == EBUSY) {
- delay(1);
- goto retry;
- }
- if (error == ENOENT)
- error = ESRCH;
- return (error);
- }
-
- /*
- * A dataset name of maximum length cannot have any snapshots,
- * so exit immediately.
- */
- if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
- dmu_objset_close(os);
- return (ESRCH);
- }
-
- error = dmu_snapshot_list_next(os,
- sizeof (zc->zc_name) - strlen(zc->zc_name),
- zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie);
- if (error == ENOENT)
- error = ESRCH;
-
- if (error == 0)
- error = zfs_ioc_objset_stats(zc); /* fill in the stats */
-
- dmu_objset_close(os);
- return (error);
-}
-
-static int
-zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl)
-{
- nvpair_t *elem;
- int error;
- const char *propname;
- zfs_prop_t prop;
- uint64_t intval;
- char *strval;
- char buf[MAXNAMELEN];
- const char *p;
- spa_t *spa;
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
- propname = nvpair_name(elem);
-
- if ((prop = zfs_name_to_prop(propname)) ==
- ZFS_PROP_INVAL) {
- /*
- * If this is a user-defined property, it must be a
- * string, and there is no further validation to do.
- */
- if (!zfs_prop_user(propname) ||
- nvpair_type(elem) != DATA_TYPE_STRING)
- return (EINVAL);
-
- VERIFY(nvpair_value_string(elem, &strval) == 0);
- error = dsl_prop_set(name, propname, 1,
- strlen(strval) + 1, strval);
- if (error == 0)
- continue;
- else
- return (error);
- }
-
- /*
- * Check permissions for special properties.
- */
- switch (prop) {
- case ZFS_PROP_ZONED:
- /*
- * Disallow setting of 'zoned' from within a local zone.
- */
- if (!INGLOBALZONE(curproc))
- return (EPERM);
- break;
-
- case ZFS_PROP_QUOTA:
- if (error = zfs_dozonecheck(name, cr))
- return (error);
-
- if (!INGLOBALZONE(curproc)) {
- uint64_t zoned;
- char setpoint[MAXNAMELEN];
- int dslen;
- /*
- * Unprivileged users are allowed to modify the
- * quota on things *under* (ie. contained by)
- * the thing they own.
- */
- if (dsl_prop_get_integer(name, "jailed", &zoned,
- setpoint))
- return (EPERM);
- if (!zoned) /* this shouldn't happen */
- return (EPERM);
- dslen = strlen(name);
- if (dslen <= strlen(setpoint))
- return (EPERM);
- }
- break;
-
- case ZFS_PROP_COMPRESSION:
- /*
- * If the user specified gzip compression, make sure
- * the SPA supports it. We ignore any errors here since
- * we'll catch them later.
- */
- if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
- nvpair_value_uint64(elem, &intval) == 0 &&
- intval >= ZIO_COMPRESS_GZIP_1 &&
- intval <= ZIO_COMPRESS_GZIP_9) {
- if ((p = strchr(name, '/')) == NULL) {
- p = name;
- } else {
- bcopy(name, buf, p - name);
- buf[p - name] = '\0';
- p = buf;
- }
-
- if (spa_open(p, &spa, FTAG) == 0) {
- if (spa_version(spa) <
- ZFS_VERSION_GZIP_COMPRESSION) {
- spa_close(spa, FTAG);
- return (ENOTSUP);
- }
-
- spa_close(spa, FTAG);
- }
- }
- break;
- }
-
- switch (prop) {
- case ZFS_PROP_QUOTA:
- if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
- (error = dsl_dir_set_quota(name,
- intval)) != 0)
- return (error);
- break;
-
- case ZFS_PROP_RESERVATION:
- if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
- (error = dsl_dir_set_reservation(name,
- intval)) != 0)
- return (error);
- break;
-
- case ZFS_PROP_VOLSIZE:
- if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
- (error = zvol_set_volsize(name, dev,
- intval)) != 0)
- return (error);
- break;
-
- case ZFS_PROP_VOLBLOCKSIZE:
- if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
- (error = zvol_set_volblocksize(name,
- intval)) != 0)
- return (error);
- break;
-
- default:
- if (nvpair_type(elem) == DATA_TYPE_STRING) {
- if (zfs_prop_get_type(prop) !=
- prop_type_string)
- return (EINVAL);
- VERIFY(nvpair_value_string(elem, &strval) == 0);
- if ((error = dsl_prop_set(name,
- nvpair_name(elem), 1, strlen(strval) + 1,
- strval)) != 0)
- return (error);
- } else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
- const char *unused;
-
- VERIFY(nvpair_value_uint64(elem, &intval) == 0);
-
- switch (zfs_prop_get_type(prop)) {
- case prop_type_number:
- break;
- case prop_type_boolean:
- if (intval > 1)
- return (EINVAL);
- break;
- case prop_type_string:
- return (EINVAL);
- case prop_type_index:
- if (zfs_prop_index_to_string(prop,
- intval, &unused) != 0)
- return (EINVAL);
- break;
- default:
- cmn_err(CE_PANIC, "unknown property "
- "type");
- break;
- }
-
- if ((error = dsl_prop_set(name, propname,
- 8, 1, &intval)) != 0)
- return (error);
- } else {
- return (EINVAL);
- }
- break;
- }
- }
-
- return (0);
-}
-
-static int
-zfs_ioc_set_prop(zfs_cmd_t *zc)
-{
- nvlist_t *nvl;
- int error;
- zfs_prop_t prop;
-
- /*
- * If zc_value is set, then this is an attempt to inherit a value.
- * Otherwise, zc_nvlist refers to a list of properties to set.
- */
- if (zc->zc_value[0] != '\0') {
- if (!zfs_prop_user(zc->zc_value) &&
- ((prop = zfs_name_to_prop(zc->zc_value)) ==
- ZFS_PROP_INVAL ||
- !zfs_prop_inheritable(prop)))
- return (EINVAL);
-
- return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
- }
-
- if ((error = get_nvlist(zc, &nvl)) != 0)
- return (error);
-
- error = zfs_set_prop_nvlist(zc->zc_name, zc->zc_dev,
- (cred_t *)(uintptr_t)zc->zc_cred, nvl);
- nvlist_free(nvl);
- return (error);
-}
-
-static int
-zfs_ioc_pool_set_props(zfs_cmd_t *zc)
-{
- nvlist_t *nvl;
- int error, reset_bootfs = 0;
- uint64_t objnum;
- zpool_prop_t prop;
- nvpair_t *elem;
- char *propname, *strval;
- spa_t *spa;
- vdev_t *rvdev;
- char *vdev_type;
- objset_t *os;
-
- if ((error = get_nvlist(zc, &nvl)) != 0)
- return (error);
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
- nvlist_free(nvl);
- return (error);
- }
-
- if (spa_version(spa) < ZFS_VERSION_BOOTFS) {
- nvlist_free(nvl);
- spa_close(spa, FTAG);
- return (ENOTSUP);
- }
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
-
- propname = nvpair_name(elem);
-
- if ((prop = zpool_name_to_prop(propname)) ==
- ZFS_PROP_INVAL) {
- nvlist_free(nvl);
- spa_close(spa, FTAG);
- return (EINVAL);
- }
-
- switch (prop) {
- case ZFS_PROP_BOOTFS:
- /*
- * A bootable filesystem can not be on a RAIDZ pool
- * nor a striped pool with more than 1 device.
- */
- rvdev = spa->spa_root_vdev;
- vdev_type =
- rvdev->vdev_child[0]->vdev_ops->vdev_op_type;
- if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
- (strcmp(vdev_type, VDEV_TYPE_MIRROR) != 0 &&
- rvdev->vdev_children > 1)) {
- error = ENOTSUP;
- break;
- }
-
- reset_bootfs = 1;
-
- VERIFY(nvpair_value_string(elem, &strval) == 0);
- if (strval == NULL || strval[0] == '\0') {
- objnum =
- zfs_prop_default_numeric(ZFS_PROP_BOOTFS);
- break;
- }
-
- if (error = dmu_objset_open(strval, DMU_OST_ZFS,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os))
- break;
- objnum = dmu_objset_id(os);
- dmu_objset_close(os);
- break;
-
- default:
- error = EINVAL;
- }
-
- if (error)
- break;
- }
- if (error == 0) {
- if (reset_bootfs) {
- VERIFY(nvlist_remove(nvl,
- zpool_prop_to_name(ZFS_PROP_BOOTFS),
- DATA_TYPE_STRING) == 0);
- VERIFY(nvlist_add_uint64(nvl,
- zpool_prop_to_name(ZFS_PROP_BOOTFS), objnum) == 0);
- }
- error = spa_set_props(spa, nvl);
- }
-
- nvlist_free(nvl);
- spa_close(spa, FTAG);
-
- return (error);
-}
-
-static int
-zfs_ioc_pool_get_props(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
- nvlist_t *nvp = NULL;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- error = spa_get_props(spa, &nvp);
-
- if (error == 0 && zc->zc_nvlist_dst != 0)
- error = put_nvlist(zc, nvp);
- else
- error = EFAULT;
-
- spa_close(spa, FTAG);
-
- if (nvp)
- nvlist_free(nvp);
- return (error);
-}
-
-static int
-zfs_ioc_create_minor(zfs_cmd_t *zc)
-{
- return (zvol_create_minor(zc->zc_name, zc->zc_dev));
-}
-
-static int
-zfs_ioc_remove_minor(zfs_cmd_t *zc)
-{
- return (zvol_remove_minor(zc->zc_name));
-}
-
-/*
- * Search the vfs list for a specified resource. Returns a pointer to it
- * or NULL if no suitable entry is found. The caller of this routine
- * is responsible for releasing the returned vfs pointer.
- */
-static vfs_t *
-zfs_get_vfs(const char *resource)
-{
- vfs_t *vfsp;
-
- mtx_lock(&mountlist_mtx);
- TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
- if (strcmp(vfsp->mnt_stat.f_mntfromname, resource) == 0) {
- VFS_HOLD(vfsp);
- break;
- }
- }
- mtx_unlock(&mountlist_mtx);
- return (vfsp);
-}
-
-static void
-zfs_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
-{
- zfs_create_data_t *zc = arg;
-
- zfs_create_fs(os, (cred_t *)(uintptr_t)zc->zc_cred, tx);
-}
-
-static int
-zfs_ioc_create(zfs_cmd_t *zc)
-{
- objset_t *clone;
- int error = 0;
- zfs_create_data_t cbdata = { 0 };
- void (*cbfunc)(objset_t *os, void *arg, dmu_tx_t *tx);
- dmu_objset_type_t type = zc->zc_objset_type;
-
- switch (type) {
-
- case DMU_OST_ZFS:
- cbfunc = zfs_create_cb;
- break;
-
- case DMU_OST_ZVOL:
- cbfunc = zvol_create_cb;
- break;
-
- default:
- cbfunc = NULL;
- }
- if (strchr(zc->zc_name, '@'))
- return (EINVAL);
-
- if (zc->zc_nvlist_src != 0 &&
- (error = get_nvlist(zc, &cbdata.zc_props)) != 0)
- return (error);
-
- cbdata.zc_cred = (cred_t *)(uintptr_t)zc->zc_cred;
- cbdata.zc_dev = (dev_t)zc->zc_dev;
-
- if (zc->zc_value[0] != '\0') {
- /*
- * We're creating a clone of an existing snapshot.
- */
- zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
- if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
- nvlist_free(cbdata.zc_props);
- return (EINVAL);
- }
-
- error = dmu_objset_open(zc->zc_value, type,
- DS_MODE_STANDARD | DS_MODE_READONLY, &clone);
- if (error) {
- nvlist_free(cbdata.zc_props);
- return (error);
- }
- error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL);
- dmu_objset_close(clone);
- } else {
- if (cbfunc == NULL) {
- nvlist_free(cbdata.zc_props);
- return (EINVAL);
- }
-
- if (type == DMU_OST_ZVOL) {
- uint64_t volsize, volblocksize;
-
- if (cbdata.zc_props == NULL ||
- nvlist_lookup_uint64(cbdata.zc_props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE),
- &volsize) != 0) {
- nvlist_free(cbdata.zc_props);
- return (EINVAL);
- }
-
- if ((error = nvlist_lookup_uint64(cbdata.zc_props,
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- &volblocksize)) != 0 && error != ENOENT) {
- nvlist_free(cbdata.zc_props);
- return (EINVAL);
- }
-
- if (error != 0)
- volblocksize = zfs_prop_default_numeric(
- ZFS_PROP_VOLBLOCKSIZE);
-
- if ((error = zvol_check_volblocksize(
- volblocksize)) != 0 ||
- (error = zvol_check_volsize(volsize,
- volblocksize)) != 0) {
- nvlist_free(cbdata.zc_props);
- return (error);
- }
- }
-
- error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc,
- &cbdata);
- }
-
- /*
- * It would be nice to do this atomically.
- */
- if (error == 0) {
- if ((error = zfs_set_prop_nvlist(zc->zc_name,
- zc->zc_dev, (cred_t *)(uintptr_t)zc->zc_cred,
- cbdata.zc_props)) != 0)
- (void) dmu_objset_destroy(zc->zc_name);
- }
-
- nvlist_free(cbdata.zc_props);
- return (error);
-}
-
-static int
-zfs_ioc_snapshot(zfs_cmd_t *zc)
-{
- if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
- return (EINVAL);
- return (dmu_objset_snapshot(zc->zc_name,
- zc->zc_value, zc->zc_cookie));
-}
-
-int
-zfs_unmount_snap(char *name, void *arg)
-{
- char *snapname = arg;
- char *cp;
- vfs_t *vfsp = NULL;
-
- /*
- * Snapshots (which are under .zfs control) must be unmounted
- * before they can be destroyed.
- */
-
- if (snapname) {
- (void) strcat(name, "@");
- (void) strcat(name, snapname);
- vfsp = zfs_get_vfs(name);
- cp = strchr(name, '@');
- *cp = '\0';
- } else if (strchr(name, '@')) {
- vfsp = zfs_get_vfs(name);
- }
-
- if (vfsp) {
- /*
- * Always force the unmount for snapshots.
- */
- int flag = MS_FORCE;
- int err;
-
- if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
- VFS_RELE(vfsp);
- return (err);
- }
- VFS_RELE(vfsp);
- mtx_lock(&Giant); /* dounmount() */
- dounmount(vfsp, flag, curthread);
- mtx_unlock(&Giant); /* dounmount() */
- }
- return (0);
-}
-
-static int
-zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
-{
- int err;
-
- if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
- return (EINVAL);
- err = dmu_objset_find(zc->zc_name,
- zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
- if (err)
- return (err);
- return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
-}
-
-static int
-zfs_ioc_destroy(zfs_cmd_t *zc)
-{
- if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
- int err = zfs_unmount_snap(zc->zc_name, NULL);
- if (err)
- return (err);
- }
-
- return (dmu_objset_destroy(zc->zc_name));
-}
-
-static int
-zfs_ioc_rollback(zfs_cmd_t *zc)
-{
- return (dmu_objset_rollback(zc->zc_name));
-}
-
-static int
-zfs_ioc_rename(zfs_cmd_t *zc)
-{
- int recursive = zc->zc_cookie & 1;
-
- zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
- if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0)
- return (EINVAL);
-
- /*
- * Unmount snapshot unless we're doing a recursive rename,
- * in which case the dataset code figures out which snapshots
- * to unmount.
- */
- if (!recursive && strchr(zc->zc_name, '@') != NULL &&
- zc->zc_objset_type == DMU_OST_ZFS) {
- int err = zfs_unmount_snap(zc->zc_name, NULL);
- if (err)
- return (err);
- }
-
- return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
-}
-
-static int
-zfs_ioc_recvbackup(zfs_cmd_t *zc)
-{
- kthread_t *td = curthread;
- struct file *fp;
- int error;
- offset_t new_off;
-
- if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
- strchr(zc->zc_value, '@') == NULL)
- return (EINVAL);
-
- error = fget_read(td, zc->zc_cookie, &fp);
- if (error)
- return (error);
-
- error = dmu_recvbackup(zc->zc_value, &zc->zc_begin_record,
- &zc->zc_cookie, (boolean_t)zc->zc_guid, fp,
- fp->f_offset);
-
- new_off = fp->f_offset + zc->zc_cookie;
- fp->f_offset = new_off;
-
- fdrop(fp, td);
- return (error);
-}
-
-static int
-zfs_ioc_sendbackup(zfs_cmd_t *zc)
-{
- kthread_t *td = curthread;
- struct file *fp;
- objset_t *fromsnap = NULL;
- objset_t *tosnap;
- int error, fd;
-
- error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap);
- if (error)
- return (error);
-
- if (zc->zc_value[0] != '\0') {
- char buf[MAXPATHLEN];
- char *cp;
-
- (void) strncpy(buf, zc->zc_name, sizeof (buf));
- cp = strchr(buf, '@');
- if (cp)
- *(cp+1) = 0;
- (void) strlcat(buf, zc->zc_value, sizeof (buf));
- error = dmu_objset_open(buf, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap);
- if (error) {
- dmu_objset_close(tosnap);
- return (error);
- }
- }
-
- fd = zc->zc_cookie;
- error = fget_write(td, fd, &fp);
- if (error) {
- dmu_objset_close(tosnap);
- if (fromsnap)
- dmu_objset_close(fromsnap);
- return (error);
- }
-
- error = dmu_sendbackup(tosnap, fromsnap, fp);
-
- fdrop(fp, td);
- if (fromsnap)
- dmu_objset_close(fromsnap);
- dmu_objset_close(tosnap);
- return (error);
-}
-
-static int
-zfs_ioc_inject_fault(zfs_cmd_t *zc)
-{
- int id, error;
-
- error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
- &zc->zc_inject_record);
-
- if (error == 0)
- zc->zc_guid = (uint64_t)id;
-
- return (error);
-}
-
-static int
-zfs_ioc_clear_fault(zfs_cmd_t *zc)
-{
- return (zio_clear_fault((int)zc->zc_guid));
-}
-
-static int
-zfs_ioc_inject_list_next(zfs_cmd_t *zc)
-{
- int id = (int)zc->zc_guid;
- int error;
-
- error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
- &zc->zc_inject_record);
-
- zc->zc_guid = id;
-
- return (error);
-}
-
-static int
-zfs_ioc_error_log(zfs_cmd_t *zc)
-{
- spa_t *spa;
- int error;
- size_t count = (size_t)zc->zc_nvlist_dst_size;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
- &count);
- if (error == 0)
- zc->zc_nvlist_dst_size = count;
- else
- zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
-
- spa_close(spa, FTAG);
-
- return (error);
-}
-
-static int
-zfs_ioc_clear(zfs_cmd_t *zc)
-{
- spa_t *spa;
- vdev_t *vd;
- int error;
-
- if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
- return (error);
-
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- if (zc->zc_guid == 0) {
- vd = NULL;
- } else if ((vd = spa_lookup_by_guid(spa, zc->zc_guid)) == NULL) {
- spa_config_exit(spa, FTAG);
- spa_close(spa, FTAG);
- return (ENODEV);
- }
-
- vdev_clear(spa, vd);
-
- spa_config_exit(spa, FTAG);
-
- spa_close(spa, FTAG);
-
- return (0);
-}
-
-static int
-zfs_ioc_promote(zfs_cmd_t *zc)
-{
- char *cp;
-
- /*
- * We don't need to unmount *all* the origin fs's snapshots, but
- * it's easier.
- */
- cp = strchr(zc->zc_value, '@');
- if (cp)
- *cp = '\0';
- (void) dmu_objset_find(zc->zc_value,
- zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
- return (dsl_dataset_promote(zc->zc_name));
-}
-
-static int
-zfs_ioc_jail(zfs_cmd_t *zc)
-{
-
- return (zone_dataset_attach((cred_t *)(uintptr_t)zc->zc_cred,
- zc->zc_name, (int)zc->zc_jailid));
-}
-
-static int
-zfs_ioc_unjail(zfs_cmd_t *zc)
-{
-
- return (zone_dataset_detach((cred_t *)(uintptr_t)zc->zc_cred,
- zc->zc_name, (int)zc->zc_jailid));
-}
-
-static zfs_ioc_vec_t zfs_ioc_vec[] = {
- { zfs_ioc_pool_create, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_destroy, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_import, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_export, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_configs, zfs_secpolicy_none, no_name },
- { zfs_ioc_pool_stats, zfs_secpolicy_read, pool_name },
- { zfs_ioc_pool_tryimport, zfs_secpolicy_config, no_name },
- { zfs_ioc_pool_scrub, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_freeze, zfs_secpolicy_config, no_name },
- { zfs_ioc_pool_upgrade, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_get_history, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_log_history, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_add, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_remove, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_online, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_offline, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_attach, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_detach, zfs_secpolicy_config, pool_name },
- { zfs_ioc_vdev_setpath, zfs_secpolicy_config, pool_name },
- { zfs_ioc_objset_stats, zfs_secpolicy_read, dataset_name },
- { zfs_ioc_dataset_list_next, zfs_secpolicy_read, dataset_name },
- { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, dataset_name },
- { zfs_ioc_set_prop, zfs_secpolicy_write, dataset_name },
- { zfs_ioc_create_minor, zfs_secpolicy_config, dataset_name },
- { zfs_ioc_remove_minor, zfs_secpolicy_config, dataset_name },
- { zfs_ioc_create, zfs_secpolicy_parent, dataset_name },
- { zfs_ioc_destroy, zfs_secpolicy_parent, dataset_name },
- { zfs_ioc_rollback, zfs_secpolicy_write, dataset_name },
- { zfs_ioc_rename, zfs_secpolicy_write, dataset_name },
- { zfs_ioc_recvbackup, zfs_secpolicy_write, dataset_name },
- { zfs_ioc_sendbackup, zfs_secpolicy_operator, dataset_name },
- { zfs_ioc_inject_fault, zfs_secpolicy_inject, no_name },
- { zfs_ioc_clear_fault, zfs_secpolicy_inject, no_name },
- { zfs_ioc_inject_list_next, zfs_secpolicy_inject, no_name },
- { zfs_ioc_error_log, zfs_secpolicy_inject, pool_name },
- { zfs_ioc_clear, zfs_secpolicy_config, pool_name },
- { zfs_ioc_promote, zfs_secpolicy_write, dataset_name },
- { zfs_ioc_destroy_snaps, zfs_secpolicy_write, dataset_name },
- { zfs_ioc_snapshot, zfs_secpolicy_operator, dataset_name },
- { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, pool_name },
- { zfs_ioc_obj_to_path, zfs_secpolicy_config, no_name },
- { zfs_ioc_pool_set_props, zfs_secpolicy_config, pool_name },
- { zfs_ioc_pool_get_props, zfs_secpolicy_read, pool_name },
- { zfs_ioc_jail, zfs_secpolicy_config, dataset_name },
- { zfs_ioc_unjail, zfs_secpolicy_config, dataset_name }
-};
-
-static int
-zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
- struct thread *td)
-{
- zfs_cmd_t *zc = (void *)addr;
- uint_t vec;
- int error;
-
- vec = ZFS_IOC(cmd);
-
- if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
- return (EINVAL);
-
- zc->zc_cred = (uintptr_t)td->td_ucred;
- zc->zc_dev = (uintptr_t)dev;
- error = zfs_ioc_vec[vec].zvec_secpolicy(zc->zc_name, td->td_ucred);
-
- /*
- * Ensure that all pool/dataset names are valid before we pass down to
- * the lower layers.
- */
- if (error == 0) {
- zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
- switch (zfs_ioc_vec[vec].zvec_namecheck) {
- case pool_name:
- if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
- error = EINVAL;
- break;
-
- case dataset_name:
- if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
- error = EINVAL;
- break;
-
- case no_name:
- break;
- }
- }
-
- if (error == 0)
- error = zfs_ioc_vec[vec].zvec_func(zc);
-
- return (error);
-}
-
-/*
- * OK, so this is a little weird.
- *
- * /dev/zfs is the control node, i.e. minor 0.
- * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
- *
- * /dev/zfs has basically nothing to do except serve up ioctls,
- * so most of the standard driver entry points are in zvol.c.
- */
-static struct cdevsw zfs_cdevsw = {
- .d_version = D_VERSION,
- .d_ioctl = zfsdev_ioctl,
- .d_name = ZFS_DEV_NAME
-};
-
-static void
-zfsdev_init(void)
-{
- zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0660,
- ZFS_DEV_NAME);
-}
-
-static void
-zfsdev_fini(void)
-{
- if (zfsdev != NULL)
- destroy_dev(zfsdev);
-}
-
-static struct task zfs_start_task;
-static struct root_hold_token *zfs_root_token;
-
-static void
-zfs_start(void *context __unused, int pending __unused)
-{
-
- zfsdev_init();
- spa_init(FREAD | FWRITE);
- zfs_init();
- zvol_init();
- printf("ZFS storage pool version " ZFS_VERSION_STRING "\n");
- root_mount_rel(zfs_root_token);
-}
-
-static int
-zfs_modevent(module_t mod, int type, void *unused __unused)
-{
- int error;
-
- error = EOPNOTSUPP;
- switch (type) {
- case MOD_LOAD:
- zfs_root_token = root_mount_hold("ZFS");
- printf("WARNING: ZFS is considered to be an experimental "
- "feature in FreeBSD.\n");
- TASK_INIT(&zfs_start_task, 0, zfs_start, NULL);
- taskqueue_enqueue(taskqueue_thread, &zfs_start_task);
- error = 0;
- break;
- case MOD_UNLOAD:
- if (spa_busy() || zfs_busy() || zvol_busy() ||
- zio_injection_enabled) {
- error = EBUSY;
- break;
- }
- zvol_fini();
- zfs_fini();
- spa_fini();
- zfsdev_fini();
- error = 0;
- break;
- }
- return (error);
-}
-
-static moduledata_t zfs_mod = {
- "zfsctrl",
- zfs_modevent,
- 0
-};
-DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
deleted file mode 100644
index dde9ec1..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sysmacros.h>
-#include <sys/cmn_err.h>
-#include <sys/kmem.h>
-#include <sys/file.h>
-#include <sys/vfs.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_dir.h>
-#include <sys/zil.h>
-#include <sys/byteorder.h>
-#include <sys/policy.h>
-#include <sys/stat.h>
-#include <sys/acl.h>
-#include <sys/dmu.h>
-#include <sys/spa.h>
-
-/*
- * All the functions in this file are used to construct the log entries
- * to record transactions. They allocate * a intent log transaction
- * structure (itx_t) and save within it all the information necessary to
- * possibly replay the transaction. The itx is then assigned a sequence
- * number and inserted in the in-memory list anchored in the zilog.
- */
-
-/*
- * zfs_log_create() is used to handle TX_CREATE, TX_MKDIR and TX_MKXATTR
- * transactions.
- */
-void
-zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name)
-{
- itx_t *itx;
- uint64_t seq;
- lr_create_t *lr;
- size_t namesize = strlen(name) + 1;
-
- if (zilog == NULL)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
- lr = (lr_create_t *)&itx->itx_lr;
- lr->lr_doid = dzp->z_id;
- lr->lr_foid = zp->z_id;
- lr->lr_mode = zp->z_phys->zp_mode;
- lr->lr_uid = zp->z_phys->zp_uid;
- lr->lr_gid = zp->z_phys->zp_gid;
- lr->lr_gen = zp->z_phys->zp_gen;
- lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
- lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
- lr->lr_rdev = zp->z_phys->zp_rdev;
- bcopy(name, (char *)(lr + 1), namesize);
-
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions.
- */
-void
-zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, char *name)
-{
- itx_t *itx;
- uint64_t seq;
- lr_remove_t *lr;
- size_t namesize = strlen(name) + 1;
-
- if (zilog == NULL)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
- lr = (lr_remove_t *)&itx->itx_lr;
- lr->lr_doid = dzp->z_id;
- bcopy(name, (char *)(lr + 1), namesize);
-
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_link() handles TX_LINK transactions.
- */
-void
-zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name)
-{
- itx_t *itx;
- uint64_t seq;
- lr_link_t *lr;
- size_t namesize = strlen(name) + 1;
-
- if (zilog == NULL)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
- lr = (lr_link_t *)&itx->itx_lr;
- lr->lr_doid = dzp->z_id;
- lr->lr_link_obj = zp->z_id;
- bcopy(name, (char *)(lr + 1), namesize);
-
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_symlink() handles TX_SYMLINK transactions.
- */
-void
-zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *dzp, znode_t *zp, char *name, char *link)
-{
- itx_t *itx;
- uint64_t seq;
- lr_create_t *lr;
- size_t namesize = strlen(name) + 1;
- size_t linksize = strlen(link) + 1;
-
- if (zilog == NULL)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
- lr = (lr_create_t *)&itx->itx_lr;
- lr->lr_doid = dzp->z_id;
- lr->lr_foid = zp->z_id;
- lr->lr_mode = zp->z_phys->zp_mode;
- lr->lr_uid = zp->z_phys->zp_uid;
- lr->lr_gid = zp->z_phys->zp_gid;
- lr->lr_gen = zp->z_phys->zp_gen;
- lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
- lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
- bcopy(name, (char *)(lr + 1), namesize);
- bcopy(link, (char *)(lr + 1) + namesize, linksize);
-
- seq = zil_itx_assign(zilog, itx, tx);
- dzp->z_last_itx = seq;
- zp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_rename() handles TX_RENAME transactions.
- */
-void
-zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
-{
- itx_t *itx;
- uint64_t seq;
- lr_rename_t *lr;
- size_t snamesize = strlen(sname) + 1;
- size_t dnamesize = strlen(dname) + 1;
-
- if (zilog == NULL)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
- lr = (lr_rename_t *)&itx->itx_lr;
- lr->lr_sdoid = sdzp->z_id;
- lr->lr_tdoid = tdzp->z_id;
- bcopy(sname, (char *)(lr + 1), snamesize);
- bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
-
- seq = zil_itx_assign(zilog, itx, tx);
- sdzp->z_last_itx = seq;
- tdzp->z_last_itx = seq;
- szp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_write() handles TX_WRITE transactions.
- */
-ssize_t zfs_immediate_write_sz = 32768;
-
-void
-zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, offset_t off, ssize_t len, int ioflag)
-{
- itx_t *itx;
- uint64_t seq;
- lr_write_t *lr;
- itx_wr_state_t write_state;
- int err;
-
- if (zilog == NULL || zp->z_unlinked)
- return;
-
- /*
- * Writes are handled in three different ways:
- *
- * WR_INDIRECT:
- * If the write is greater than zfs_immediate_write_sz then
- * later *if* we need to log the write then dmu_sync() is used
- * to immediately write the block and it's block pointer is put
- * in the log record.
- * WR_COPIED:
- * If we know we'll immediately be committing the
- * transaction (FDSYNC (O_DSYNC)), the we allocate a larger
- * log record here for the data and copy the data in.
- * WR_NEED_COPY:
- * Otherwise we don't allocate a buffer, and *if* we need to
- * flush the write later then a buffer is allocated and
- * we retrieve the data using the dmu.
- */
- if (len > zfs_immediate_write_sz)
- write_state = WR_INDIRECT;
- else if (ioflag & FDSYNC)
- write_state = WR_COPIED;
- else
- write_state = WR_NEED_COPY;
-
- itx = zil_itx_create(txtype, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
- lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED) {
- err = dmu_read(zp->z_zfsvfs->z_os, zp->z_id, off, len, lr + 1);
- if (err) {
- kmem_free(itx, offsetof(itx_t, itx_lr) +
- itx->itx_lr.lrc_reclen);
- itx = zil_itx_create(txtype, sizeof (*lr));
- lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
- }
- }
-
- itx->itx_wr_state = write_state;
- lr->lr_foid = zp->z_id;
- lr->lr_offset = off;
- lr->lr_length = len;
- lr->lr_blkoff = 0;
- BP_ZERO(&lr->lr_blkptr);
-
- itx->itx_private = zp->z_zfsvfs;
-
- itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_truncate() handles TX_TRUNCATE transactions.
- */
-void
-zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, uint64_t off, uint64_t len)
-{
- itx_t *itx;
- uint64_t seq;
- lr_truncate_t *lr;
-
- if (zilog == NULL || zp->z_unlinked)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr));
- lr = (lr_truncate_t *)&itx->itx_lr;
- lr->lr_foid = zp->z_id;
- lr->lr_offset = off;
- lr->lr_length = len;
-
- itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_setattr() handles TX_SETATTR transactions.
- */
-void
-zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, vattr_t *vap, uint_t mask_applied)
-{
- itx_t *itx;
- uint64_t seq;
- lr_setattr_t *lr;
-
- if (zilog == NULL || zp->z_unlinked)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr));
- lr = (lr_setattr_t *)&itx->itx_lr;
- lr->lr_foid = zp->z_id;
- lr->lr_mask = (uint64_t)mask_applied;
- lr->lr_mode = (uint64_t)vap->va_mode;
- lr->lr_uid = (uint64_t)vap->va_uid;
- lr->lr_gid = (uint64_t)vap->va_gid;
- lr->lr_size = (uint64_t)vap->va_size;
- ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
- ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
-
- itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
-}
-
-/*
- * zfs_log_acl() handles TX_ACL transactions.
- */
-void
-zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, int aclcnt, ace_t *z_ace)
-{
- itx_t *itx;
- uint64_t seq;
- lr_acl_t *lr;
-
- if (zilog == NULL || zp->z_unlinked)
- return;
-
- itx = zil_itx_create(txtype, sizeof (*lr) + aclcnt * sizeof (ace_t));
- lr = (lr_acl_t *)&itx->itx_lr;
- lr->lr_foid = zp->z_id;
- lr->lr_aclcnt = (uint64_t)aclcnt;
- bcopy(z_ace, (ace_t *)(lr + 1), aclcnt * sizeof (ace_t));
-
- itx->itx_sync = (zp->z_sync_cnt != 0);
- seq = zil_itx_assign(zilog, itx, tx);
- zp->z_last_itx = seq;
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c
deleted file mode 100644
index 2be3093..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sysmacros.h>
-#include <sys/cmn_err.h>
-#include <sys/kmem.h>
-#include <sys/file.h>
-#include <sys/fcntl.h>
-#include <sys/vfs.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/spa.h>
-#include <sys/zil.h>
-#include <sys/byteorder.h>
-#include <sys/stat.h>
-#include <sys/acl.h>
-#include <sys/atomic.h>
-#include <sys/cred.h>
-#include <sys/namei.h>
-
-/*
- * Functions to replay ZFS intent log (ZIL) records
- * The functions are called through a function vector (zfs_replay_vector)
- * which is indexed by the transaction type.
- */
-
-static void
-zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
- uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
-{
- VATTR_NULL(vap);
- vap->va_mask = (uint_t)mask;
- vap->va_type = IFTOVT(mode);
- vap->va_mode = mode & MODEMASK;
- vap->va_uid = (uid_t)uid;
- vap->va_gid = (gid_t)gid;
- vap->va_rdev = zfs_cmpldev(rdev);
- vap->va_nodeid = nodeid;
-}
-
-/* ARGSUSED */
-static int
-zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
-{
- return (ENOTSUP);
-}
-
-static int
-zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
-{
- char *name = (char *)(lr + 1); /* name follows lr_create_t */
- char *link; /* symlink content follows name */
- znode_t *dzp;
- vnode_t *vp = NULL;
- vattr_t va;
- struct componentname cn;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
- return (error);
-
- zfs_init_vattr(&va, AT_TYPE | AT_MODE | AT_UID | AT_GID,
- lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
-
- /*
- * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
- * eventually end up in zfs_mknode(), which assigns the object's
- * creation time and generation number. The generic VOP_CREATE()
- * doesn't have either concept, so we smuggle the values inside
- * the vattr's otherwise unused va_ctime and va_nblocks fields.
- */
- ZFS_TIME_DECODE(&va.va_ctime, lr->lr_crtime);
- va.va_nblocks = lr->lr_gen;
-
- cn.cn_nameptr = name;
- cn.cn_cred = kcred;
- cn.cn_thread = curthread;
- cn.cn_flags = SAVENAME;
-
- vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
- switch ((int)lr->lr_common.lrc_txtype) {
- case TX_CREATE:
- error = VOP_CREATE(ZTOV(dzp), &vp, &cn, &va);
- break;
- case TX_MKDIR:
- error = VOP_MKDIR(ZTOV(dzp), &vp, &cn, &va);
- break;
- case TX_MKXATTR:
- error = zfs_make_xattrdir(dzp, &va, &vp, kcred);
- break;
- case TX_SYMLINK:
- link = name + strlen(name) + 1;
- error = VOP_SYMLINK(ZTOV(dzp), &vp, &cn, &va, link);
- break;
- default:
- error = ENOTSUP;
- }
- VOP_UNLOCK(ZTOV(dzp), 0);
-
- if (error == 0 && vp != NULL) {
- VOP_UNLOCK(vp, 0);
- VN_RELE(vp);
- }
-
- VN_RELE(ZTOV(dzp));
-
- return (error);
-}
-
-static int
-zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
-{
- char *name = (char *)(lr + 1); /* name follows lr_remove_t */
- znode_t *dzp;
- struct componentname cn;
- vnode_t *vp;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
- return (error);
-
- bzero(&cn, sizeof(cn));
- cn.cn_nameptr = name;
- cn.cn_namelen = strlen(name);
- cn.cn_nameiop = DELETE;
- cn.cn_flags = ISLASTCN | SAVENAME;
- cn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
- cn.cn_cred = kcred;
- cn.cn_thread = curthread;
- vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
- error = VOP_LOOKUP(ZTOV(dzp), &vp, &cn);
- if (error != 0) {
- VOP_UNLOCK(ZTOV(dzp), 0);
- goto fail;
- }
-
- switch ((int)lr->lr_common.lrc_txtype) {
- case TX_REMOVE:
- error = VOP_REMOVE(ZTOV(dzp), vp, &cn);
- break;
- case TX_RMDIR:
- error = VOP_RMDIR(ZTOV(dzp), vp, &cn);
- break;
- default:
- error = ENOTSUP;
- }
- vput(vp);
- VOP_UNLOCK(ZTOV(dzp), 0);
-fail:
- VN_RELE(ZTOV(dzp));
-
- return (error);
-}
-
-static int
-zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap)
-{
- char *name = (char *)(lr + 1); /* name follows lr_link_t */
- znode_t *dzp, *zp;
- struct componentname cn;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
- return (error);
-
- if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
- VN_RELE(ZTOV(dzp));
- return (error);
- }
-
- cn.cn_nameptr = name;
- cn.cn_cred = kcred;
- cn.cn_thread = curthread;
- cn.cn_flags = SAVENAME;
-
- vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
- vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
- error = VOP_LINK(ZTOV(dzp), ZTOV(zp), &cn);
- VOP_UNLOCK(ZTOV(zp), 0);
- VOP_UNLOCK(ZTOV(dzp), 0);
-
- VN_RELE(ZTOV(zp));
- VN_RELE(ZTOV(dzp));
-
- return (error);
-}
-
-static int
-zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
-{
- char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
- char *tname = sname + strlen(sname) + 1;
- znode_t *sdzp, *tdzp;
- struct componentname scn, tcn;
- vnode_t *svp, *tvp;
- kthread_t *td = curthread;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
- return (error);
-
- if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
- VN_RELE(ZTOV(sdzp));
- return (error);
- }
-
- svp = tvp = NULL;
-
- bzero(&scn, sizeof(scn));
- scn.cn_nameptr = sname;
- scn.cn_namelen = strlen(sname);
- scn.cn_nameiop = DELETE;
- scn.cn_flags = ISLASTCN | SAVENAME;
- scn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
- scn.cn_cred = kcred;
- scn.cn_thread = td;
- vn_lock(ZTOV(sdzp), LK_EXCLUSIVE | LK_RETRY);
- error = VOP_LOOKUP(ZTOV(sdzp), &svp, &scn);
- VOP_UNLOCK(ZTOV(sdzp), 0);
- if (error != 0)
- goto fail;
- VOP_UNLOCK(svp, 0);
-
- bzero(&tcn, sizeof(tcn));
- tcn.cn_nameptr = tname;
- tcn.cn_namelen = strlen(tname);
- tcn.cn_nameiop = RENAME;
- tcn.cn_flags = ISLASTCN | SAVENAME;
- tcn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
- tcn.cn_cred = kcred;
- tcn.cn_thread = td;
- vn_lock(ZTOV(tdzp), LK_EXCLUSIVE | LK_RETRY);
- error = VOP_LOOKUP(ZTOV(tdzp), &tvp, &tcn);
- if (error == EJUSTRETURN)
- tvp = NULL;
- else if (error != 0) {
- VOP_UNLOCK(ZTOV(tdzp), 0);
- goto fail;
- }
-
- error = VOP_RENAME(ZTOV(sdzp), svp, &scn, ZTOV(tdzp), tvp, &tcn);
- return (error);
-fail:
- if (svp != NULL)
- vrele(svp);
- if (tvp != NULL)
- vrele(tvp);
- VN_RELE(ZTOV(tdzp));
- VN_RELE(ZTOV(sdzp));
-
- return (error);
-}
-
-static int
-zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
-{
- char *data = (char *)(lr + 1); /* data follows lr_write_t */
- znode_t *zp;
- int error;
- ssize_t resid;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log writes out of order, it's possible the
- * file has been removed. In this case just drop the write
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
- return (error);
- }
-
- error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length,
- lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
-
- VN_RELE(ZTOV(zp));
-
- return (error);
-}
-
-static int
-zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
-{
-
- ZFS_LOG(0, "Unexpected code path, report to pjd@FreeBSD.org");
- return (EOPNOTSUPP);
-}
-
-static int
-zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
-{
- znode_t *zp;
- vattr_t va;
- vnode_t *vp;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log setattrs out of order, it's possible the
- * file has been removed. In this case just drop the setattr
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
- return (error);
- }
-
- zfs_init_vattr(&va, lr->lr_mask, lr->lr_mode,
- lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
-
- va.va_size = lr->lr_size;
- ZFS_TIME_DECODE(&va.va_atime, lr->lr_atime);
- ZFS_TIME_DECODE(&va.va_mtime, lr->lr_mtime);
-
- vp = ZTOV(zp);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- error = VOP_SETATTR(vp, &va, kcred, curthread);
- VOP_UNLOCK(vp, 0);
- VN_RELE(vp);
-
- return (error);
-}
-
-static int
-zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
-{
- ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
-#ifdef TODO
- vsecattr_t vsa;
-#endif
- znode_t *zp;
- int error;
-
- if (byteswap) {
- byteswap_uint64_array(lr, sizeof (*lr));
- zfs_ace_byteswap(ace, lr->lr_aclcnt);
- }
-
- if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
- /*
- * As we can log acls out of order, it's possible the
- * file has been removed. In this case just drop the acl
- * and return success.
- */
- if (error == ENOENT)
- error = 0;
- return (error);
- }
-
-#ifdef TODO
- bzero(&vsa, sizeof (vsa));
- vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
- vsa.vsa_aclcnt = lr->lr_aclcnt;
- vsa.vsa_aclentp = ace;
-
- error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred);
-#else
- error = EOPNOTSUPP;
-#endif
-
- VN_RELE(ZTOV(zp));
-
- return (error);
-}
-
-/*
- * Callback vectors for replaying records
- */
-zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
- zfs_replay_error, /* 0 no such transaction type */
- zfs_replay_create, /* TX_CREATE */
- zfs_replay_create, /* TX_MKDIR */
- zfs_replay_create, /* TX_MKXATTR */
- zfs_replay_create, /* TX_SYMLINK */
- zfs_replay_remove, /* TX_REMOVE */
- zfs_replay_remove, /* TX_RMDIR */
- zfs_replay_link, /* TX_LINK */
- zfs_replay_rename, /* TX_RENAME */
- zfs_replay_write, /* TX_WRITE */
- zfs_replay_truncate, /* TX_TRUNCATE */
- zfs_replay_setattr, /* TX_SETATTR */
- zfs_replay_acl, /* TX_ACL */
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c
deleted file mode 100644
index 07ec0f6..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This file contains the code to implement file range locking in
- * ZFS, although there isn't much specific to ZFS (all that comes to mind
- * support for growing the blocksize).
- *
- * Interface
- * ---------
- * Defined in zfs_rlock.h but essentially:
- * rl = zfs_range_lock(zp, off, len, lock_type);
- * zfs_range_unlock(rl);
- * zfs_range_reduce(rl, off, len);
- *
- * AVL tree
- * --------
- * An AVL tree is used to maintain the state of the existing ranges
- * that are locked for exclusive (writer) or shared (reader) use.
- * The starting range offset is used for searching and sorting the tree.
- *
- * Common case
- * -----------
- * The (hopefully) usual case is of no overlaps or contention for
- * locks. On entry to zfs_lock_range() a rl_t is allocated; the tree
- * searched that finds no overlap, and *this* rl_t is placed in the tree.
- *
- * Overlaps/Reference counting/Proxy locks
- * ---------------------------------------
- * The avl code only allows one node at a particular offset. Also it's very
- * inefficient to search through all previous entries looking for overlaps
- * (because the very 1st in the ordered list might be at offset 0 but
- * cover the whole file).
- * So this implementation uses reference counts and proxy range locks.
- * Firstly, only reader locks use reference counts and proxy locks,
- * because writer locks are exclusive.
- * When a reader lock overlaps with another then a proxy lock is created
- * for that range and replaces the original lock. If the overlap
- * is exact then the reference count of the proxy is simply incremented.
- * Otherwise, the proxy lock is split into smaller lock ranges and
- * new proxy locks created for non overlapping ranges.
- * The reference counts are adjusted accordingly.
- * Meanwhile, the orginal lock is kept around (this is the callers handle)
- * and its offset and length are used when releasing the lock.
- *
- * Thread coordination
- * -------------------
- * In order to make wakeups efficient and to ensure multiple continuous
- * readers on a range don't starve a writer for the same range lock,
- * two condition variables are allocated in each rl_t.
- * If a writer (or reader) can't get a range it initialises the writer
- * (or reader) cv; sets a flag saying there's a writer (or reader) waiting;
- * and waits on that cv. When a thread unlocks that range it wakes up all
- * writers then all readers before destroying the lock.
- *
- * Append mode writes
- * ------------------
- * Append mode writes need to lock a range at the end of a file.
- * The offset of the end of the file is determined under the
- * range locking mutex, and the lock type converted from RL_APPEND to
- * RL_WRITER and the range locked.
- *
- * Grow block handling
- * -------------------
- * ZFS supports multiple block sizes currently upto 128K. The smallest
- * block size is used for the file which is grown as needed. During this
- * growth all other writers and readers must be excluded.
- * So if the block size needs to be grown then the whole file is
- * exclusively locked, then later the caller will reduce the lock
- * range to just the range to be written using zfs_reduce_range.
- */
-
-#include <sys/zfs_rlock.h>
-
-/*
- * Check if a write lock can be grabbed, or wait and recheck until available.
- */
-static void
-zfs_range_lock_writer(znode_t *zp, rl_t *new)
-{
- avl_tree_t *tree = &zp->z_range_avl;
- rl_t *rl;
- avl_index_t where;
- uint64_t end_size;
- uint64_t off = new->r_off;
- uint64_t len = new->r_len;
-
- for (;;) {
- /*
- * Range locking is also used by zvol and uses a
- * dummied up znode. However, for zvol, we don't need to
- * append or grow blocksize, and besides we don't have
- * a z_phys or z_zfsvfs - so skip that processing.
- *
- * Yes, this is ugly, and would be solved by not handling
- * grow or append in range lock code. If that was done then
- * we could make the range locking code generically available
- * to other non-zfs consumers.
- */
- if (zp->z_vnode) { /* caller is ZPL */
- /*
- * If in append mode pick up the current end of file.
- * This is done under z_range_lock to avoid races.
- */
- if (new->r_type == RL_APPEND)
- new->r_off = zp->z_phys->zp_size;
-
- /*
- * If we need to grow the block size then grab the whole
- * file range. This is also done under z_range_lock to
- * avoid races.
- */
- end_size = MAX(zp->z_phys->zp_size, new->r_off + len);
- if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
- zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
- new->r_off = 0;
- new->r_len = UINT64_MAX;
- }
- }
-
- /*
- * First check for the usual case of no locks
- */
- if (avl_numnodes(tree) == 0) {
- new->r_type = RL_WRITER; /* convert to writer */
- avl_add(tree, new);
- return;
- }
-
- /*
- * Look for any locks in the range.
- */
- rl = avl_find(tree, new, &where);
- if (rl)
- goto wait; /* already locked at same offset */
-
- rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
- if (rl && (rl->r_off < new->r_off + new->r_len))
- goto wait;
-
- rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
- if (rl && rl->r_off + rl->r_len > new->r_off)
- goto wait;
-
- new->r_type = RL_WRITER; /* convert possible RL_APPEND */
- avl_insert(tree, new, where);
- return;
-wait:
- if (!rl->r_write_wanted) {
- cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
- rl->r_write_wanted = B_TRUE;
- }
- cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
-
- /* reset to original */
- new->r_off = off;
- new->r_len = len;
- }
-}
-
-/*
- * If this is an original (non-proxy) lock then replace it by
- * a proxy and return the proxy.
- */
-static rl_t *
-zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
-{
- rl_t *proxy;
-
- if (rl->r_proxy)
- return (rl); /* already a proxy */
-
- ASSERT3U(rl->r_cnt, ==, 1);
- ASSERT(rl->r_write_wanted == B_FALSE);
- ASSERT(rl->r_read_wanted == B_FALSE);
- avl_remove(tree, rl);
- rl->r_cnt = 0;
-
- /* create a proxy range lock */
- proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
- proxy->r_off = rl->r_off;
- proxy->r_len = rl->r_len;
- proxy->r_cnt = 1;
- proxy->r_type = RL_READER;
- proxy->r_proxy = B_TRUE;
- proxy->r_write_wanted = B_FALSE;
- proxy->r_read_wanted = B_FALSE;
- avl_add(tree, proxy);
-
- return (proxy);
-}
-
-/*
- * Split the range lock at the supplied offset
- * returning the *front* proxy.
- */
-static rl_t *
-zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
-{
- rl_t *front, *rear;
-
- ASSERT3U(rl->r_len, >, 1);
- ASSERT3U(off, >, rl->r_off);
- ASSERT3U(off, <, rl->r_off + rl->r_len);
- ASSERT(rl->r_write_wanted == B_FALSE);
- ASSERT(rl->r_read_wanted == B_FALSE);
-
- /* create the rear proxy range lock */
- rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
- rear->r_off = off;
- rear->r_len = rl->r_off + rl->r_len - off;
- rear->r_cnt = rl->r_cnt;
- rear->r_type = RL_READER;
- rear->r_proxy = B_TRUE;
- rear->r_write_wanted = B_FALSE;
- rear->r_read_wanted = B_FALSE;
-
- front = zfs_range_proxify(tree, rl);
- front->r_len = off - rl->r_off;
-
- avl_insert_here(tree, rear, front, AVL_AFTER);
- return (front);
-}
-
-/*
- * Create and add a new proxy range lock for the supplied range.
- */
-static void
-zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
-{
- rl_t *rl;
-
- ASSERT(len);
- rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
- rl->r_off = off;
- rl->r_len = len;
- rl->r_cnt = 1;
- rl->r_type = RL_READER;
- rl->r_proxy = B_TRUE;
- rl->r_write_wanted = B_FALSE;
- rl->r_read_wanted = B_FALSE;
- avl_add(tree, rl);
-}
-
-static void
-zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
-{
- rl_t *next;
- uint64_t off = new->r_off;
- uint64_t len = new->r_len;
-
- /*
- * prev arrives either:
- * - pointing to an entry at the same offset
- * - pointing to the entry with the closest previous offset whose
- * range may overlap with the new range
- * - null, if there were no ranges starting before the new one
- */
- if (prev) {
- if (prev->r_off + prev->r_len <= off) {
- prev = NULL;
- } else if (prev->r_off != off) {
- /*
- * convert to proxy if needed then
- * split this entry and bump ref count
- */
- prev = zfs_range_split(tree, prev, off);
- prev = AVL_NEXT(tree, prev); /* move to rear range */
- }
- }
- ASSERT((prev == NULL) || (prev->r_off == off));
-
- if (prev)
- next = prev;
- else
- next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
-
- if (next == NULL || off + len <= next->r_off) {
- /* no overlaps, use the original new rl_t in the tree */
- avl_insert(tree, new, where);
- return;
- }
-
- if (off < next->r_off) {
- /* Add a proxy for initial range before the overlap */
- zfs_range_new_proxy(tree, off, next->r_off - off);
- }
-
- new->r_cnt = 0; /* will use proxies in tree */
- /*
- * We now search forward through the ranges, until we go past the end
- * of the new range. For each entry we make it a proxy if it
- * isn't already, then bump its reference count. If there's any
- * gaps between the ranges then we create a new proxy range.
- */
- for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) {
- if (off + len <= next->r_off)
- break;
- if (prev && prev->r_off + prev->r_len < next->r_off) {
- /* there's a gap */
- ASSERT3U(next->r_off, >, prev->r_off + prev->r_len);
- zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
- next->r_off - (prev->r_off + prev->r_len));
- }
- if (off + len == next->r_off + next->r_len) {
- /* exact overlap with end */
- next = zfs_range_proxify(tree, next);
- next->r_cnt++;
- return;
- }
- if (off + len < next->r_off + next->r_len) {
- /* new range ends in the middle of this block */
- next = zfs_range_split(tree, next, off + len);
- next->r_cnt++;
- return;
- }
- ASSERT3U(off + len, >, next->r_off + next->r_len);
- next = zfs_range_proxify(tree, next);
- next->r_cnt++;
- }
-
- /* Add the remaining end range. */
- zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
- (off + len) - (prev->r_off + prev->r_len));
-}
-
-/*
- * Check if a reader lock can be grabbed, or wait and recheck until available.
- */
-static void
-zfs_range_lock_reader(znode_t *zp, rl_t *new)
-{
- avl_tree_t *tree = &zp->z_range_avl;
- rl_t *prev, *next;
- avl_index_t where;
- uint64_t off = new->r_off;
- uint64_t len = new->r_len;
-
- /*
- * Look for any writer locks in the range.
- */
-retry:
- prev = avl_find(tree, new, &where);
- if (prev == NULL)
- prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
-
- /*
- * Check the previous range for a writer lock overlap.
- */
- if (prev && (off < prev->r_off + prev->r_len)) {
- if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
- if (!prev->r_read_wanted) {
- cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
- prev->r_read_wanted = B_TRUE;
- }
- cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
- goto retry;
- }
- if (off + len < prev->r_off + prev->r_len)
- goto got_lock;
- }
-
- /*
- * Search through the following ranges to see if there's
- * write lock any overlap.
- */
- if (prev)
- next = AVL_NEXT(tree, prev);
- else
- next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
- for (; next; next = AVL_NEXT(tree, next)) {
- if (off + len <= next->r_off)
- goto got_lock;
- if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
- if (!next->r_read_wanted) {
- cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
- next->r_read_wanted = B_TRUE;
- }
- cv_wait(&next->r_rd_cv, &zp->z_range_lock);
- goto retry;
- }
- if (off + len <= next->r_off + next->r_len)
- goto got_lock;
- }
-
-got_lock:
- /*
- * Add the read lock, which may involve splitting existing
- * locks and bumping ref counts (r_cnt).
- */
- zfs_range_add_reader(tree, new, prev, where);
-}
-
-/*
- * Lock a range (offset, length) as either shared (RL_READER)
- * or exclusive (RL_WRITER). Returns the range lock structure
- * for later unlocking or reduce range (if entire file
- * previously locked as RL_WRITER).
- */
-rl_t *
-zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
-{
- rl_t *new;
-
- ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
-
- new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
- new->r_zp = zp;
- new->r_off = off;
- new->r_len = len;
- new->r_cnt = 1; /* assume it's going to be in the tree */
- new->r_type = type;
- new->r_proxy = B_FALSE;
- new->r_write_wanted = B_FALSE;
- new->r_read_wanted = B_FALSE;
-
- mutex_enter(&zp->z_range_lock);
- if (type == RL_READER) {
- /*
- * First check for the usual case of no locks
- */
- if (avl_numnodes(&zp->z_range_avl) == 0)
- avl_add(&zp->z_range_avl, new);
- else
- zfs_range_lock_reader(zp, new);
- } else
- zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
- mutex_exit(&zp->z_range_lock);
- return (new);
-}
-
-/*
- * Unlock a reader lock
- */
-static void
-zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
-{
- avl_tree_t *tree = &zp->z_range_avl;
- rl_t *rl, *next;
- uint64_t len;
-
- /*
- * The common case is when the remove entry is in the tree
- * (cnt == 1) meaning there's been no other reader locks overlapping
- * with this one. Otherwise the remove entry will have been
- * removed from the tree and replaced by proxies (one or
- * more ranges mapping to the entire range).
- */
- if (remove->r_cnt == 1) {
- avl_remove(tree, remove);
- if (remove->r_write_wanted)
- cv_broadcast(&remove->r_wr_cv);
- if (remove->r_read_wanted)
- cv_broadcast(&remove->r_rd_cv);
- } else {
- ASSERT3U(remove->r_cnt, ==, 0);
- ASSERT3U(remove->r_write_wanted, ==, 0);
- ASSERT3U(remove->r_read_wanted, ==, 0);
- /*
- * Find start proxy representing this reader lock,
- * then decrement ref count on all proxies
- * that make up this range, freeing them as needed.
- */
- rl = avl_find(tree, remove, NULL);
- ASSERT(rl);
- ASSERT(rl->r_cnt);
- ASSERT(rl->r_type == RL_READER);
- for (len = remove->r_len; len != 0; rl = next) {
- len -= rl->r_len;
- if (len) {
- next = AVL_NEXT(tree, rl);
- ASSERT(next);
- ASSERT(rl->r_off + rl->r_len == next->r_off);
- ASSERT(next->r_cnt);
- ASSERT(next->r_type == RL_READER);
- }
- rl->r_cnt--;
- if (rl->r_cnt == 0) {
- avl_remove(tree, rl);
- if (rl->r_write_wanted)
- cv_broadcast(&rl->r_wr_cv);
- if (rl->r_read_wanted)
- cv_broadcast(&rl->r_rd_cv);
- kmem_free(rl, sizeof (rl_t));
- }
- }
- }
- kmem_free(remove, sizeof (rl_t));
-}
-
-/*
- * Unlock range and destroy range lock structure.
- */
-void
-zfs_range_unlock(rl_t *rl)
-{
- znode_t *zp = rl->r_zp;
-
- ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
- ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
- ASSERT(!rl->r_proxy);
-
- mutex_enter(&zp->z_range_lock);
- if (rl->r_type == RL_WRITER) {
- /* writer locks can't be shared or split */
- avl_remove(&zp->z_range_avl, rl);
- mutex_exit(&zp->z_range_lock);
- if (rl->r_write_wanted) {
- cv_broadcast(&rl->r_wr_cv);
- cv_destroy(&rl->r_wr_cv);
- }
- if (rl->r_read_wanted) {
- cv_broadcast(&rl->r_rd_cv);
- cv_destroy(&rl->r_rd_cv);
- }
- kmem_free(rl, sizeof (rl_t));
- } else {
- /*
- * lock may be shared, let zfs_range_unlock_reader()
- * release the lock and free the rl_t
- */
- zfs_range_unlock_reader(zp, rl);
- mutex_exit(&zp->z_range_lock);
- }
-}
-
-/*
- * Reduce range locked as RL_WRITER from whole file to specified range.
- * Asserts the whole file is exclusivly locked and so there's only one
- * entry in the tree.
- */
-void
-zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
-{
- znode_t *zp = rl->r_zp;
-
- /* Ensure there are no other locks */
- ASSERT(avl_numnodes(&zp->z_range_avl) == 1);
- ASSERT(rl->r_off == 0);
- ASSERT(rl->r_type == RL_WRITER);
- ASSERT(!rl->r_proxy);
- ASSERT3U(rl->r_len, ==, UINT64_MAX);
- ASSERT3U(rl->r_cnt, ==, 1);
-
- mutex_enter(&zp->z_range_lock);
- rl->r_off = off;
- rl->r_len = len;
- mutex_exit(&zp->z_range_lock);
- if (rl->r_write_wanted)
- cv_broadcast(&rl->r_wr_cv);
- if (rl->r_read_wanted)
- cv_broadcast(&rl->r_rd_cv);
-}
-
-/*
- * AVL comparison function used to order range locks
- * Locks are ordered on the start offset of the range.
- */
-int
-zfs_range_compare(const void *arg1, const void *arg2)
-{
- const rl_t *rl1 = arg1;
- const rl_t *rl2 = arg2;
-
- if (rl1->r_off > rl2->r_off)
- return (1);
- if (rl1->r_off < rl2->r_off)
- return (-1);
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
deleted file mode 100644
index 28f3293..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/sysmacros.h>
-#include <sys/kmem.h>
-#include <sys/acl.h>
-#include <sys/vnode.h>
-#include <sys/vfs.h>
-#include <sys/mntent.h>
-#include <sys/mount.h>
-#include <sys/cmn_err.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_dir.h>
-#include <sys/zil.h>
-#include <sys/fs/zfs.h>
-#include <sys/dmu.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_dataset.h>
-#include <sys/spa.h>
-#include <sys/zap.h>
-#include <sys/varargs.h>
-#include <sys/policy.h>
-#include <sys/atomic.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/sunddi.h>
-#include <sys/dnlc.h>
-
-struct mtx zfs_debug_mtx;
-MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
-SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
-int zfs_debug_level = 0;
-TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0,
- "Debug level");
-
-static int zfs_mount(vfs_t *vfsp, kthread_t *td);
-static int zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td);
-static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td);
-static int zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td);
-static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
-static int zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td);
-static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp);
-static void zfs_objset_close(zfsvfs_t *zfsvfs);
-static void zfs_freevfs(vfs_t *vfsp);
-
-static struct vfsops zfs_vfsops = {
- .vfs_mount = zfs_mount,
- .vfs_unmount = zfs_umount,
- .vfs_root = zfs_root,
- .vfs_statfs = zfs_statfs,
- .vfs_vget = zfs_vget,
- .vfs_sync = zfs_sync,
- .vfs_fhtovp = zfs_fhtovp,
-};
-
-VFS_SET(zfs_vfsops, zfs, VFCF_JAIL);
-
-/*
- * We need to keep a count of active fs's.
- * This is necessary to prevent our module
- * from being unloaded after a umount -f
- */
-static uint32_t zfs_active_fs_count = 0;
-
-/*ARGSUSED*/
-static int
-zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td)
-{
-
- /*
- * Data integrity is job one. We don't want a compromised kernel
- * writing to the storage pool, so we never sync during panic.
- */
- if (panicstr)
- return (0);
-
- if (vfsp != NULL) {
- /*
- * Sync a specific filesystem.
- */
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- int error;
-
- error = vfs_stdsync(vfsp, waitfor, td);
- if (error != 0)
- return (error);
-
- ZFS_ENTER(zfsvfs);
- if (zfsvfs->z_log != NULL)
- zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
- else
- txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
- ZFS_EXIT(zfsvfs);
- } else {
- /*
- * Sync all ZFS filesystems. This is what happens when you
- * run sync(1M). Unlike other filesystems, ZFS honors the
- * request by waiting for all pools to commit all dirty data.
- */
- spa_sync_allpools();
- }
-
- return (0);
-}
-
-static void
-atime_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- if (newval == TRUE) {
- zfsvfs->z_atime = TRUE;
- zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
- } else {
- zfsvfs->z_atime = FALSE;
- zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
- }
-}
-
-static void
-xattr_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- if (newval == TRUE) {
- /* XXX locking on vfs_flag? */
-#ifdef TODO
- zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
-#endif
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
- } else {
- /* XXX locking on vfs_flag? */
-#ifdef TODO
- zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
-#endif
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
- }
-}
-
-static void
-blksz_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- if (newval < SPA_MINBLOCKSIZE ||
- newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
- newval = SPA_MAXBLOCKSIZE;
-
- zfsvfs->z_max_blksz = newval;
- zfsvfs->z_vfs->vfs_bsize = newval;
-}
-
-static void
-readonly_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- if (newval) {
- /* XXX locking on vfs_flag? */
- zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
- } else {
- /* XXX locking on vfs_flag? */
- zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
- }
-}
-
-static void
-setuid_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- if (newval == FALSE) {
- zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
- } else {
- zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
- }
-}
-
-static void
-exec_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- if (newval == FALSE) {
- zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
- } else {
- zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
- vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
- vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
- }
-}
-
-static void
-snapdir_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_show_ctldir = newval;
-}
-
-static void
-acl_mode_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_acl_mode = newval;
-}
-
-static void
-acl_inherit_changed_cb(void *arg, uint64_t newval)
-{
- zfsvfs_t *zfsvfs = arg;
-
- zfsvfs->z_acl_inherit = newval;
-}
-
-static int
-zfs_refresh_properties(vfs_t *vfsp)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
-
- /*
- * Remount operations default to "rw" unless "ro" is explicitly
- * specified.
- */
- if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
- readonly_changed_cb(zfsvfs, B_TRUE);
- } else {
- if (!dmu_objset_is_snapshot(zfsvfs->z_os))
- readonly_changed_cb(zfsvfs, B_FALSE);
- else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
- return (EROFS);
- }
-
- if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
- setuid_changed_cb(zfsvfs, B_FALSE);
- } else {
- if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
- setuid_changed_cb(zfsvfs, B_FALSE);
- else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
- setuid_changed_cb(zfsvfs, B_TRUE);
- }
-
- if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
- exec_changed_cb(zfsvfs, B_FALSE);
- else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
- exec_changed_cb(zfsvfs, B_TRUE);
-
- if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
- atime_changed_cb(zfsvfs, B_TRUE);
- else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
- atime_changed_cb(zfsvfs, B_FALSE);
-
- if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
- xattr_changed_cb(zfsvfs, B_TRUE);
- else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL))
- xattr_changed_cb(zfsvfs, B_FALSE);
-
- return (0);
-}
-
-static int
-zfs_register_callbacks(vfs_t *vfsp)
-{
- struct dsl_dataset *ds = NULL;
- objset_t *os = NULL;
- zfsvfs_t *zfsvfs = NULL;
- int readonly, do_readonly = FALSE;
- int setuid, do_setuid = FALSE;
- int exec, do_exec = FALSE;
- int xattr, do_xattr = FALSE;
- int error = 0;
-
- ASSERT(vfsp);
- zfsvfs = vfsp->vfs_data;
- ASSERT(zfsvfs);
- os = zfsvfs->z_os;
-
- /*
- * The act of registering our callbacks will destroy any mount
- * options we may have. In order to enable temporary overrides
- * of mount options, we stash away the current values and
- * restore them after we register the callbacks.
- */
- if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
- readonly = B_TRUE;
- do_readonly = B_TRUE;
- } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
- readonly = B_FALSE;
- do_readonly = B_TRUE;
- }
- if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
- setuid = B_FALSE;
- do_setuid = B_TRUE;
- } else {
- if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
- setuid = B_FALSE;
- do_setuid = B_TRUE;
- } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
- setuid = B_TRUE;
- do_setuid = B_TRUE;
- }
- }
- if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
- exec = B_FALSE;
- do_exec = B_TRUE;
- } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
- exec = B_TRUE;
- do_exec = B_TRUE;
- }
- if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
- xattr = B_FALSE;
- do_xattr = B_TRUE;
- } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
- xattr = B_TRUE;
- do_xattr = B_TRUE;
- }
-
- /*
- * Register property callbacks.
- *
- * It would probably be fine to just check for i/o error from
- * the first prop_register(), but I guess I like to go
- * overboard...
- */
- ds = dmu_objset_ds(os);
- error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "xattr", xattr_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "recordsize", blksz_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "readonly", readonly_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "setuid", setuid_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "exec", exec_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "snapdir", snapdir_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "aclmode", acl_mode_changed_cb, zfsvfs);
- error = error ? error : dsl_prop_register(ds,
- "aclinherit", acl_inherit_changed_cb, zfsvfs);
- if (error)
- goto unregister;
-
- /*
- * Invoke our callbacks to restore temporary mount options.
- */
- if (do_readonly)
- readonly_changed_cb(zfsvfs, readonly);
- if (do_setuid)
- setuid_changed_cb(zfsvfs, setuid);
- if (do_exec)
- exec_changed_cb(zfsvfs, exec);
- if (do_xattr)
- xattr_changed_cb(zfsvfs, xattr);
-
- return (0);
-
-unregister:
- /*
- * We may attempt to unregister some callbacks that are not
- * registered, but this is OK; it will simply return ENOMSG,
- * which we will ignore.
- */
- (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
- zfsvfs);
- return (error);
-
-}
-
-static int
-zfs_domount(vfs_t *vfsp, char *osname, kthread_t *td)
-{
- cred_t *cr = td->td_ucred;
- uint64_t recordsize, readonly;
- int error = 0;
- int mode;
- zfsvfs_t *zfsvfs;
- znode_t *zp = NULL;
-
- ASSERT(vfsp);
- ASSERT(osname);
-
- /*
- * Initialize the zfs-specific filesystem structure.
- * Should probably make this a kmem cache, shuffle fields,
- * and just bzero up to z_hold_mtx[].
- */
- zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
- zfsvfs->z_vfs = vfsp;
- zfsvfs->z_parent = zfsvfs;
- zfsvfs->z_assign = TXG_NOWAIT;
- zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
- zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
-
- mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
- offsetof(znode_t, z_link_node));
- rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL);
-
- if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
- NULL))
- goto out;
- zfsvfs->z_vfs->vfs_bsize = recordsize;
-
- vfsp->vfs_data = zfsvfs;
- vfsp->mnt_flag |= MNT_LOCAL;
- vfsp->mnt_kern_flag |= MNTK_MPSAFE;
- vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
-
- if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
- goto out;
-
- if (readonly)
- mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
- else
- mode = DS_MODE_PRIMARY;
-
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
- if (error == EROFS) {
- mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
- error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
- &zfsvfs->z_os);
- }
-
- if (error)
- goto out;
-
- if (error = zfs_init_fs(zfsvfs, &zp, cr))
- goto out;
-
- if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
- uint64_t xattr;
-
- ASSERT(mode & DS_MODE_READONLY);
- atime_changed_cb(zfsvfs, B_FALSE);
- readonly_changed_cb(zfsvfs, B_TRUE);
- if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL))
- goto out;
- xattr_changed_cb(zfsvfs, xattr);
- zfsvfs->z_issnap = B_TRUE;
- } else {
- error = zfs_register_callbacks(vfsp);
- if (error)
- goto out;
-
- zfs_unlinked_drain(zfsvfs);
-
- /*
- * Parse and replay the intent log.
- */
- zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
- zfs_replay_vector);
-
- if (!zil_disable)
- zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
- }
-
- vfs_mountedfrom(vfsp, osname);
-
- if (!zfsvfs->z_issnap)
- zfsctl_create(zfsvfs);
-out:
- if (error) {
- if (zfsvfs->z_os)
- dmu_objset_close(zfsvfs->z_os);
- rw_destroy(&zfsvfs->z_um_lock);
- mutex_destroy(&zfsvfs->z_znodes_lock);
- kmem_free(zfsvfs, sizeof (zfsvfs_t));
- } else {
- atomic_add_32(&zfs_active_fs_count, 1);
- }
-
- return (error);
-
-}
-
-void
-zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
-{
- objset_t *os = zfsvfs->z_os;
- struct dsl_dataset *ds;
-
- /*
- * Unregister properties.
- */
- if (!dmu_objset_is_snapshot(os)) {
- ds = dmu_objset_ds(os);
- VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
- zfsvfs) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "aclinherit",
- acl_inherit_changed_cb, zfsvfs) == 0);
- }
-}
-
-/*ARGSUSED*/
-static int
-zfs_mount(vfs_t *vfsp, kthread_t *td)
-{
- char *from;
- int error;
-
- /*
- * When doing a remount, we simply refresh our temporary properties
- * according to those options set in the current VFS options.
- */
- if (vfsp->vfs_flag & MS_REMOUNT)
- return (zfs_refresh_properties(vfsp));
-
- if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&from, NULL))
- return (EINVAL);
-
- DROP_GIANT();
- error = zfs_domount(vfsp, from, td);
- PICKUP_GIANT();
- return (error);
-}
-
-static int
-zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- uint64_t refdbytes, availbytes, usedobjs, availobjs;
-
- statp->f_version = STATFS_VERSION;
-
- ZFS_ENTER(zfsvfs);
-
- dmu_objset_space(zfsvfs->z_os,
- &refdbytes, &availbytes, &usedobjs, &availobjs);
-
- /*
- * The underlying storage pool actually uses multiple block sizes.
- * We report the fragsize as the smallest block size we support,
- * and we report our blocksize as the filesystem's maximum blocksize.
- */
- statp->f_bsize = zfsvfs->z_vfs->vfs_bsize;
- statp->f_iosize = zfsvfs->z_vfs->vfs_bsize;
-
- /*
- * The following report "total" blocks of various kinds in the
- * file system, but reported in terms of f_frsize - the
- * "fragment" size.
- */
-
- statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize;
- statp->f_bfree = availbytes / statp->f_bsize;
- statp->f_bavail = statp->f_bfree; /* no root reservation */
-
- /*
- * statvfs() should really be called statufs(), because it assumes
- * static metadata. ZFS doesn't preallocate files, so the best
- * we can do is report the max that could possibly fit in f_files,
- * and that minus the number actually used in f_ffree.
- * For f_ffree, report the smaller of the number of object available
- * and the number of blocks (each object will take at least a block).
- */
- statp->f_ffree = MIN(availobjs, statp->f_bfree);
- statp->f_files = statp->f_ffree + usedobjs;
-
- /*
- * We're a zfs filesystem.
- */
- (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
-
- strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
- sizeof(statp->f_mntfromname));
- strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
- sizeof(statp->f_mntonname));
-
- statp->f_namemax = ZFS_MAXNAMELEN;
-
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-static int
-zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- znode_t *rootzp;
- int error;
-
- ZFS_ENTER(zfsvfs);
-
- error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
- if (error == 0) {
- *vpp = ZTOV(rootzp);
- error = vn_lock(*vpp, flags);
- (*vpp)->v_vflag |= VV_ROOT;
- }
-
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*ARGSUSED*/
-static int
-zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- cred_t *cr = td->td_ucred;
- int ret;
-
- if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0)
- return (ret);
-
- (void) dnlc_purge_vfsp(vfsp, 0);
-
- /*
- * Unmount any snapshots mounted under .zfs before unmounting the
- * dataset itself.
- */
- if (zfsvfs->z_ctldir != NULL) {
- if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
- return (ret);
- ret = vflush(vfsp, 0, 0, td);
- ASSERT(ret == EBUSY);
- if (!(fflag & MS_FORCE)) {
- if (zfsvfs->z_ctldir->v_count > 1)
- return (EBUSY);
- ASSERT(zfsvfs->z_ctldir->v_count == 1);
- }
- zfsctl_destroy(zfsvfs);
- ASSERT(zfsvfs->z_ctldir == NULL);
- }
-
- /*
- * Flush all the files.
- */
- ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
- if (ret != 0) {
- if (!zfsvfs->z_issnap) {
- zfsctl_create(zfsvfs);
- ASSERT(zfsvfs->z_ctldir != NULL);
- }
- return (ret);
- }
-
- if (fflag & MS_FORCE) {
- MNT_ILOCK(vfsp);
- vfsp->mnt_kern_flag |= MNTK_UNMOUNTF;
- MNT_IUNLOCK(vfsp);
- zfsvfs->z_unmounted1 = B_TRUE;
-
- /*
- * Wait for all zfs threads to leave zfs.
- * Grabbing a rwlock as reader in all vops and
- * as writer here doesn't work because it too easy to get
- * multiple reader enters as zfs can re-enter itself.
- * This can lead to deadlock if there is an intervening
- * rw_enter as writer.
- * So a file system threads ref count (z_op_cnt) is used.
- * A polling loop on z_op_cnt may seem inefficient, but
- * - this saves all threads on exit from having to grab a
- * mutex in order to cv_signal
- * - only occurs on forced unmount in the rare case when
- * there are outstanding threads within the file system.
- */
- while (zfsvfs->z_op_cnt) {
- delay(1);
- }
- }
-
- zfs_objset_close(zfsvfs);
- VFS_RELE(vfsp);
- zfs_freevfs(vfsp);
-
- return (0);
-}
-
-static int
-zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- znode_t *zp;
- int err;
-
- ZFS_ENTER(zfsvfs);
- err = zfs_zget(zfsvfs, ino, &zp);
- if (err == 0 && zp->z_unlinked) {
- VN_RELE(ZTOV(zp));
- err = EINVAL;
- }
- if (err != 0)
- *vpp = NULL;
- else {
- *vpp = ZTOV(zp);
- vn_lock(*vpp, flags);
- }
- ZFS_EXIT(zfsvfs);
- return (err);
-}
-
-static int
-zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
-{
- kthread_t *td = curthread;
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- znode_t *zp;
- uint64_t object = 0;
- uint64_t fid_gen = 0;
- uint64_t gen_mask;
- uint64_t zp_gen;
- int i, err;
-
- *vpp = NULL;
-
- ZFS_ENTER(zfsvfs);
-
- if (fidp->fid_len == LONG_FID_LEN) {
- zfid_long_t *zlfid = (zfid_long_t *)fidp;
- uint64_t objsetid = 0;
- uint64_t setgen = 0;
-
- for (i = 0; i < sizeof (zlfid->zf_setid); i++)
- objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
-
- for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
- setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
-
- ZFS_EXIT(zfsvfs);
-
- err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
- if (err)
- return (EINVAL);
- ZFS_ENTER(zfsvfs);
- }
-
- if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
- zfid_short_t *zfid = (zfid_short_t *)fidp;
-
- for (i = 0; i < sizeof (zfid->zf_object); i++)
- object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
-
- for (i = 0; i < sizeof (zfid->zf_gen); i++)
- fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
- } else {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /* A zero fid_gen means we are in the .zfs control directories */
- if (fid_gen == 0 &&
- (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
- *vpp = zfsvfs->z_ctldir;
- ASSERT(*vpp != NULL);
- if (object == ZFSCTL_INO_SNAPDIR) {
- VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
- 0, NULL, NULL) == 0);
- } else {
- VN_HOLD(*vpp);
- }
- ZFS_EXIT(zfsvfs);
- /* XXX: LK_RETRY? */
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- return (0);
- }
-
- gen_mask = -1ULL >> (64 - 8 * i);
-
- dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
- if (err = zfs_zget(zfsvfs, object, &zp)) {
- ZFS_EXIT(zfsvfs);
- return (err);
- }
- zp_gen = zp->z_phys->zp_gen & gen_mask;
- if (zp_gen == 0)
- zp_gen = 1;
- if (zp->z_unlinked || zp_gen != fid_gen) {
- dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
- VN_RELE(ZTOV(zp));
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- *vpp = ZTOV(zp);
- /* XXX: LK_RETRY? */
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- vnode_create_vobject(*vpp, zp->z_phys->zp_size, td);
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-static void
-zfs_objset_close(zfsvfs_t *zfsvfs)
-{
- znode_t *zp, *nextzp;
- objset_t *os = zfsvfs->z_os;
-
- /*
- * For forced unmount, at this point all vops except zfs_inactive
- * are erroring EIO. We need to now suspend zfs_inactive threads
- * while we are freeing dbufs before switching zfs_inactive
- * to use behaviour without a objset.
- */
- rw_enter(&zfsvfs->z_um_lock, RW_WRITER);
-
- /*
- * Release all holds on dbufs
- * Note, although we have stopped all other vop threads and
- * zfs_inactive(), the dmu can callback via znode_pageout_func()
- * which can zfs_znode_free() the znode.
- * So we lock z_all_znodes; search the list for a held
- * dbuf; drop the lock (we know zp can't disappear if we hold
- * a dbuf lock; then regrab the lock and restart.
- */
- mutex_enter(&zfsvfs->z_znodes_lock);
- for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) {
- nextzp = list_next(&zfsvfs->z_all_znodes, zp);
- if (zp->z_dbuf_held) {
- /* dbufs should only be held when force unmounting */
- zp->z_dbuf_held = 0;
- mutex_exit(&zfsvfs->z_znodes_lock);
- dmu_buf_rele(zp->z_dbuf, NULL);
- /* Start again */
- mutex_enter(&zfsvfs->z_znodes_lock);
- nextzp = list_head(&zfsvfs->z_all_znodes);
- }
- }
- mutex_exit(&zfsvfs->z_znodes_lock);
-
- /*
- * Unregister properties.
- */
- if (!dmu_objset_is_snapshot(os))
- zfs_unregister_callbacks(zfsvfs);
-
- /*
- * Switch zfs_inactive to behaviour without an objset.
- * It just tosses cached pages and frees the znode & vnode.
- * Then re-enable zfs_inactive threads in that new behaviour.
- */
- zfsvfs->z_unmounted2 = B_TRUE;
- rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */
-
- /*
- * Close the zil. Can't close the zil while zfs_inactive
- * threads are blocked as zil_close can call zfs_inactive.
- */
- if (zfsvfs->z_log) {
- zil_close(zfsvfs->z_log);
- zfsvfs->z_log = NULL;
- }
-
- /*
- * Evict all dbufs so that cached znodes will be freed
- */
- if (dmu_objset_evict_dbufs(os, 1)) {
- txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
- (void) dmu_objset_evict_dbufs(os, 0);
- }
-
- /*
- * Finally close the objset
- */
- dmu_objset_close(os);
-}
-
-static void
-zfs_freevfs(vfs_t *vfsp)
-{
- zfsvfs_t *zfsvfs = vfsp->vfs_data;
- int i;
-
- for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
- mutex_destroy(&zfsvfs->z_hold_mtx[i]);
- rw_destroy(&zfsvfs->z_um_lock);
- mutex_destroy(&zfsvfs->z_znodes_lock);
- kmem_free(zfsvfs, sizeof (zfsvfs_t));
-
- atomic_add_32(&zfs_active_fs_count, -1);
-}
-
-#ifdef __i386__
-static int desiredvnodes_backup;
-#endif
-
-static void
-zfs_vnodes_adjust(void)
-{
-#ifdef __i386__
- int val;
-
- desiredvnodes_backup = desiredvnodes;
-
- /*
- * We calculate newdesiredvnodes the same way it is done in
- * vntblinit(). If it is equal to desiredvnodes, it means that
- * it wasn't tuned by the administrator and we can tune it down.
- */
- val = min(maxproc + cnt.v_page_count / 4, 2 * vm_kmem_size /
- (5 * (sizeof(struct vm_object) + sizeof(struct vnode))));
- if (desiredvnodes == val)
- desiredvnodes = (3 * desiredvnodes) / 4;
-#endif
-}
-
-static void
-zfs_vnodes_adjust_back(void)
-{
-
-#ifdef __i386__
- desiredvnodes = desiredvnodes_backup;
-#endif
-}
-
-void
-zfs_init(void)
-{
-
- printf("ZFS filesystem version " ZFS_VERSION_STRING "\n");
-
- /*
- * Initialize .zfs directory structures
- */
- zfsctl_init();
-
- /*
- * Initialize znode cache, vnode ops, etc...
- */
- zfs_znode_init();
-
- /*
- * Reduce number of vnodes. Originally number of vnodes is calculated
- * with UFS inode in mind. We reduce it here, because it's too big for
- * ZFS/i386.
- */
- zfs_vnodes_adjust();
-}
-
-void
-zfs_fini(void)
-{
- zfsctl_fini();
- zfs_znode_fini();
- zfs_vnodes_adjust_back();
-}
-
-int
-zfs_busy(void)
-{
- return (zfs_active_fs_count != 0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
deleted file mode 100644
index 088103a..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ /dev/null
@@ -1,3623 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Portions Copyright 2007 Jeremy Teo */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/systm.h>
-#include <sys/sysmacros.h>
-#include <sys/resource.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/kmem.h>
-#include <sys/taskq.h>
-#include <sys/uio.h>
-#include <sys/atomic.h>
-#include <sys/namei.h>
-#include <sys/mman.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/unistd.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/fs/zfs.h>
-#include <sys/dmu.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/dbuf.h>
-#include <sys/zap.h>
-#include <sys/dirent.h>
-#include <sys/policy.h>
-#include <sys/sunddi.h>
-#include <sys/filio.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/dnlc.h>
-#include <sys/zfs_rlock.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/sf_buf.h>
-#include <sys/sched.h>
-
-/*
- * Programming rules.
- *
- * Each vnode op performs some logical unit of work. To do this, the ZPL must
- * properly lock its in-core state, create a DMU transaction, do the work,
- * record this work in the intent log (ZIL), commit the DMU transaction,
- * and wait the the intent log to commit if it's is a synchronous operation.
- * Morover, the vnode ops must work in both normal and log replay context.
- * The ordering of events is important to avoid deadlocks and references
- * to freed memory. The example below illustrates the following Big Rules:
- *
- * (1) A check must be made in each zfs thread for a mounted file system.
- * This is done avoiding races using ZFS_ENTER(zfsvfs).
- * A ZFS_EXIT(zfsvfs) is needed before all returns.
- *
- * (2) VN_RELE() should always be the last thing except for zil_commit()
- * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
- * First, if it's the last reference, the vnode/znode
- * can be freed, so the zp may point to freed memory. Second, the last
- * reference will call zfs_zinactive(), which may induce a lot of work --
- * pushing cached pages (which acquires range locks) and syncing out
- * cached atime changes. Third, zfs_zinactive() may require a new tx,
- * which could deadlock the system if you were already holding one.
- *
- * (3) All range locks must be grabbed before calling dmu_tx_assign(),
- * as they can span dmu_tx_assign() calls.
- *
- * (4) Always pass zfsvfs->z_assign as the second argument to dmu_tx_assign().
- * In normal operation, this will be TXG_NOWAIT. During ZIL replay,
- * it will be a specific txg. Either way, dmu_tx_assign() never blocks.
- * This is critical because we don't want to block while holding locks.
- * Note, in particular, that if a lock is sometimes acquired before
- * the tx assigns, and sometimes after (e.g. z_lock), then failing to
- * use a non-blocking assign can deadlock the system. The scenario:
- *
- * Thread A has grabbed a lock before calling dmu_tx_assign().
- * Thread B is in an already-assigned tx, and blocks for this lock.
- * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
- * forever, because the previous txg can't quiesce until B's tx commits.
- *
- * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
- * then drop all locks, call dmu_tx_wait(), and try again.
- *
- * (5) If the operation succeeded, generate the intent log entry for it
- * before dropping locks. This ensures that the ordering of events
- * in the intent log matches the order in which they actually occurred.
- *
- * (6) At the end of each vnode op, the DMU tx must always commit,
- * regardless of whether there were any errors.
- *
- * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid)
- * to ensure that synchronous semantics are provided when necessary.
- *
- * In general, this is how things should be ordered in each vnode op:
- *
- * ZFS_ENTER(zfsvfs); // exit if unmounted
- * top:
- * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD())
- * rw_enter(...); // grab any other locks you need
- * tx = dmu_tx_create(...); // get DMU tx
- * dmu_tx_hold_*(); // hold each object you might modify
- * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign
- * if (error) {
- * rw_exit(...); // drop locks
- * zfs_dirent_unlock(dl); // unlock directory entry
- * VN_RELE(...); // release held vnodes
- * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- * dmu_tx_wait(tx);
- * dmu_tx_abort(tx);
- * goto top;
- * }
- * dmu_tx_abort(tx); // abort DMU tx
- * ZFS_EXIT(zfsvfs); // finished in zfs
- * return (error); // really out of space
- * }
- * error = do_real_work(); // do whatever this VOP does
- * if (error == 0)
- * zfs_log_*(...); // on success, make ZIL entry
- * dmu_tx_commit(tx); // commit DMU tx -- error or not
- * rw_exit(...); // drop locks
- * zfs_dirent_unlock(dl); // unlock directory entry
- * VN_RELE(...); // release held vnodes
- * zil_commit(zilog, seq, foid); // synchronous when necessary
- * ZFS_EXIT(zfsvfs); // finished in zfs
- * return (error); // done, report error
- */
-/* ARGSUSED */
-static int
-zfs_open(vnode_t **vpp, int flag, cred_t *cr)
-{
- znode_t *zp = VTOZ(*vpp);
-
- /* Keep a count of the synchronous opens in the znode */
- if (flag & (FSYNC | FDSYNC))
- atomic_inc_32(&zp->z_sync_cnt);
- return (0);
-}
-
-/* ARGSUSED */
-static int
-zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
-
- /* Decrement the synchronous opens in the znode */
- if (flag & (FSYNC | FDSYNC))
- atomic_dec_32(&zp->z_sync_cnt);
-
- /*
- * Clean up any locks held by this process on the vp.
- */
- cleanlocks(vp, ddi_get_pid(), 0);
- cleanshares(vp, ddi_get_pid());
-
- return (0);
-}
-
-/*
- * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
- * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
- */
-static int
-zfs_holey(vnode_t *vp, u_long cmd, offset_t *off)
-{
- znode_t *zp = VTOZ(vp);
- uint64_t noff = (uint64_t)*off; /* new offset */
- uint64_t file_sz;
- int error;
- boolean_t hole;
-
- file_sz = zp->z_phys->zp_size;
- if (noff >= file_sz) {
- return (ENXIO);
- }
-
- if (cmd == _FIO_SEEK_HOLE)
- hole = B_TRUE;
- else
- hole = B_FALSE;
-
- error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
-
- /* end of file? */
- if ((error == ESRCH) || (noff > file_sz)) {
- /*
- * Handle the virtual hole at the end of file.
- */
- if (hole) {
- *off = file_sz;
- return (0);
- }
- return (ENXIO);
- }
-
- if (noff < *off)
- return (error);
- *off = noff;
- return (error);
-}
-
-/* ARGSUSED */
-static int
-zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
- int *rvalp)
-{
- offset_t off;
- int error;
- zfsvfs_t *zfsvfs;
-
- switch (com) {
- case _FIOFFS:
- return (0);
-
- /*
- * The following two ioctls are used by bfu. Faking out,
- * necessary to avoid bfu errors.
- */
- case _FIOGDIO:
- case _FIOSDIO:
- return (0);
-
- case _FIO_SEEK_DATA:
- case _FIO_SEEK_HOLE:
- if (ddi_copyin((void *)data, &off, sizeof (off), flag))
- return (EFAULT);
-
- zfsvfs = VTOZ(vp)->z_zfsvfs;
- ZFS_ENTER(zfsvfs);
-
- /* offset parameter is in/out */
- error = zfs_holey(vp, com, &off);
- ZFS_EXIT(zfsvfs);
- if (error)
- return (error);
- if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
- return (EFAULT);
- return (0);
- }
- return (ENOTTY);
-}
-
-/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages. What this means:
- *
- * On Write: If we find a memory mapped page, we write to *both*
- * the page and the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- * the file is memory mapped.
- */
-static int
-mappedwrite(vnode_t *vp, int nbytes, uio_t *uio, dmu_tx_t *tx)
-{
- znode_t *zp = VTOZ(vp);
- objset_t *os = zp->z_zfsvfs->z_os;
- vm_object_t obj;
- vm_page_t m;
- struct sf_buf *sf;
- int64_t start, off;
- int len = nbytes;
- int error = 0;
- uint64_t dirbytes;
-
- ASSERT(vp->v_mount != NULL);
- obj = vp->v_object;
- ASSERT(obj != NULL);
-
- start = uio->uio_loffset;
- off = start & PAGEOFFSET;
- dirbytes = 0;
- VM_OBJECT_LOCK(obj);
- for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
- uint64_t bytes = MIN(PAGESIZE - off, len);
- uint64_t fsize;
-
-again:
- if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
- vm_page_is_valid(m, (vm_offset_t)off, bytes)) {
- uint64_t woff;
- caddr_t va;
-
- if (vm_page_sleep_if_busy(m, FALSE, "zfsmwb"))
- goto again;
- fsize = obj->un_pager.vnp.vnp_size;
- vm_page_busy(m);
- vm_page_lock_queues();
- vm_page_undirty(m);
- vm_page_unlock_queues();
- VM_OBJECT_UNLOCK(obj);
- if (dirbytes > 0) {
- error = dmu_write_uio(os, zp->z_id, uio,
- dirbytes, tx);
- dirbytes = 0;
- }
- if (error == 0) {
- sched_pin();
- sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
- va = (caddr_t)sf_buf_kva(sf);
- woff = uio->uio_loffset - off;
- error = uiomove(va + off, bytes, UIO_WRITE, uio);
- /*
- * The uiomove() above could have been partially
- * successful, that's why we call dmu_write()
- * below unconditionally. The page was marked
- * non-dirty above and we would lose the changes
- * without doing so. If the uiomove() failed
- * entirely, well, we just write what we got
- * before one more time.
- */
- dmu_write(os, zp->z_id, woff,
- MIN(PAGESIZE, fsize - woff), va, tx);
- sf_buf_free(sf);
- sched_unpin();
- }
- VM_OBJECT_LOCK(obj);
- vm_page_wakeup(m);
- } else {
- if (__predict_false(obj->cache != NULL)) {
- vm_page_cache_free(obj, OFF_TO_IDX(start),
- OFF_TO_IDX(start) + 1);
- }
- dirbytes += bytes;
- }
- len -= bytes;
- off = 0;
- if (error)
- break;
- }
- VM_OBJECT_UNLOCK(obj);
- if (error == 0 && dirbytes > 0)
- error = dmu_write_uio(os, zp->z_id, uio, dirbytes, tx);
- return (error);
-}
-
-/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages. What this means:
- *
- * On Read: We "read" preferentially from memory mapped pages,
- * else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- * the file is memory mapped.
- */
-static int
-mappedread(vnode_t *vp, int nbytes, uio_t *uio)
-{
- znode_t *zp = VTOZ(vp);
- objset_t *os = zp->z_zfsvfs->z_os;
- vm_object_t obj;
- vm_page_t m;
- struct sf_buf *sf;
- int64_t start, off;
- caddr_t va;
- int len = nbytes;
- int error = 0;
- uint64_t dirbytes;
-
- ASSERT(vp->v_mount != NULL);
- obj = vp->v_object;
- ASSERT(obj != NULL);
-
- start = uio->uio_loffset;
- off = start & PAGEOFFSET;
- dirbytes = 0;
- VM_OBJECT_LOCK(obj);
- for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
- uint64_t bytes = MIN(PAGESIZE - off, len);
-
-again:
- if ((m = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
- vm_page_is_valid(m, (vm_offset_t)off, bytes)) {
- if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb"))
- goto again;
- vm_page_busy(m);
- VM_OBJECT_UNLOCK(obj);
- if (dirbytes > 0) {
- error = dmu_read_uio(os, zp->z_id, uio,
- dirbytes);
- dirbytes = 0;
- }
- if (error == 0) {
- sched_pin();
- sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
- va = (caddr_t)sf_buf_kva(sf);
- error = uiomove(va + off, bytes, UIO_READ, uio);
- sf_buf_free(sf);
- sched_unpin();
- }
- VM_OBJECT_LOCK(obj);
- vm_page_wakeup(m);
- } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
- /*
- * The code below is here to make sendfile(2) work
- * correctly with ZFS. As pointed out by ups@
- * sendfile(2) should be changed to use VOP_GETPAGES(),
- * but it pessimize performance of sendfile/UFS, that's
- * why I handle this special case in ZFS code.
- */
- if (vm_page_sleep_if_busy(m, FALSE, "zfsmrb"))
- goto again;
- vm_page_busy(m);
- VM_OBJECT_UNLOCK(obj);
- if (dirbytes > 0) {
- error = dmu_read_uio(os, zp->z_id, uio,
- dirbytes);
- dirbytes = 0;
- }
- if (error == 0) {
- sched_pin();
- sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
- va = (caddr_t)sf_buf_kva(sf);
- error = dmu_read(os, zp->z_id, start + off,
- bytes, (void *)(va + off));
- sf_buf_free(sf);
- sched_unpin();
- }
- VM_OBJECT_LOCK(obj);
- vm_page_wakeup(m);
- if (error == 0)
- uio->uio_resid -= bytes;
- } else {
- dirbytes += bytes;
- }
- len -= bytes;
- off = 0;
- if (error)
- break;
- }
- VM_OBJECT_UNLOCK(obj);
- if (error == 0 && dirbytes > 0)
- error = dmu_read_uio(os, zp->z_id, uio, dirbytes);
- return (error);
-}
-
-offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
-
-/*
- * Read bytes from specified file into supplied buffer.
- *
- * IN: vp - vnode of file to be read from.
- * uio - structure supplying read location, range info,
- * and return buffer.
- * ioflag - SYNC flags; used to provide FRSYNC semantics.
- * cr - credentials of caller.
- *
- * OUT: uio - updated offset and range, buffer filled.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Side Effects:
- * vp - atime updated if byte count > 0
- */
-/* ARGSUSED */
-static int
-zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- objset_t *os = zfsvfs->z_os;
- ssize_t n, nbytes;
- int error;
- rl_t *rl;
-
- ZFS_ENTER(zfsvfs);
-
- /*
- * Validate file offset
- */
- if (uio->uio_loffset < (offset_t)0) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /*
- * Fasttrack empty reads
- */
- if (uio->uio_resid == 0) {
- ZFS_EXIT(zfsvfs);
- return (0);
- }
-
- /*
- * Check for mandatory locks
- */
- if (MANDMODE((mode_t)zp->z_phys->zp_mode)) {
- if (error = chklock(vp, FREAD,
- uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- }
-
- /*
- * If we're in FRSYNC mode, sync out this znode before reading it.
- */
- if (ioflag & FRSYNC)
- zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id);
-
- /*
- * Lock the range against changes.
- */
- rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
-
- /*
- * If we are reading past end-of-file we can skip
- * to the end; but we might still need to set atime.
- */
- if (uio->uio_loffset >= zp->z_phys->zp_size) {
- error = 0;
- goto out;
- }
-
- ASSERT(uio->uio_loffset < zp->z_phys->zp_size);
- n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset);
-
- while (n > 0) {
- nbytes = MIN(n, zfs_read_chunk_size -
- P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
-
- if (vn_has_cached_data(vp))
- error = mappedread(vp, nbytes, uio);
- else
- error = dmu_read_uio(os, zp->z_id, uio, nbytes);
- if (error)
- break;
-
- n -= nbytes;
- }
-
-out:
- zfs_range_unlock(rl);
-
- ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Fault in the pages of the first n bytes specified by the uio structure.
- * 1 byte in each page is touched and the uio struct is unmodified.
- * Any error will exit this routine as this is only a best
- * attempt to get the pages resident. This is a copy of ufs_trans_touch().
- */
-static void
-zfs_prefault_write(ssize_t n, struct uio *uio)
-{
- struct iovec *iov;
- ulong_t cnt, incr;
- caddr_t p;
-
- if (uio->uio_segflg != UIO_USERSPACE)
- return;
-
- iov = uio->uio_iov;
-
- while (n) {
- cnt = MIN(iov->iov_len, n);
- if (cnt == 0) {
- /* empty iov entry */
- iov++;
- continue;
- }
- n -= cnt;
- /*
- * touch each page in this segment.
- */
- p = iov->iov_base;
- while (cnt) {
- if (fubyte(p) == -1)
- return;
- incr = MIN(cnt, PAGESIZE);
- p += incr;
- cnt -= incr;
- }
- /*
- * touch the last byte in case it straddles a page.
- */
- p--;
- if (fubyte(p) == -1)
- return;
- iov++;
- }
-}
-
-/*
- * Write the bytes to a file.
- *
- * IN: vp - vnode of file to be written to.
- * uio - structure supplying write location, range info,
- * and data buffer.
- * ioflag - IO_APPEND flag set if in append mode.
- * cr - credentials of caller.
- *
- * OUT: uio - updated offset and range.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * vp - ctime|mtime updated if byte count > 0
- */
-/* ARGSUSED */
-static int
-zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
-{
- znode_t *zp = VTOZ(vp);
- rlim64_t limit = MAXOFFSET_T;
- ssize_t start_resid = uio->uio_resid;
- ssize_t tx_bytes;
- uint64_t end_size;
- dmu_tx_t *tx;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- offset_t woff;
- ssize_t n, nbytes;
- rl_t *rl;
- int max_blksz = zfsvfs->z_max_blksz;
- int error;
-
- /*
- * Fasttrack empty write
- */
- n = start_resid;
- if (n == 0)
- return (0);
-
- if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
- limit = MAXOFFSET_T;
-
- ZFS_ENTER(zfsvfs);
-
- /*
- * Pre-fault the pages to ensure slow (eg NFS) pages
- * don't hold up txg.
- */
- zfs_prefault_write(n, uio);
-
- /*
- * If in append mode, set the io offset pointer to eof.
- */
- if (ioflag & IO_APPEND) {
- /*
- * Range lock for a file append:
- * The value for the start of range will be determined by
- * zfs_range_lock() (to guarantee append semantics).
- * If this write will cause the block size to increase,
- * zfs_range_lock() will lock the entire file, so we must
- * later reduce the range after we grow the block size.
- */
- rl = zfs_range_lock(zp, 0, n, RL_APPEND);
- if (rl->r_len == UINT64_MAX) {
- /* overlocked, zp_size can't change */
- woff = uio->uio_loffset = zp->z_phys->zp_size;
- } else {
- woff = uio->uio_loffset = rl->r_off;
- }
- } else {
- woff = uio->uio_loffset;
- /*
- * Validate file offset
- */
- if (woff < 0) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /*
- * If we need to grow the block size then zfs_range_lock()
- * will lock a wider range than we request here.
- * Later after growing the block size we reduce the range.
- */
- rl = zfs_range_lock(zp, woff, n, RL_WRITER);
- }
-
- if (woff >= limit) {
- zfs_range_unlock(rl);
- ZFS_EXIT(zfsvfs);
- return (EFBIG);
- }
-
- if ((woff + n) > limit || woff > (limit - n))
- n = limit - woff;
-
- /*
- * Check for mandatory locks
- */
- if (MANDMODE((mode_t)zp->z_phys->zp_mode) &&
- (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
- zfs_range_unlock(rl);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- end_size = MAX(zp->z_phys->zp_size, woff + n);
-
- /*
- * Write the file in reasonable size chunks. Each chunk is written
- * in a separate transaction; this keeps the intent log records small
- * and allows us to do more fine-grained space accounting.
- */
- while (n > 0) {
- /*
- * Start a transaction.
- */
- woff = uio->uio_loffset;
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, zp->z_id);
- dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- if (error == ERESTART &&
- zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- continue;
- }
- dmu_tx_abort(tx);
- break;
- }
-
- /*
- * If zfs_range_lock() over-locked we grow the blocksize
- * and then reduce the lock range. This will only happen
- * on the first iteration since zfs_range_reduce() will
- * shrink down r_len to the appropriate size.
- */
- if (rl->r_len == UINT64_MAX) {
- uint64_t new_blksz;
-
- if (zp->z_blksz > max_blksz) {
- ASSERT(!ISP2(zp->z_blksz));
- new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE);
- } else {
- new_blksz = MIN(end_size, max_blksz);
- }
- zfs_grow_blocksize(zp, new_blksz, tx);
- zfs_range_reduce(rl, woff, n);
- }
-
- /*
- * XXX - should we really limit each write to z_max_blksz?
- * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
- */
- nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
-
- if (woff + nbytes > zp->z_phys->zp_size)
- vnode_pager_setsize(vp, woff + nbytes);
-
- rw_enter(&zp->z_map_lock, RW_READER);
-
- tx_bytes = uio->uio_resid;
- if (vn_has_cached_data(vp)) {
- rw_exit(&zp->z_map_lock);
- error = mappedwrite(vp, nbytes, uio, tx);
- } else {
- error = dmu_write_uio(zfsvfs->z_os, zp->z_id,
- uio, nbytes, tx);
- rw_exit(&zp->z_map_lock);
- }
- tx_bytes -= uio->uio_resid;
-
- /*
- * If we made no progress, we're done. If we made even
- * partial progress, update the znode and ZIL accordingly.
- */
- if (tx_bytes == 0) {
- dmu_tx_commit(tx);
- ASSERT(error != 0);
- break;
- }
-
- /*
- * Clear Set-UID/Set-GID bits on successful write if not
- * privileged and at least one of the excute bits is set.
- *
- * It would be nice to to this after all writes have
- * been done, but that would still expose the ISUID/ISGID
- * to another app after the partial write is committed.
- */
- mutex_enter(&zp->z_acl_lock);
- if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) |
- (S_IXUSR >> 6))) != 0 &&
- (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 &&
- secpolicy_vnode_setid_retain(cr,
- (zp->z_phys->zp_mode & S_ISUID) != 0 &&
- zp->z_phys->zp_uid == 0) != 0) {
- zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID);
- }
- mutex_exit(&zp->z_acl_lock);
-
- /*
- * Update time stamp. NOTE: This marks the bonus buffer as
- * dirty, so we don't have to do it again for zp_size.
- */
- zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
-
- /*
- * Update the file size (zp_size) if it has changed;
- * account for possible concurrent updates.
- */
- while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset)
- (void) atomic_cas_64(&zp->z_phys->zp_size, end_size,
- uio->uio_loffset);
- zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
- dmu_tx_commit(tx);
-
- if (error != 0)
- break;
- ASSERT(tx_bytes == nbytes);
- n -= nbytes;
- }
-
- zfs_range_unlock(rl);
-
- /*
- * If we're in replay mode, or we made no progress, return error.
- * Otherwise, it's at least a partial write, so it's successful.
- */
- if (zfsvfs->z_assign >= TXG_INITIAL || uio->uio_resid == start_resid) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- if (ioflag & (FSYNC | FDSYNC))
- zil_commit(zilog, zp->z_last_itx, zp->z_id);
-
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-void
-zfs_get_done(dmu_buf_t *db, void *vzgd)
-{
- zgd_t *zgd = (zgd_t *)vzgd;
- rl_t *rl = zgd->zgd_rl;
- vnode_t *vp = ZTOV(rl->r_zp);
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(vp->v_vfsp);
- dmu_buf_rele(db, vzgd);
- zfs_range_unlock(rl);
- VN_RELE(vp);
- zil_add_vdev(zgd->zgd_zilog, DVA_GET_VDEV(BP_IDENTITY(zgd->zgd_bp)));
- kmem_free(zgd, sizeof (zgd_t));
- VFS_UNLOCK_GIANT(vfslocked);
-}
-
-/*
- * Get data to generate a TX_WRITE intent log record.
- */
-int
-zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
-{
- zfsvfs_t *zfsvfs = arg;
- objset_t *os = zfsvfs->z_os;
- znode_t *zp;
- uint64_t off = lr->lr_offset;
- dmu_buf_t *db;
- rl_t *rl;
- zgd_t *zgd;
- int dlen = lr->lr_length; /* length of user data */
- int error = 0;
-
- ASSERT(zio);
- ASSERT(dlen != 0);
-
- /*
- * Nothing to do if the file has been removed
- */
- if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0)
- return (ENOENT);
- if (zp->z_unlinked) {
- VN_RELE(ZTOV(zp));
- return (ENOENT);
- }
-
- /*
- * Write records come in two flavors: immediate and indirect.
- * For small writes it's cheaper to store the data with the
- * log record (immediate); for large writes it's cheaper to
- * sync the data and get a pointer to it (indirect) so that
- * we don't have to write the data twice.
- */
- if (buf != NULL) { /* immediate write */
- rl = zfs_range_lock(zp, off, dlen, RL_READER);
- /* test for truncation needs to be done while range locked */
- if (off >= zp->z_phys->zp_size) {
- error = ENOENT;
- goto out;
- }
- VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf));
- } else { /* indirect write */
- uint64_t boff; /* block starting offset */
-
- /*
- * Have to lock the whole block to ensure when it's
- * written out and it's checksum is being calculated
- * that no one can change the data. We need to re-check
- * blocksize after we get the lock in case it's changed!
- */
- for (;;) {
- if (ISP2(zp->z_blksz)) {
- boff = P2ALIGN_TYPED(off, zp->z_blksz,
- uint64_t);
- } else {
- boff = 0;
- }
- dlen = zp->z_blksz;
- rl = zfs_range_lock(zp, boff, dlen, RL_READER);
- if (zp->z_blksz == dlen)
- break;
- zfs_range_unlock(rl);
- }
- /* test for truncation needs to be done while range locked */
- if (off >= zp->z_phys->zp_size) {
- error = ENOENT;
- goto out;
- }
- zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP);
- zgd->zgd_rl = rl;
- zgd->zgd_zilog = zfsvfs->z_log;
- zgd->zgd_bp = &lr->lr_blkptr;
- VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db));
- ASSERT(boff == db->db_offset);
- lr->lr_blkoff = off - boff;
- error = dmu_sync(zio, db, &lr->lr_blkptr,
- lr->lr_common.lrc_txg, zfs_get_done, zgd);
- ASSERT(error == EEXIST || lr->lr_length <= zp->z_blksz);
- if (error == 0) {
- zil_add_vdev(zfsvfs->z_log,
- DVA_GET_VDEV(BP_IDENTITY(&lr->lr_blkptr)));
- }
- /*
- * If we get EINPROGRESS, then we need to wait for a
- * write IO initiated by dmu_sync() to complete before
- * we can release this dbuf. We will finish everything
- * up in the zfs_get_done() callback.
- */
- if (error == EINPROGRESS)
- return (0);
- dmu_buf_rele(db, zgd);
- kmem_free(zgd, sizeof (zgd_t));
- }
-out:
- zfs_range_unlock(rl);
- VN_RELE(ZTOV(zp));
- return (error);
-}
-
-/*ARGSUSED*/
-static int
-zfs_access(vnode_t *vp, int mode, int flags, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- int error;
-
- ZFS_ENTER(zfsvfs);
- error = zfs_zaccess_rwx(zp, mode, cr);
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Lookup an entry in a directory, or an extended attribute directory.
- * If it exists, return a held vnode reference for it.
- *
- * IN: dvp - vnode of directory to search.
- * nm - name of entry to lookup.
- * pnp - full pathname to lookup [UNUSED].
- * flags - LOOKUP_XATTR set if looking for an attribute.
- * rdir - root directory vnode [UNUSED].
- * cr - credentials of caller.
- *
- * OUT: vpp - vnode of located entry, NULL if not found.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * NA
- */
-/* ARGSUSED */
-static int
-zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
- int nameiop, cred_t *cr, kthread_t *td)
-{
-
- znode_t *zdp = VTOZ(dvp);
- zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
- int error;
-
- ZFS_ENTER(zfsvfs);
-
- *vpp = NULL;
-
-#ifdef TODO
- if (flags & LOOKUP_XATTR) {
- /*
- * If the xattr property is off, refuse the lookup request.
- */
- if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /*
- * We don't allow recursive attributes..
- * Maybe someday we will.
- */
- if (zdp->z_phys->zp_flags & ZFS_XATTR) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- /*
- * Do we have permission to get into attribute directory?
- */
-
- if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, cr)) {
- VN_RELE(*vpp);
- }
-
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-#endif /* TODO */
-
- if (dvp->v_type != VDIR) {
- ZFS_EXIT(zfsvfs);
- return (ENOTDIR);
- }
-
- /*
- * Check accessibility of directory.
- */
-
- if (error = zfs_zaccess(zdp, ACE_EXECUTE, cr)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- if ((error = zfs_dirlook(zdp, nm, vpp)) == 0) {
-
- /*
- * Convert device special files
- */
- if (IS_DEVVP(*vpp)) {
- vnode_t *svp;
-
- svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
- VN_RELE(*vpp);
- if (svp == NULL)
- error = ENOSYS;
- else
- *vpp = svp;
- }
- }
-
- ZFS_EXIT(zfsvfs);
-
- /* Translate errors and add SAVENAME when needed. */
- if (cnp->cn_flags & ISLASTCN) {
- switch (nameiop) {
- case CREATE:
- case RENAME:
- if (error == ENOENT) {
- error = EJUSTRETURN;
- cnp->cn_flags |= SAVENAME;
- break;
- }
- /* FALLTHROUGH */
- case DELETE:
- if (error == 0)
- cnp->cn_flags |= SAVENAME;
- break;
- }
- }
- if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) {
- int ltype = 0;
-
- if (cnp->cn_flags & ISDOTDOT) {
- ltype = VOP_ISLOCKED(dvp);
- VOP_UNLOCK(dvp, 0);
- }
- error = vn_lock(*vpp, cnp->cn_lkflags);
- if (cnp->cn_flags & ISDOTDOT)
- vn_lock(dvp, ltype | LK_RETRY);
- if (error != 0) {
- VN_RELE(*vpp);
- *vpp = NULL;
- return (error);
- }
- }
-
-#ifdef FREEBSD_NAMECACHE
- /*
- * Insert name into cache (as non-existent) if appropriate.
- */
- if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
- cache_enter(dvp, *vpp, cnp);
- /*
- * Insert name into cache if appropriate.
- */
- if (error == 0 && (cnp->cn_flags & MAKEENTRY)) {
- if (!(cnp->cn_flags & ISLASTCN) ||
- (nameiop != DELETE && nameiop != RENAME)) {
- cache_enter(dvp, *vpp, cnp);
- }
- }
-#endif
-
- return (error);
-}
-
-/*
- * Attempt to create a new entry in a directory. If the entry
- * already exists, truncate the file if permissible, else return
- * an error. Return the vp of the created or trunc'd file.
- *
- * IN: dvp - vnode of directory to put new file entry in.
- * name - name of new file entry.
- * vap - attributes of new file.
- * excl - flag indicating exclusive or non-exclusive mode.
- * mode - mode to open file with.
- * cr - credentials of caller.
- * flag - large file flag [UNUSED].
- *
- * OUT: vpp - vnode of created or trunc'd entry.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * dvp - ctime|mtime updated if new entry created
- * vp - ctime|mtime always, atime if new
- */
-/* ARGSUSED */
-static int
-zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode,
- vnode_t **vpp, cred_t *cr)
-{
- znode_t *zp, *dzp = VTOZ(dvp);
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- objset_t *os = zfsvfs->z_os;
- zfs_dirlock_t *dl;
- dmu_tx_t *tx;
- int error;
- uint64_t zoid;
-
- ZFS_ENTER(zfsvfs);
-
-top:
- *vpp = NULL;
-
- if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr))
- vap->va_mode &= ~VSVTX;
-
- if (*name == '\0') {
- /*
- * Null component name refers to the directory itself.
- */
- VN_HOLD(dvp);
- zp = dzp;
- dl = NULL;
- error = 0;
- } else {
- /* possible VN_HOLD(zp) */
- if (error = zfs_dirent_lock(&dl, dzp, name, &zp, 0)) {
- if (strcmp(name, "..") == 0)
- error = EISDIR;
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- }
-
- zoid = zp ? zp->z_id : -1ULL;
-
- if (zp == NULL) {
- /*
- * Create a new file object and update the directory
- * to reference it.
- */
- if (error = zfs_zaccess(dzp, ACE_ADD_FILE, cr)) {
- goto out;
- }
-
- /*
- * We only support the creation of regular files in
- * extended attribute directories.
- */
- if ((dzp->z_phys->zp_flags & ZFS_XATTR) &&
- (vap->va_type != VREG)) {
- error = EINVAL;
- goto out;
- }
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_bonus(tx, dzp->z_id);
- dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
- if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
- 0, SPA_MAXBLOCKSIZE);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- zfs_dirent_unlock(dl);
- if (error == ERESTART &&
- zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0);
- ASSERT(zp->z_id == zoid);
- (void) zfs_link_create(dl, zp, tx, ZNEW);
- zfs_log_create(zilog, tx, TX_CREATE, dzp, zp, name);
- dmu_tx_commit(tx);
- } else {
- /*
- * A directory entry already exists for this name.
- */
- /*
- * Can't truncate an existing file if in exclusive mode.
- */
- if (excl == EXCL) {
- error = EEXIST;
- goto out;
- }
- /*
- * Can't open a directory for writing.
- */
- if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) {
- error = EISDIR;
- goto out;
- }
- /*
- * Verify requested access to file.
- */
- if (mode && (error = zfs_zaccess_rwx(zp, mode, cr))) {
- goto out;
- }
-
- mutex_enter(&dzp->z_lock);
- dzp->z_seq++;
- mutex_exit(&dzp->z_lock);
-
- /*
- * Truncate regular files if requested.
- */
- if ((ZTOV(zp)->v_type == VREG) &&
- (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) {
- error = zfs_freesp(zp, 0, 0, mode, TRUE);
- if (error == ERESTART &&
- zfsvfs->z_assign == TXG_NOWAIT) {
- /* NB: we already did dmu_tx_wait() */
- zfs_dirent_unlock(dl);
- VN_RELE(ZTOV(zp));
- goto top;
- }
- }
- }
-out:
-
- if (error == 0) {
- *vpp = ZTOV(zp);
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- }
-
- if (dl)
- zfs_dirent_unlock(dl);
-
- if (error) {
- if (zp)
- VN_RELE(ZTOV(zp));
- } else {
- *vpp = ZTOV(zp);
- /*
- * If vnode is for a device return a specfs vnode instead.
- */
- if (IS_DEVVP(*vpp)) {
- struct vnode *svp;
-
- svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
- VN_RELE(*vpp);
- if (svp == NULL) {
- error = ENOSYS;
- }
- *vpp = svp;
- }
- }
-
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Remove an entry from a directory.
- *
- * IN: dvp - vnode of directory to remove entry from.
- * name - name of entry to remove.
- * cr - credentials of caller.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * dvp - ctime|mtime
- * vp - ctime (if nlink > 0)
- */
-static int
-zfs_remove(vnode_t *dvp, char *name, cred_t *cr)
-{
- znode_t *zp, *dzp = VTOZ(dvp);
- znode_t *xzp = NULL;
- vnode_t *vp;
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- uint64_t acl_obj, xattr_obj;
- zfs_dirlock_t *dl;
- dmu_tx_t *tx;
- boolean_t may_delete_now, delete_now = FALSE;
- boolean_t unlinked;
- int error;
-
- ZFS_ENTER(zfsvfs);
-
-top:
- /*
- * Attempt to lock directory; fail if entry doesn't exist.
- */
- if (error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- vp = ZTOV(zp);
-
- if (error = zfs_zaccess_delete(dzp, zp, cr)) {
- goto out;
- }
-
- /*
- * Need to use rmdir for removing directories.
- */
- if (vp->v_type == VDIR) {
- error = EPERM;
- goto out;
- }
-
- vnevent_remove(vp);
-
- dnlc_remove(dvp, name);
-
- may_delete_now = FALSE;
-
- /*
- * We may delete the znode now, or we may put it in the unlinked set;
- * it depends on whether we're the last link, and on whether there are
- * other holds on the vnode. So we dmu_tx_hold() the right things to
- * allow for either case.
- */
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
- dmu_tx_hold_bonus(tx, zp->z_id);
- if (may_delete_now)
- dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
-
- /* are there any extended attributes? */
- if ((xattr_obj = zp->z_phys->zp_xattr) != 0) {
- /* XXX - do we need this if we are deleting? */
- dmu_tx_hold_bonus(tx, xattr_obj);
- }
-
- /* are there any additional acls */
- if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 &&
- may_delete_now)
- dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
-
- /* charge as an update -- would be nice not to charge at all */
- dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- zfs_dirent_unlock(dl);
- VN_RELE(vp);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- /*
- * Remove the directory entry.
- */
- error = zfs_link_destroy(dl, zp, tx, 0, &unlinked);
-
- if (error) {
- dmu_tx_commit(tx);
- goto out;
- }
-
- if (0 && unlinked) {
- VI_LOCK(vp);
- delete_now = may_delete_now &&
- vp->v_count == 1 && !vn_has_cached_data(vp) &&
- zp->z_phys->zp_xattr == xattr_obj &&
- zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj;
- VI_UNLOCK(vp);
- }
-
- if (delete_now) {
- if (zp->z_phys->zp_xattr) {
- error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
- ASSERT3U(error, ==, 0);
- ASSERT3U(xzp->z_phys->zp_links, ==, 2);
- dmu_buf_will_dirty(xzp->z_dbuf, tx);
- mutex_enter(&xzp->z_lock);
- xzp->z_unlinked = 1;
- xzp->z_phys->zp_links = 0;
- mutex_exit(&xzp->z_lock);
- zfs_unlinked_add(xzp, tx);
- zp->z_phys->zp_xattr = 0; /* probably unnecessary */
- }
- mutex_enter(&zp->z_lock);
- VI_LOCK(vp);
- vp->v_count--;
- ASSERT3U(vp->v_count, ==, 0);
- VI_UNLOCK(vp);
- mutex_exit(&zp->z_lock);
- zfs_znode_delete(zp, tx);
- VFS_RELE(zfsvfs->z_vfs);
- } else if (unlinked) {
- zfs_unlinked_add(zp, tx);
- }
-
- zfs_log_remove(zilog, tx, TX_REMOVE, dzp, name);
-
- dmu_tx_commit(tx);
-out:
- zfs_dirent_unlock(dl);
-
- if (!delete_now) {
- VN_RELE(vp);
- } else if (xzp) {
- /* this rele delayed to prevent nesting transactions */
- VN_RELE(ZTOV(xzp));
- }
-
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Create a new directory and insert it into dvp using the name
- * provided. Return a pointer to the inserted directory.
- *
- * IN: dvp - vnode of directory to add subdir to.
- * dirname - name of new directory.
- * vap - attributes of new directory.
- * cr - credentials of caller.
- *
- * OUT: vpp - vnode of created directory.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * dvp - ctime|mtime updated
- * vp - ctime|mtime|atime updated
- */
-static int
-zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr)
-{
- znode_t *zp, *dzp = VTOZ(dvp);
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- zfs_dirlock_t *dl;
- uint64_t zoid = 0;
- dmu_tx_t *tx;
- int error;
-
- ASSERT(vap->va_type == VDIR);
-
- ZFS_ENTER(zfsvfs);
-
- if (dzp->z_phys->zp_flags & ZFS_XATTR) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-top:
- *vpp = NULL;
-
- /*
- * First make sure the new directory doesn't exist.
- */
- if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, ZNEW)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, cr)) {
- zfs_dirent_unlock(dl);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- /*
- * Add a new entry to the directory.
- */
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
- if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
- 0, SPA_MAXBLOCKSIZE);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- /*
- * Create new node.
- */
- zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0);
-
- /*
- * Now put new name in parent dir.
- */
- (void) zfs_link_create(dl, zp, tx, ZNEW);
-
- *vpp = ZTOV(zp);
-
- zfs_log_create(zilog, tx, TX_MKDIR, dzp, zp, dirname);
- dmu_tx_commit(tx);
-
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
-
- zfs_dirent_unlock(dl);
-
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-/*
- * Remove a directory subdir entry. If the current working
- * directory is the same as the subdir to be removed, the
- * remove will fail.
- *
- * IN: dvp - vnode of directory to remove from.
- * name - name of directory to be removed.
- * cwd - vnode of current working directory.
- * cr - credentials of caller.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * dvp - ctime|mtime updated
- */
-static int
-zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr)
-{
- znode_t *dzp = VTOZ(dvp);
- znode_t *zp;
- vnode_t *vp;
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- zfs_dirlock_t *dl;
- dmu_tx_t *tx;
- int error;
-
- ZFS_ENTER(zfsvfs);
-
-top:
- zp = NULL;
-
- /*
- * Attempt to lock directory; fail if entry doesn't exist.
- */
- if (error = zfs_dirent_lock(&dl, dzp, name, &zp, ZEXISTS)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- vp = ZTOV(zp);
-
- if (error = zfs_zaccess_delete(dzp, zp, cr)) {
- goto out;
- }
-
- if (vp->v_type != VDIR) {
- error = ENOTDIR;
- goto out;
- }
-
- if (vp == cwd) {
- error = EINVAL;
- goto out;
- }
-
- vnevent_rmdir(vp);
-
- /*
- * Grab a lock on the directory to make sure that noone is
- * trying to add (or lookup) entries while we are removing it.
- */
- rw_enter(&zp->z_name_lock, RW_WRITER);
-
- /*
- * Grab a lock on the parent pointer to make sure we play well
- * with the treewalk and directory rename code.
- */
- rw_enter(&zp->z_parent_lock, RW_WRITER);
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
- dmu_tx_hold_bonus(tx, zp->z_id);
- dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- rw_exit(&zp->z_parent_lock);
- rw_exit(&zp->z_name_lock);
- zfs_dirent_unlock(dl);
- VN_RELE(vp);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
-#ifdef FREEBSD_NAMECACHE
- cache_purge(dvp);
-#endif
-
- error = zfs_link_destroy(dl, zp, tx, 0, NULL);
-
- if (error == 0)
- zfs_log_remove(zilog, tx, TX_RMDIR, dzp, name);
-
- dmu_tx_commit(tx);
-
- rw_exit(&zp->z_parent_lock);
- rw_exit(&zp->z_name_lock);
-#ifdef FREEBSD_NAMECACHE
- cache_purge(vp);
-#endif
-out:
- zfs_dirent_unlock(dl);
-
- VN_RELE(vp);
-
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Read as many directory entries as will fit into the provided
- * buffer from the given directory cursor position (specified in
- * the uio structure.
- *
- * IN: vp - vnode of directory to read.
- * uio - structure supplying read location, range info,
- * and return buffer.
- * cr - credentials of caller.
- *
- * OUT: uio - updated offset and range, buffer filled.
- * eofp - set to true if end-of-file detected.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * vp - atime updated
- *
- * Note that the low 4 bits of the cookie returned by zap is always zero.
- * This allows us to use the low range for "special" directory entries:
- * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
- * we use the offset 2 for the '.zfs' directory.
- */
-/* ARGSUSED */
-static int
-zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_long **cookies)
-{
- znode_t *zp = VTOZ(vp);
- iovec_t *iovp;
- dirent64_t *odp;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- objset_t *os;
- caddr_t outbuf;
- size_t bufsize;
- zap_cursor_t zc;
- zap_attribute_t zap;
- uint_t bytes_wanted;
- uint64_t offset; /* must be unsigned; checks for < 1 */
- int local_eof;
- int outcount;
- int error;
- uint8_t prefetch;
- uint8_t type;
- int ncooks;
- u_long *cooks = NULL;
-
- ZFS_ENTER(zfsvfs);
-
- /*
- * If we are not given an eof variable,
- * use a local one.
- */
- if (eofp == NULL)
- eofp = &local_eof;
-
- /*
- * Check for valid iov_len.
- */
- if (uio->uio_iov->iov_len <= 0) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /*
- * Quit if directory has been removed (posix)
- */
- if ((*eofp = zp->z_unlinked) != 0) {
- ZFS_EXIT(zfsvfs);
- return (0);
- }
-
- error = 0;
- os = zfsvfs->z_os;
- offset = uio->uio_loffset;
- prefetch = zp->z_zn_prefetch;
-
- /*
- * Initialize the iterator cursor.
- */
- if (offset <= 3) {
- /*
- * Start iteration from the beginning of the directory.
- */
- zap_cursor_init(&zc, os, zp->z_id);
- } else {
- /*
- * The offset is a serialized cursor.
- */
- zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
- }
-
- /*
- * Get space to change directory entries into fs independent format.
- */
- iovp = uio->uio_iov;
- bytes_wanted = iovp->iov_len;
- if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
- bufsize = bytes_wanted;
- outbuf = kmem_alloc(bufsize, KM_SLEEP);
- odp = (struct dirent64 *)outbuf;
- } else {
- bufsize = bytes_wanted;
- odp = (struct dirent64 *)iovp->iov_base;
- }
-
- if (ncookies != NULL) {
- /*
- * Minimum entry size is dirent size and 1 byte for a file name.
- */
- ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
- cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK);
- *cookies = cooks;
- *ncookies = ncooks;
- }
-
- /*
- * Transform to file-system independent format
- */
- outcount = 0;
- while (outcount < bytes_wanted) {
- ino64_t objnum;
- ushort_t reclen;
-
- /*
- * Special case `.', `..', and `.zfs'.
- */
- if (offset == 0) {
- (void) strcpy(zap.za_name, ".");
- objnum = zp->z_id;
- type = DT_DIR;
- } else if (offset == 1) {
- (void) strcpy(zap.za_name, "..");
- objnum = zp->z_phys->zp_parent;
- type = DT_DIR;
- } else if (offset == 2 && zfs_show_ctldir(zp)) {
- (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
- objnum = ZFSCTL_INO_ROOT;
- type = DT_DIR;
- } else {
- /*
- * Grab next entry.
- */
- if (error = zap_cursor_retrieve(&zc, &zap)) {
- if ((*eofp = (error == ENOENT)) != 0)
- break;
- else
- goto update;
- }
-
- if (zap.za_integer_length != 8 ||
- zap.za_num_integers != 1) {
- cmn_err(CE_WARN, "zap_readdir: bad directory "
- "entry, obj = %lld, offset = %lld\n",
- (u_longlong_t)zp->z_id,
- (u_longlong_t)offset);
- error = ENXIO;
- goto update;
- }
-
- objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
- /*
- * MacOS X can extract the object type here such as:
- * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
- */
- type = ZFS_DIRENT_TYPE(zap.za_first_integer);
- }
- reclen = DIRENT64_RECLEN(strlen(zap.za_name));
-
- /*
- * Will this entry fit in the buffer?
- */
- if (outcount + reclen > bufsize) {
- /*
- * Did we manage to fit anything in the buffer?
- */
- if (!outcount) {
- error = EINVAL;
- goto update;
- }
- break;
- }
- /*
- * Add this entry:
- */
- odp->d_ino = objnum;
- odp->d_reclen = reclen;
- odp->d_namlen = strlen(zap.za_name);
- (void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
- odp->d_type = type;
- outcount += reclen;
- odp = (dirent64_t *)((intptr_t)odp + reclen);
-
- ASSERT(outcount <= bufsize);
-
- /* Prefetch znode */
- if (prefetch)
- dmu_prefetch(os, objnum, 0, 0);
-
- /*
- * Move to the next entry, fill in the previous offset.
- */
- if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
- zap_cursor_advance(&zc);
- offset = zap_cursor_serialize(&zc);
- } else {
- offset += 1;
- }
-
- if (cooks != NULL) {
- *cooks++ = offset;
- ncooks--;
- KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
- }
- }
- zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
-
- /* Subtract unused cookies */
- if (ncookies != NULL)
- *ncookies -= ncooks;
-
- if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
- iovp->iov_base += outcount;
- iovp->iov_len -= outcount;
- uio->uio_resid -= outcount;
- } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) {
- /*
- * Reset the pointer.
- */
- offset = uio->uio_loffset;
- }
-
-update:
- zap_cursor_fini(&zc);
- if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
- kmem_free(outbuf, bufsize);
-
- if (error == ENOENT)
- error = 0;
-
- ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
-
- uio->uio_loffset = offset;
- ZFS_EXIT(zfsvfs);
- if (error != 0 && cookies != NULL) {
- free(*cookies, M_TEMP);
- *cookies = NULL;
- *ncookies = 0;
- }
- return (error);
-}
-
-static int
-zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-
- ZFS_ENTER(zfsvfs);
- zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id);
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-/*
- * Get the requested file attributes and place them in the provided
- * vattr structure.
- *
- * IN: vp - vnode of file.
- * vap - va_mask identifies requested attributes.
- * flags - [UNUSED]
- * cr - credentials of caller.
- *
- * OUT: vap - attribute values.
- *
- * RETURN: 0 (always succeeds)
- */
-/* ARGSUSED */
-static int
-zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- znode_phys_t *pzp = zp->z_phys;
- uint32_t blksize;
- u_longlong_t nblocks;
- int error;
-
- ZFS_ENTER(zfsvfs);
-
- /*
- * Return all attributes. It's cheaper to provide the answer
- * than to determine whether we were asked the question.
- */
- mutex_enter(&zp->z_lock);
-
- vap->va_type = IFTOVT(pzp->zp_mode);
- vap->va_mode = pzp->zp_mode & ~S_IFMT;
- vap->va_uid = zp->z_phys->zp_uid;
- vap->va_gid = zp->z_phys->zp_gid;
- vap->va_nodeid = zp->z_id;
- vap->va_nlink = MIN(pzp->zp_links, UINT32_MAX); /* nlink_t limit! */
- vap->va_size = pzp->zp_size;
- vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
- vap->va_rdev = zfs_cmpldev(pzp->zp_rdev);
- vap->va_seq = zp->z_seq;
- vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
-
- ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime);
- ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime);
- ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime);
- ZFS_TIME_DECODE(&vap->va_birthtime, pzp->zp_crtime);
-
- /*
- * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
- * Also, if we are the owner don't bother, since owner should
- * always be allowed to read basic attributes of file.
- */
- if (!(zp->z_phys->zp_flags & ZFS_ACL_TRIVIAL) &&
- (zp->z_phys->zp_uid != crgetuid(cr))) {
- if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, cr)) {
- mutex_exit(&zp->z_lock);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- }
-
- mutex_exit(&zp->z_lock);
-
- dmu_object_size_from_db(zp->z_dbuf, &blksize, &nblocks);
- vap->va_blksize = blksize;
- vap->va_bytes = nblocks << 9; /* nblocks * 512 */
-
- if (zp->z_blksz == 0) {
- /*
- * Block size hasn't been set; suggest maximal I/O transfers.
- */
- vap->va_blksize = zfsvfs->z_max_blksz;
- }
-
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-/*
- * Set the file attributes to the values contained in the
- * vattr structure.
- *
- * IN: vp - vnode of file to be modified.
- * vap - new attribute values.
- * flags - ATTR_UTIME set if non-default time values provided.
- * cr - credentials of caller.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * vp - ctime updated, mtime updated if size changed.
- */
-/* ARGSUSED */
-static int
-zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
- caller_context_t *ct)
-{
- struct znode *zp = VTOZ(vp);
- znode_phys_t *pzp = zp->z_phys;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- dmu_tx_t *tx;
- vattr_t oldva;
- uint_t mask = vap->va_mask;
- uint_t saved_mask;
- int trim_mask = 0;
- uint64_t new_mode;
- znode_t *attrzp;
- int need_policy = FALSE;
- int err;
-
- if (mask == 0)
- return (0);
-
- if (mask & AT_NOSET)
- return (EINVAL);
-
- if (mask & AT_SIZE && vp->v_type == VDIR)
- return (EISDIR);
-
- if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO)
- return (EINVAL);
-
- ZFS_ENTER(zfsvfs);
-
-top:
- attrzp = NULL;
-
- if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
- ZFS_EXIT(zfsvfs);
- return (EROFS);
- }
-
- /*
- * First validate permissions
- */
-
- if (mask & AT_SIZE) {
- err = zfs_zaccess(zp, ACE_WRITE_DATA, cr);
- if (err) {
- ZFS_EXIT(zfsvfs);
- return (err);
- }
- /*
- * XXX - Note, we are not providing any open
- * mode flags here (like FNDELAY), so we may
- * block if there are locks present... this
- * should be addressed in openat().
- */
- do {
- err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
- /* NB: we already did dmu_tx_wait() if necessary */
- } while (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT);
- if (err) {
- ZFS_EXIT(zfsvfs);
- return (err);
- }
- }
-
- if (mask & (AT_ATIME|AT_MTIME))
- need_policy = zfs_zaccess_v4_perm(zp, ACE_WRITE_ATTRIBUTES, cr);
-
- if (mask & (AT_UID|AT_GID)) {
- int idmask = (mask & (AT_UID|AT_GID));
- int take_owner;
- int take_group;
-
- /*
- * NOTE: even if a new mode is being set,
- * we may clear S_ISUID/S_ISGID bits.
- */
-
- if (!(mask & AT_MODE))
- vap->va_mode = pzp->zp_mode;
-
- /*
- * Take ownership or chgrp to group we are a member of
- */
-
- take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
- take_group = (mask & AT_GID) && groupmember(vap->va_gid, cr);
-
- /*
- * If both AT_UID and AT_GID are set then take_owner and
- * take_group must both be set in order to allow taking
- * ownership.
- *
- * Otherwise, send the check through secpolicy_vnode_setattr()
- *
- */
-
- if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
- ((idmask == AT_UID) && take_owner) ||
- ((idmask == AT_GID) && take_group)) {
- if (zfs_zaccess_v4_perm(zp, ACE_WRITE_OWNER, cr) == 0) {
- /*
- * Remove setuid/setgid for non-privileged users
- */
- secpolicy_setid_clear(vap, cr);
- trim_mask = (mask & (AT_UID|AT_GID));
- } else {
- need_policy = TRUE;
- }
- } else {
- need_policy = TRUE;
- }
- }
-
- mutex_enter(&zp->z_lock);
- oldva.va_mode = pzp->zp_mode;
- oldva.va_uid = zp->z_phys->zp_uid;
- oldva.va_gid = zp->z_phys->zp_gid;
- mutex_exit(&zp->z_lock);
-
- if (mask & AT_MODE) {
- if (zfs_zaccess_v4_perm(zp, ACE_WRITE_ACL, cr) == 0) {
- err = secpolicy_setid_setsticky_clear(vp, vap,
- &oldva, cr);
- if (err) {
- ZFS_EXIT(zfsvfs);
- return (err);
- }
- trim_mask |= AT_MODE;
- } else {
- need_policy = TRUE;
- }
- }
-
- if (need_policy) {
- /*
- * If trim_mask is set then take ownership
- * has been granted or write_acl is present and user
- * has the ability to modify mode. In that case remove
- * UID|GID and or MODE from mask so that
- * secpolicy_vnode_setattr() doesn't revoke it.
- */
-
- if (trim_mask) {
- saved_mask = vap->va_mask;
- vap->va_mask &= ~trim_mask;
-
- }
- err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
- (int (*)(void *, int, cred_t *))zfs_zaccess_rwx, zp);
- if (err) {
- ZFS_EXIT(zfsvfs);
- return (err);
- }
-
- if (trim_mask)
- vap->va_mask |= saved_mask;
- }
-
- /*
- * secpolicy_vnode_setattr, or take ownership may have
- * changed va_mask
- */
- mask = vap->va_mask;
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, zp->z_id);
-
- if (mask & AT_MODE) {
- uint64_t pmode = pzp->zp_mode;
-
- new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
-
- if (zp->z_phys->zp_acl.z_acl_extern_obj)
- dmu_tx_hold_write(tx,
- pzp->zp_acl.z_acl_extern_obj, 0, SPA_MAXBLOCKSIZE);
- else
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
- 0, ZFS_ACL_SIZE(MAX_ACL_SIZE));
- }
-
- if ((mask & (AT_UID | AT_GID)) && zp->z_phys->zp_xattr != 0) {
- err = zfs_zget(zp->z_zfsvfs, zp->z_phys->zp_xattr, &attrzp);
- if (err) {
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (err);
- }
- dmu_tx_hold_bonus(tx, attrzp->z_id);
- }
-
- err = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (err) {
- if (attrzp)
- VN_RELE(ZTOV(attrzp));
- if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (err);
- }
-
- dmu_buf_will_dirty(zp->z_dbuf, tx);
-
- /*
- * Set each attribute requested.
- * We group settings according to the locks they need to acquire.
- *
- * Note: you cannot set ctime directly, although it will be
- * updated as a side-effect of calling this function.
- */
-
- mutex_enter(&zp->z_lock);
-
- if (mask & AT_MODE) {
- err = zfs_acl_chmod_setattr(zp, new_mode, tx);
- ASSERT3U(err, ==, 0);
- }
-
- if (attrzp)
- mutex_enter(&attrzp->z_lock);
-
- if (mask & AT_UID) {
- zp->z_phys->zp_uid = (uint64_t)vap->va_uid;
- if (attrzp) {
- attrzp->z_phys->zp_uid = (uint64_t)vap->va_uid;
- }
- }
-
- if (mask & AT_GID) {
- zp->z_phys->zp_gid = (uint64_t)vap->va_gid;
- if (attrzp)
- attrzp->z_phys->zp_gid = (uint64_t)vap->va_gid;
- }
-
- if (attrzp)
- mutex_exit(&attrzp->z_lock);
-
- if (mask & AT_ATIME)
- ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime);
-
- if (mask & AT_MTIME)
- ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
-
- if (mask & AT_SIZE)
- zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx);
- else if (mask != 0)
- zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
-
- if (mask != 0)
- zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask);
-
- mutex_exit(&zp->z_lock);
-
- if (attrzp)
- VN_RELE(ZTOV(attrzp));
-
- dmu_tx_commit(tx);
-
- ZFS_EXIT(zfsvfs);
- return (err);
-}
-
-typedef struct zfs_zlock {
- krwlock_t *zl_rwlock; /* lock we acquired */
- znode_t *zl_znode; /* znode we held */
- struct zfs_zlock *zl_next; /* next in list */
-} zfs_zlock_t;
-
-/*
- * Drop locks and release vnodes that were held by zfs_rename_lock().
- */
-static void
-zfs_rename_unlock(zfs_zlock_t **zlpp)
-{
- zfs_zlock_t *zl;
-
- while ((zl = *zlpp) != NULL) {
- if (zl->zl_znode != NULL)
- VN_RELE(ZTOV(zl->zl_znode));
- rw_exit(zl->zl_rwlock);
- *zlpp = zl->zl_next;
- kmem_free(zl, sizeof (*zl));
- }
-}
-
-/*
- * Search back through the directory tree, using the ".." entries.
- * Lock each directory in the chain to prevent concurrent renames.
- * Fail any attempt to move a directory into one of its own descendants.
- * XXX - z_parent_lock can overlap with map or grow locks
- */
-static int
-zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
-{
- zfs_zlock_t *zl;
- znode_t *zp = tdzp;
- uint64_t rootid = zp->z_zfsvfs->z_root;
- uint64_t *oidp = &zp->z_id;
- krwlock_t *rwlp = &szp->z_parent_lock;
- krw_t rw = RW_WRITER;
-
- /*
- * First pass write-locks szp and compares to zp->z_id.
- * Later passes read-lock zp and compare to zp->z_parent.
- */
- do {
- if (!rw_tryenter(rwlp, rw)) {
- /*
- * Another thread is renaming in this path.
- * Note that if we are a WRITER, we don't have any
- * parent_locks held yet.
- */
- if (rw == RW_READER && zp->z_id > szp->z_id) {
- /*
- * Drop our locks and restart
- */
- zfs_rename_unlock(&zl);
- *zlpp = NULL;
- zp = tdzp;
- oidp = &zp->z_id;
- rwlp = &szp->z_parent_lock;
- rw = RW_WRITER;
- continue;
- } else {
- /*
- * Wait for other thread to drop its locks
- */
- rw_enter(rwlp, rw);
- }
- }
-
- zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
- zl->zl_rwlock = rwlp;
- zl->zl_znode = NULL;
- zl->zl_next = *zlpp;
- *zlpp = zl;
-
- if (*oidp == szp->z_id) /* We're a descendant of szp */
- return (EINVAL);
-
- if (*oidp == rootid) /* We've hit the top */
- return (0);
-
- if (rw == RW_READER) { /* i.e. not the first pass */
- int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp);
- if (error)
- return (error);
- zl->zl_znode = zp;
- }
- oidp = &zp->z_phys->zp_parent;
- rwlp = &zp->z_parent_lock;
- rw = RW_READER;
-
- } while (zp->z_id != sdzp->z_id);
-
- return (0);
-}
-
-/*
- * Move an entry from the provided source directory to the target
- * directory. Change the entry name as indicated.
- *
- * IN: sdvp - Source directory containing the "old entry".
- * snm - Old entry name.
- * tdvp - Target directory to contain the "new entry".
- * tnm - New entry name.
- * cr - credentials of caller.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * sdvp,tdvp - ctime|mtime updated
- */
-static int
-zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr)
-{
- znode_t *tdzp, *szp, *tzp;
- znode_t *sdzp = VTOZ(sdvp);
- zfsvfs_t *zfsvfs = sdzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- vnode_t *realvp;
- zfs_dirlock_t *sdl, *tdl;
- dmu_tx_t *tx;
- zfs_zlock_t *zl;
- int cmp, serr, terr, error;
-
- ZFS_ENTER(zfsvfs);
-
- /*
- * Make sure we have the real vp for the target directory.
- */
- if (VOP_REALVP(tdvp, &realvp) == 0)
- tdvp = realvp;
-
- if (tdvp->v_vfsp != sdvp->v_vfsp) {
- ZFS_EXIT(zfsvfs);
- return (EXDEV);
- }
-
- tdzp = VTOZ(tdvp);
-top:
- szp = NULL;
- tzp = NULL;
- zl = NULL;
-
- /*
- * This is to prevent the creation of links into attribute space
- * by renaming a linked file into/outof an attribute directory.
- * See the comment in zfs_link() for why this is considered bad.
- */
- if ((tdzp->z_phys->zp_flags & ZFS_XATTR) !=
- (sdzp->z_phys->zp_flags & ZFS_XATTR)) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /*
- * Lock source and target directory entries. To prevent deadlock,
- * a lock ordering must be defined. We lock the directory with
- * the smallest object id first, or if it's a tie, the one with
- * the lexically first name.
- */
- if (sdzp->z_id < tdzp->z_id) {
- cmp = -1;
- } else if (sdzp->z_id > tdzp->z_id) {
- cmp = 1;
- } else {
- cmp = strcmp(snm, tnm);
- if (cmp == 0) {
- /*
- * POSIX: "If the old argument and the new argument
- * both refer to links to the same existing file,
- * the rename() function shall return successfully
- * and perform no other action."
- */
- ZFS_EXIT(zfsvfs);
- return (0);
- }
- }
- if (cmp < 0) {
- serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS);
- terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, 0);
- } else {
- terr = zfs_dirent_lock(&tdl, tdzp, tnm, &tzp, 0);
- serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, ZEXISTS);
- }
-
- if (serr) {
- /*
- * Source entry invalid or not there.
- */
- if (!terr) {
- zfs_dirent_unlock(tdl);
- if (tzp)
- VN_RELE(ZTOV(tzp));
- }
- if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0)
- serr = EINVAL;
- ZFS_EXIT(zfsvfs);
- return (serr);
- }
- if (terr) {
- zfs_dirent_unlock(sdl);
- VN_RELE(ZTOV(szp));
- if (strcmp(tnm, "..") == 0)
- terr = EINVAL;
- ZFS_EXIT(zfsvfs);
- return (terr);
- }
-
- /*
- * Must have write access at the source to remove the old entry
- * and write access at the target to create the new entry.
- * Note that if target and source are the same, this can be
- * done in a single check.
- */
-
- if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
- goto out;
-
- if (ZTOV(szp)->v_type == VDIR) {
- /*
- * Check to make sure rename is valid.
- * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
- */
- if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl))
- goto out;
- }
-
- /*
- * Does target exist?
- */
- if (tzp) {
- /*
- * Source and target must be the same type.
- */
- if (ZTOV(szp)->v_type == VDIR) {
- if (ZTOV(tzp)->v_type != VDIR) {
- error = ENOTDIR;
- goto out;
- }
- } else {
- if (ZTOV(tzp)->v_type == VDIR) {
- error = EISDIR;
- goto out;
- }
- }
- /*
- * POSIX dictates that when the source and target
- * entries refer to the same file object, rename
- * must do nothing and exit without error.
- */
- if (szp->z_id == tzp->z_id) {
- error = 0;
- goto out;
- }
- }
-
- vnevent_rename_src(ZTOV(szp));
- if (tzp)
- vnevent_rename_dest(ZTOV(tzp));
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */
- dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */
- dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
- dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
- if (sdzp != tdzp)
- dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */
- if (tzp)
- dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */
- dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- if (zl != NULL)
- zfs_rename_unlock(&zl);
- zfs_dirent_unlock(sdl);
- zfs_dirent_unlock(tdl);
- VN_RELE(ZTOV(szp));
- if (tzp)
- VN_RELE(ZTOV(tzp));
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- if (tzp) /* Attempt to remove the existing target */
- error = zfs_link_destroy(tdl, tzp, tx, 0, NULL);
-
- if (error == 0) {
- error = zfs_link_create(tdl, szp, tx, ZRENAMING);
- if (error == 0) {
- error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
- ASSERT(error == 0);
- zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
- sdl->dl_name, tdzp, tdl->dl_name, szp);
- }
-#ifdef FREEBSD_NAMECACHE
- if (error == 0) {
- cache_purge(sdvp);
- cache_purge(tdvp);
- }
-#endif
- }
-
- dmu_tx_commit(tx);
-out:
- if (zl != NULL)
- zfs_rename_unlock(&zl);
-
- zfs_dirent_unlock(sdl);
- zfs_dirent_unlock(tdl);
-
- VN_RELE(ZTOV(szp));
- if (tzp)
- VN_RELE(ZTOV(tzp));
-
- ZFS_EXIT(zfsvfs);
-
- return (error);
-}
-
-/*
- * Insert the indicated symbolic reference entry into the directory.
- *
- * IN: dvp - Directory to contain new symbolic link.
- * link - Name for new symlink entry.
- * vap - Attributes of new entry.
- * target - Target path of new symlink.
- * cr - credentials of caller.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * dvp - ctime|mtime updated
- */
-static int
-zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, cred_t *cr, kthread_t *td)
-{
- znode_t *zp, *dzp = VTOZ(dvp);
- zfs_dirlock_t *dl;
- dmu_tx_t *tx;
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- uint64_t zoid;
- int len = strlen(link);
- int error;
-
- ASSERT(vap->va_type == VLNK);
-
- ZFS_ENTER(zfsvfs);
-top:
- if (error = zfs_zaccess(dzp, ACE_ADD_FILE, cr)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- if (len > MAXPATHLEN) {
- ZFS_EXIT(zfsvfs);
- return (ENAMETOOLONG);
- }
-
- /*
- * Attempt to lock directory; fail if entry already exists.
- */
- if (error = zfs_dirent_lock(&dl, dzp, name, &zp, ZNEW)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
- dmu_tx_hold_bonus(tx, dzp->z_id);
- dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
- if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- dmu_buf_will_dirty(dzp->z_dbuf, tx);
-
- /*
- * Create a new object for the symlink.
- * Put the link content into bonus buffer if it will fit;
- * otherwise, store it just like any other file data.
- */
- zoid = 0;
- if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) {
- zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, len);
- if (len != 0)
- bcopy(link, zp->z_phys + 1, len);
- } else {
- dmu_buf_t *dbp;
-
- zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0);
-
- /*
- * Nothing can access the znode yet so no locking needed
- * for growing the znode's blocksize.
- */
- zfs_grow_blocksize(zp, len, tx);
-
- VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, zoid, 0, FTAG, &dbp));
- dmu_buf_will_dirty(dbp, tx);
-
- ASSERT3U(len, <=, dbp->db_size);
- bcopy(link, dbp->db_data, len);
- dmu_buf_rele(dbp, FTAG);
- }
- zp->z_phys->zp_size = len;
-
- /*
- * Insert the new object into the directory.
- */
- (void) zfs_link_create(dl, zp, tx, ZNEW);
-out:
- if (error == 0) {
- zfs_log_symlink(zilog, tx, TX_SYMLINK, dzp, zp, name, link);
- *vpp = ZTOV(zp);
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- }
-
- dmu_tx_commit(tx);
-
- zfs_dirent_unlock(dl);
-
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Return, in the buffer contained in the provided uio structure,
- * the symbolic path referred to by vp.
- *
- * IN: vp - vnode of symbolic link.
- * uoip - structure to contain the link path.
- * cr - credentials of caller.
- *
- * OUT: uio - structure to contain the link path.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * vp - atime updated
- */
-/* ARGSUSED */
-static int
-zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- size_t bufsz;
- int error;
-
- ZFS_ENTER(zfsvfs);
-
- bufsz = (size_t)zp->z_phys->zp_size;
- if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) {
- error = uiomove(zp->z_phys + 1,
- MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
- } else {
- dmu_buf_t *dbp;
- error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp);
- if (error) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- error = uiomove(dbp->db_data,
- MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
- dmu_buf_rele(dbp, FTAG);
- }
-
- ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-/*
- * Insert a new entry into directory tdvp referencing svp.
- *
- * IN: tdvp - Directory to contain new entry.
- * svp - vnode of new entry.
- * name - name of new entry.
- * cr - credentials of caller.
- *
- * RETURN: 0 if success
- * error code if failure
- *
- * Timestamps:
- * tdvp - ctime|mtime updated
- * svp - ctime updated
- */
-/* ARGSUSED */
-static int
-zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr)
-{
- znode_t *dzp = VTOZ(tdvp);
- znode_t *tzp, *szp;
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- zfs_dirlock_t *dl;
- dmu_tx_t *tx;
- vnode_t *realvp;
- int error;
-
- ASSERT(tdvp->v_type == VDIR);
-
- ZFS_ENTER(zfsvfs);
-
- if (VOP_REALVP(svp, &realvp) == 0)
- svp = realvp;
-
- if (svp->v_vfsp != tdvp->v_vfsp) {
- ZFS_EXIT(zfsvfs);
- return (EXDEV);
- }
-
- szp = VTOZ(svp);
-top:
- /*
- * We do not support links between attributes and non-attributes
- * because of the potential security risk of creating links
- * into "normal" file space in order to circumvent restrictions
- * imposed in attribute space.
- */
- if ((szp->z_phys->zp_flags & ZFS_XATTR) !=
- (dzp->z_phys->zp_flags & ZFS_XATTR)) {
- ZFS_EXIT(zfsvfs);
- return (EINVAL);
- }
-
- /*
- * POSIX dictates that we return EPERM here.
- * Better choices include ENOTSUP or EISDIR.
- */
- if (svp->v_type == VDIR) {
- ZFS_EXIT(zfsvfs);
- return (EPERM);
- }
-
- if ((uid_t)szp->z_phys->zp_uid != crgetuid(cr) &&
- secpolicy_basic_link(cr) != 0) {
- ZFS_EXIT(zfsvfs);
- return (EPERM);
- }
-
- if (error = zfs_zaccess(dzp, ACE_ADD_FILE, cr)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- /*
- * Attempt to lock directory; fail if entry already exists.
- */
- if (error = zfs_dirent_lock(&dl, dzp, name, &tzp, ZNEW)) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, szp->z_id);
- dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- zfs_dirent_unlock(dl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
- dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- error = zfs_link_create(dl, szp, tx, 0);
-
- if (error == 0)
- zfs_log_link(zilog, tx, TX_LINK, dzp, szp, name);
-
- dmu_tx_commit(tx);
-
- zfs_dirent_unlock(dl);
-
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-
-void
-zfs_inactive(vnode_t *vp, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- int error;
-
- rw_enter(&zfsvfs->z_um_lock, RW_READER);
- if (zfsvfs->z_unmounted2) {
- ASSERT(zp->z_dbuf_held == 0);
-
- mutex_enter(&zp->z_lock);
- VI_LOCK(vp);
- vp->v_count = 0; /* count arrives as 1 */
- VI_UNLOCK(vp);
- if (zp->z_dbuf == NULL) {
- mutex_exit(&zp->z_lock);
- zfs_znode_free(zp);
- } else {
- mutex_exit(&zp->z_lock);
- }
- rw_exit(&zfsvfs->z_um_lock);
- VFS_RELE(zfsvfs->z_vfs);
- return;
- }
-
- if (zp->z_atime_dirty && zp->z_unlinked == 0) {
- dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
-
- dmu_tx_hold_bonus(tx, zp->z_id);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- dmu_buf_will_dirty(zp->z_dbuf, tx);
- mutex_enter(&zp->z_lock);
- zp->z_atime_dirty = 0;
- mutex_exit(&zp->z_lock);
- dmu_tx_commit(tx);
- }
- }
-
- zfs_zinactive(zp);
- rw_exit(&zfsvfs->z_um_lock);
-}
-
-CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid));
-CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid));
-
-static int
-zfs_fid(vnode_t *vp, fid_t *fidp)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- uint32_t gen = (uint32_t)zp->z_phys->zp_gen;
- uint64_t object = zp->z_id;
- zfid_short_t *zfid;
- int size, i;
-
- ZFS_ENTER(zfsvfs);
-
- size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
- fidp->fid_len = size;
-
- zfid = (zfid_short_t *)fidp;
-
- zfid->zf_len = size;
-
- for (i = 0; i < sizeof (zfid->zf_object); i++)
- zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
- /* Must have a non-zero generation number to distinguish from .zfs */
- if (gen == 0)
- gen = 1;
- for (i = 0; i < sizeof (zfid->zf_gen); i++)
- zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
-
- if (size == LONG_FID_LEN) {
- uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
- zfid_long_t *zlfid;
-
- zlfid = (zfid_long_t *)fidp;
-
- for (i = 0; i < sizeof (zlfid->zf_setid); i++)
- zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
-
- /* XXX - this should be the generation number for the objset */
- for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
- zlfid->zf_setgen[i] = 0;
- }
-
- ZFS_EXIT(zfsvfs);
- return (0);
-}
-
-static int
-zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
-{
- znode_t *zp, *xzp;
- zfsvfs_t *zfsvfs;
- zfs_dirlock_t *dl;
- int error;
-
- switch (cmd) {
- case _PC_LINK_MAX:
- *valp = INT_MAX;
- return (0);
-
- case _PC_FILESIZEBITS:
- *valp = 64;
- return (0);
-
-#if 0
- case _PC_XATTR_EXISTS:
- zp = VTOZ(vp);
- zfsvfs = zp->z_zfsvfs;
- ZFS_ENTER(zfsvfs);
- *valp = 0;
- error = zfs_dirent_lock(&dl, zp, "", &xzp,
- ZXATTR | ZEXISTS | ZSHARED);
- if (error == 0) {
- zfs_dirent_unlock(dl);
- if (!zfs_dirempty(xzp))
- *valp = 1;
- VN_RELE(ZTOV(xzp));
- } else if (error == ENOENT) {
- /*
- * If there aren't extended attributes, it's the
- * same as having zero of them.
- */
- error = 0;
- }
- ZFS_EXIT(zfsvfs);
- return (error);
-#endif
-
- case _PC_ACL_EXTENDED:
- *valp = 0; /* TODO */
- return (0);
-
- case _PC_MIN_HOLE_SIZE:
- *valp = (int)SPA_MINBLOCKSIZE;
- return (0);
-
- default:
- return (EOPNOTSUPP);
- }
-}
-
-#ifdef TODO
-/*ARGSUSED*/
-static int
-zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- int error;
-
- ZFS_ENTER(zfsvfs);
- error = zfs_getacl(zp, vsecp, cr);
- ZFS_EXIT(zfsvfs);
-
- return (error);
-}
-#endif /* TODO */
-
-#ifdef TODO
-/*ARGSUSED*/
-static int
-zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr)
-{
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- int error;
-
- ZFS_ENTER(zfsvfs);
- error = zfs_setacl(zp, vsecp, cr);
- ZFS_EXIT(zfsvfs);
- return (error);
-}
-#endif /* TODO */
-
-static int
-zfs_freebsd_open(ap)
- struct vop_open_args /* {
- struct vnode *a_vp;
- int a_mode;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- znode_t *zp = VTOZ(vp);
- int error;
-
- error = zfs_open(&vp, ap->a_mode, ap->a_cred);
- if (error == 0)
- vnode_create_vobject(vp, zp->z_phys->zp_size, ap->a_td);
- return (error);
-}
-
-static int
-zfs_freebsd_close(ap)
- struct vop_close_args /* {
- struct vnode *a_vp;
- int a_fflag;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
-
- return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred));
-}
-
-static int
-zfs_freebsd_ioctl(ap)
- struct vop_ioctl_args /* {
- struct vnode *a_vp;
- u_long a_command;
- caddr_t a_data;
- int a_fflag;
- struct ucred *cred;
- struct thread *td;
- } */ *ap;
-{
-
- return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
- ap->a_fflag, ap->a_cred, NULL));
-}
-
-static int
-zfs_freebsd_read(ap)
- struct vop_read_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap;
-{
-
- return (zfs_read(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL));
-}
-
-static int
-zfs_freebsd_write(ap)
- struct vop_write_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap;
-{
-
- return (zfs_write(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL));
-}
-
-static int
-zfs_freebsd_access(ap)
- struct vop_access_args /* {
- struct vnode *a_vp;
- int a_mode;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
-
- return (zfs_access(ap->a_vp, ap->a_mode, 0, ap->a_cred));
-}
-
-static int
-zfs_freebsd_lookup(ap)
- struct vop_lookup_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- struct componentname *cnp = ap->a_cnp;
- char nm[NAME_MAX + 1];
-
- ASSERT(cnp->cn_namelen < sizeof(nm));
- strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm)));
-
- return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
- cnp->cn_cred, cnp->cn_thread));
-}
-
-static int
-zfs_freebsd_create(ap)
- struct vop_create_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- struct vattr *a_vap;
- } */ *ap;
-{
- struct componentname *cnp = ap->a_cnp;
- vattr_t *vap = ap->a_vap;
- int mode;
-
- ASSERT(cnp->cn_flags & SAVENAME);
-
- vattr_init_mask(vap);
- mode = vap->va_mode & ALLPERMS;
-
- return (zfs_create(ap->a_dvp, cnp->cn_nameptr, vap, !EXCL, mode,
- ap->a_vpp, cnp->cn_cred));
-}
-
-static int
-zfs_freebsd_remove(ap)
- struct vop_remove_args /* {
- struct vnode *a_dvp;
- struct vnode *a_vp;
- struct componentname *a_cnp;
- } */ *ap;
-{
-
- ASSERT(ap->a_cnp->cn_flags & SAVENAME);
-
- return (zfs_remove(ap->a_dvp, ap->a_cnp->cn_nameptr,
- ap->a_cnp->cn_cred));
-}
-
-static int
-zfs_freebsd_mkdir(ap)
- struct vop_mkdir_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- struct vattr *a_vap;
- } */ *ap;
-{
- vattr_t *vap = ap->a_vap;
-
- ASSERT(ap->a_cnp->cn_flags & SAVENAME);
-
- vattr_init_mask(vap);
-
- return (zfs_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, vap, ap->a_vpp,
- ap->a_cnp->cn_cred));
-}
-
-static int
-zfs_freebsd_rmdir(ap)
- struct vop_rmdir_args /* {
- struct vnode *a_dvp;
- struct vnode *a_vp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- struct componentname *cnp = ap->a_cnp;
-
- ASSERT(cnp->cn_flags & SAVENAME);
-
- return (zfs_rmdir(ap->a_dvp, cnp->cn_nameptr, NULL, cnp->cn_cred));
-}
-
-static int
-zfs_freebsd_readdir(ap)
- struct vop_readdir_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- struct ucred *a_cred;
- int *a_eofflag;
- int *a_ncookies;
- u_long **a_cookies;
- } */ *ap;
-{
-
- return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
- ap->a_ncookies, ap->a_cookies));
-}
-
-static int
-zfs_freebsd_fsync(ap)
- struct vop_fsync_args /* {
- struct vnode *a_vp;
- int a_waitfor;
- struct thread *a_td;
- } */ *ap;
-{
-
- vop_stdfsync(ap);
- return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred));
-}
-
-static int
-zfs_freebsd_getattr(ap)
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
-
- return (zfs_getattr(ap->a_vp, ap->a_vap, 0, ap->a_cred));
-}
-
-static int
-zfs_freebsd_setattr(ap)
- struct vop_setattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- struct thread *a_td;
- } */ *ap;
-{
- vattr_t *vap = ap->a_vap;
-
- /* No support for FreeBSD's chflags(2). */
- if (vap->va_flags != VNOVAL)
- return (EOPNOTSUPP);
-
- vattr_init_mask(vap);
- vap->va_mask &= ~AT_NOSET;
-
- return (zfs_setattr(ap->a_vp, vap, 0, ap->a_cred, NULL));
-}
-
-static int
-zfs_freebsd_rename(ap)
- struct vop_rename_args /* {
- struct vnode *a_fdvp;
- struct vnode *a_fvp;
- struct componentname *a_fcnp;
- struct vnode *a_tdvp;
- struct vnode *a_tvp;
- struct componentname *a_tcnp;
- } */ *ap;
-{
- vnode_t *fdvp = ap->a_fdvp;
- vnode_t *fvp = ap->a_fvp;
- vnode_t *tdvp = ap->a_tdvp;
- vnode_t *tvp = ap->a_tvp;
- int error;
-
- ASSERT(ap->a_fcnp->cn_flags & SAVENAME);
- ASSERT(ap->a_tcnp->cn_flags & SAVENAME);
-
- error = zfs_rename(fdvp, ap->a_fcnp->cn_nameptr, tdvp,
- ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred);
-
- if (tdvp == tvp)
- VN_RELE(tdvp);
- else
- VN_URELE(tdvp);
- if (tvp)
- VN_URELE(tvp);
- VN_RELE(fdvp);
- VN_RELE(fvp);
-
- return (error);
-}
-
-static int
-zfs_freebsd_symlink(ap)
- struct vop_symlink_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- struct vattr *a_vap;
- char *a_target;
- } */ *ap;
-{
- struct componentname *cnp = ap->a_cnp;
- vattr_t *vap = ap->a_vap;
-
- ASSERT(cnp->cn_flags & SAVENAME);
-
- vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
- vattr_init_mask(vap);
-
- return (zfs_symlink(ap->a_dvp, ap->a_vpp, cnp->cn_nameptr, vap,
- ap->a_target, cnp->cn_cred, cnp->cn_thread));
-}
-
-static int
-zfs_freebsd_readlink(ap)
- struct vop_readlink_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- struct ucred *a_cred;
- } */ *ap;
-{
-
- return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred));
-}
-
-static int
-zfs_freebsd_link(ap)
- struct vop_link_args /* {
- struct vnode *a_tdvp;
- struct vnode *a_vp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- struct componentname *cnp = ap->a_cnp;
-
- ASSERT(cnp->cn_flags & SAVENAME);
-
- return (zfs_link(ap->a_tdvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
-}
-
-static int
-zfs_freebsd_inactive(ap)
- struct vop_inactive_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
-
- zfs_inactive(vp, ap->a_td->td_ucred);
- return (0);
-}
-
-static int
-zfs_freebsd_reclaim(ap)
- struct vop_reclaim_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- znode_t *zp = VTOZ(vp);
- zfsvfs_t *zfsvfs;
- int rele = 1;
-
- ASSERT(zp != NULL);
-
- /*
- * Destroy the vm object and flush associated pages.
- */
- vnode_destroy_vobject(vp);
-
- mutex_enter(&zp->z_lock);
- ASSERT(zp->z_phys);
- ASSERT(zp->z_dbuf_held);
- zfsvfs = zp->z_zfsvfs;
- if (!zp->z_unlinked) {
- zp->z_dbuf_held = 0;
- ZTOV(zp) = NULL;
- mutex_exit(&zp->z_lock);
- dmu_buf_rele(zp->z_dbuf, NULL);
- } else {
- mutex_exit(&zp->z_lock);
- }
- VI_LOCK(vp);
- if (vp->v_count > 0)
- rele = 0;
- vp->v_data = NULL;
- ASSERT(vp->v_holdcnt >= 1);
- VI_UNLOCK(vp);
- if (!zp->z_unlinked && rele)
- VFS_RELE(zfsvfs->z_vfs);
- return (0);
-}
-
-static int
-zfs_freebsd_fid(ap)
- struct vop_fid_args /* {
- struct vnode *a_vp;
- struct fid *a_fid;
- } */ *ap;
-{
-
- return (zfs_fid(ap->a_vp, (void *)ap->a_fid));
-}
-
-static int
-zfs_freebsd_pathconf(ap)
- struct vop_pathconf_args /* {
- struct vnode *a_vp;
- int a_name;
- register_t *a_retval;
- } */ *ap;
-{
- ulong_t val;
- int error;
-
- error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred);
- if (error == 0)
- *ap->a_retval = val;
- else if (error == EOPNOTSUPP)
- error = vop_stdpathconf(ap);
- return (error);
-}
-
-/*
- * Advisory record locking support
- */
-static int
-zfs_freebsd_advlock(ap)
- struct vop_advlock_args /* {
- struct vnode *a_vp;
- caddr_t a_id;
- int a_op;
- struct flock *a_fl;
- int a_flags;
- } */ *ap;
-{
- znode_t *zp = VTOZ(ap->a_vp);
-
- return (lf_advlock(ap, &(zp->z_lockf), zp->z_phys->zp_size));
-}
-
-/*
- * Advisory record locking support
- */
-static int
-zfs_freebsd_advlockasync(ap)
- struct vop_advlockasync_args /* {
- struct vnode *a_vp;
- caddr_t a_id;
- int a_op;
- struct flock *a_fl;
- int a_flags;
- struct task *a_task;
- } */ *ap;
-{
- znode_t *zp = VTOZ(ap->a_vp);
-
- return (lf_advlockasync(ap, &(zp->z_lockf), zp->z_phys->zp_size));
-}
-
-struct vop_vector zfs_vnodeops;
-struct vop_vector zfs_fifoops;
-
-struct vop_vector zfs_vnodeops = {
- .vop_default = &default_vnodeops,
- .vop_inactive = zfs_freebsd_inactive,
- .vop_reclaim = zfs_freebsd_reclaim,
- .vop_access = zfs_freebsd_access,
-#ifdef FREEBSD_NAMECACHE
- .vop_lookup = vfs_cache_lookup,
- .vop_cachedlookup = zfs_freebsd_lookup,
-#else
- .vop_lookup = zfs_freebsd_lookup,
-#endif
- .vop_getattr = zfs_freebsd_getattr,
- .vop_setattr = zfs_freebsd_setattr,
- .vop_create = zfs_freebsd_create,
- .vop_mknod = zfs_freebsd_create,
- .vop_mkdir = zfs_freebsd_mkdir,
- .vop_readdir = zfs_freebsd_readdir,
- .vop_fsync = zfs_freebsd_fsync,
- .vop_open = zfs_freebsd_open,
- .vop_close = zfs_freebsd_close,
- .vop_rmdir = zfs_freebsd_rmdir,
- .vop_ioctl = zfs_freebsd_ioctl,
- .vop_link = zfs_freebsd_link,
- .vop_symlink = zfs_freebsd_symlink,
- .vop_readlink = zfs_freebsd_readlink,
- .vop_read = zfs_freebsd_read,
- .vop_write = zfs_freebsd_write,
- .vop_remove = zfs_freebsd_remove,
- .vop_rename = zfs_freebsd_rename,
- .vop_advlock = zfs_freebsd_advlock,
- .vop_advlockasync = zfs_freebsd_advlockasync,
- .vop_pathconf = zfs_freebsd_pathconf,
- .vop_bmap = VOP_EOPNOTSUPP,
- .vop_fid = zfs_freebsd_fid,
-};
-
-struct vop_vector zfs_fifoops = {
- .vop_default = &fifo_specops,
- .vop_fsync = VOP_PANIC,
- .vop_access = zfs_freebsd_access,
- .vop_getattr = zfs_freebsd_getattr,
- .vop_inactive = zfs_freebsd_inactive,
- .vop_read = VOP_PANIC,
- .vop_reclaim = zfs_freebsd_reclaim,
- .vop_setattr = zfs_freebsd_setattr,
- .vop_write = VOP_PANIC,
- .vop_fid = zfs_freebsd_fid,
-};
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
deleted file mode 100644
index 46e501c..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
+++ /dev/null
@@ -1,1072 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Portions Copyright 2007 Jeremy Teo */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef _KERNEL
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/systm.h>
-#include <sys/sysmacros.h>
-#include <sys/resource.h>
-#include <sys/mntent.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/unistd.h>
-#include <sys/atomic.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_rlock.h>
-#include <sys/fs/zfs.h>
-#endif /* _KERNEL */
-
-#include <sys/dmu.h>
-#include <sys/refcount.h>
-#include <sys/stat.h>
-#include <sys/zap.h>
-#include <sys/zfs_znode.h>
-#include <sys/refcount.h>
-
-/* Used by fstat(1). */
-SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 0, sizeof(znode_t),
- "sizeof(znode_t)");
-
-/*
- * Functions needed for userland (ie: libzpool) are not put under
- * #ifdef_KERNEL; the rest of the functions have dependencies
- * (such as VFS logic) that will not compile easily in userland.
- */
-#ifdef _KERNEL
-struct kmem_cache *znode_cache = NULL;
-
-/*ARGSUSED*/
-static void
-znode_pageout_func(dmu_buf_t *dbuf, void *user_ptr)
-{
- znode_t *zp = user_ptr;
- vnode_t *vp;
-
- mutex_enter(&zp->z_lock);
- vp = ZTOV(zp);
- if (vp == NULL) {
- mutex_exit(&zp->z_lock);
- zfs_znode_free(zp);
- } else if (vp->v_count == 0) {
- ZTOV(zp) = NULL;
- vhold(vp);
- mutex_exit(&zp->z_lock);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- vrecycle(vp, curthread);
- VOP_UNLOCK(vp, 0);
- vdrop(vp);
- zfs_znode_free(zp);
- } else {
- /* signal force unmount that this znode can be freed */
- zp->z_dbuf = NULL;
- mutex_exit(&zp->z_lock);
- }
-}
-
-extern struct vop_vector zfs_vnodeops;
-extern struct vop_vector zfs_fifoops;
-
-/*
- * XXX: We cannot use this function as a cache constructor, because
- * there is one global cache for all file systems and we need
- * to pass vfsp here, which is not possible, because argument
- * 'cdrarg' is defined at kmem_cache_create() time.
- */
-static int
-zfs_znode_cache_constructor(void *buf, void *cdrarg, int kmflags)
-{
- znode_t *zp = buf;
- vnode_t *vp;
- vfs_t *vfsp = cdrarg;
- int error;
-
- if (cdrarg != NULL) {
- error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &vp);
- ASSERT(error == 0);
- zp->z_vnode = vp;
- vp->v_data = (caddr_t)zp;
- VN_LOCK_AREC(vp);
- VN_LOCK_ASHARE(vp);
- } else {
- zp->z_vnode = NULL;
- }
- mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
- rw_init(&zp->z_map_lock, NULL, RW_DEFAULT, NULL);
- rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
- rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
- mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
-
- mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
- avl_create(&zp->z_range_avl, zfs_range_compare,
- sizeof (rl_t), offsetof(rl_t, r_node));
-
- zp->z_dbuf_held = 0;
- zp->z_dirlocks = 0;
- zp->z_lockf = NULL;
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-zfs_znode_cache_destructor(void *buf, void *cdarg)
-{
- znode_t *zp = buf;
-
- ASSERT(zp->z_dirlocks == 0);
- mutex_destroy(&zp->z_lock);
- rw_destroy(&zp->z_map_lock);
- rw_destroy(&zp->z_parent_lock);
- rw_destroy(&zp->z_name_lock);
- mutex_destroy(&zp->z_acl_lock);
- mutex_destroy(&zp->z_range_lock);
- avl_destroy(&zp->z_range_avl);
-
- ASSERT(zp->z_dbuf_held == 0);
-}
-
-void
-zfs_znode_init(void)
-{
- /*
- * Initialize zcache
- */
- ASSERT(znode_cache == NULL);
- znode_cache = kmem_cache_create("zfs_znode_cache",
- sizeof (znode_t), 0, /* zfs_znode_cache_constructor */ NULL,
- zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
-}
-
-void
-zfs_znode_fini(void)
-{
- /*
- * Cleanup zcache
- */
- if (znode_cache)
- kmem_cache_destroy(znode_cache);
- znode_cache = NULL;
-}
-
-/*
- * zfs_init_fs - Initialize the zfsvfs struct and the file system
- * incore "master" object. Verify version compatibility.
- */
-int
-zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr)
-{
- objset_t *os = zfsvfs->z_os;
- uint64_t version = ZPL_VERSION;
- int i, error;
- dmu_object_info_t doi;
- uint64_t fsid_guid;
-
- *zpp = NULL;
-
- /*
- * XXX - hack to auto-create the pool root filesystem at
- * the first attempted mount.
- */
- if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) {
- dmu_tx_t *tx = dmu_tx_create(os);
-
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */
- error = dmu_tx_assign(tx, TXG_WAIT);
- ASSERT3U(error, ==, 0);
- zfs_create_fs(os, cr, tx);
- dmu_tx_commit(tx);
- }
-
- error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_OBJ, 8, 1,
- &version);
- if (error) {
- return (error);
- } else if (version != ZPL_VERSION) {
- (void) printf("Mismatched versions: File system "
- "is version %lld on-disk format, which is "
- "incompatible with this software version %lld!",
- (u_longlong_t)version, ZPL_VERSION);
- return (ENOTSUP);
- }
-
- /*
- * The fsid is 64 bits, composed of an 8-bit fs type, which
- * separates our fsid from any other filesystem types, and a
- * 56-bit objset unique ID. The objset unique ID is unique to
- * all objsets open on this system, provided by unique_create().
- * The 8-bit fs type must be put in the low bits of fsid[1]
- * because that's where other Solaris filesystems put it.
- */
- fsid_guid = dmu_objset_fsid_guid(os);
- ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
- zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid;
- zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
- zfsvfs->z_vfs->mnt_vfc->vfc_typenum & 0xFF;
-
- error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
- &zfsvfs->z_root);
- if (error)
- return (error);
- ASSERT(zfsvfs->z_root != 0);
-
- /*
- * Create the per mount vop tables.
- */
-
- /*
- * Initialize zget mutex's
- */
- for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
- mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
-
- error = zfs_zget(zfsvfs, zfsvfs->z_root, zpp);
- if (error)
- return (error);
- ASSERT3U((*zpp)->z_id, ==, zfsvfs->z_root);
-
- error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
- &zfsvfs->z_unlinkedobj);
- if (error)
- return (error);
-
- return (0);
-}
-
-/*
- * define a couple of values we need available
- * for both 64 and 32 bit environments.
- */
-#ifndef NBITSMINOR64
-#define NBITSMINOR64 32
-#endif
-#ifndef MAXMAJ64
-#define MAXMAJ64 0xffffffffUL
-#endif
-#ifndef MAXMIN64
-#define MAXMIN64 0xffffffffUL
-#endif
-#ifndef major
-#define major(x) ((int)(((u_int)(x) >> 8)&0xff)) /* major number */
-#endif
-#ifndef minor
-#define minor(x) ((int)((x)&0xffff00ff)) /* minor number */
-#endif
-
-/*
- * Create special expldev for ZFS private use.
- * Can't use standard expldev since it doesn't do
- * what we want. The standard expldev() takes a
- * dev32_t in LP64 and expands it to a long dev_t.
- * We need an interface that takes a dev32_t in ILP32
- * and expands it to a long dev_t.
- */
-static uint64_t
-zfs_expldev(dev_t dev)
-{
- return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
-}
-/*
- * Special cmpldev for ZFS private use.
- * Can't use standard cmpldev since it takes
- * a long dev_t and compresses it to dev32_t in
- * LP64. We need to do a compaction of a long dev_t
- * to a dev32_t in ILP32.
- */
-dev_t
-zfs_cmpldev(uint64_t dev)
-{
- return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
-}
-
-/*
- * Construct a new znode/vnode and intialize.
- *
- * This does not do a call to dmu_set_user() that is
- * up to the caller to do, in case you don't want to
- * return the znode
- */
-static znode_t *
-zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, uint64_t obj_num, int blksz)
-{
- znode_t *zp;
- vnode_t *vp;
- int error;
-
- zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
- zfs_znode_cache_constructor(zp, zfsvfs->z_vfs, 0);
-
- ASSERT(zp->z_dirlocks == NULL);
-
- zp->z_phys = db->db_data;
- zp->z_zfsvfs = zfsvfs;
- zp->z_unlinked = 0;
- zp->z_atime_dirty = 0;
- zp->z_dbuf_held = 0;
- zp->z_mapcnt = 0;
- zp->z_last_itx = 0;
- zp->z_dbuf = db;
- zp->z_id = obj_num;
- zp->z_blksz = blksz;
- zp->z_seq = 0x7A4653;
- zp->z_sync_cnt = 0;
-
- mutex_enter(&zfsvfs->z_znodes_lock);
- list_insert_tail(&zfsvfs->z_all_znodes, zp);
- mutex_exit(&zfsvfs->z_znodes_lock);
-
- vp = ZTOV(zp);
- if (vp == NULL)
- return (zp);
-
- error = insmntque(vp, zfsvfs->z_vfs);
- KASSERT(error == 0, ("insmntque() failed: error %d", error));
-
- vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
- switch (vp->v_type) {
- case VDIR:
- zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
- break;
- case VFIFO:
- vp->v_op = &zfs_fifoops;
- break;
- }
-
- return (zp);
-}
-
-static void
-zfs_znode_dmu_init(znode_t *zp)
-{
- znode_t *nzp;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- dmu_buf_t *db = zp->z_dbuf;
-
- mutex_enter(&zp->z_lock);
-
- nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_pageout_func);
-
- /*
- * there should be no
- * concurrent zgets on this object.
- */
- ASSERT3P(nzp, ==, NULL);
-
- /*
- * Slap on VROOT if we are the root znode
- */
- if (zp->z_id == zfsvfs->z_root) {
- ZTOV(zp)->v_flag |= VROOT;
- }
-
- ASSERT(zp->z_dbuf_held == 0);
- zp->z_dbuf_held = 1;
- VFS_HOLD(zfsvfs->z_vfs);
- mutex_exit(&zp->z_lock);
-}
-
-/*
- * Create a new DMU object to hold a zfs znode.
- *
- * IN: dzp - parent directory for new znode
- * vap - file attributes for new znode
- * tx - dmu transaction id for zap operations
- * cr - credentials of caller
- * flag - flags:
- * IS_ROOT_NODE - new object will be root
- * IS_XATTR - new object is an attribute
- * IS_REPLAY - intent log replay
- *
- * OUT: oid - ID of created object
- *
- */
-void
-zfs_mknode(znode_t *dzp, vattr_t *vap, uint64_t *oid, dmu_tx_t *tx, cred_t *cr,
- uint_t flag, znode_t **zpp, int bonuslen)
-{
- dmu_buf_t *dbp;
- znode_phys_t *pzp;
- znode_t *zp;
- zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
- timestruc_t now;
- uint64_t gen;
- int err;
-
- ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
-
- if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */
- *oid = vap->va_nodeid;
- flag |= IS_REPLAY;
- now = vap->va_ctime; /* see zfs_replay_create() */
- gen = vap->va_nblocks; /* ditto */
- } else {
- *oid = 0;
- gethrestime(&now);
- gen = dmu_tx_get_txg(tx);
- }
-
- /*
- * Create a new DMU object.
- */
- /*
- * There's currently no mechanism for pre-reading the blocks that will
- * be to needed allocate a new object, so we accept the small chance
- * that there will be an i/o error and we will fail one of the
- * assertions below.
- */
- if (vap->va_type == VDIR) {
- if (flag & IS_REPLAY) {
- err = zap_create_claim(zfsvfs->z_os, *oid,
- DMU_OT_DIRECTORY_CONTENTS,
- DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
- ASSERT3U(err, ==, 0);
- } else {
- *oid = zap_create(zfsvfs->z_os,
- DMU_OT_DIRECTORY_CONTENTS,
- DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
- }
- } else {
- if (flag & IS_REPLAY) {
- err = dmu_object_claim(zfsvfs->z_os, *oid,
- DMU_OT_PLAIN_FILE_CONTENTS, 0,
- DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
- ASSERT3U(err, ==, 0);
- } else {
- *oid = dmu_object_alloc(zfsvfs->z_os,
- DMU_OT_PLAIN_FILE_CONTENTS, 0,
- DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
- }
- }
- VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, *oid, NULL, &dbp));
- dmu_buf_will_dirty(dbp, tx);
-
- /*
- * Initialize the znode physical data to zero.
- */
- ASSERT(dbp->db_size >= sizeof (znode_phys_t));
- bzero(dbp->db_data, dbp->db_size);
- pzp = dbp->db_data;
-
- /*
- * If this is the root, fix up the half-initialized parent pointer
- * to reference the just-allocated physical data area.
- */
- if (flag & IS_ROOT_NODE) {
- dzp->z_phys = pzp;
- dzp->z_id = *oid;
- }
-
- /*
- * If parent is an xattr, so am I.
- */
- if (dzp->z_phys->zp_flags & ZFS_XATTR)
- flag |= IS_XATTR;
-
- if (vap->va_type == VBLK || vap->va_type == VCHR) {
- pzp->zp_rdev = zfs_expldev(vap->va_rdev);
- }
-
- if (vap->va_type == VDIR) {
- pzp->zp_size = 2; /* contents ("." and "..") */
- pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
- }
-
- pzp->zp_parent = dzp->z_id;
- if (flag & IS_XATTR)
- pzp->zp_flags |= ZFS_XATTR;
-
- pzp->zp_gen = gen;
-
- ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
- ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
-
- if (vap->va_mask & AT_ATIME) {
- ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime);
- } else {
- ZFS_TIME_ENCODE(&now, pzp->zp_atime);
- }
-
- if (vap->va_mask & AT_MTIME) {
- ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
- } else {
- ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
- }
-
- pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode);
- zp = zfs_znode_alloc(zfsvfs, dbp, *oid, 0);
-
- zfs_perm_init(zp, dzp, flag, vap, tx, cr);
-
- if (zpp) {
- kmutex_t *hash_mtx = ZFS_OBJ_MUTEX(zp);
-
- mutex_enter(hash_mtx);
- zfs_znode_dmu_init(zp);
- mutex_exit(hash_mtx);
-
- *zpp = zp;
- } else {
- if (ZTOV(zp) != NULL)
- ZTOV(zp)->v_count = 0;
- dmu_buf_rele(dbp, NULL);
- zfs_znode_free(zp);
- }
-}
-
-int
-zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
-{
- dmu_object_info_t doi;
- dmu_buf_t *db;
- znode_t *zp;
- vnode_t *vp;
- int err;
-
- *zpp = NULL;
-
- ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
-
- err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
- if (err) {
- ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
- return (err);
- }
-
- dmu_object_info_from_db(db, &doi);
- if (doi.doi_bonus_type != DMU_OT_ZNODE ||
- doi.doi_bonus_size < sizeof (znode_phys_t)) {
- dmu_buf_rele(db, NULL);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
- return (EINVAL);
- }
-
- ASSERT(db->db_object == obj_num);
- ASSERT(db->db_offset == -1);
- ASSERT(db->db_data != NULL);
-
- zp = dmu_buf_get_user(db);
-
- if (zp != NULL) {
- mutex_enter(&zp->z_lock);
-
- ASSERT3U(zp->z_id, ==, obj_num);
- if (zp->z_unlinked) {
- dmu_buf_rele(db, NULL);
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
- return (ENOENT);
- } else if (zp->z_dbuf_held) {
- dmu_buf_rele(db, NULL);
- } else {
- zp->z_dbuf_held = 1;
- VFS_HOLD(zfsvfs->z_vfs);
- }
-
- if (ZTOV(zp) != NULL)
- VN_HOLD(ZTOV(zp));
- else {
- err = getnewvnode("zfs", zfsvfs->z_vfs, &zfs_vnodeops,
- &zp->z_vnode);
- ASSERT(err == 0);
- vp = ZTOV(zp);
- vp->v_data = (caddr_t)zp;
- VN_LOCK_AREC(vp);
- VN_LOCK_ASHARE(vp);
- vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
- if (vp->v_type == VDIR)
- zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
- err = insmntque(vp, zfsvfs->z_vfs);
- KASSERT(err == 0, ("insmntque() failed: error %d", err));
- }
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
- *zpp = zp;
- return (0);
- }
-
- /*
- * Not found create new znode/vnode
- */
- zp = zfs_znode_alloc(zfsvfs, db, obj_num, doi.doi_data_block_size);
- ASSERT3U(zp->z_id, ==, obj_num);
- zfs_znode_dmu_init(zp);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
- *zpp = zp;
- return (0);
-}
-
-void
-zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- int error;
-
- ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id);
- if (zp->z_phys->zp_acl.z_acl_extern_obj) {
- error = dmu_object_free(zfsvfs->z_os,
- zp->z_phys->zp_acl.z_acl_extern_obj, tx);
- ASSERT3U(error, ==, 0);
- }
- error = dmu_object_free(zfsvfs->z_os, zp->z_id, tx);
- ASSERT3U(error, ==, 0);
- zp->z_dbuf_held = 0;
- ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
- dmu_buf_rele(zp->z_dbuf, NULL);
-}
-
-void
-zfs_zinactive(znode_t *zp)
-{
- vnode_t *vp = ZTOV(zp);
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- uint64_t z_id = zp->z_id;
-
- ASSERT(zp->z_dbuf_held && zp->z_phys);
-
- /*
- * Don't allow a zfs_zget() while were trying to release this znode
- */
- ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
-
- mutex_enter(&zp->z_lock);
- VI_LOCK(vp);
- if (vp->v_count > 0) {
- /*
- * If the hold count is greater than zero, somebody has
- * obtained a new reference on this znode while we were
- * processing it here, so we are done.
- */
- VI_UNLOCK(vp);
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
- return;
- }
- VI_UNLOCK(vp);
-
- /*
- * If this was the last reference to a file with no links,
- * remove the file from the file system.
- */
- if (zp->z_unlinked) {
- ZTOV(zp) = NULL;
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
- ASSERT(vp->v_count == 0);
- vrecycle(vp, curthread);
- zfs_rmnode(zp);
- VFS_RELE(zfsvfs->z_vfs);
- return;
- }
- ASSERT(zp->z_phys);
- ASSERT(zp->z_dbuf_held);
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
-}
-
-void
-zfs_znode_free(znode_t *zp)
-{
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-
- mutex_enter(&zfsvfs->z_znodes_lock);
- list_remove(&zfsvfs->z_all_znodes, zp);
- mutex_exit(&zfsvfs->z_znodes_lock);
-
- kmem_cache_free(znode_cache, zp);
-}
-
-void
-zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx)
-{
- timestruc_t now;
-
- ASSERT(MUTEX_HELD(&zp->z_lock));
-
- gethrestime(&now);
-
- if (tx) {
- dmu_buf_will_dirty(zp->z_dbuf, tx);
- zp->z_atime_dirty = 0;
- zp->z_seq++;
- } else {
- zp->z_atime_dirty = 1;
- }
-
- if (flag & AT_ATIME)
- ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime);
-
- if (flag & AT_MTIME)
- ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime);
-
- if (flag & AT_CTIME)
- ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime);
-}
-
-/*
- * Update the requested znode timestamps with the current time.
- * If we are in a transaction, then go ahead and mark the znode
- * dirty in the transaction so the timestamps will go to disk.
- * Otherwise, we will get pushed next time the znode is updated
- * in a transaction, or when this znode eventually goes inactive.
- *
- * Why is this OK?
- * 1 - Only the ACCESS time is ever updated outside of a transaction.
- * 2 - Multiple consecutive updates will be collapsed into a single
- * znode update by the transaction grouping semantics of the DMU.
- */
-void
-zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx)
-{
- mutex_enter(&zp->z_lock);
- zfs_time_stamper_locked(zp, flag, tx);
- mutex_exit(&zp->z_lock);
-}
-
-/*
- * Grow the block size for a file.
- *
- * IN: zp - znode of file to free data in.
- * size - requested block size
- * tx - open transaction.
- *
- * NOTE: this function assumes that the znode is write locked.
- */
-void
-zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
-{
- int error;
- u_longlong_t dummy;
-
- if (size <= zp->z_blksz)
- return;
- /*
- * If the file size is already greater than the current blocksize,
- * we will not grow. If there is more than one block in a file,
- * the blocksize cannot change.
- */
- if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz)
- return;
-
- error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
- size, 0, tx);
- if (error == ENOTSUP)
- return;
- ASSERT3U(error, ==, 0);
-
- /* What blocksize did we actually get? */
- dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy);
-}
-
-/*
- * Free space in a file.
- *
- * IN: zp - znode of file to free data in.
- * off - start of section to free.
- * len - length of section to free (0 => to EOF).
- * flag - current file open mode flags.
- *
- * RETURN: 0 if success
- * error code if failure
- */
-int
-zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
-{
- vnode_t *vp = ZTOV(zp);
- dmu_tx_t *tx;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- rl_t *rl;
- uint64_t end = off + len;
- uint64_t size, new_blksz;
- int error;
-
- if (ZTOV(zp)->v_type == VFIFO)
- return (0);
-
- /*
- * If we will change zp_size then lock the whole file,
- * otherwise just lock the range being freed.
- */
- if (len == 0 || off + len > zp->z_phys->zp_size) {
- rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
- } else {
- rl = zfs_range_lock(zp, off, len, RL_WRITER);
- /* recheck, in case zp_size changed */
- if (off + len > zp->z_phys->zp_size) {
- /* lost race: file size changed, lock whole file */
- zfs_range_unlock(rl);
- rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
- }
- }
-
- /*
- * Nothing to do if file already at desired length.
- */
- size = zp->z_phys->zp_size;
- if (len == 0 && size == off && off != 0) {
- zfs_range_unlock(rl);
- return (0);
- }
-
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, zp->z_id);
- new_blksz = 0;
- if (end > size &&
- (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
- /*
- * We are growing the file past the current block size.
- */
- if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
- ASSERT(!ISP2(zp->z_blksz));
- new_blksz = MIN(end, SPA_MAXBLOCKSIZE);
- } else {
- new_blksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
- }
- dmu_tx_hold_write(tx, zp->z_id, 0, MIN(end, new_blksz));
- } else if (off < size) {
- /*
- * If len == 0, we are truncating the file.
- */
- dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END);
- }
-
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- zfs_range_unlock(rl);
- return (error);
- }
-
- if (new_blksz)
- zfs_grow_blocksize(zp, new_blksz, tx);
-
- if (end > size || len == 0)
- zp->z_phys->zp_size = end;
-
- if (off < size) {
- objset_t *os = zfsvfs->z_os;
- uint64_t rlen = len;
-
- if (len == 0)
- rlen = -1;
- else if (end > size)
- rlen = size - off;
- VERIFY(0 == dmu_free_range(os, zp->z_id, off, rlen, tx));
- }
-
- if (log) {
- zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
- zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
- }
-
- zfs_range_unlock(rl);
-
- dmu_tx_commit(tx);
-
- /*
- * Clear any mapped pages in the truncated region. This has to
- * happen outside of the transaction to avoid the possibility of
- * a deadlock with someone trying to push a page that we are
- * about to invalidate.
- */
- rw_enter(&zp->z_map_lock, RW_WRITER);
- if (end > size)
- vnode_pager_setsize(vp, end);
- else if (len == 0) {
-#if 0
- error = vtruncbuf(vp, curthread->td_ucred, curthread, end, PAGE_SIZE);
-#else
- error = vinvalbuf(vp, V_SAVE, curthread, 0, 0);
- vnode_pager_setsize(vp, end);
-#endif
- }
- rw_exit(&zp->z_map_lock);
-
- return (0);
-}
-
-void
-zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx)
-{
- zfsvfs_t zfsvfs;
- uint64_t moid, doid, roid = 0;
- uint64_t version = ZPL_VERSION;
- int error;
- znode_t *rootzp = NULL;
- vattr_t vattr;
-
- /*
- * First attempt to create master node.
- */
- /*
- * In an empty objset, there are no blocks to read and thus
- * there can be no i/o errors (which we assert below).
- */
- moid = MASTER_NODE_OBJ;
- error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
- DMU_OT_NONE, 0, tx);
- ASSERT(error == 0);
-
- /*
- * Set starting attributes.
- */
-
- error = zap_update(os, moid, ZPL_VERSION_OBJ, 8, 1, &version, tx);
- ASSERT(error == 0);
-
- /*
- * Create a delete queue.
- */
- doid = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
-
- error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &doid, tx);
- ASSERT(error == 0);
-
- /*
- * Create root znode. Create minimal znode/vnode/zfsvfs
- * to allow zfs_mknode to work.
- */
- vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
- vattr.va_type = VDIR;
- vattr.va_mode = S_IFDIR|0755;
- vattr.va_uid = UID_ROOT;
- vattr.va_gid = GID_WHEEL;
-
- rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
- zfs_znode_cache_constructor(rootzp, NULL, 0);
- rootzp->z_zfsvfs = &zfsvfs;
- rootzp->z_unlinked = 0;
- rootzp->z_atime_dirty = 0;
- rootzp->z_dbuf_held = 0;
-
- bzero(&zfsvfs, sizeof (zfsvfs_t));
-
- zfsvfs.z_os = os;
- zfsvfs.z_assign = TXG_NOWAIT;
- zfsvfs.z_parent = &zfsvfs;
-
- mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&zfsvfs.z_all_znodes, sizeof (znode_t),
- offsetof(znode_t, z_link_node));
-
- zfs_mknode(rootzp, &vattr, &roid, tx, cr, IS_ROOT_NODE, NULL, 0);
- ASSERT3U(rootzp->z_id, ==, roid);
- error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &roid, tx);
- ASSERT(error == 0);
-
- mutex_destroy(&zfsvfs.z_znodes_lock);
- kmem_cache_free(znode_cache, rootzp);
-}
-#endif /* _KERNEL */
-
-/*
- * Given an object number, return its parent object number and whether
- * or not the object is an extended attribute directory.
- */
-static int
-zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir)
-{
- dmu_buf_t *db;
- dmu_object_info_t doi;
- znode_phys_t *zp;
- int error;
-
- if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0)
- return (error);
-
- dmu_object_info_from_db(db, &doi);
- if (doi.doi_bonus_type != DMU_OT_ZNODE ||
- doi.doi_bonus_size < sizeof (znode_phys_t)) {
- dmu_buf_rele(db, FTAG);
- return (EINVAL);
- }
-
- zp = db->db_data;
- *pobjp = zp->zp_parent;
- *is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) &&
- S_ISDIR(zp->zp_mode);
- dmu_buf_rele(db, FTAG);
-
- return (0);
-}
-
-int
-zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
-{
- char *path = buf + len - 1;
- int error;
-
- *path = '\0';
-
- for (;;) {
- uint64_t pobj;
- char component[MAXNAMELEN + 2];
- size_t complen;
- int is_xattrdir;
-
- if ((error = zfs_obj_to_pobj(osp, obj, &pobj,
- &is_xattrdir)) != 0)
- break;
-
- if (pobj == obj) {
- if (path[0] != '/')
- *--path = '/';
- break;
- }
-
- component[0] = '/';
- if (is_xattrdir) {
- (void) sprintf(component + 1, "<xattrdir>");
- } else {
- error = zap_value_search(osp, pobj, obj, component + 1);
- if (error != 0)
- break;
- }
-
- complen = strlen(component);
- path -= complen;
- ASSERT(path >= buf);
- bcopy(component, path, complen);
- obj = pobj;
- }
-
- if (error == 0)
- (void) memmove(buf, path, buf + len - path);
- return (error);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zil.c
deleted file mode 100644
index 69ee509..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zil.c
+++ /dev/null
@@ -1,1607 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/dmu.h>
-#include <sys/zap.h>
-#include <sys/arc.h>
-#include <sys/stat.h>
-#include <sys/resource.h>
-#include <sys/zil.h>
-#include <sys/zil_impl.h>
-#include <sys/dsl_dataset.h>
-#include <sys/vdev.h>
-#include <sys/dmu_tx.h>
-
-/*
- * The zfs intent log (ZIL) saves transaction records of system calls
- * that change the file system in memory with enough information
- * to be able to replay them. These are stored in memory until
- * either the DMU transaction group (txg) commits them to the stable pool
- * and they can be discarded, or they are flushed to the stable log
- * (also in the pool) due to a fsync, O_DSYNC or other synchronous
- * requirement. In the event of a panic or power fail then those log
- * records (transactions) are replayed.
- *
- * There is one ZIL per file system. Its on-disk (pool) format consists
- * of 3 parts:
- *
- * - ZIL header
- * - ZIL blocks
- * - ZIL records
- *
- * A log record holds a system call transaction. Log blocks can
- * hold many log records and the blocks are chained together.
- * Each ZIL block contains a block pointer (blkptr_t) to the next
- * ZIL block in the chain. The ZIL header points to the first
- * block in the chain. Note there is not a fixed place in the pool
- * to hold blocks. They are dynamically allocated and freed as
- * needed from the blocks available. Figure X shows the ZIL structure:
- */
-
-/*
- * This global ZIL switch affects all pools
- */
-int zil_disable = 0; /* disable intent logging */
-SYSCTL_DECL(_vfs_zfs);
-TUNABLE_INT("vfs.zfs.zil_disable", &zil_disable);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, zil_disable, CTLFLAG_RW, &zil_disable, 0,
- "Disable ZFS Intent Log (ZIL)");
-
-/*
- * Tunable parameter for debugging or performance analysis. Setting
- * zfs_nocacheflush will cause corruption on power loss if a volatile
- * out-of-order write cache is enabled.
- */
-boolean_t zfs_nocacheflush = B_FALSE;
-TUNABLE_INT("vfs.zfs.cache_flush_disable", &zfs_nocacheflush);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, cache_flush_disable, CTLFLAG_RDTUN,
- &zfs_nocacheflush, 0, "Disable cache flush");
-
-static kmem_cache_t *zil_lwb_cache;
-
-static int
-zil_dva_compare(const void *x1, const void *x2)
-{
- const dva_t *dva1 = x1;
- const dva_t *dva2 = x2;
-
- if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2))
- return (-1);
- if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2))
- return (1);
-
- if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2))
- return (-1);
- if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2))
- return (1);
-
- return (0);
-}
-
-static void
-zil_dva_tree_init(avl_tree_t *t)
-{
- avl_create(t, zil_dva_compare, sizeof (zil_dva_node_t),
- offsetof(zil_dva_node_t, zn_node));
-}
-
-static void
-zil_dva_tree_fini(avl_tree_t *t)
-{
- zil_dva_node_t *zn;
- void *cookie = NULL;
-
- while ((zn = avl_destroy_nodes(t, &cookie)) != NULL)
- kmem_free(zn, sizeof (zil_dva_node_t));
-
- avl_destroy(t);
-}
-
-static int
-zil_dva_tree_add(avl_tree_t *t, dva_t *dva)
-{
- zil_dva_node_t *zn;
- avl_index_t where;
-
- if (avl_find(t, dva, &where) != NULL)
- return (EEXIST);
-
- zn = kmem_alloc(sizeof (zil_dva_node_t), KM_SLEEP);
- zn->zn_dva = *dva;
- avl_insert(t, zn, where);
-
- return (0);
-}
-
-static zil_header_t *
-zil_header_in_syncing_context(zilog_t *zilog)
-{
- return ((zil_header_t *)zilog->zl_header);
-}
-
-static void
-zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
-{
- zio_cksum_t *zc = &bp->blk_cksum;
-
- zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL);
- zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL);
- zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os);
- zc->zc_word[ZIL_ZC_SEQ] = 1ULL;
-}
-
-/*
- * Read a log block, make sure it's valid, and byteswap it if necessary.
- */
-static int
-zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, arc_buf_t **abufpp)
-{
- blkptr_t blk = *bp;
- zbookmark_t zb;
- uint32_t aflags = ARC_WAIT;
- int error;
-
- zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
-
- *abufpp = NULL;
-
- error = arc_read(NULL, zilog->zl_spa, &blk, byteswap_uint64_array,
- arc_getbuf_func, abufpp, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, &aflags, &zb);
-
- if (error == 0) {
- char *data = (*abufpp)->b_data;
- uint64_t blksz = BP_GET_LSIZE(bp);
- zil_trailer_t *ztp = (zil_trailer_t *)(data + blksz) - 1;
- zio_cksum_t cksum = bp->blk_cksum;
-
- /*
- * Sequence numbers should be... sequential. The checksum
- * verifier for the next block should be bp's checksum plus 1.
- */
- cksum.zc_word[ZIL_ZC_SEQ]++;
-
- if (bcmp(&cksum, &ztp->zit_next_blk.blk_cksum, sizeof (cksum)))
- error = ESTALE;
- else if (BP_IS_HOLE(&ztp->zit_next_blk))
- error = ENOENT;
- else if (ztp->zit_nused > (blksz - sizeof (zil_trailer_t)))
- error = EOVERFLOW;
-
- if (error) {
- VERIFY(arc_buf_remove_ref(*abufpp, abufpp) == 1);
- *abufpp = NULL;
- }
- }
-
- dprintf("error %d on %llu:%llu\n", error, zb.zb_objset, zb.zb_blkid);
-
- return (error);
-}
-
-/*
- * Parse the intent log, and call parse_func for each valid record within.
- * Return the highest sequence number.
- */
-uint64_t
-zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
- zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
-{
- const zil_header_t *zh = zilog->zl_header;
- uint64_t claim_seq = zh->zh_claim_seq;
- uint64_t seq = 0;
- uint64_t max_seq = 0;
- blkptr_t blk = zh->zh_log;
- arc_buf_t *abuf;
- char *lrbuf, *lrp;
- zil_trailer_t *ztp;
- int reclen, error;
-
- if (BP_IS_HOLE(&blk))
- return (max_seq);
-
- /*
- * Starting at the block pointed to by zh_log we read the log chain.
- * For each block in the chain we strongly check that block to
- * ensure its validity. We stop when an invalid block is found.
- * For each block pointer in the chain we call parse_blk_func().
- * For each record in each valid block we call parse_lr_func().
- * If the log has been claimed, stop if we encounter a sequence
- * number greater than the highest claimed sequence number.
- */
- zil_dva_tree_init(&zilog->zl_dva_tree);
- for (;;) {
- seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
-
- if (claim_seq != 0 && seq > claim_seq)
- break;
-
- ASSERT(max_seq < seq);
- max_seq = seq;
-
- error = zil_read_log_block(zilog, &blk, &abuf);
-
- if (parse_blk_func != NULL)
- parse_blk_func(zilog, &blk, arg, txg);
-
- if (error)
- break;
-
- lrbuf = abuf->b_data;
- ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1;
- blk = ztp->zit_next_blk;
-
- if (parse_lr_func == NULL) {
- VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
- continue;
- }
-
- for (lrp = lrbuf; lrp < lrbuf + ztp->zit_nused; lrp += reclen) {
- lr_t *lr = (lr_t *)lrp;
- reclen = lr->lrc_reclen;
- ASSERT3U(reclen, >=, sizeof (lr_t));
- parse_lr_func(zilog, lr, arg, txg);
- }
- VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
- }
- zil_dva_tree_fini(&zilog->zl_dva_tree);
-
- return (max_seq);
-}
-
-/* ARGSUSED */
-static void
-zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
-{
- spa_t *spa = zilog->zl_spa;
- int err;
-
- /*
- * Claim log block if not already committed and not already claimed.
- */
- if (bp->blk_birth >= first_txg &&
- zil_dva_tree_add(&zilog->zl_dva_tree, BP_IDENTITY(bp)) == 0) {
- err = zio_wait(zio_claim(NULL, spa, first_txg, bp, NULL, NULL));
- ASSERT(err == 0);
- }
-}
-
-static void
-zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
-{
- if (lrc->lrc_txtype == TX_WRITE) {
- lr_write_t *lr = (lr_write_t *)lrc;
- zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg);
- }
-}
-
-/* ARGSUSED */
-static void
-zil_free_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg)
-{
- zio_free_blk(zilog->zl_spa, bp, dmu_tx_get_txg(tx));
-}
-
-static void
-zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
-{
- /*
- * If we previously claimed it, we need to free it.
- */
- if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE) {
- lr_write_t *lr = (lr_write_t *)lrc;
- blkptr_t *bp = &lr->lr_blkptr;
- if (bp->blk_birth >= claim_txg &&
- !zil_dva_tree_add(&zilog->zl_dva_tree, BP_IDENTITY(bp))) {
- (void) arc_free(NULL, zilog->zl_spa,
- dmu_tx_get_txg(tx), bp, NULL, NULL, ARC_WAIT);
- }
- }
-}
-
-/*
- * Create an on-disk intent log.
- */
-static void
-zil_create(zilog_t *zilog)
-{
- const zil_header_t *zh = zilog->zl_header;
- lwb_t *lwb;
- uint64_t txg = 0;
- dmu_tx_t *tx = NULL;
- blkptr_t blk;
- int error = 0;
-
- /*
- * Wait for any previous destroy to complete.
- */
- txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
-
- ASSERT(zh->zh_claim_txg == 0);
- ASSERT(zh->zh_replay_seq == 0);
-
- blk = zh->zh_log;
-
- /*
- * If we don't already have an initial log block, allocate one now.
- */
- if (BP_IS_HOLE(&blk)) {
- tx = dmu_tx_create(zilog->zl_os);
- (void) dmu_tx_assign(tx, TXG_WAIT);
- dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
- txg = dmu_tx_get_txg(tx);
-
- error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk,
- NULL, txg);
-
- if (error == 0)
- zil_init_log_chain(zilog, &blk);
- }
-
- /*
- * Allocate a log write buffer (lwb) for the first log block.
- */
- if (error == 0) {
- lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
- lwb->lwb_zilog = zilog;
- lwb->lwb_blk = blk;
- lwb->lwb_nused = 0;
- lwb->lwb_sz = BP_GET_LSIZE(&lwb->lwb_blk);
- lwb->lwb_buf = zio_buf_alloc(lwb->lwb_sz);
- lwb->lwb_max_txg = txg;
- lwb->lwb_zio = NULL;
-
- mutex_enter(&zilog->zl_lock);
- list_insert_tail(&zilog->zl_lwb_list, lwb);
- mutex_exit(&zilog->zl_lock);
- }
-
- /*
- * If we just allocated the first log block, commit our transaction
- * and wait for zil_sync() to stuff the block poiner into zh_log.
- * (zh is part of the MOS, so we cannot modify it in open context.)
- */
- if (tx != NULL) {
- dmu_tx_commit(tx);
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- }
-
- ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0);
-}
-
-/*
- * In one tx, free all log blocks and clear the log header.
- * If keep_first is set, then we're replaying a log with no content.
- * We want to keep the first block, however, so that the first
- * synchronous transaction doesn't require a txg_wait_synced()
- * in zil_create(). We don't need to txg_wait_synced() here either
- * when keep_first is set, because both zil_create() and zil_destroy()
- * will wait for any in-progress destroys to complete.
- */
-void
-zil_destroy(zilog_t *zilog, boolean_t keep_first)
-{
- const zil_header_t *zh = zilog->zl_header;
- lwb_t *lwb;
- dmu_tx_t *tx;
- uint64_t txg;
-
- /*
- * Wait for any previous destroy to complete.
- */
- txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
-
- if (BP_IS_HOLE(&zh->zh_log))
- return;
-
- tx = dmu_tx_create(zilog->zl_os);
- (void) dmu_tx_assign(tx, TXG_WAIT);
- dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
- txg = dmu_tx_get_txg(tx);
-
- mutex_enter(&zilog->zl_lock);
-
- ASSERT3U(zilog->zl_destroy_txg, <, txg);
- zilog->zl_destroy_txg = txg;
- zilog->zl_keep_first = keep_first;
-
- if (!list_is_empty(&zilog->zl_lwb_list)) {
- ASSERT(zh->zh_claim_txg == 0);
- ASSERT(!keep_first);
- while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
- list_remove(&zilog->zl_lwb_list, lwb);
- if (lwb->lwb_buf != NULL)
- zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
- zio_free_blk(zilog->zl_spa, &lwb->lwb_blk, txg);
- kmem_cache_free(zil_lwb_cache, lwb);
- }
- } else {
- if (!keep_first) {
- (void) zil_parse(zilog, zil_free_log_block,
- zil_free_log_record, tx, zh->zh_claim_txg);
- }
- }
- mutex_exit(&zilog->zl_lock);
-
- dmu_tx_commit(tx);
-
- if (keep_first) /* no need to wait in this case */
- return;
-
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- ASSERT(BP_IS_HOLE(&zh->zh_log));
-}
-
-int
-zil_claim(char *osname, void *txarg)
-{
- dmu_tx_t *tx = txarg;
- uint64_t first_txg = dmu_tx_get_txg(tx);
- zilog_t *zilog;
- zil_header_t *zh;
- objset_t *os;
- int error;
-
- error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_STANDARD, &os);
- if (error) {
- cmn_err(CE_WARN, "can't process intent log for %s", osname);
- return (0);
- }
-
- zilog = dmu_objset_zil(os);
- zh = zil_header_in_syncing_context(zilog);
-
- /*
- * Claim all log blocks if we haven't already done so, and remember
- * the highest claimed sequence number. This ensures that if we can
- * read only part of the log now (e.g. due to a missing device),
- * but we can read the entire log later, we will not try to replay
- * or destroy beyond the last block we successfully claimed.
- */
- ASSERT3U(zh->zh_claim_txg, <=, first_txg);
- if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
- zh->zh_claim_txg = first_txg;
- zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block,
- zil_claim_log_record, tx, first_txg);
- dsl_dataset_dirty(dmu_objset_ds(os), tx);
- }
-
- ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
- dmu_objset_close(os);
- return (0);
-}
-
-void
-zil_add_vdev(zilog_t *zilog, uint64_t vdev)
-{
- zil_vdev_t *zv, *new;
- uint64_t bmap_sz = sizeof (zilog->zl_vdev_bmap) << 3;
- uchar_t *cp;
-
- if (zfs_nocacheflush)
- return;
-
- if (vdev < bmap_sz) {
- cp = zilog->zl_vdev_bmap + (vdev / 8);
- atomic_or_8(cp, 1 << (vdev % 8));
- } else {
- /*
- * insert into ordered list
- */
- mutex_enter(&zilog->zl_lock);
- for (zv = list_head(&zilog->zl_vdev_list); zv != NULL;
- zv = list_next(&zilog->zl_vdev_list, zv)) {
- if (zv->vdev == vdev) {
- /* duplicate found - just return */
- mutex_exit(&zilog->zl_lock);
- return;
- }
- if (zv->vdev > vdev) {
- /* insert before this entry */
- new = kmem_alloc(sizeof (zil_vdev_t),
- KM_SLEEP);
- new->vdev = vdev;
- list_insert_before(&zilog->zl_vdev_list,
- zv, new);
- mutex_exit(&zilog->zl_lock);
- return;
- }
- }
- /* ran off end of list, insert at the end */
- ASSERT(zv == NULL);
- new = kmem_alloc(sizeof (zil_vdev_t), KM_SLEEP);
- new->vdev = vdev;
- list_insert_tail(&zilog->zl_vdev_list, new);
- mutex_exit(&zilog->zl_lock);
- }
-}
-
-/* start an async flush of the write cache for this vdev */
-void
-zil_flush_vdev(spa_t *spa, uint64_t vdev, zio_t **zio)
-{
- vdev_t *vd;
-
- if (*zio == NULL)
- *zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
-
- vd = vdev_lookup_top(spa, vdev);
- ASSERT(vd);
-
- (void) zio_nowait(zio_ioctl(*zio, spa, vd, DKIOCFLUSHWRITECACHE,
- NULL, NULL, ZIO_PRIORITY_NOW,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY));
-}
-
-void
-zil_flush_vdevs(zilog_t *zilog)
-{
- zil_vdev_t *zv;
- zio_t *zio = NULL;
- spa_t *spa = zilog->zl_spa;
- uint64_t vdev;
- uint8_t b;
- int i, j;
-
- ASSERT(zilog->zl_writer);
-
- for (i = 0; i < sizeof (zilog->zl_vdev_bmap); i++) {
- b = zilog->zl_vdev_bmap[i];
- if (b == 0)
- continue;
- for (j = 0; j < 8; j++) {
- if (b & (1 << j)) {
- vdev = (i << 3) + j;
- zil_flush_vdev(spa, vdev, &zio);
- }
- }
- zilog->zl_vdev_bmap[i] = 0;
- }
-
- while ((zv = list_head(&zilog->zl_vdev_list)) != NULL) {
- zil_flush_vdev(spa, zv->vdev, &zio);
- list_remove(&zilog->zl_vdev_list, zv);
- kmem_free(zv, sizeof (zil_vdev_t));
- }
- /*
- * Wait for all the flushes to complete. Not all devices actually
- * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails.
- */
- if (zio)
- (void) zio_wait(zio);
-}
-
-/*
- * Function called when a log block write completes
- */
-static void
-zil_lwb_write_done(zio_t *zio)
-{
- lwb_t *lwb = zio->io_private;
- zilog_t *zilog = lwb->lwb_zilog;
-
- /*
- * Now that we've written this log block, we have a stable pointer
- * to the next block in the chain, so it's OK to let the txg in
- * which we allocated the next block sync.
- */
- txg_rele_to_sync(&lwb->lwb_txgh);
-
- zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
- mutex_enter(&zilog->zl_lock);
- lwb->lwb_buf = NULL;
- if (zio->io_error) {
- zilog->zl_log_error = B_TRUE;
- mutex_exit(&zilog->zl_lock);
- return;
- }
- mutex_exit(&zilog->zl_lock);
-}
-
-/*
- * Initialize the io for a log block.
- *
- * Note, we should not initialize the IO until we are about
- * to use it, since zio_rewrite() does a spa_config_enter().
- */
-static void
-zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
-{
- zbookmark_t zb;
-
- zb.zb_objset = lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET];
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
-
- if (zilog->zl_root_zio == NULL) {
- zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL,
- ZIO_FLAG_CANFAIL);
- }
- if (lwb->lwb_zio == NULL) {
- lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
- ZIO_CHECKSUM_ZILOG, 0, &lwb->lwb_blk, lwb->lwb_buf,
- lwb->lwb_sz, zil_lwb_write_done, lwb,
- ZIO_PRIORITY_LOG_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
- }
-}
-
-/*
- * Start a log block write and advance to the next log block.
- * Calls are serialized.
- */
-static lwb_t *
-zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
-{
- lwb_t *nlwb;
- zil_trailer_t *ztp = (zil_trailer_t *)(lwb->lwb_buf + lwb->lwb_sz) - 1;
- spa_t *spa = zilog->zl_spa;
- blkptr_t *bp = &ztp->zit_next_blk;
- uint64_t txg;
- uint64_t zil_blksz;
- int error;
-
- ASSERT(lwb->lwb_nused <= ZIL_BLK_DATA_SZ(lwb));
-
- /*
- * Allocate the next block and save its address in this block
- * before writing it in order to establish the log chain.
- * Note that if the allocation of nlwb synced before we wrote
- * the block that points at it (lwb), we'd leak it if we crashed.
- * Therefore, we don't do txg_rele_to_sync() until zil_lwb_write_done().
- */
- txg = txg_hold_open(zilog->zl_dmu_pool, &lwb->lwb_txgh);
- txg_rele_to_quiesce(&lwb->lwb_txgh);
-
- /*
- * Pick a ZIL blocksize. We request a size that is the
- * maximum of the previous used size, the current used size and
- * the amount waiting in the queue.
- */
- zil_blksz = MAX(zilog->zl_prev_used,
- zilog->zl_cur_used + sizeof (*ztp));
- zil_blksz = MAX(zil_blksz, zilog->zl_itx_list_sz + sizeof (*ztp));
- zil_blksz = P2ROUNDUP_TYPED(zil_blksz, ZIL_MIN_BLKSZ, uint64_t);
- if (zil_blksz > ZIL_MAX_BLKSZ)
- zil_blksz = ZIL_MAX_BLKSZ;
-
- BP_ZERO(bp);
- /* pass the old blkptr in order to spread log blocks across devs */
- error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg);
- if (error) {
- dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg);
-
- /*
- * We dirty the dataset to ensure that zil_sync() will
- * be called to remove this lwb from our zl_lwb_list.
- * Failing to do so, may leave an lwb with a NULL lwb_buf
- * hanging around on the zl_lwb_list.
- */
- dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
- dmu_tx_commit(tx);
-
- /*
- * Since we've just experienced an allocation failure so we
- * terminate the current lwb and send it on its way.
- */
- ztp->zit_pad = 0;
- ztp->zit_nused = lwb->lwb_nused;
- ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum;
- zio_nowait(lwb->lwb_zio);
-
- /*
- * By returning NULL the caller will call tx_wait_synced()
- */
- return (NULL);
- }
-
- ASSERT3U(bp->blk_birth, ==, txg);
- ztp->zit_pad = 0;
- ztp->zit_nused = lwb->lwb_nused;
- ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum;
- bp->blk_cksum = lwb->lwb_blk.blk_cksum;
- bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
-
- /*
- * Allocate a new log write buffer (lwb).
- */
- nlwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
-
- nlwb->lwb_zilog = zilog;
- nlwb->lwb_blk = *bp;
- nlwb->lwb_nused = 0;
- nlwb->lwb_sz = BP_GET_LSIZE(&nlwb->lwb_blk);
- nlwb->lwb_buf = zio_buf_alloc(nlwb->lwb_sz);
- nlwb->lwb_max_txg = txg;
- nlwb->lwb_zio = NULL;
-
- /*
- * Put new lwb at the end of the log chain
- */
- mutex_enter(&zilog->zl_lock);
- list_insert_tail(&zilog->zl_lwb_list, nlwb);
- mutex_exit(&zilog->zl_lock);
-
- /* Record the vdev for later flushing */
- zil_add_vdev(zilog, DVA_GET_VDEV(BP_IDENTITY(&(lwb->lwb_blk))));
-
- /*
- * kick off the write for the old log block
- */
- dprintf_bp(&lwb->lwb_blk, "lwb %p txg %llu: ", lwb, txg);
- ASSERT(lwb->lwb_zio);
- zio_nowait(lwb->lwb_zio);
-
- return (nlwb);
-}
-
-static lwb_t *
-zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
-{
- lr_t *lrc = &itx->itx_lr; /* common log record */
- lr_write_t *lr = (lr_write_t *)lrc;
- uint64_t txg = lrc->lrc_txg;
- uint64_t reclen = lrc->lrc_reclen;
- uint64_t dlen;
-
- if (lwb == NULL)
- return (NULL);
- ASSERT(lwb->lwb_buf != NULL);
-
- if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
- dlen = P2ROUNDUP_TYPED(
- lr->lr_length, sizeof (uint64_t), uint64_t);
- else
- dlen = 0;
-
- zilog->zl_cur_used += (reclen + dlen);
-
- zil_lwb_write_init(zilog, lwb);
-
- /*
- * If this record won't fit in the current log block, start a new one.
- */
- if (lwb->lwb_nused + reclen + dlen > ZIL_BLK_DATA_SZ(lwb)) {
- lwb = zil_lwb_write_start(zilog, lwb);
- if (lwb == NULL)
- return (NULL);
- zil_lwb_write_init(zilog, lwb);
- ASSERT(lwb->lwb_nused == 0);
- if (reclen + dlen > ZIL_BLK_DATA_SZ(lwb)) {
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- return (lwb);
- }
- }
-
- /*
- * Update the lrc_seq, to be log record sequence number. See zil.h
- * Then copy the record to the log buffer.
- */
- lrc->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
- bcopy(lrc, lwb->lwb_buf + lwb->lwb_nused, reclen);
-
- /*
- * If it's a write, fetch the data or get its blkptr as appropriate.
- */
- if (lrc->lrc_txtype == TX_WRITE) {
- if (txg > spa_freeze_txg(zilog->zl_spa))
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- if (itx->itx_wr_state != WR_COPIED) {
- char *dbuf;
- int error;
-
- /* alignment is guaranteed */
- lr = (lr_write_t *)(lwb->lwb_buf + lwb->lwb_nused);
- if (dlen) {
- ASSERT(itx->itx_wr_state == WR_NEED_COPY);
- dbuf = lwb->lwb_buf + lwb->lwb_nused + reclen;
- lr->lr_common.lrc_reclen += dlen;
- } else {
- ASSERT(itx->itx_wr_state == WR_INDIRECT);
- dbuf = NULL;
- }
- error = zilog->zl_get_data(
- itx->itx_private, lr, dbuf, lwb->lwb_zio);
- if (error) {
- ASSERT(error == ENOENT || error == EEXIST ||
- error == EALREADY);
- return (lwb);
- }
- }
- }
-
- lwb->lwb_nused += reclen + dlen;
- lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg);
- ASSERT3U(lwb->lwb_nused, <=, ZIL_BLK_DATA_SZ(lwb));
- ASSERT3U(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)), ==, 0);
-
- return (lwb);
-}
-
-itx_t *
-zil_itx_create(int txtype, size_t lrsize)
-{
- itx_t *itx;
-
- lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t);
-
- itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP);
- itx->itx_lr.lrc_txtype = txtype;
- itx->itx_lr.lrc_reclen = lrsize;
- itx->itx_lr.lrc_seq = 0; /* defensive */
-
- return (itx);
-}
-
-uint64_t
-zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
-{
- uint64_t seq;
-
- ASSERT(itx->itx_lr.lrc_seq == 0);
-
- mutex_enter(&zilog->zl_lock);
- list_insert_tail(&zilog->zl_itx_list, itx);
- zilog->zl_itx_list_sz += itx->itx_lr.lrc_reclen;
- itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx);
- itx->itx_lr.lrc_seq = seq = ++zilog->zl_itx_seq;
- mutex_exit(&zilog->zl_lock);
-
- return (seq);
-}
-
-/*
- * Free up all in-memory intent log transactions that have now been synced.
- */
-static void
-zil_itx_clean(zilog_t *zilog)
-{
- uint64_t synced_txg = spa_last_synced_txg(zilog->zl_spa);
- uint64_t freeze_txg = spa_freeze_txg(zilog->zl_spa);
- list_t clean_list;
- itx_t *itx;
-
- list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node));
-
- mutex_enter(&zilog->zl_lock);
- /* wait for a log writer to finish walking list */
- while (zilog->zl_writer) {
- cv_wait(&zilog->zl_cv_writer, &zilog->zl_lock);
- }
-
- /*
- * Move the sync'd log transactions to a separate list so we can call
- * kmem_free without holding the zl_lock.
- *
- * There is no need to set zl_writer as we don't drop zl_lock here
- */
- while ((itx = list_head(&zilog->zl_itx_list)) != NULL &&
- itx->itx_lr.lrc_txg <= MIN(synced_txg, freeze_txg)) {
- list_remove(&zilog->zl_itx_list, itx);
- zilog->zl_itx_list_sz -= itx->itx_lr.lrc_reclen;
- list_insert_tail(&clean_list, itx);
- }
- cv_broadcast(&zilog->zl_cv_writer);
- mutex_exit(&zilog->zl_lock);
-
- /* destroy sync'd log transactions */
- while ((itx = list_head(&clean_list)) != NULL) {
- list_remove(&clean_list, itx);
- kmem_free(itx, offsetof(itx_t, itx_lr)
- + itx->itx_lr.lrc_reclen);
- }
- list_destroy(&clean_list);
-}
-
-/*
- * If there are any in-memory intent log transactions which have now been
- * synced then start up a taskq to free them.
- */
-void
-zil_clean(zilog_t *zilog)
-{
- itx_t *itx;
-
- mutex_enter(&zilog->zl_lock);
- itx = list_head(&zilog->zl_itx_list);
- if ((itx != NULL) &&
- (itx->itx_lr.lrc_txg <= spa_last_synced_txg(zilog->zl_spa))) {
- (void) taskq_dispatch(zilog->zl_clean_taskq,
- (void (*)(void *))zil_itx_clean, zilog, TQ_NOSLEEP);
- }
- mutex_exit(&zilog->zl_lock);
-}
-
-void
-zil_commit_writer(zilog_t *zilog, uint64_t seq, uint64_t foid)
-{
- uint64_t txg;
- uint64_t reclen;
- uint64_t commit_seq = 0;
- itx_t *itx, *itx_next = (itx_t *)-1;
- lwb_t *lwb;
- spa_t *spa;
-
- zilog->zl_writer = B_TRUE;
- zilog->zl_root_zio = NULL;
- spa = zilog->zl_spa;
-
- if (zilog->zl_suspend) {
- lwb = NULL;
- } else {
- lwb = list_tail(&zilog->zl_lwb_list);
- if (lwb == NULL) {
- /*
- * Return if there's nothing to flush before we
- * dirty the fs by calling zil_create()
- */
- if (list_is_empty(&zilog->zl_itx_list)) {
- zilog->zl_writer = B_FALSE;
- return;
- }
- mutex_exit(&zilog->zl_lock);
- zil_create(zilog);
- mutex_enter(&zilog->zl_lock);
- lwb = list_tail(&zilog->zl_lwb_list);
- }
- }
-
- /* Loop through in-memory log transactions filling log blocks. */
- DTRACE_PROBE1(zil__cw1, zilog_t *, zilog);
- for (;;) {
- /*
- * Find the next itx to push:
- * Push all transactions related to specified foid and all
- * other transactions except TX_WRITE, TX_TRUNCATE,
- * TX_SETATTR and TX_ACL for all other files.
- */
- if (itx_next != (itx_t *)-1)
- itx = itx_next;
- else
- itx = list_head(&zilog->zl_itx_list);
- for (; itx != NULL; itx = list_next(&zilog->zl_itx_list, itx)) {
- if (foid == 0) /* push all foids? */
- break;
- if (itx->itx_sync) /* push all O_[D]SYNC */
- break;
- switch (itx->itx_lr.lrc_txtype) {
- case TX_SETATTR:
- case TX_WRITE:
- case TX_TRUNCATE:
- case TX_ACL:
- /* lr_foid is same offset for these records */
- if (((lr_write_t *)&itx->itx_lr)->lr_foid
- != foid) {
- continue; /* skip this record */
- }
- }
- break;
- }
- if (itx == NULL)
- break;
-
- reclen = itx->itx_lr.lrc_reclen;
- if ((itx->itx_lr.lrc_seq > seq) &&
- ((lwb == NULL) || (lwb->lwb_nused == 0) ||
- (lwb->lwb_nused + reclen > ZIL_BLK_DATA_SZ(lwb)))) {
- break;
- }
-
- /*
- * Save the next pointer. Even though we soon drop
- * zl_lock all threads that may change the list
- * (another writer or zil_itx_clean) can't do so until
- * they have zl_writer.
- */
- itx_next = list_next(&zilog->zl_itx_list, itx);
- list_remove(&zilog->zl_itx_list, itx);
- mutex_exit(&zilog->zl_lock);
- txg = itx->itx_lr.lrc_txg;
- ASSERT(txg);
-
- if (txg > spa_last_synced_txg(spa) ||
- txg > spa_freeze_txg(spa))
- lwb = zil_lwb_commit(zilog, itx, lwb);
- kmem_free(itx, offsetof(itx_t, itx_lr)
- + itx->itx_lr.lrc_reclen);
- mutex_enter(&zilog->zl_lock);
- zilog->zl_itx_list_sz -= reclen;
- }
- DTRACE_PROBE1(zil__cw2, zilog_t *, zilog);
- /* determine commit sequence number */
- itx = list_head(&zilog->zl_itx_list);
- if (itx)
- commit_seq = itx->itx_lr.lrc_seq;
- else
- commit_seq = zilog->zl_itx_seq;
- mutex_exit(&zilog->zl_lock);
-
- /* write the last block out */
- if (lwb != NULL && lwb->lwb_zio != NULL)
- lwb = zil_lwb_write_start(zilog, lwb);
-
- zilog->zl_prev_used = zilog->zl_cur_used;
- zilog->zl_cur_used = 0;
-
- /*
- * Wait if necessary for the log blocks to be on stable storage.
- */
- if (zilog->zl_root_zio) {
- DTRACE_PROBE1(zil__cw3, zilog_t *, zilog);
- (void) zio_wait(zilog->zl_root_zio);
- DTRACE_PROBE1(zil__cw4, zilog_t *, zilog);
- if (!zfs_nocacheflush)
- zil_flush_vdevs(zilog);
- }
-
- if (zilog->zl_log_error || lwb == NULL) {
- zilog->zl_log_error = 0;
- txg_wait_synced(zilog->zl_dmu_pool, 0);
- }
-
- mutex_enter(&zilog->zl_lock);
- zilog->zl_writer = B_FALSE;
-
- ASSERT3U(commit_seq, >=, zilog->zl_commit_seq);
- zilog->zl_commit_seq = commit_seq;
-}
-
-/*
- * Push zfs transactions to stable storage up to the supplied sequence number.
- * If foid is 0 push out all transactions, otherwise push only those
- * for that file or might have been used to create that file.
- */
-void
-zil_commit(zilog_t *zilog, uint64_t seq, uint64_t foid)
-{
- if (zilog == NULL || seq == 0)
- return;
-
- mutex_enter(&zilog->zl_lock);
-
- seq = MIN(seq, zilog->zl_itx_seq); /* cap seq at largest itx seq */
-
- while (zilog->zl_writer) {
- cv_wait(&zilog->zl_cv_writer, &zilog->zl_lock);
- if (seq < zilog->zl_commit_seq) {
- mutex_exit(&zilog->zl_lock);
- return;
- }
- }
- zil_commit_writer(zilog, seq, foid); /* drops zl_lock */
- /* wake up others waiting on the commit */
- cv_broadcast(&zilog->zl_cv_writer);
- mutex_exit(&zilog->zl_lock);
-}
-
-/*
- * Called in syncing context to free committed log blocks and update log header.
- */
-void
-zil_sync(zilog_t *zilog, dmu_tx_t *tx)
-{
- zil_header_t *zh = zil_header_in_syncing_context(zilog);
- uint64_t txg = dmu_tx_get_txg(tx);
- spa_t *spa = zilog->zl_spa;
- lwb_t *lwb;
-
- mutex_enter(&zilog->zl_lock);
-
- ASSERT(zilog->zl_stop_sync == 0);
-
- zh->zh_replay_seq = zilog->zl_replay_seq[txg & TXG_MASK];
-
- if (zilog->zl_destroy_txg == txg) {
- blkptr_t blk = zh->zh_log;
-
- ASSERT(list_head(&zilog->zl_lwb_list) == NULL);
- ASSERT(spa_sync_pass(spa) == 1);
-
- bzero(zh, sizeof (zil_header_t));
- bzero(zilog->zl_replay_seq, sizeof (zilog->zl_replay_seq));
-
- if (zilog->zl_keep_first) {
- /*
- * If this block was part of log chain that couldn't
- * be claimed because a device was missing during
- * zil_claim(), but that device later returns,
- * then this block could erroneously appear valid.
- * To guard against this, assign a new GUID to the new
- * log chain so it doesn't matter what blk points to.
- */
- zil_init_log_chain(zilog, &blk);
- zh->zh_log = blk;
- }
- }
-
- for (;;) {
- lwb = list_head(&zilog->zl_lwb_list);
- if (lwb == NULL) {
- mutex_exit(&zilog->zl_lock);
- return;
- }
- zh->zh_log = lwb->lwb_blk;
- if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
- break;
- list_remove(&zilog->zl_lwb_list, lwb);
- zio_free_blk(spa, &lwb->lwb_blk, txg);
- kmem_cache_free(zil_lwb_cache, lwb);
-
- /*
- * If we don't have anything left in the lwb list then
- * we've had an allocation failure and we need to zero
- * out the zil_header blkptr so that we don't end
- * up freeing the same block twice.
- */
- if (list_head(&zilog->zl_lwb_list) == NULL)
- BP_ZERO(&zh->zh_log);
- }
- mutex_exit(&zilog->zl_lock);
-}
-
-void
-zil_init(void)
-{
- zil_lwb_cache = kmem_cache_create("zil_lwb_cache",
- sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0);
-}
-
-void
-zil_fini(void)
-{
- kmem_cache_destroy(zil_lwb_cache);
-}
-
-zilog_t *
-zil_alloc(objset_t *os, zil_header_t *zh_phys)
-{
- zilog_t *zilog;
-
- zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP);
-
- zilog->zl_header = zh_phys;
- zilog->zl_os = os;
- zilog->zl_spa = dmu_objset_spa(os);
- zilog->zl_dmu_pool = dmu_objset_pool(os);
- zilog->zl_destroy_txg = TXG_INITIAL - 1;
-
- mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL);
- cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL);
-
- list_create(&zilog->zl_itx_list, sizeof (itx_t),
- offsetof(itx_t, itx_node));
-
- list_create(&zilog->zl_lwb_list, sizeof (lwb_t),
- offsetof(lwb_t, lwb_node));
-
- list_create(&zilog->zl_vdev_list, sizeof (zil_vdev_t),
- offsetof(zil_vdev_t, vdev_seq_node));
-
- return (zilog);
-}
-
-void
-zil_free(zilog_t *zilog)
-{
- lwb_t *lwb;
- zil_vdev_t *zv;
-
- zilog->zl_stop_sync = 1;
-
- while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
- list_remove(&zilog->zl_lwb_list, lwb);
- if (lwb->lwb_buf != NULL)
- zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
- kmem_cache_free(zil_lwb_cache, lwb);
- }
- list_destroy(&zilog->zl_lwb_list);
-
- while ((zv = list_head(&zilog->zl_vdev_list)) != NULL) {
- list_remove(&zilog->zl_vdev_list, zv);
- kmem_free(zv, sizeof (zil_vdev_t));
- }
- list_destroy(&zilog->zl_vdev_list);
-
- ASSERT(list_head(&zilog->zl_itx_list) == NULL);
- list_destroy(&zilog->zl_itx_list);
- cv_destroy(&zilog->zl_cv_suspend);
- cv_destroy(&zilog->zl_cv_writer);
- mutex_destroy(&zilog->zl_lock);
-
- kmem_free(zilog, sizeof (zilog_t));
-}
-
-/*
- * return true if the initial log block is not valid
- */
-static int
-zil_empty(zilog_t *zilog)
-{
- const zil_header_t *zh = zilog->zl_header;
- arc_buf_t *abuf = NULL;
-
- if (BP_IS_HOLE(&zh->zh_log))
- return (1);
-
- if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0)
- return (1);
-
- VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
- return (0);
-}
-
-/*
- * Open an intent log.
- */
-zilog_t *
-zil_open(objset_t *os, zil_get_data_t *get_data)
-{
- zilog_t *zilog = dmu_objset_zil(os);
-
- zilog->zl_get_data = get_data;
- zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
- 2, 2, TASKQ_PREPOPULATE);
-
- return (zilog);
-}
-
-/*
- * Close an intent log.
- */
-void
-zil_close(zilog_t *zilog)
-{
- /*
- * If the log isn't already committed, mark the objset dirty
- * (so zil_sync() will be called) and wait for that txg to sync.
- */
- if (!zil_is_committed(zilog)) {
- uint64_t txg;
- dmu_tx_t *tx = dmu_tx_create(zilog->zl_os);
- (void) dmu_tx_assign(tx, TXG_WAIT);
- dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
- txg = dmu_tx_get_txg(tx);
- dmu_tx_commit(tx);
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- }
-
- taskq_destroy(zilog->zl_clean_taskq);
- zilog->zl_clean_taskq = NULL;
- zilog->zl_get_data = NULL;
-
- zil_itx_clean(zilog);
- ASSERT(list_head(&zilog->zl_itx_list) == NULL);
-}
-
-/*
- * Suspend an intent log. While in suspended mode, we still honor
- * synchronous semantics, but we rely on txg_wait_synced() to do it.
- * We suspend the log briefly when taking a snapshot so that the snapshot
- * contains all the data it's supposed to, and has an empty intent log.
- */
-int
-zil_suspend(zilog_t *zilog)
-{
- const zil_header_t *zh = zilog->zl_header;
-
- mutex_enter(&zilog->zl_lock);
- if (zh->zh_claim_txg != 0) { /* unplayed log */
- mutex_exit(&zilog->zl_lock);
- return (EBUSY);
- }
- if (zilog->zl_suspend++ != 0) {
- /*
- * Someone else already began a suspend.
- * Just wait for them to finish.
- */
- while (zilog->zl_suspending)
- cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
- ASSERT(BP_IS_HOLE(&zh->zh_log));
- mutex_exit(&zilog->zl_lock);
- return (0);
- }
- zilog->zl_suspending = B_TRUE;
- mutex_exit(&zilog->zl_lock);
-
- zil_commit(zilog, UINT64_MAX, 0);
-
- /*
- * Wait for any in-flight log writes to complete.
- */
- mutex_enter(&zilog->zl_lock);
- while (zilog->zl_writer)
- cv_wait(&zilog->zl_cv_writer, &zilog->zl_lock);
- mutex_exit(&zilog->zl_lock);
-
- zil_destroy(zilog, B_FALSE);
-
- mutex_enter(&zilog->zl_lock);
- ASSERT(BP_IS_HOLE(&zh->zh_log));
- zilog->zl_suspending = B_FALSE;
- cv_broadcast(&zilog->zl_cv_suspend);
- mutex_exit(&zilog->zl_lock);
-
- return (0);
-}
-
-void
-zil_resume(zilog_t *zilog)
-{
- mutex_enter(&zilog->zl_lock);
- ASSERT(zilog->zl_suspend != 0);
- zilog->zl_suspend--;
- mutex_exit(&zilog->zl_lock);
-}
-
-typedef struct zil_replay_arg {
- objset_t *zr_os;
- zil_replay_func_t **zr_replay;
- void *zr_arg;
- uint64_t *zr_txgp;
- boolean_t zr_byteswap;
- char *zr_lrbuf;
-} zil_replay_arg_t;
-
-static void
-zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
-{
- zil_replay_arg_t *zr = zra;
- const zil_header_t *zh = zilog->zl_header;
- uint64_t reclen = lr->lrc_reclen;
- uint64_t txtype = lr->lrc_txtype;
- char *name;
- int pass, error, sunk;
-
- if (zilog->zl_stop_replay)
- return;
-
- if (lr->lrc_txg < claim_txg) /* already committed */
- return;
-
- if (lr->lrc_seq <= zh->zh_replay_seq) /* already replayed */
- return;
-
- /*
- * Make a copy of the data so we can revise and extend it.
- */
- bcopy(lr, zr->zr_lrbuf, reclen);
-
- /*
- * The log block containing this lr may have been byteswapped
- * so that we can easily examine common fields like lrc_txtype.
- * However, the log is a mix of different data types, and only the
- * replay vectors know how to byteswap their records. Therefore, if
- * the lr was byteswapped, undo it before invoking the replay vector.
- */
- if (zr->zr_byteswap)
- byteswap_uint64_array(zr->zr_lrbuf, reclen);
-
- /*
- * If this is a TX_WRITE with a blkptr, suck in the data.
- */
- if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
- lr_write_t *lrw = (lr_write_t *)lr;
- blkptr_t *wbp = &lrw->lr_blkptr;
- uint64_t wlen = lrw->lr_length;
- char *wbuf = zr->zr_lrbuf + reclen;
-
- if (BP_IS_HOLE(wbp)) { /* compressed to a hole */
- bzero(wbuf, wlen);
- } else {
- /*
- * A subsequent write may have overwritten this block,
- * in which case wbp may have been been freed and
- * reallocated, and our read of wbp may fail with a
- * checksum error. We can safely ignore this because
- * the later write will provide the correct data.
- */
- zbookmark_t zb;
-
- zb.zb_objset = dmu_objset_id(zilog->zl_os);
- zb.zb_object = lrw->lr_foid;
- zb.zb_level = -1;
- zb.zb_blkid = lrw->lr_offset / BP_GET_LSIZE(wbp);
-
- (void) zio_wait(zio_read(NULL, zilog->zl_spa,
- wbp, wbuf, BP_GET_LSIZE(wbp), NULL, NULL,
- ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb));
- (void) memmove(wbuf, wbuf + lrw->lr_blkoff, wlen);
- }
- }
-
- /*
- * We must now do two things atomically: replay this log record,
- * and update the log header to reflect the fact that we did so.
- * We use the DMU's ability to assign into a specific txg to do this.
- */
- for (pass = 1, sunk = B_FALSE; /* CONSTANTCONDITION */; pass++) {
- uint64_t replay_txg;
- dmu_tx_t *replay_tx;
-
- replay_tx = dmu_tx_create(zr->zr_os);
- error = dmu_tx_assign(replay_tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(replay_tx);
- break;
- }
-
- replay_txg = dmu_tx_get_txg(replay_tx);
-
- if (txtype == 0 || txtype >= TX_MAX_TYPE) {
- error = EINVAL;
- } else {
- /*
- * On the first pass, arrange for the replay vector
- * to fail its dmu_tx_assign(). That's the only way
- * to ensure that those code paths remain well tested.
- */
- *zr->zr_txgp = replay_txg - (pass == 1);
- error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf,
- zr->zr_byteswap);
- *zr->zr_txgp = TXG_NOWAIT;
- }
-
- if (error == 0) {
- dsl_dataset_dirty(dmu_objset_ds(zr->zr_os), replay_tx);
- zilog->zl_replay_seq[replay_txg & TXG_MASK] =
- lr->lrc_seq;
- }
-
- dmu_tx_commit(replay_tx);
-
- if (!error)
- return;
-
- /*
- * The DMU's dnode layer doesn't see removes until the txg
- * commits, so a subsequent claim can spuriously fail with
- * EEXIST. So if we receive any error other than ERESTART
- * we try syncing out any removes then retrying the
- * transaction.
- */
- if (error != ERESTART && !sunk) {
- txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
- sunk = B_TRUE;
- continue; /* retry */
- }
-
- if (error != ERESTART)
- break;
-
- if (pass != 1)
- txg_wait_open(spa_get_dsl(zilog->zl_spa),
- replay_txg + 1);
-
- dprintf("pass %d, retrying\n", pass);
- }
-
- ASSERT(error && error != ERESTART);
- name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- dmu_objset_name(zr->zr_os, name);
- cmn_err(CE_WARN, "ZFS replay transaction error %d, "
- "dataset %s, seq 0x%llx, txtype %llu\n",
- error, name, (u_longlong_t)lr->lrc_seq, (u_longlong_t)txtype);
- zilog->zl_stop_replay = 1;
- kmem_free(name, MAXNAMELEN);
-}
-
-/* ARGSUSED */
-static void
-zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
-{
- zilog->zl_replay_blks++;
-}
-
-/*
- * If this dataset has a non-empty intent log, replay it and destroy it.
- */
-void
-zil_replay(objset_t *os, void *arg, uint64_t *txgp,
- zil_replay_func_t *replay_func[TX_MAX_TYPE])
-{
- zilog_t *zilog = dmu_objset_zil(os);
- const zil_header_t *zh = zilog->zl_header;
- zil_replay_arg_t zr;
-
- if (zil_empty(zilog)) {
- zil_destroy(zilog, B_TRUE);
- return;
- }
- //printf("ZFS: Replaying ZIL on %s...\n", os->os->os_spa->spa_name);
-
- zr.zr_os = os;
- zr.zr_replay = replay_func;
- zr.zr_arg = arg;
- zr.zr_txgp = txgp;
- zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log);
- zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
-
- /*
- * Wait for in-progress removes to sync before starting replay.
- */
- txg_wait_synced(zilog->zl_dmu_pool, 0);
-
- zilog->zl_stop_replay = 0;
- zilog->zl_replay_time = LBOLT;
- ASSERT(zilog->zl_replay_blks == 0);
- (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
- zh->zh_claim_txg);
- kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE);
-
- zil_destroy(zilog, B_FALSE);
- //printf("ZFS: Replay of ZIL on %s finished.\n", os->os->os_spa->spa_name);
-}
-
-/*
- * Report whether all transactions are committed
- */
-int
-zil_is_committed(zilog_t *zilog)
-{
- lwb_t *lwb;
- int ret;
-
- mutex_enter(&zilog->zl_lock);
- while (zilog->zl_writer)
- cv_wait(&zilog->zl_cv_writer, &zilog->zl_lock);
-
- /* recent unpushed intent log transactions? */
- if (!list_is_empty(&zilog->zl_itx_list)) {
- ret = B_FALSE;
- goto out;
- }
-
- /* intent log never used? */
- lwb = list_head(&zilog->zl_lwb_list);
- if (lwb == NULL) {
- ret = B_TRUE;
- goto out;
- }
-
- /*
- * more than 1 log buffer means zil_sync() hasn't yet freed
- * entries after a txg has committed
- */
- if (list_next(&zilog->zl_lwb_list, lwb)) {
- ret = B_FALSE;
- goto out;
- }
-
- ASSERT(zil_empty(zilog));
- ret = B_TRUE;
-out:
- cv_broadcast(&zilog->zl_cv_writer);
- mutex_exit(&zilog->zl_lock);
- return (ret);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zio.c
deleted file mode 100644
index b5dd35f..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ /dev/null
@@ -1,1861 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/fm/fs/zfs.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_impl.h>
-#include <sys/zio_impl.h>
-#include <sys/zio_compress.h>
-#include <sys/zio_checksum.h>
-
-/*
- * ==========================================================================
- * I/O priority table
- * ==========================================================================
- */
-uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = {
- 0, /* ZIO_PRIORITY_NOW */
- 0, /* ZIO_PRIORITY_SYNC_READ */
- 0, /* ZIO_PRIORITY_SYNC_WRITE */
- 6, /* ZIO_PRIORITY_ASYNC_READ */
- 4, /* ZIO_PRIORITY_ASYNC_WRITE */
- 4, /* ZIO_PRIORITY_FREE */
- 0, /* ZIO_PRIORITY_CACHE_FILL */
- 0, /* ZIO_PRIORITY_LOG_WRITE */
- 10, /* ZIO_PRIORITY_RESILVER */
- 20, /* ZIO_PRIORITY_SCRUB */
-};
-
-/*
- * ==========================================================================
- * I/O type descriptions
- * ==========================================================================
- */
-char *zio_type_name[ZIO_TYPES] = {
- "null", "read", "write", "free", "claim", "ioctl" };
-
-/* At or above this size, force gang blocking - for testing */
-uint64_t zio_gang_bang = SPA_MAXBLOCKSIZE + 1;
-
-/* Force an allocation failure when non-zero */
-uint16_t zio_zil_fail_shift = 0;
-
-typedef struct zio_sync_pass {
- int zp_defer_free; /* defer frees after this pass */
- int zp_dontcompress; /* don't compress after this pass */
- int zp_rewrite; /* rewrite new bps after this pass */
-} zio_sync_pass_t;
-
-zio_sync_pass_t zio_sync_pass = {
- 1, /* zp_defer_free */
- 4, /* zp_dontcompress */
- 1, /* zp_rewrite */
-};
-
-/*
- * ==========================================================================
- * I/O kmem caches
- * ==========================================================================
- */
-kmem_cache_t *zio_cache;
-#ifdef ZIO_USE_UMA
-kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
-kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
-#endif
-
-#ifdef _KERNEL
-extern vmem_t *zio_alloc_arena;
-#endif
-
-void
-zio_init(void)
-{
-#ifdef ZIO_USE_UMA
- size_t c;
-#endif
-#if 0
- vmem_t *data_alloc_arena = NULL;
-
-#ifdef _KERNEL
- data_alloc_arena = zio_alloc_arena;
-#endif
-#endif
-
- zio_cache = kmem_cache_create("zio_cache", sizeof (zio_t), 0,
- NULL, NULL, NULL, NULL, NULL, 0);
-
-#ifdef ZIO_USE_UMA
- /*
- * For small buffers, we want a cache for each multiple of
- * SPA_MINBLOCKSIZE. For medium-size buffers, we want a cache
- * for each quarter-power of 2. For large buffers, we want
- * a cache for each multiple of PAGESIZE.
- */
- for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
- size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
- size_t p2 = size;
- size_t align = 0;
-
- while (p2 & (p2 - 1))
- p2 &= p2 - 1;
-
- if (size <= 4 * SPA_MINBLOCKSIZE) {
- align = SPA_MINBLOCKSIZE;
- } else if (P2PHASE(size, PAGESIZE) == 0) {
- align = PAGESIZE;
- } else if (P2PHASE(size, p2 >> 2) == 0) {
- align = p2 >> 2;
- }
-
- if (align != 0) {
- char name[36];
- (void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
- zio_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, NULL, KMC_NODEBUG);
-
- (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
- zio_data_buf_cache[c] = kmem_cache_create(name, size,
- align, NULL, NULL, NULL, NULL, data_alloc_arena,
- KMC_NODEBUG);
-
- dprintf("creating cache for size %5lx align %5lx\n",
- size, align);
- }
- }
-
- while (--c != 0) {
- ASSERT(zio_buf_cache[c] != NULL);
- if (zio_buf_cache[c - 1] == NULL)
- zio_buf_cache[c - 1] = zio_buf_cache[c];
-
- ASSERT(zio_data_buf_cache[c] != NULL);
- if (zio_data_buf_cache[c - 1] == NULL)
- zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
- }
-#endif
-
- zio_inject_init();
-}
-
-void
-zio_fini(void)
-{
-#ifdef ZIO_USE_UMA
- size_t c;
- kmem_cache_t *last_cache = NULL;
- kmem_cache_t *last_data_cache = NULL;
-
- for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
- if (zio_buf_cache[c] != last_cache) {
- last_cache = zio_buf_cache[c];
- kmem_cache_destroy(zio_buf_cache[c]);
- }
- zio_buf_cache[c] = NULL;
-
- if (zio_data_buf_cache[c] != last_data_cache) {
- last_data_cache = zio_data_buf_cache[c];
- kmem_cache_destroy(zio_data_buf_cache[c]);
- }
- zio_data_buf_cache[c] = NULL;
- }
-#endif
-
- kmem_cache_destroy(zio_cache);
-
- zio_inject_fini();
-}
-
-/*
- * ==========================================================================
- * Allocate and free I/O buffers
- * ==========================================================================
- */
-
-/*
- * Use zio_buf_alloc to allocate ZFS metadata. This data will appear in a
- * crashdump if the kernel panics, so use it judiciously. Obviously, it's
- * useful to inspect ZFS metadata, but if possible, we should avoid keeping
- * excess / transient data in-core during a crashdump.
- */
-void *
-zio_buf_alloc(size_t size)
-{
-#ifdef ZIO_USE_UMA
- size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
-
- ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
-
- return (kmem_cache_alloc(zio_buf_cache[c], KM_SLEEP));
-#else
- return (kmem_alloc(size, KM_SLEEP));
-#endif
-}
-
-/*
- * Use zio_data_buf_alloc to allocate data. The data will not appear in a
- * crashdump if the kernel panics. This exists so that we will limit the amount
- * of ZFS data that shows up in a kernel crashdump. (Thus reducing the amount
- * of kernel heap dumped to disk when the kernel panics)
- */
-void *
-zio_data_buf_alloc(size_t size)
-{
-#ifdef ZIO_USE_UMA
- size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
-
- ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
-
- return (kmem_cache_alloc(zio_data_buf_cache[c], KM_SLEEP));
-#else
- return (kmem_alloc(size, KM_SLEEP));
-#endif
-}
-
-void
-zio_buf_free(void *buf, size_t size)
-{
-#ifdef ZIO_USE_UMA
- size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
-
- ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
-
- kmem_cache_free(zio_buf_cache[c], buf);
-#else
- kmem_free(buf, size);
-#endif
-}
-
-void
-zio_data_buf_free(void *buf, size_t size)
-{
-#ifdef ZIO_USE_UMA
- size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
-
- ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
-
- kmem_cache_free(zio_data_buf_cache[c], buf);
-#else
- kmem_free(buf, size);
-#endif
-}
-
-/*
- * ==========================================================================
- * Push and pop I/O transform buffers
- * ==========================================================================
- */
-static void
-zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize)
-{
- zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP);
-
- zt->zt_data = data;
- zt->zt_size = size;
- zt->zt_bufsize = bufsize;
-
- zt->zt_next = zio->io_transform_stack;
- zio->io_transform_stack = zt;
-
- zio->io_data = data;
- zio->io_size = size;
-}
-
-static void
-zio_pop_transform(zio_t *zio, void **data, uint64_t *size, uint64_t *bufsize)
-{
- zio_transform_t *zt = zio->io_transform_stack;
-
- *data = zt->zt_data;
- *size = zt->zt_size;
- *bufsize = zt->zt_bufsize;
-
- zio->io_transform_stack = zt->zt_next;
- kmem_free(zt, sizeof (zio_transform_t));
-
- if ((zt = zio->io_transform_stack) != NULL) {
- zio->io_data = zt->zt_data;
- zio->io_size = zt->zt_size;
- }
-}
-
-static void
-zio_clear_transform_stack(zio_t *zio)
-{
- void *data;
- uint64_t size, bufsize;
-
- ASSERT(zio->io_transform_stack != NULL);
-
- zio_pop_transform(zio, &data, &size, &bufsize);
- while (zio->io_transform_stack != NULL) {
- zio_buf_free(data, bufsize);
- zio_pop_transform(zio, &data, &size, &bufsize);
- }
-}
-
-/*
- * ==========================================================================
- * Create the various types of I/O (read, write, free)
- * ==========================================================================
- */
-static zio_t *
-zio_create(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- void *data, uint64_t size, zio_done_func_t *done, void *private,
- zio_type_t type, int priority, int flags, uint8_t stage, uint32_t pipeline)
-{
- zio_t *zio;
-
- ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
- ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
-
- zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
- bzero(zio, sizeof (zio_t));
- zio->io_parent = pio;
- zio->io_spa = spa;
- zio->io_txg = txg;
- if (bp != NULL) {
- zio->io_bp = bp;
- zio->io_bp_copy = *bp;
- zio->io_bp_orig = *bp;
- }
- zio->io_done = done;
- zio->io_private = private;
- zio->io_type = type;
- zio->io_priority = priority;
- zio->io_stage = stage;
- zio->io_pipeline = pipeline;
- zio->io_async_stages = ZIO_ASYNC_PIPELINE_STAGES;
- zio->io_timestamp = lbolt64;
- zio->io_flags = flags;
- mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
- zio_push_transform(zio, data, size, size);
-
- /*
- * Note on config lock:
- *
- * If CONFIG_HELD is set, then the caller already has the config
- * lock, so we don't need it for this io.
- *
- * We set CONFIG_GRABBED to indicate that we have grabbed the
- * config lock on behalf of this io, so it should be released
- * in zio_done.
- *
- * Unless CONFIG_HELD is set, we will grab the config lock for
- * any top-level (parent-less) io, *except* NULL top-level ios.
- * The NULL top-level ios rarely have any children, so we delay
- * grabbing the lock until the first child is added (but it is
- * still grabbed on behalf of the top-level i/o, so additional
- * children don't need to also grab it). This greatly reduces
- * contention on the config lock.
- */
- if (pio == NULL) {
- if (type != ZIO_TYPE_NULL &&
- !(flags & ZIO_FLAG_CONFIG_HELD)) {
- spa_config_enter(zio->io_spa, RW_READER, zio);
- zio->io_flags |= ZIO_FLAG_CONFIG_GRABBED;
- }
- zio->io_root = zio;
- } else {
- zio->io_root = pio->io_root;
- if (!(flags & ZIO_FLAG_NOBOOKMARK))
- zio->io_logical = pio->io_logical;
- mutex_enter(&pio->io_lock);
- if (pio->io_parent == NULL &&
- pio->io_type == ZIO_TYPE_NULL &&
- !(pio->io_flags & ZIO_FLAG_CONFIG_GRABBED) &&
- !(pio->io_flags & ZIO_FLAG_CONFIG_HELD)) {
- pio->io_flags |= ZIO_FLAG_CONFIG_GRABBED;
- spa_config_enter(zio->io_spa, RW_READER, pio);
- }
- if (stage < ZIO_STAGE_READY)
- pio->io_children_notready++;
- pio->io_children_notdone++;
- zio->io_sibling_next = pio->io_child;
- zio->io_sibling_prev = NULL;
- if (pio->io_child != NULL)
- pio->io_child->io_sibling_prev = zio;
- pio->io_child = zio;
- zio->io_ndvas = pio->io_ndvas;
- mutex_exit(&pio->io_lock);
- }
-
- return (zio);
-}
-
-zio_t *
-zio_null(zio_t *pio, spa_t *spa, zio_done_func_t *done, void *private,
- int flags)
-{
- zio_t *zio;
-
- zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
- ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, ZIO_STAGE_OPEN,
- ZIO_WAIT_FOR_CHILDREN_PIPELINE);
-
- return (zio);
-}
-
-zio_t *
-zio_root(spa_t *spa, zio_done_func_t *done, void *private, int flags)
-{
- return (zio_null(NULL, spa, done, private, flags));
-}
-
-zio_t *
-zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data,
- uint64_t size, zio_done_func_t *done, void *private,
- int priority, int flags, zbookmark_t *zb)
-{
- zio_t *zio;
-
- ASSERT3U(size, ==, BP_GET_LSIZE(bp));
-
- zio = zio_create(pio, spa, bp->blk_birth, bp, data, size, done, private,
- ZIO_TYPE_READ, priority, flags | ZIO_FLAG_USER,
- ZIO_STAGE_OPEN, ZIO_READ_PIPELINE);
- zio->io_bookmark = *zb;
-
- zio->io_logical = zio;
-
- /*
- * Work off our copy of the bp so the caller can free it.
- */
- zio->io_bp = &zio->io_bp_copy;
-
- if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
- uint64_t csize = BP_GET_PSIZE(bp);
- void *cbuf = zio_buf_alloc(csize);
-
- zio_push_transform(zio, cbuf, csize, csize);
- zio->io_pipeline |= 1U << ZIO_STAGE_READ_DECOMPRESS;
- }
-
- if (BP_IS_GANG(bp)) {
- uint64_t gsize = SPA_GANGBLOCKSIZE;
- void *gbuf = zio_buf_alloc(gsize);
-
- zio_push_transform(zio, gbuf, gsize, gsize);
- zio->io_pipeline |= 1U << ZIO_STAGE_READ_GANG_MEMBERS;
- }
-
- return (zio);
-}
-
-zio_t *
-zio_write(zio_t *pio, spa_t *spa, int checksum, int compress, int ncopies,
- uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *ready, zio_done_func_t *done, void *private, int priority,
- int flags, zbookmark_t *zb)
-{
- zio_t *zio;
-
- ASSERT(checksum >= ZIO_CHECKSUM_OFF &&
- checksum < ZIO_CHECKSUM_FUNCTIONS);
-
- ASSERT(compress >= ZIO_COMPRESS_OFF &&
- compress < ZIO_COMPRESS_FUNCTIONS);
-
- zio = zio_create(pio, spa, txg, bp, data, size, done, private,
- ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_USER,
- ZIO_STAGE_OPEN, ZIO_WRITE_PIPELINE);
-
- zio->io_ready = ready;
-
- zio->io_bookmark = *zb;
-
- zio->io_logical = zio;
-
- zio->io_checksum = checksum;
- zio->io_compress = compress;
- zio->io_ndvas = ncopies;
-
- if (compress != ZIO_COMPRESS_OFF)
- zio->io_async_stages |= 1U << ZIO_STAGE_WRITE_COMPRESS;
-
- if (bp->blk_birth != txg) {
- /* XXX the bp usually (always?) gets re-zeroed later */
- BP_ZERO(bp);
- BP_SET_LSIZE(bp, size);
- BP_SET_PSIZE(bp, size);
- } else {
- /* Make sure someone doesn't change their mind on overwrites */
- ASSERT(MIN(zio->io_ndvas + BP_IS_GANG(bp),
- spa_max_replication(spa)) == BP_GET_NDVAS(bp));
- }
-
- return (zio);
-}
-
-zio_t *
-zio_rewrite(zio_t *pio, spa_t *spa, int checksum,
- uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *done, void *private, int priority, int flags,
- zbookmark_t *zb)
-{
- zio_t *zio;
-
- zio = zio_create(pio, spa, txg, bp, data, size, done, private,
- ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_USER,
- ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
-
- zio->io_bookmark = *zb;
- zio->io_checksum = checksum;
- zio->io_compress = ZIO_COMPRESS_OFF;
-
- if (pio != NULL)
- ASSERT3U(zio->io_ndvas, <=, BP_GET_NDVAS(bp));
-
- return (zio);
-}
-
-static zio_t *
-zio_write_allocate(zio_t *pio, spa_t *spa, int checksum,
- uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
- zio_done_func_t *done, void *private, int priority, int flags)
-{
- zio_t *zio;
-
- BP_ZERO(bp);
- BP_SET_LSIZE(bp, size);
- BP_SET_PSIZE(bp, size);
- BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
-
- zio = zio_create(pio, spa, txg, bp, data, size, done, private,
- ZIO_TYPE_WRITE, priority, flags,
- ZIO_STAGE_OPEN, ZIO_WRITE_ALLOCATE_PIPELINE);
-
- zio->io_checksum = checksum;
- zio->io_compress = ZIO_COMPRESS_OFF;
-
- return (zio);
-}
-
-zio_t *
-zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private)
-{
- zio_t *zio;
-
- ASSERT(!BP_IS_HOLE(bp));
-
- if (txg == spa->spa_syncing_txg &&
- spa->spa_sync_pass > zio_sync_pass.zp_defer_free) {
- bplist_enqueue_deferred(&spa->spa_sync_bplist, bp);
- return (zio_null(pio, spa, NULL, NULL, 0));
- }
-
- zio = zio_create(pio, spa, txg, bp, NULL, 0, done, private,
- ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, ZIO_FLAG_USER,
- ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
-
- zio->io_bp = &zio->io_bp_copy;
-
- return (zio);
-}
-
-zio_t *
-zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- zio_done_func_t *done, void *private)
-{
- zio_t *zio;
-
- /*
- * A claim is an allocation of a specific block. Claims are needed
- * to support immediate writes in the intent log. The issue is that
- * immediate writes contain committed data, but in a txg that was
- * *not* committed. Upon opening the pool after an unclean shutdown,
- * the intent log claims all blocks that contain immediate write data
- * so that the SPA knows they're in use.
- *
- * All claims *must* be resolved in the first txg -- before the SPA
- * starts allocating blocks -- so that nothing is allocated twice.
- */
- ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
- ASSERT3U(spa_first_txg(spa), <=, txg);
-
- zio = zio_create(pio, spa, txg, bp, NULL, 0, done, private,
- ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, 0,
- ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
-
- zio->io_bp = &zio->io_bp_copy;
-
- return (zio);
-}
-
-zio_t *
-zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
- zio_done_func_t *done, void *private, int priority, int flags)
-{
- zio_t *zio;
- int c;
-
- if (vd->vdev_children == 0) {
- zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
- ZIO_TYPE_IOCTL, priority, flags,
- ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
-
- zio->io_vd = vd;
- zio->io_cmd = cmd;
- } else {
- zio = zio_null(pio, spa, NULL, NULL, flags);
-
- for (c = 0; c < vd->vdev_children; c++)
- zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
- done, private, priority, flags));
- }
-
- return (zio);
-}
-
-static void
-zio_phys_bp_init(vdev_t *vd, blkptr_t *bp, uint64_t offset, uint64_t size,
- int checksum)
-{
- ASSERT(vd->vdev_children == 0);
-
- ASSERT(size <= SPA_MAXBLOCKSIZE);
- ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
- ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
-
- ASSERT(offset + size <= VDEV_LABEL_START_SIZE ||
- offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
- ASSERT3U(offset + size, <=, vd->vdev_psize);
-
- BP_ZERO(bp);
-
- BP_SET_LSIZE(bp, size);
- BP_SET_PSIZE(bp, size);
-
- BP_SET_CHECKSUM(bp, checksum);
- BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
- BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
-
- if (checksum != ZIO_CHECKSUM_OFF)
- ZIO_SET_CHECKSUM(&bp->blk_cksum, offset, 0, 0, 0);
-}
-
-zio_t *
-zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
- void *data, int checksum, zio_done_func_t *done, void *private,
- int priority, int flags)
-{
- zio_t *zio;
- blkptr_t blk;
-
- zio_phys_bp_init(vd, &blk, offset, size, checksum);
-
- zio = zio_create(pio, vd->vdev_spa, 0, &blk, data, size, done, private,
- ZIO_TYPE_READ, priority, flags | ZIO_FLAG_PHYSICAL,
- ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
-
- zio->io_vd = vd;
- zio->io_offset = offset;
-
- /*
- * Work off our copy of the bp so the caller can free it.
- */
- zio->io_bp = &zio->io_bp_copy;
-
- return (zio);
-}
-
-zio_t *
-zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
- void *data, int checksum, zio_done_func_t *done, void *private,
- int priority, int flags)
-{
- zio_block_tail_t *zbt;
- void *wbuf;
- zio_t *zio;
- blkptr_t blk;
-
- zio_phys_bp_init(vd, &blk, offset, size, checksum);
-
- zio = zio_create(pio, vd->vdev_spa, 0, &blk, data, size, done, private,
- ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_PHYSICAL,
- ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
-
- zio->io_vd = vd;
- zio->io_offset = offset;
-
- zio->io_bp = &zio->io_bp_copy;
- zio->io_checksum = checksum;
-
- if (zio_checksum_table[checksum].ci_zbt) {
- /*
- * zbt checksums are necessarily destructive -- they modify
- * one word of the write buffer to hold the verifier/checksum.
- * Therefore, we must make a local copy in case the data is
- * being written to multiple places.
- */
- wbuf = zio_buf_alloc(size);
- bcopy(data, wbuf, size);
- zio_push_transform(zio, wbuf, size, size);
-
- zbt = (zio_block_tail_t *)((char *)wbuf + size) - 1;
- zbt->zbt_cksum = blk.blk_cksum;
- }
-
- return (zio);
-}
-
-/*
- * Create a child I/O to do some work for us. It has no associated bp.
- */
-zio_t *
-zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
- void *data, uint64_t size, int type, int priority, int flags,
- zio_done_func_t *done, void *private)
-{
- uint32_t pipeline = ZIO_VDEV_CHILD_PIPELINE;
- zio_t *cio;
-
- if (type == ZIO_TYPE_READ && bp != NULL) {
- /*
- * If we have the bp, then the child should perform the
- * checksum and the parent need not. This pushes error
- * detection as close to the leaves as possible and
- * eliminates redundant checksums in the interior nodes.
- */
- pipeline |= 1U << ZIO_STAGE_CHECKSUM_VERIFY;
- zio->io_pipeline &= ~(1U << ZIO_STAGE_CHECKSUM_VERIFY);
- }
-
- cio = zio_create(zio, zio->io_spa, zio->io_txg, bp, data, size,
- done, private, type, priority,
- (zio->io_flags & ZIO_FLAG_VDEV_INHERIT) | ZIO_FLAG_CANFAIL | flags,
- ZIO_STAGE_VDEV_IO_START - 1, pipeline);
-
- cio->io_vd = vd;
- cio->io_offset = offset;
-
- return (cio);
-}
-
-/*
- * ==========================================================================
- * Initiate I/O, either sync or async
- * ==========================================================================
- */
-int
-zio_wait(zio_t *zio)
-{
- int error;
-
- ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
-
- zio->io_waiter = curthread;
-
- zio_next_stage_async(zio);
-
- mutex_enter(&zio->io_lock);
- while (zio->io_stalled != ZIO_STAGE_DONE)
- cv_wait(&zio->io_cv, &zio->io_lock);
- mutex_exit(&zio->io_lock);
-
- error = zio->io_error;
- cv_destroy(&zio->io_cv);
- mutex_destroy(&zio->io_lock);
- kmem_cache_free(zio_cache, zio);
-
- return (error);
-}
-
-void
-zio_nowait(zio_t *zio)
-{
- zio_next_stage_async(zio);
-}
-
-/*
- * ==========================================================================
- * I/O pipeline interlocks: parent/child dependency scoreboarding
- * ==========================================================================
- */
-static void
-zio_wait_for_children(zio_t *zio, uint32_t stage, uint64_t *countp)
-{
- mutex_enter(&zio->io_lock);
- if (*countp == 0) {
- ASSERT(zio->io_stalled == 0);
- mutex_exit(&zio->io_lock);
- zio_next_stage(zio);
- } else {
- zio->io_stalled = stage;
- mutex_exit(&zio->io_lock);
- }
-}
-
-static void
-zio_notify_parent(zio_t *zio, uint32_t stage, uint64_t *countp)
-{
- zio_t *pio = zio->io_parent;
-
- mutex_enter(&pio->io_lock);
- if (pio->io_error == 0 && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
- pio->io_error = zio->io_error;
- if (--*countp == 0 && pio->io_stalled == stage) {
- pio->io_stalled = 0;
- mutex_exit(&pio->io_lock);
- zio_next_stage_async(pio);
- } else {
- mutex_exit(&pio->io_lock);
- }
-}
-
-static void
-zio_wait_children_ready(zio_t *zio)
-{
- zio_wait_for_children(zio, ZIO_STAGE_WAIT_CHILDREN_READY,
- &zio->io_children_notready);
-}
-
-void
-zio_wait_children_done(zio_t *zio)
-{
- zio_wait_for_children(zio, ZIO_STAGE_WAIT_CHILDREN_DONE,
- &zio->io_children_notdone);
-}
-
-static void
-zio_ready(zio_t *zio)
-{
- zio_t *pio = zio->io_parent;
-
- if (zio->io_ready)
- zio->io_ready(zio);
-
- if (pio != NULL)
- zio_notify_parent(zio, ZIO_STAGE_WAIT_CHILDREN_READY,
- &pio->io_children_notready);
-
- if (zio->io_bp)
- zio->io_bp_copy = *zio->io_bp;
-
- zio_next_stage(zio);
-}
-
-static void
-zio_done(zio_t *zio)
-{
- zio_t *pio = zio->io_parent;
- spa_t *spa = zio->io_spa;
- blkptr_t *bp = zio->io_bp;
- vdev_t *vd = zio->io_vd;
-
- ASSERT(zio->io_children_notready == 0);
- ASSERT(zio->io_children_notdone == 0);
-
- if (bp != NULL) {
- ASSERT(bp->blk_pad[0] == 0);
- ASSERT(bp->blk_pad[1] == 0);
- ASSERT(bp->blk_pad[2] == 0);
- ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0);
- if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) &&
- !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
- ASSERT(!BP_SHOULD_BYTESWAP(bp));
- if (zio->io_ndvas != 0)
- ASSERT3U(zio->io_ndvas, <=, BP_GET_NDVAS(bp));
- ASSERT(BP_COUNT_GANG(bp) == 0 ||
- (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp)));
- }
- }
-
- if (vd != NULL)
- vdev_stat_update(zio);
-
- if (zio->io_error) {
- /*
- * If this I/O is attached to a particular vdev,
- * generate an error message describing the I/O failure
- * at the block level. We ignore these errors if the
- * device is currently unavailable.
- */
- if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd))
- zfs_ereport_post(FM_EREPORT_ZFS_IO,
- zio->io_spa, vd, zio, 0, 0);
-
- if ((zio->io_error == EIO ||
- !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) &&
- zio->io_logical == zio) {
- /*
- * For root I/O requests, tell the SPA to log the error
- * appropriately. Also, generate a logical data
- * ereport.
- */
- spa_log_error(zio->io_spa, zio);
-
- zfs_ereport_post(FM_EREPORT_ZFS_DATA,
- zio->io_spa, NULL, zio, 0, 0);
- }
-
- /*
- * For I/O requests that cannot fail, panic appropriately.
- */
- if (!(zio->io_flags & ZIO_FLAG_CANFAIL)) {
- char *blkbuf;
-
- blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_NOSLEEP);
- if (blkbuf) {
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN,
- bp ? bp : &zio->io_bp_copy);
- }
- panic("ZFS: %s (%s on %s off %llx: zio %p %s): error "
- "%d", zio->io_error == ECKSUM ?
- "bad checksum" : "I/O failure",
- zio_type_name[zio->io_type],
- vdev_description(vd),
- (u_longlong_t)zio->io_offset,
- zio, blkbuf ? blkbuf : "", zio->io_error);
- }
- }
- zio_clear_transform_stack(zio);
-
- if (zio->io_done)
- zio->io_done(zio);
-
- ASSERT(zio->io_delegate_list == NULL);
- ASSERT(zio->io_delegate_next == NULL);
-
- if (pio != NULL) {
- zio_t *next, *prev;
-
- mutex_enter(&pio->io_lock);
- next = zio->io_sibling_next;
- prev = zio->io_sibling_prev;
- if (next != NULL)
- next->io_sibling_prev = prev;
- if (prev != NULL)
- prev->io_sibling_next = next;
- if (pio->io_child == zio)
- pio->io_child = next;
- mutex_exit(&pio->io_lock);
-
- zio_notify_parent(zio, ZIO_STAGE_WAIT_CHILDREN_DONE,
- &pio->io_children_notdone);
- }
-
- /*
- * Note: this I/O is now done, and will shortly be freed, so there is no
- * need to clear this (or any other) flag.
- */
- if (zio->io_flags & ZIO_FLAG_CONFIG_GRABBED)
- spa_config_exit(spa, zio);
-
- if (zio->io_waiter != NULL) {
- mutex_enter(&zio->io_lock);
- ASSERT(zio->io_stage == ZIO_STAGE_DONE);
- zio->io_stalled = zio->io_stage;
- cv_broadcast(&zio->io_cv);
- mutex_exit(&zio->io_lock);
- } else {
- cv_destroy(&zio->io_cv);
- mutex_destroy(&zio->io_lock);
- kmem_cache_free(zio_cache, zio);
- }
-}
-
-/*
- * ==========================================================================
- * Compression support
- * ==========================================================================
- */
-static void
-zio_write_compress(zio_t *zio)
-{
- int compress = zio->io_compress;
- blkptr_t *bp = zio->io_bp;
- void *cbuf;
- uint64_t lsize = zio->io_size;
- uint64_t csize = lsize;
- uint64_t cbufsize = 0;
- int pass;
-
- if (bp->blk_birth == zio->io_txg) {
- /*
- * We're rewriting an existing block, which means we're
- * working on behalf of spa_sync(). For spa_sync() to
- * converge, it must eventually be the case that we don't
- * have to allocate new blocks. But compression changes
- * the blocksize, which forces a reallocate, and makes
- * convergence take longer. Therefore, after the first
- * few passes, stop compressing to ensure convergence.
- */
- pass = spa_sync_pass(zio->io_spa);
- if (pass > zio_sync_pass.zp_dontcompress)
- compress = ZIO_COMPRESS_OFF;
- } else {
- ASSERT(BP_IS_HOLE(bp));
- pass = 1;
- }
-
- if (compress != ZIO_COMPRESS_OFF)
- if (!zio_compress_data(compress, zio->io_data, zio->io_size,
- &cbuf, &csize, &cbufsize))
- compress = ZIO_COMPRESS_OFF;
-
- if (compress != ZIO_COMPRESS_OFF && csize != 0)
- zio_push_transform(zio, cbuf, csize, cbufsize);
-
- /*
- * The final pass of spa_sync() must be all rewrites, but the first
- * few passes offer a trade-off: allocating blocks defers convergence,
- * but newly allocated blocks are sequential, so they can be written
- * to disk faster. Therefore, we allow the first few passes of
- * spa_sync() to reallocate new blocks, but force rewrites after that.
- * There should only be a handful of blocks after pass 1 in any case.
- */
- if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == csize &&
- pass > zio_sync_pass.zp_rewrite) {
- ASSERT(csize != 0);
- BP_SET_LSIZE(bp, lsize);
- BP_SET_COMPRESS(bp, compress);
- zio->io_pipeline = ZIO_REWRITE_PIPELINE;
- } else {
- if (bp->blk_birth == zio->io_txg)
- BP_ZERO(bp);
- if (csize == 0) {
- BP_ZERO(bp);
- zio->io_pipeline = ZIO_WAIT_FOR_CHILDREN_PIPELINE;
- } else {
- ASSERT3U(BP_GET_NDVAS(bp), ==, 0);
- BP_SET_LSIZE(bp, lsize);
- BP_SET_PSIZE(bp, csize);
- BP_SET_COMPRESS(bp, compress);
- zio->io_pipeline = ZIO_WRITE_ALLOCATE_PIPELINE;
- }
- }
-
- zio_next_stage(zio);
-}
-
-static void
-zio_read_decompress(zio_t *zio)
-{
- blkptr_t *bp = zio->io_bp;
- void *data;
- uint64_t size;
- uint64_t bufsize;
- int compress = BP_GET_COMPRESS(bp);
-
- ASSERT(compress != ZIO_COMPRESS_OFF);
-
- zio_pop_transform(zio, &data, &size, &bufsize);
-
- if (zio_decompress_data(compress, data, size,
- zio->io_data, zio->io_size))
- zio->io_error = EIO;
-
- zio_buf_free(data, bufsize);
-
- zio_next_stage(zio);
-}
-
-/*
- * ==========================================================================
- * Gang block support
- * ==========================================================================
- */
-static void
-zio_gang_pipeline(zio_t *zio)
-{
- /*
- * By default, the pipeline assumes that we're dealing with a gang
- * block. If we're not, strip out any gang-specific stages.
- */
- if (!BP_IS_GANG(zio->io_bp))
- zio->io_pipeline &= ~ZIO_GANG_STAGES;
-
- zio_next_stage(zio);
-}
-
-static void
-zio_gang_byteswap(zio_t *zio)
-{
- ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
-
- if (BP_SHOULD_BYTESWAP(zio->io_bp))
- byteswap_uint64_array(zio->io_data, zio->io_size);
-}
-
-static void
-zio_get_gang_header(zio_t *zio)
-{
- blkptr_t *bp = zio->io_bp;
- uint64_t gsize = SPA_GANGBLOCKSIZE;
- void *gbuf = zio_buf_alloc(gsize);
-
- ASSERT(BP_IS_GANG(bp));
-
- zio_push_transform(zio, gbuf, gsize, gsize);
-
- zio_nowait(zio_create(zio, zio->io_spa, bp->blk_birth, bp, gbuf, gsize,
- NULL, NULL, ZIO_TYPE_READ, zio->io_priority,
- zio->io_flags & ZIO_FLAG_GANG_INHERIT,
- ZIO_STAGE_OPEN, ZIO_READ_PIPELINE));
-
- zio_wait_children_done(zio);
-}
-
-static void
-zio_read_gang_members(zio_t *zio)
-{
- zio_gbh_phys_t *gbh;
- uint64_t gsize, gbufsize, loff, lsize;
- int i;
-
- ASSERT(BP_IS_GANG(zio->io_bp));
-
- zio_gang_byteswap(zio);
- zio_pop_transform(zio, (void **)&gbh, &gsize, &gbufsize);
-
- for (loff = 0, i = 0; loff != zio->io_size; loff += lsize, i++) {
- blkptr_t *gbp = &gbh->zg_blkptr[i];
- lsize = BP_GET_PSIZE(gbp);
-
- ASSERT(BP_GET_COMPRESS(gbp) == ZIO_COMPRESS_OFF);
- ASSERT3U(lsize, ==, BP_GET_LSIZE(gbp));
- ASSERT3U(loff + lsize, <=, zio->io_size);
- ASSERT(i < SPA_GBH_NBLKPTRS);
- ASSERT(!BP_IS_HOLE(gbp));
-
- zio_nowait(zio_read(zio, zio->io_spa, gbp,
- (char *)zio->io_data + loff, lsize, NULL, NULL,
- zio->io_priority, zio->io_flags & ZIO_FLAG_GANG_INHERIT,
- &zio->io_bookmark));
- }
-
- zio_buf_free(gbh, gbufsize);
- zio_wait_children_done(zio);
-}
-
-static void
-zio_rewrite_gang_members(zio_t *zio)
-{
- zio_gbh_phys_t *gbh;
- uint64_t gsize, gbufsize, loff, lsize;
- int i;
-
- ASSERT(BP_IS_GANG(zio->io_bp));
- ASSERT3U(zio->io_size, ==, SPA_GANGBLOCKSIZE);
-
- zio_gang_byteswap(zio);
- zio_pop_transform(zio, (void **)&gbh, &gsize, &gbufsize);
-
- ASSERT(gsize == gbufsize);
-
- for (loff = 0, i = 0; loff != zio->io_size; loff += lsize, i++) {
- blkptr_t *gbp = &gbh->zg_blkptr[i];
- lsize = BP_GET_PSIZE(gbp);
-
- ASSERT(BP_GET_COMPRESS(gbp) == ZIO_COMPRESS_OFF);
- ASSERT3U(lsize, ==, BP_GET_LSIZE(gbp));
- ASSERT3U(loff + lsize, <=, zio->io_size);
- ASSERT(i < SPA_GBH_NBLKPTRS);
- ASSERT(!BP_IS_HOLE(gbp));
-
- zio_nowait(zio_rewrite(zio, zio->io_spa, zio->io_checksum,
- zio->io_txg, gbp, (char *)zio->io_data + loff, lsize,
- NULL, NULL, zio->io_priority, zio->io_flags,
- &zio->io_bookmark));
- }
-
- zio_push_transform(zio, gbh, gsize, gbufsize);
- zio_wait_children_ready(zio);
-}
-
-static void
-zio_free_gang_members(zio_t *zio)
-{
- zio_gbh_phys_t *gbh;
- uint64_t gsize, gbufsize;
- int i;
-
- ASSERT(BP_IS_GANG(zio->io_bp));
-
- zio_gang_byteswap(zio);
- zio_pop_transform(zio, (void **)&gbh, &gsize, &gbufsize);
-
- for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
- blkptr_t *gbp = &gbh->zg_blkptr[i];
-
- if (BP_IS_HOLE(gbp))
- continue;
- zio_nowait(zio_free(zio, zio->io_spa, zio->io_txg,
- gbp, NULL, NULL));
- }
-
- zio_buf_free(gbh, gbufsize);
- zio_next_stage(zio);
-}
-
-static void
-zio_claim_gang_members(zio_t *zio)
-{
- zio_gbh_phys_t *gbh;
- uint64_t gsize, gbufsize;
- int i;
-
- ASSERT(BP_IS_GANG(zio->io_bp));
-
- zio_gang_byteswap(zio);
- zio_pop_transform(zio, (void **)&gbh, &gsize, &gbufsize);
-
- for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
- blkptr_t *gbp = &gbh->zg_blkptr[i];
- if (BP_IS_HOLE(gbp))
- continue;
- zio_nowait(zio_claim(zio, zio->io_spa, zio->io_txg,
- gbp, NULL, NULL));
- }
-
- zio_buf_free(gbh, gbufsize);
- zio_next_stage(zio);
-}
-
-static void
-zio_write_allocate_gang_member_done(zio_t *zio)
-{
- zio_t *pio = zio->io_parent;
- dva_t *cdva = zio->io_bp->blk_dva;
- dva_t *pdva = pio->io_bp->blk_dva;
- uint64_t asize;
- int d;
-
- ASSERT3U(pio->io_ndvas, ==, zio->io_ndvas);
- ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp));
- ASSERT3U(zio->io_ndvas, <=, BP_GET_NDVAS(zio->io_bp));
- ASSERT3U(pio->io_ndvas, <=, BP_GET_NDVAS(pio->io_bp));
-
- mutex_enter(&pio->io_lock);
- for (d = 0; d < BP_GET_NDVAS(pio->io_bp); d++) {
- ASSERT(DVA_GET_GANG(&pdva[d]));
- asize = DVA_GET_ASIZE(&pdva[d]);
- asize += DVA_GET_ASIZE(&cdva[d]);
- DVA_SET_ASIZE(&pdva[d], asize);
- }
- mutex_exit(&pio->io_lock);
-}
-
-static void
-zio_write_allocate_gang_members(zio_t *zio)
-{
- blkptr_t *bp = zio->io_bp;
- dva_t *dva = bp->blk_dva;
- spa_t *spa = zio->io_spa;
- zio_gbh_phys_t *gbh;
- uint64_t txg = zio->io_txg;
- uint64_t resid = zio->io_size;
- uint64_t maxalloc = P2ROUNDUP(zio->io_size >> 1, SPA_MINBLOCKSIZE);
- uint64_t gsize, loff, lsize;
- uint32_t gbps_left;
- int ndvas = zio->io_ndvas;
- int gbh_ndvas = MIN(ndvas + 1, spa_max_replication(spa));
- int error;
- int i, d;
-
- gsize = SPA_GANGBLOCKSIZE;
- gbps_left = SPA_GBH_NBLKPTRS;
-
- error = metaslab_alloc(spa, gsize, bp, gbh_ndvas, txg, NULL, B_FALSE);
- if (error == ENOSPC)
- panic("can't allocate gang block header");
- ASSERT(error == 0);
-
- for (d = 0; d < gbh_ndvas; d++)
- DVA_SET_GANG(&dva[d], 1);
-
- bp->blk_birth = txg;
-
- gbh = zio_buf_alloc(gsize);
- bzero(gbh, gsize);
-
- /* We need to test multi-level gang blocks */
- if (maxalloc >= zio_gang_bang && (LBOLT & 0x1) == 0)
- maxalloc = MAX(maxalloc >> 2, SPA_MINBLOCKSIZE);
-
- for (loff = 0, i = 0; loff != zio->io_size;
- loff += lsize, resid -= lsize, gbps_left--, i++) {
- blkptr_t *gbp = &gbh->zg_blkptr[i];
- dva = gbp->blk_dva;
-
- ASSERT(gbps_left != 0);
- maxalloc = MIN(maxalloc, resid);
-
- while (resid <= maxalloc * gbps_left) {
- error = metaslab_alloc(spa, maxalloc, gbp, ndvas,
- txg, bp, B_FALSE);
- if (error == 0)
- break;
- ASSERT3U(error, ==, ENOSPC);
- if (maxalloc == SPA_MINBLOCKSIZE)
- panic("really out of space");
- maxalloc = P2ROUNDUP(maxalloc >> 1, SPA_MINBLOCKSIZE);
- }
-
- if (resid <= maxalloc * gbps_left) {
- lsize = maxalloc;
- BP_SET_LSIZE(gbp, lsize);
- BP_SET_PSIZE(gbp, lsize);
- BP_SET_COMPRESS(gbp, ZIO_COMPRESS_OFF);
- gbp->blk_birth = txg;
- zio_nowait(zio_rewrite(zio, spa,
- zio->io_checksum, txg, gbp,
- (char *)zio->io_data + loff, lsize,
- zio_write_allocate_gang_member_done, NULL,
- zio->io_priority, zio->io_flags,
- &zio->io_bookmark));
- } else {
- lsize = P2ROUNDUP(resid / gbps_left, SPA_MINBLOCKSIZE);
- ASSERT(lsize != SPA_MINBLOCKSIZE);
- zio_nowait(zio_write_allocate(zio, spa,
- zio->io_checksum, txg, gbp,
- (char *)zio->io_data + loff, lsize,
- zio_write_allocate_gang_member_done, NULL,
- zio->io_priority, zio->io_flags));
- }
- }
-
- ASSERT(resid == 0 && loff == zio->io_size);
-
- zio->io_pipeline |= 1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE;
-
- zio_push_transform(zio, gbh, gsize, gsize);
- /*
- * As much as we'd like this to be zio_wait_children_ready(),
- * updating our ASIZE doesn't happen until the io_done callback,
- * so we have to wait for that to finish in order for our BP
- * to be stable.
- */
- zio_wait_children_done(zio);
-}
-
-/*
- * ==========================================================================
- * Allocate and free blocks
- * ==========================================================================
- */
-static void
-zio_dva_allocate(zio_t *zio)
-{
- blkptr_t *bp = zio->io_bp;
- int error;
-
- ASSERT(BP_IS_HOLE(bp));
- ASSERT3U(BP_GET_NDVAS(bp), ==, 0);
- ASSERT3U(zio->io_ndvas, >, 0);
- ASSERT3U(zio->io_ndvas, <=, spa_max_replication(zio->io_spa));
-
- /* For testing, make some blocks above a certain size be gang blocks */
- if (zio->io_size >= zio_gang_bang && (LBOLT & 0x3) == 0) {
- zio_write_allocate_gang_members(zio);
- return;
- }
-
- ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
-
- error = metaslab_alloc(zio->io_spa, zio->io_size, bp, zio->io_ndvas,
- zio->io_txg, NULL, B_FALSE);
-
- if (error == 0) {
- bp->blk_birth = zio->io_txg;
- } else if (error == ENOSPC) {
- if (zio->io_size == SPA_MINBLOCKSIZE)
- panic("really, truly out of space");
- zio_write_allocate_gang_members(zio);
- return;
- } else {
- zio->io_error = error;
- }
- zio_next_stage(zio);
-}
-
-static void
-zio_dva_free(zio_t *zio)
-{
- blkptr_t *bp = zio->io_bp;
-
- metaslab_free(zio->io_spa, bp, zio->io_txg, B_FALSE);
-
- BP_ZERO(bp);
-
- zio_next_stage(zio);
-}
-
-static void
-zio_dva_claim(zio_t *zio)
-{
- zio->io_error = metaslab_claim(zio->io_spa, zio->io_bp, zio->io_txg);
-
- zio_next_stage(zio);
-}
-
-/*
- * ==========================================================================
- * Read and write to physical devices
- * ==========================================================================
- */
-
-static void
-zio_vdev_io_start(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_t *tvd = vd ? vd->vdev_top : NULL;
- blkptr_t *bp = zio->io_bp;
- uint64_t align;
-
- if (vd == NULL) {
- /* The mirror_ops handle multiple DVAs in a single BP */
- vdev_mirror_ops.vdev_op_io_start(zio);
- return;
- }
-
- align = 1ULL << tvd->vdev_ashift;
-
- if (zio->io_retries == 0 && vd == tvd)
- zio->io_flags |= ZIO_FLAG_FAILFAST;
-
- if (!(zio->io_flags & ZIO_FLAG_PHYSICAL) &&
- vd->vdev_children == 0) {
- zio->io_flags |= ZIO_FLAG_PHYSICAL;
- zio->io_offset += VDEV_LABEL_START_SIZE;
- }
-
- if (P2PHASE(zio->io_size, align) != 0) {
- uint64_t asize = P2ROUNDUP(zio->io_size, align);
- char *abuf = zio_buf_alloc(asize);
- ASSERT(vd == tvd);
- if (zio->io_type == ZIO_TYPE_WRITE) {
- bcopy(zio->io_data, abuf, zio->io_size);
- bzero(abuf + zio->io_size, asize - zio->io_size);
- }
- zio_push_transform(zio, abuf, asize, asize);
- ASSERT(!(zio->io_flags & ZIO_FLAG_SUBBLOCK));
- zio->io_flags |= ZIO_FLAG_SUBBLOCK;
- }
-
- ASSERT(P2PHASE(zio->io_offset, align) == 0);
- ASSERT(P2PHASE(zio->io_size, align) == 0);
- ASSERT(bp == NULL ||
- P2ROUNDUP(ZIO_GET_IOSIZE(zio), align) == zio->io_size);
- ASSERT(zio->io_type != ZIO_TYPE_WRITE || (spa_mode & FWRITE));
-
- vdev_io_start(zio);
-
- /* zio_next_stage_async() gets called from io completion interrupt */
-}
-
-static void
-zio_vdev_io_done(zio_t *zio)
-{
- if (zio->io_vd == NULL)
- /* The mirror_ops handle multiple DVAs in a single BP */
- vdev_mirror_ops.vdev_op_io_done(zio);
- else
- vdev_io_done(zio);
-}
-
-/* XXPOLICY */
-boolean_t
-zio_should_retry(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
-
- if (zio->io_error == 0)
- return (B_FALSE);
- if (zio->io_delegate_list != NULL)
- return (B_FALSE);
- if (vd && vd != vd->vdev_top)
- return (B_FALSE);
- if (zio->io_flags & ZIO_FLAG_DONT_RETRY)
- return (B_FALSE);
- if (zio->io_retries > 0)
- return (B_FALSE);
-
- return (B_TRUE);
-}
-
-static void
-zio_vdev_io_assess(zio_t *zio)
-{
- vdev_t *vd = zio->io_vd;
- vdev_t *tvd = vd ? vd->vdev_top : NULL;
-
- ASSERT(zio->io_vsd == NULL);
-
- if (zio->io_flags & ZIO_FLAG_SUBBLOCK) {
- void *abuf;
- uint64_t asize;
- ASSERT(vd == tvd);
- zio_pop_transform(zio, &abuf, &asize, &asize);
- if (zio->io_type == ZIO_TYPE_READ)
- bcopy(abuf, zio->io_data, zio->io_size);
- zio_buf_free(abuf, asize);
- zio->io_flags &= ~ZIO_FLAG_SUBBLOCK;
- }
-
- if (zio_injection_enabled && !zio->io_error)
- zio->io_error = zio_handle_fault_injection(zio, EIO);
-
- /*
- * If the I/O failed, determine whether we should attempt to retry it.
- */
- /* XXPOLICY */
- if (zio_should_retry(zio)) {
- ASSERT(tvd == vd);
-
- zio->io_retries++;
- zio->io_error = 0;
- zio->io_flags &= ZIO_FLAG_VDEV_INHERIT |
- ZIO_FLAG_CONFIG_GRABBED;
- /* XXPOLICY */
- zio->io_flags &= ~ZIO_FLAG_FAILFAST;
- zio->io_flags |= ZIO_FLAG_DONT_CACHE;
- zio->io_stage = ZIO_STAGE_VDEV_IO_START - 1;
-
- dprintf("retry #%d for %s to %s offset %llx\n",
- zio->io_retries, zio_type_name[zio->io_type],
- vdev_description(vd), zio->io_offset);
-
- zio_next_stage_async(zio);
- return;
- }
-
- if (zio->io_error != 0 && zio->io_error != ECKSUM &&
- !(zio->io_flags & ZIO_FLAG_SPECULATIVE) && vd) {
- /*
- * Poor man's hotplug support. Even if we're done retrying this
- * I/O, try to reopen the vdev to see if it's still attached.
- * To avoid excessive thrashing, we only try it once a minute.
- * This also has the effect of detecting when missing devices
- * have come back, by polling the device once a minute.
- *
- * We need to do this asynchronously because we can't grab
- * all the necessary locks way down here.
- */
- if (gethrtime() - vd->vdev_last_try > 60ULL * NANOSEC) {
- vd->vdev_last_try = gethrtime();
- tvd->vdev_reopen_wanted = 1;
- spa_async_request(vd->vdev_spa, SPA_ASYNC_REOPEN);
- }
- }
-
- zio_next_stage(zio);
-}
-
-void
-zio_vdev_io_reissue(zio_t *zio)
-{
- ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
- ASSERT(zio->io_error == 0);
-
- zio->io_stage--;
-}
-
-void
-zio_vdev_io_redone(zio_t *zio)
-{
- ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_DONE);
-
- zio->io_stage--;
-}
-
-void
-zio_vdev_io_bypass(zio_t *zio)
-{
- ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
- ASSERT(zio->io_error == 0);
-
- zio->io_flags |= ZIO_FLAG_IO_BYPASS;
- zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS - 1;
-}
-
-/*
- * ==========================================================================
- * Generate and verify checksums
- * ==========================================================================
- */
-static void
-zio_checksum_generate(zio_t *zio)
-{
- int checksum = zio->io_checksum;
- blkptr_t *bp = zio->io_bp;
-
- ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
-
- BP_SET_CHECKSUM(bp, checksum);
- BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
-
- zio_checksum(checksum, &bp->blk_cksum, zio->io_data, zio->io_size);
-
- zio_next_stage(zio);
-}
-
-static void
-zio_gang_checksum_generate(zio_t *zio)
-{
- zio_cksum_t zc;
- zio_gbh_phys_t *gbh = zio->io_data;
-
- ASSERT(BP_IS_GANG(zio->io_bp));
- ASSERT3U(zio->io_size, ==, SPA_GANGBLOCKSIZE);
-
- zio_set_gang_verifier(zio, &gbh->zg_tail.zbt_cksum);
-
- zio_checksum(ZIO_CHECKSUM_GANG_HEADER, &zc, zio->io_data, zio->io_size);
-
- zio_next_stage(zio);
-}
-
-static void
-zio_checksum_verify(zio_t *zio)
-{
- if (zio->io_bp != NULL) {
- zio->io_error = zio_checksum_error(zio);
- if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE))
- zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM,
- zio->io_spa, zio->io_vd, zio, 0, 0);
- }
-
- zio_next_stage(zio);
-}
-
-/*
- * Called by RAID-Z to ensure we don't compute the checksum twice.
- */
-void
-zio_checksum_verified(zio_t *zio)
-{
- zio->io_pipeline &= ~(1U << ZIO_STAGE_CHECKSUM_VERIFY);
-}
-
-/*
- * Set the external verifier for a gang block based on stuff in the bp
- */
-void
-zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp)
-{
- blkptr_t *bp = zio->io_bp;
-
- zcp->zc_word[0] = DVA_GET_VDEV(BP_IDENTITY(bp));
- zcp->zc_word[1] = DVA_GET_OFFSET(BP_IDENTITY(bp));
- zcp->zc_word[2] = bp->blk_birth;
- zcp->zc_word[3] = 0;
-}
-
-/*
- * ==========================================================================
- * Define the pipeline
- * ==========================================================================
- */
-typedef void zio_pipe_stage_t(zio_t *zio);
-
-static void
-zio_badop(zio_t *zio)
-{
- panic("Invalid I/O pipeline stage %u for zio %p", zio->io_stage, zio);
-}
-
-zio_pipe_stage_t *zio_pipeline[ZIO_STAGE_DONE + 2] = {
- zio_badop,
- zio_wait_children_ready,
- zio_write_compress,
- zio_checksum_generate,
- zio_gang_pipeline,
- zio_get_gang_header,
- zio_rewrite_gang_members,
- zio_free_gang_members,
- zio_claim_gang_members,
- zio_dva_allocate,
- zio_dva_free,
- zio_dva_claim,
- zio_gang_checksum_generate,
- zio_ready,
- zio_vdev_io_start,
- zio_vdev_io_done,
- zio_vdev_io_assess,
- zio_wait_children_done,
- zio_checksum_verify,
- zio_read_gang_members,
- zio_read_decompress,
- zio_done,
- zio_badop
-};
-
-/*
- * Move an I/O to the next stage of the pipeline and execute that stage.
- * There's no locking on io_stage because there's no legitimate way for
- * multiple threads to be attempting to process the same I/O.
- */
-void
-zio_next_stage(zio_t *zio)
-{
- uint32_t pipeline = zio->io_pipeline;
-
- ASSERT(!MUTEX_HELD(&zio->io_lock));
-
- if (zio->io_error) {
- dprintf("zio %p vdev %s offset %llx stage %d error %d\n",
- zio, vdev_description(zio->io_vd),
- zio->io_offset, zio->io_stage, zio->io_error);
- if (((1U << zio->io_stage) & ZIO_VDEV_IO_PIPELINE) == 0)
- pipeline &= ZIO_ERROR_PIPELINE_MASK;
- }
-
- while (((1U << ++zio->io_stage) & pipeline) == 0)
- continue;
-
- ASSERT(zio->io_stage <= ZIO_STAGE_DONE);
- ASSERT(zio->io_stalled == 0);
-
- /*
- * See the comment in zio_next_stage_async() about per-CPU taskqs.
- */
- if (((1U << zio->io_stage) & zio->io_async_stages) &&
- (zio->io_stage == ZIO_STAGE_WRITE_COMPRESS) &&
- !(zio->io_flags & ZIO_FLAG_METADATA)) {
- taskq_t *tq = zio->io_spa->spa_zio_issue_taskq[zio->io_type];
- (void) taskq_dispatch(tq,
- (task_func_t *)zio_pipeline[zio->io_stage], zio, TQ_SLEEP);
- } else {
- zio_pipeline[zio->io_stage](zio);
- }
-}
-
-void
-zio_next_stage_async(zio_t *zio)
-{
- taskq_t *tq;
- uint32_t pipeline = zio->io_pipeline;
-
- ASSERT(!MUTEX_HELD(&zio->io_lock));
-
- if (zio->io_error) {
- dprintf("zio %p vdev %s offset %llx stage %d error %d\n",
- zio, vdev_description(zio->io_vd),
- zio->io_offset, zio->io_stage, zio->io_error);
- if (((1U << zio->io_stage) & ZIO_VDEV_IO_PIPELINE) == 0)
- pipeline &= ZIO_ERROR_PIPELINE_MASK;
- }
-
- while (((1U << ++zio->io_stage) & pipeline) == 0)
- continue;
-
- ASSERT(zio->io_stage <= ZIO_STAGE_DONE);
- ASSERT(zio->io_stalled == 0);
-
- /*
- * For performance, we'll probably want two sets of task queues:
- * per-CPU issue taskqs and per-CPU completion taskqs. The per-CPU
- * part is for read performance: since we have to make a pass over
- * the data to checksum it anyway, we want to do this on the same CPU
- * that issued the read, because (assuming CPU scheduling affinity)
- * that thread is probably still there. Getting this optimization
- * right avoids performance-hostile cache-to-cache transfers.
- *
- * Note that having two sets of task queues is also necessary for
- * correctness: if all of the issue threads get bogged down waiting
- * for dependent reads (e.g. metaslab freelist) to complete, then
- * there won't be any threads available to service I/O completion
- * interrupts.
- */
- if ((1U << zio->io_stage) & zio->io_async_stages) {
- if (zio->io_stage < ZIO_STAGE_VDEV_IO_DONE)
- tq = zio->io_spa->spa_zio_issue_taskq[zio->io_type];
- else
- tq = zio->io_spa->spa_zio_intr_taskq[zio->io_type];
- (void) taskq_dispatch(tq,
- (task_func_t *)zio_pipeline[zio->io_stage], zio, TQ_SLEEP);
- } else {
- zio_pipeline[zio->io_stage](zio);
- }
-}
-
-static boolean_t
-zio_alloc_should_fail(void)
-{
- static uint16_t allocs = 0;
-
- return (P2PHASE(allocs++, 1U<<zio_zil_fail_shift) == 0);
-}
-
-/*
- * Try to allocate an intent log block. Return 0 on success, errno on failure.
- */
-int
-zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp,
- uint64_t txg)
-{
- int error;
-
- spa_config_enter(spa, RW_READER, FTAG);
-
- if (zio_zil_fail_shift && zio_alloc_should_fail()) {
- spa_config_exit(spa, FTAG);
- return (ENOSPC);
- }
-
- /*
- * We were passed the previous log blocks dva_t in bp->blk_dva[0].
- */
- error = metaslab_alloc(spa, size, new_bp, 1, txg, old_bp, B_TRUE);
-
- if (error == 0) {
- BP_SET_LSIZE(new_bp, size);
- BP_SET_PSIZE(new_bp, size);
- BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
- BP_SET_CHECKSUM(new_bp, ZIO_CHECKSUM_ZILOG);
- BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG);
- BP_SET_LEVEL(new_bp, 0);
- BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);
- new_bp->blk_birth = txg;
- }
-
- spa_config_exit(spa, FTAG);
-
- return (error);
-}
-
-/*
- * Free an intent log block. We know it can't be a gang block, so there's
- * nothing to do except metaslab_free() it.
- */
-void
-zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg)
-{
- ASSERT(!BP_IS_GANG(bp));
-
- spa_config_enter(spa, RW_READER, FTAG);
-
- metaslab_free(spa, bp, txg, B_FALSE);
-
- spa_config_exit(spa, FTAG);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c
deleted file mode 100644
index f0d9a14..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-
-/*
- * Checksum vectors.
- *
- * In the SPA, everything is checksummed. We support checksum vectors
- * for three distinct reasons:
- *
- * 1. Different kinds of data need different levels of protection.
- * For SPA metadata, we always want a very strong checksum.
- * For user data, we let users make the trade-off between speed
- * and checksum strength.
- *
- * 2. Cryptographic hash and MAC algorithms are an area of active research.
- * It is likely that in future hash functions will be at least as strong
- * as current best-of-breed, and may be substantially faster as well.
- * We want the ability to take advantage of these new hashes as soon as
- * they become available.
- *
- * 3. If someone develops hardware that can compute a strong hash quickly,
- * we want the ability to take advantage of that hardware.
- *
- * Of course, we don't want a checksum upgrade to invalidate existing
- * data, so we store the checksum *function* in five bits of the DVA.
- * This gives us room for up to 32 different checksum functions.
- *
- * When writing a block, we always checksum it with the latest-and-greatest
- * checksum function of the appropriate strength. When reading a block,
- * we compare the expected checksum against the actual checksum, which we
- * compute via the checksum function specified in the DVA encoding.
- */
-
-/*ARGSUSED*/
-static void
-zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
-{
- ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
-}
-
-zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
- {{NULL, NULL}, 0, 0, "inherit"},
- {{NULL, NULL}, 0, 0, "on"},
- {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"},
- {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"},
- {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"},
- {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, "zilog"},
- {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"},
- {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"},
- {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"},
-};
-
-uint8_t
-zio_checksum_select(uint8_t child, uint8_t parent)
-{
- ASSERT(child < ZIO_CHECKSUM_FUNCTIONS);
- ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS);
- ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
-
- if (child == ZIO_CHECKSUM_INHERIT)
- return (parent);
-
- if (child == ZIO_CHECKSUM_ON)
- return (ZIO_CHECKSUM_ON_VALUE);
-
- return (child);
-}
-
-/*
- * Generate the checksum.
- */
-void
-zio_checksum(uint_t checksum, zio_cksum_t *zcp, void *data, uint64_t size)
-{
- zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1;
- zio_checksum_info_t *ci = &zio_checksum_table[checksum];
- zio_cksum_t zbt_cksum;
-
- ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS);
- ASSERT(ci->ci_func[0] != NULL);
-
- if (ci->ci_zbt) {
- *zcp = zbt->zbt_cksum;
- zbt->zbt_magic = ZBT_MAGIC;
- ci->ci_func[0](data, size, &zbt_cksum);
- zbt->zbt_cksum = zbt_cksum;
- } else {
- ci->ci_func[0](data, size, zcp);
- }
-}
-
-int
-zio_checksum_error(zio_t *zio)
-{
- blkptr_t *bp = zio->io_bp;
- zio_cksum_t zc = bp->blk_cksum;
- uint_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER :
- BP_GET_CHECKSUM(bp);
- int byteswap = BP_SHOULD_BYTESWAP(bp);
- void *data = zio->io_data;
- uint64_t size = ZIO_GET_IOSIZE(zio);
- zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1;
- zio_checksum_info_t *ci = &zio_checksum_table[checksum];
- zio_cksum_t actual_cksum, expected_cksum;
-
- if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
- return (EINVAL);
-
- if (ci->ci_zbt) {
- if (checksum == ZIO_CHECKSUM_GANG_HEADER)
- zio_set_gang_verifier(zio, &zc);
-
- if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) {
- expected_cksum = zbt->zbt_cksum;
- byteswap_uint64_array(&expected_cksum,
- sizeof (zio_cksum_t));
- zbt->zbt_cksum = zc;
- byteswap_uint64_array(&zbt->zbt_cksum,
- sizeof (zio_cksum_t));
- ci->ci_func[1](data, size, &actual_cksum);
- zbt->zbt_cksum = expected_cksum;
- byteswap_uint64_array(&zbt->zbt_cksum,
- sizeof (zio_cksum_t));
- } else {
- expected_cksum = zbt->zbt_cksum;
- zbt->zbt_cksum = zc;
- ci->ci_func[0](data, size, &actual_cksum);
- zbt->zbt_cksum = expected_cksum;
- }
- zc = expected_cksum;
- } else {
- ASSERT(!BP_IS_GANG(bp));
- ci->ci_func[byteswap](data, size, &actual_cksum);
- }
-
- if (!ZIO_CHECKSUM_EQUAL(actual_cksum, zc))
- return (ECKSUM);
-
- if (zio_injection_enabled && !zio->io_error)
- return (zio_handle_fault_injection(zio, ECKSUM));
-
- return (0);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
deleted file mode 100644
index c563be4..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/zfs_context.h>
-#include <sys/compress.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/zio_compress.h>
-
-/*
- * Compression vectors.
- */
-
-zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
- {NULL, NULL, 0, "inherit"},
- {NULL, NULL, 0, "on"},
- {NULL, NULL, 0, "uncompressed"},
- {lzjb_compress, lzjb_decompress, 0, "lzjb"},
- {NULL, NULL, 0, "empty"},
- {gzip_compress, gzip_decompress, 1, "gzip-1"},
- {gzip_compress, gzip_decompress, 2, "gzip-2"},
- {gzip_compress, gzip_decompress, 3, "gzip-3"},
- {gzip_compress, gzip_decompress, 4, "gzip-4"},
- {gzip_compress, gzip_decompress, 5, "gzip-5"},
- {gzip_compress, gzip_decompress, 6, "gzip-6"},
- {gzip_compress, gzip_decompress, 7, "gzip-7"},
- {gzip_compress, gzip_decompress, 8, "gzip-8"},
- {gzip_compress, gzip_decompress, 9, "gzip-9"},
-};
-
-uint8_t
-zio_compress_select(uint8_t child, uint8_t parent)
-{
- ASSERT(child < ZIO_COMPRESS_FUNCTIONS);
- ASSERT(parent < ZIO_COMPRESS_FUNCTIONS);
- ASSERT(parent != ZIO_COMPRESS_INHERIT && parent != ZIO_COMPRESS_ON);
-
- if (child == ZIO_COMPRESS_INHERIT)
- return (parent);
-
- if (child == ZIO_COMPRESS_ON)
- return (ZIO_COMPRESS_ON_VALUE);
-
- return (child);
-}
-
-int
-zio_compress_data(int cpfunc, void *src, uint64_t srcsize, void **destp,
- uint64_t *destsizep, uint64_t *destbufsizep)
-{
- uint64_t *word, *word_end;
- uint64_t ciosize, gapsize, destbufsize;
- zio_compress_info_t *ci = &zio_compress_table[cpfunc];
- char *dest;
- uint_t allzero;
-
- ASSERT((uint_t)cpfunc < ZIO_COMPRESS_FUNCTIONS);
- ASSERT((uint_t)cpfunc == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL);
-
- /*
- * If the data is all zeroes, we don't even need to allocate
- * a block for it. We indicate this by setting *destsizep = 0.
- */
- allzero = 1;
- word = src;
- word_end = (uint64_t *)(uintptr_t)((uintptr_t)word + srcsize);
- while (word < word_end) {
- if (*word++ != 0) {
- allzero = 0;
- break;
- }
- }
- if (allzero) {
- *destp = NULL;
- *destsizep = 0;
- *destbufsizep = 0;
- return (1);
- }
-
- if (cpfunc == ZIO_COMPRESS_EMPTY)
- return (0);
-
- /* Compress at least 12.5% */
- destbufsize = P2ALIGN(srcsize - (srcsize >> 3), SPA_MINBLOCKSIZE);
- if (destbufsize == 0)
- return (0);
- dest = zio_buf_alloc(destbufsize);
-
- ciosize = ci->ci_compress(src, dest, (size_t)srcsize,
- (size_t)destbufsize, ci->ci_level);
- if (ciosize > destbufsize) {
- zio_buf_free(dest, destbufsize);
- return (0);
- }
-
- /* Cool. We compressed at least as much as we were hoping to. */
-
- /* For security, make sure we don't write random heap crap to disk */
- gapsize = P2ROUNDUP(ciosize, SPA_MINBLOCKSIZE) - ciosize;
- if (gapsize != 0) {
- bzero(dest + ciosize, gapsize);
- ciosize += gapsize;
- }
-
- ASSERT3U(ciosize, <=, destbufsize);
- ASSERT(P2PHASE(ciosize, SPA_MINBLOCKSIZE) == 0);
- *destp = dest;
- *destsizep = ciosize;
- *destbufsizep = destbufsize;
-
- return (1);
-}
-
-int
-zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
- void *dest, uint64_t destsize)
-{
- zio_compress_info_t *ci = &zio_compress_table[cpfunc];
-
- ASSERT((uint_t)cpfunc < ZIO_COMPRESS_FUNCTIONS);
-
- return (ci->ci_decompress(src, dest, srcsize, destsize, ci->ci_level));
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
deleted file mode 100644
index 4cada09..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * ZFS fault injection
- *
- * To handle fault injection, we keep track of a series of zinject_record_t
- * structures which describe which logical block(s) should be injected with a
- * fault. These are kept in a global list. Each record corresponds to a given
- * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
- * or exported while the injection record exists.
- *
- * Device level injection is done using the 'zi_guid' field. If this is set, it
- * means that the error is destined for a particular device, not a piece of
- * data.
- *
- * This is a rather poor data structure and algorithm, but we don't expect more
- * than a few faults at any one time, so it should be sufficient for our needs.
- */
-
-#include <sys/arc.h>
-#include <sys/zio_impl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_impl.h>
-
-uint32_t zio_injection_enabled;
-
-typedef struct inject_handler {
- int zi_id;
- spa_t *zi_spa;
- zinject_record_t zi_record;
- list_node_t zi_link;
-} inject_handler_t;
-
-static list_t inject_handlers;
-static krwlock_t inject_lock;
-static int inject_next_id = 1;
-
-/*
- * Returns true if the given record matches the I/O in progress.
- */
-static boolean_t
-zio_match_handler(zbookmark_t *zb, uint64_t type,
- zinject_record_t *record, int error)
-{
- /*
- * Check for a match against the MOS, which is based on type
- */
- if (zb->zb_objset == 0 && record->zi_objset == 0 &&
- record->zi_object == 0) {
- if (record->zi_type == DMU_OT_NONE ||
- type == record->zi_type)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
- else
- return (B_FALSE);
- }
-
- /*
- * Check for an exact match.
- */
- if (zb->zb_objset == record->zi_objset &&
- zb->zb_object == record->zi_object &&
- zb->zb_level == record->zi_level &&
- zb->zb_blkid >= record->zi_start &&
- zb->zb_blkid <= record->zi_end &&
- error == record->zi_error)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
-
- return (B_FALSE);
-}
-
-/*
- * Determine if the I/O in question should return failure. Returns the errno
- * to be returned to the caller.
- */
-int
-zio_handle_fault_injection(zio_t *zio, int error)
-{
- int ret = 0;
- inject_handler_t *handler;
-
- /*
- * Ignore I/O not associated with any logical data.
- */
- if (zio->io_logical == NULL)
- return (0);
-
- /*
- * Currently, we only support fault injection on reads.
- */
- if (zio->io_type != ZIO_TYPE_READ)
- return (0);
-
- rw_enter(&inject_lock, RW_READER);
-
- for (handler = list_head(&inject_handlers); handler != NULL;
- handler = list_next(&inject_handlers, handler)) {
-
- /* Ignore errors not destined for this pool */
- if (zio->io_spa != handler->zi_spa)
- continue;
-
- /* Ignore device errors */
- if (handler->zi_record.zi_guid != 0)
- continue;
-
- /* If this handler matches, return EIO */
- if (zio_match_handler(&zio->io_logical->io_bookmark,
- zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
- &handler->zi_record, error)) {
- ret = error;
- break;
- }
- }
-
- rw_exit(&inject_lock);
-
- return (ret);
-}
-
-int
-zio_handle_device_injection(vdev_t *vd, int error)
-{
- inject_handler_t *handler;
- int ret = 0;
-
- rw_enter(&inject_lock, RW_READER);
-
- for (handler = list_head(&inject_handlers); handler != NULL;
- handler = list_next(&inject_handlers, handler)) {
-
- if (vd->vdev_guid == handler->zi_record.zi_guid) {
- if (handler->zi_record.zi_error == error) {
- /*
- * For a failed open, pretend like the device
- * has gone away.
- */
- if (error == ENXIO)
- vd->vdev_stat.vs_aux =
- VDEV_AUX_OPEN_FAILED;
- ret = error;
- break;
- }
- if (handler->zi_record.zi_error == ENXIO) {
- ret = EIO;
- break;
- }
- }
- }
-
- rw_exit(&inject_lock);
-
- return (ret);
-}
-
-/*
- * Create a new handler for the given record. We add it to the list, adding
- * a reference to the spa_t in the process. We increment zio_injection_enabled,
- * which is the switch to trigger all fault injection.
- */
-int
-zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
-{
- inject_handler_t *handler;
- int error;
- spa_t *spa;
-
- /*
- * If this is pool-wide metadata, make sure we unload the corresponding
- * spa_t, so that the next attempt to load it will trigger the fault.
- * We call spa_reset() to unload the pool appropriately.
- */
- if (flags & ZINJECT_UNLOAD_SPA)
- if ((error = spa_reset(name)) != 0)
- return (error);
-
- if (!(flags & ZINJECT_NULL)) {
- /*
- * spa_inject_ref() will add an injection reference, which will
- * prevent the pool from being removed from the namespace while
- * still allowing it to be unloaded.
- */
- if ((spa = spa_inject_addref(name)) == NULL)
- return (ENOENT);
-
- handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
-
- rw_enter(&inject_lock, RW_WRITER);
-
- *id = handler->zi_id = inject_next_id++;
- handler->zi_spa = spa;
- handler->zi_record = *record;
- list_insert_tail(&inject_handlers, handler);
- atomic_add_32(&zio_injection_enabled, 1);
-
- rw_exit(&inject_lock);
- }
-
- /*
- * Flush the ARC, so that any attempts to read this data will end up
- * going to the ZIO layer. Note that this is a little overkill, but
- * we don't have the necessary ARC interfaces to do anything else, and
- * fault injection isn't a performance critical path.
- */
- if (flags & ZINJECT_FLUSH_ARC)
- arc_flush();
-
- return (0);
-}
-
-/*
- * Returns the next record with an ID greater than that supplied to the
- * function. Used to iterate over all handlers in the system.
- */
-int
-zio_inject_list_next(int *id, char *name, size_t buflen,
- zinject_record_t *record)
-{
- inject_handler_t *handler;
- int ret;
-
- mutex_enter(&spa_namespace_lock);
- rw_enter(&inject_lock, RW_READER);
-
- for (handler = list_head(&inject_handlers); handler != NULL;
- handler = list_next(&inject_handlers, handler))
- if (handler->zi_id > *id)
- break;
-
- if (handler) {
- *record = handler->zi_record;
- *id = handler->zi_id;
- (void) strncpy(name, spa_name(handler->zi_spa), buflen);
- ret = 0;
- } else {
- ret = ENOENT;
- }
-
- rw_exit(&inject_lock);
- mutex_exit(&spa_namespace_lock);
-
- return (ret);
-}
-
-/*
- * Clear the fault handler with the given identifier, or return ENOENT if none
- * exists.
- */
-int
-zio_clear_fault(int id)
-{
- inject_handler_t *handler;
- int ret;
-
- rw_enter(&inject_lock, RW_WRITER);
-
- for (handler = list_head(&inject_handlers); handler != NULL;
- handler = list_next(&inject_handlers, handler))
- if (handler->zi_id == id)
- break;
-
- if (handler == NULL) {
- ret = ENOENT;
- } else {
- list_remove(&inject_handlers, handler);
- spa_inject_delref(handler->zi_spa);
- kmem_free(handler, sizeof (inject_handler_t));
- atomic_add_32(&zio_injection_enabled, -1);
- ret = 0;
- }
-
- rw_exit(&inject_lock);
-
- return (ret);
-}
-
-void
-zio_inject_init(void)
-{
- list_create(&inject_handlers, sizeof (inject_handler_t),
- offsetof(inject_handler_t, zi_link));
-}
-
-void
-zio_inject_fini(void)
-{
- list_destroy(&inject_handlers);
-}
diff --git a/sys/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/contrib/opensolaris/uts/common/fs/zfs/zvol.c
deleted file mode 100644
index fedae03..0000000
--- a/sys/contrib/opensolaris/uts/common/fs/zfs/zvol.c
+++ /dev/null
@@ -1,801 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
- * All rights reserved.
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * ZFS volume emulation driver.
- *
- * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
- * Volumes are accessed through the symbolic links named:
- *
- * /dev/zvol/dsk/<pool_name>/<dataset_name>
- * /dev/zvol/rdsk/<pool_name>/<dataset_name>
- *
- * These links are created by the ZFS-specific devfsadm link generator.
- * Volumes are persistent through reboot. No user command needs to be
- * run before opening and using a device.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/errno.h>
-#include <sys/uio.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/kmem.h>
-#include <sys/conf.h>
-#include <sys/cmn_err.h>
-#include <sys/stat.h>
-#include <sys/zap.h>
-#include <sys/spa.h>
-#include <sys/zio.h>
-#include <sys/dsl_prop.h>
-#include <sys/dkio.h>
-#include <sys/byteorder.h>
-#include <sys/sunddi.h>
-#include <sys/dirent.h>
-#include <sys/policy.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zil.h>
-#include <sys/refcount.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_rlock.h>
-#include <geom/geom.h>
-
-#include "zfs_namecheck.h"
-
-struct g_class zfs_zvol_class = {
- .name = "ZFS::ZVOL",
- .version = G_VERSION,
-};
-
-DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
-
-#define ZVOL_OBJ 1ULL
-#define ZVOL_ZAP_OBJ 2ULL
-
-static uint32_t zvol_minors;
-
-/*
- * The in-core state of each volume.
- */
-typedef struct zvol_state {
- char zv_name[MAXPATHLEN]; /* pool/dd name */
- uint64_t zv_volsize; /* amount of space we advertise */
- uint64_t zv_volblocksize; /* volume block size */
- struct g_provider *zv_provider; /* GEOM provider */
- uint8_t zv_min_bs; /* minimum addressable block shift */
- uint8_t zv_readonly; /* hard readonly; like write-protect */
- objset_t *zv_objset; /* objset handle */
- uint32_t zv_mode; /* DS_MODE_* flags at open time */
- uint32_t zv_total_opens; /* total open count */
- zilog_t *zv_zilog; /* ZIL handle */
- uint64_t zv_txg_assign; /* txg to assign during ZIL replay */
- znode_t zv_znode; /* for range locking */
- int zv_state;
- struct bio_queue_head zv_queue;
- struct mtx zv_queue_mtx; /* zv_queue mutex */
-} zvol_state_t;
-
-/*
- * zvol maximum transfer in one DMU tx.
- */
-int zvol_maxphys = DMU_MAX_ACCESS/2;
-
-static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
-
-int
-zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
-{
- if (volsize == 0)
- return (EINVAL);
-
- if (volsize % blocksize != 0)
- return (EINVAL);
-
-#ifdef _ILP32
- if (volsize - 1 > SPEC_MAXOFFSET_T)
- return (EOVERFLOW);
-#endif
- return (0);
-}
-
-int
-zvol_check_volblocksize(uint64_t volblocksize)
-{
- if (volblocksize < SPA_MINBLOCKSIZE ||
- volblocksize > SPA_MAXBLOCKSIZE ||
- !ISP2(volblocksize))
- return (EDOM);
-
- return (0);
-}
-
-static void
-zvol_readonly_changed_cb(void *arg, uint64_t newval)
-{
- zvol_state_t *zv = arg;
-
- zv->zv_readonly = (uint8_t)newval;
-}
-
-int
-zvol_get_stats(objset_t *os, nvlist_t *nv)
-{
- int error;
- dmu_object_info_t doi;
- uint64_t val;
-
-
- error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
- if (error)
- return (error);
-
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
-
- error = dmu_object_info(os, ZVOL_OBJ, &doi);
-
- if (error == 0) {
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
- doi.doi_data_block_size);
- }
-
- return (error);
-}
-
-static zvol_state_t *
-zvol_minor_lookup(const char *name)
-{
- struct g_provider *pp;
- struct g_geom *gp;
-
- g_topology_assert();
-
- LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
- LIST_FOREACH(pp, &gp->provider, provider) {
- if (strcmp(pp->name + sizeof(ZVOL_DEV_DIR), name) == 0)
- return (pp->private);
- }
- }
-
- return (NULL);
-}
-
-static int
-zvol_access(struct g_provider *pp, int acr, int acw, int ace)
-{
- zvol_state_t *zv;
-
- g_topology_assert();
-
- zv = pp->private;
- if (zv == NULL) {
- if (acr <= 0 && acw <= 0 && ace <= 0)
- return (0);
- return (pp->error);
- }
-
- ASSERT(zv->zv_objset != NULL);
-
- if (acw > 0 && (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)))
- return (EROFS);
-
- zv->zv_total_opens += acr + acw + ace;
-
- return (0);
-}
-
-/*
- * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
- *
- * We store data in the log buffers if it's small enough.
- * Otherwise we will later flush the data out via dmu_sync().
- */
-ssize_t zvol_immediate_write_sz = 32768;
-
-static void
-zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len)
-{
- uint32_t blocksize = zv->zv_volblocksize;
- lr_write_t *lr;
-
- while (len) {
- ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
- itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr));
-
- itx->itx_wr_state =
- len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY;
- itx->itx_private = zv;
- lr = (lr_write_t *)&itx->itx_lr;
- lr->lr_foid = ZVOL_OBJ;
- lr->lr_offset = off;
- lr->lr_length = nbytes;
- lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t);
- BP_ZERO(&lr->lr_blkptr);
-
- (void) zil_itx_assign(zv->zv_zilog, itx, tx);
- len -= nbytes;
- off += nbytes;
- }
-}
-
-static void
-zvol_start(struct bio *bp)
-{
- zvol_state_t *zv;
-
- switch (bp->bio_cmd) {
- case BIO_READ:
- case BIO_WRITE:
- case BIO_FLUSH:
- zv = bp->bio_to->private;
- ASSERT(zv != NULL);
- mtx_lock(&zv->zv_queue_mtx);
- bioq_insert_tail(&zv->zv_queue, bp);
- wakeup_one(&zv->zv_queue);
- mtx_unlock(&zv->zv_queue_mtx);
- break;
- case BIO_DELETE:
- case BIO_GETATTR:
- default:
- g_io_deliver(bp, EOPNOTSUPP);
- break;
- }
-}
-
-static void
-zvol_serve_one(zvol_state_t *zv, struct bio *bp)
-{
- uint64_t off, volsize;
- size_t size, resid;
- char *addr;
- objset_t *os;
- rl_t *rl;
- int error = 0;
- boolean_t reading;
-
- off = bp->bio_offset;
- volsize = zv->zv_volsize;
-
- os = zv->zv_objset;
- ASSERT(os != NULL);
-
- addr = bp->bio_data;
- resid = bp->bio_length;
-
- error = 0;
-
- /*
- * There must be no buffer changes when doing a dmu_sync() because
- * we can't change the data whilst calculating the checksum.
- * A better approach than a per zvol rwlock would be to lock ranges.
- */
- reading = (bp->bio_cmd == BIO_READ);
- rl = zfs_range_lock(&zv->zv_znode, off, resid,
- reading ? RL_READER : RL_WRITER);
-
- while (resid != 0 && off < volsize) {
-
- size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */
-
- if (size > volsize - off) /* don't write past the end */
- size = volsize - off;
-
- if (reading) {
- error = dmu_read(os, ZVOL_OBJ, off, size, addr);
- } else {
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
- zvol_log_write(zv, tx, off, size);
- dmu_tx_commit(tx);
- }
- }
- if (error)
- break;
- off += size;
- addr += size;
- resid -= size;
- }
- zfs_range_unlock(rl);
-
- bp->bio_completed = bp->bio_length - resid;
- if (bp->bio_completed < bp->bio_length)
- bp->bio_error = (off > volsize ? EINVAL : error);
-}
-
-static void
-zvol_worker(void *arg)
-{
- zvol_state_t *zv;
- struct bio *bp;
-
- zv = arg;
- for (;;) {
- mtx_lock(&zv->zv_queue_mtx);
- bp = bioq_takefirst(&zv->zv_queue);
- if (bp == NULL) {
- if (zv->zv_state == 1) {
- zv->zv_state = 2;
- wakeup(&zv->zv_state);
- mtx_unlock(&zv->zv_queue_mtx);
- kproc_exit(0);
- }
- msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP,
- "zvol:io", 0);
- continue;
- }
- mtx_unlock(&zv->zv_queue_mtx);
- switch (bp->bio_cmd) {
- case BIO_FLUSH:
- break;
- case BIO_READ:
- case BIO_WRITE:
- zvol_serve_one(zv, bp);
- break;
- }
-
- if (bp->bio_cmd != BIO_READ && !zil_disable)
- zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ);
-
- g_io_deliver(bp, bp->bio_error);
- }
-}
-
-void
-zvol_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
-{
- zfs_create_data_t *zc = arg;
- int error;
- uint64_t volblocksize, volsize;
-
- VERIFY(nvlist_lookup_uint64(zc->zc_props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
- if (nvlist_lookup_uint64(zc->zc_props,
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
- volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
-
- /*
- * These properites must be removed from the list so the generic
- * property setting step won't apply to them.
- */
- VERIFY(nvlist_remove_all(zc->zc_props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
- (void) nvlist_remove_all(zc->zc_props,
- zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
-
- error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
- DMU_OT_NONE, 0, tx);
- ASSERT(error == 0);
-
- error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
- DMU_OT_NONE, 0, tx);
- ASSERT(error == 0);
-
- error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
- ASSERT(error == 0);
-}
-
-/*
- * Replay a TX_WRITE ZIL transaction that didn't get committed
- * after a system failure
- */
-static int
-zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
-{
- objset_t *os = zv->zv_objset;
- char *data = (char *)(lr + 1); /* data follows lr_write_t */
- uint64_t off = lr->lr_offset;
- uint64_t len = lr->lr_length;
- dmu_tx_t *tx;
- int error;
-
- if (byteswap)
- byteswap_uint64_array(lr, sizeof (*lr));
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
- error = dmu_tx_assign(tx, zv->zv_txg_assign);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- dmu_write(os, ZVOL_OBJ, off, len, data, tx);
- dmu_tx_commit(tx);
- }
-
- return (error);
-}
-
-/* ARGSUSED */
-static int
-zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
-{
- return (ENOTSUP);
-}
-
-/*
- * Callback vectors for replaying records.
- * Only TX_WRITE is needed for zvol.
- */
-zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
- zvol_replay_err, /* 0 no such transaction type */
- zvol_replay_err, /* TX_CREATE */
- zvol_replay_err, /* TX_MKDIR */
- zvol_replay_err, /* TX_MKXATTR */
- zvol_replay_err, /* TX_SYMLINK */
- zvol_replay_err, /* TX_REMOVE */
- zvol_replay_err, /* TX_RMDIR */
- zvol_replay_err, /* TX_LINK */
- zvol_replay_err, /* TX_RENAME */
- zvol_replay_write, /* TX_WRITE */
- zvol_replay_err, /* TX_TRUNCATE */
- zvol_replay_err, /* TX_SETATTR */
- zvol_replay_err, /* TX_ACL */
-};
-
-/*
- * Create a minor node for the specified volume.
- */
-int
-zvol_create_minor(const char *name, dev_t dev)
-{
- struct g_provider *pp;
- struct g_geom *gp;
- zvol_state_t *zv;
- objset_t *os;
- dmu_object_info_t doi;
- uint64_t volsize;
- int ds_mode = DS_MODE_PRIMARY;
- int error;
-
- DROP_GIANT();
- g_topology_lock();
-
- if ((zv = zvol_minor_lookup(name)) != NULL) {
- error = EEXIST;
- goto end;
- }
-
- if (strchr(name, '@') != 0)
- ds_mode |= DS_MODE_READONLY;
-
- error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os);
- if (error)
- goto end;
-
- g_topology_unlock();
- PICKUP_GIANT();
- error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
- DROP_GIANT();
- g_topology_lock();
- if (error) {
- dmu_objset_close(os);
- goto end;
- }
-
- gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
- gp->start = zvol_start;
- gp->access = zvol_access;
- pp = g_new_providerf(gp, "%s/%s", ZVOL_DEV_DIR, name);
- pp->mediasize = volsize;
- pp->sectorsize = DEV_BSIZE;
-
- zv = kmem_zalloc(sizeof(*zv), KM_SLEEP);
- (void) strcpy(zv->zv_name, name);
- zv->zv_min_bs = DEV_BSHIFT;
- zv->zv_provider = pp;
- zv->zv_volsize = pp->mediasize;
- zv->zv_objset = os;
- zv->zv_mode = ds_mode;
- zv->zv_zilog = zil_open(os, zvol_get_data);
- mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
- avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
- sizeof (rl_t), offsetof(rl_t, r_node));
-
-
- /* get and cache the blocksize */
- error = dmu_object_info(os, ZVOL_OBJ, &doi);
- ASSERT(error == 0);
- zv->zv_volblocksize = doi.doi_data_block_size;
-
- zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector);
-
- /* XXX this should handle the possible i/o error */
- VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset),
- "readonly", zvol_readonly_changed_cb, zv) == 0);
-
- pp->private = zv;
- g_error_provider(pp, 0);
-
- bioq_init(&zv->zv_queue);
- mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF);
- zv->zv_state = 0;
- kproc_create(zvol_worker, zv, NULL, 0, 0, "zvol:worker %s", pp->name);
-
- zvol_minors++;
-end:
- g_topology_unlock();
- PICKUP_GIANT();
-
- return (error);
-}
-
-/*
- * Remove minor node for the specified volume.
- */
-int
-zvol_remove_minor(const char *name)
-{
- struct g_provider *pp;
- zvol_state_t *zv;
- int error = 0;
-
- DROP_GIANT();
- g_topology_lock();
-
- if ((zv = zvol_minor_lookup(name)) == NULL) {
- error = ENXIO;
- goto end;
- }
-
- if (zv->zv_total_opens != 0) {
- error = EBUSY;
- goto end;
- }
-
- VERIFY(dsl_prop_unregister(dmu_objset_ds(zv->zv_objset),
- "readonly", zvol_readonly_changed_cb, zv) == 0);
-
- mtx_lock(&zv->zv_queue_mtx);
- zv->zv_state = 1;
- wakeup_one(&zv->zv_queue);
- while (zv->zv_state != 2)
- msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0);
- mtx_unlock(&zv->zv_queue_mtx);
- mtx_destroy(&zv->zv_queue_mtx);
-
- pp = zv->zv_provider;
- pp->private = NULL;
- g_wither_geom(pp->geom, ENXIO);
-
- zil_close(zv->zv_zilog);
- zv->zv_zilog = NULL;
- dmu_objset_close(zv->zv_objset);
- zv->zv_objset = NULL;
- avl_destroy(&zv->zv_znode.z_range_avl);
- mutex_destroy(&zv->zv_znode.z_range_lock);
-
- kmem_free(zv, sizeof(*zv));
-
- zvol_minors--;
-end:
- g_topology_unlock();
- PICKUP_GIANT();
-
- return (error);
-}
-
-int
-zvol_set_volsize(const char *name, dev_t dev, uint64_t volsize)
-{
- zvol_state_t *zv;
- dmu_tx_t *tx;
- int error;
- dmu_object_info_t doi;
-
- DROP_GIANT();
- g_topology_lock();
-
- if ((zv = zvol_minor_lookup(name)) == NULL) {
- error = ENXIO;
- goto end;
- }
-
- if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 ||
- (error = zvol_check_volsize(volsize,
- doi.doi_data_block_size)) != 0) {
- goto end;
- }
-
- if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) {
- error = EROFS;
- goto end;
- }
-
- tx = dmu_tx_create(zv->zv_objset);
- dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
- dmu_tx_hold_free(tx, ZVOL_OBJ, volsize, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- goto end;
- }
-
- error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1,
- &volsize, tx);
- if (error == 0) {
- error = dmu_free_range(zv->zv_objset, ZVOL_OBJ, volsize,
- DMU_OBJECT_END, tx);
- }
-
- dmu_tx_commit(tx);
-
- if (error == 0) {
- zv->zv_volsize = volsize;
- zv->zv_provider->mediasize = volsize; /* XXX: Not supported. */
- }
-end:
- g_topology_unlock();
- PICKUP_GIANT();
-
- return (error);
-}
-
-int
-zvol_set_volblocksize(const char *name, uint64_t volblocksize)
-{
- zvol_state_t *zv;
- dmu_tx_t *tx;
- int error;
-
- DROP_GIANT();
- g_topology_lock();
-
- if ((zv = zvol_minor_lookup(name)) == NULL) {
- error = ENXIO;
- goto end;
- }
-
- if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) {
- error = EROFS;
- goto end;
- }
-
- tx = dmu_tx_create(zv->zv_objset);
- dmu_tx_hold_bonus(tx, ZVOL_OBJ);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ,
- volblocksize, 0, tx);
- if (error == ENOTSUP)
- error = EBUSY;
- dmu_tx_commit(tx);
- /* XXX: Not supported. */
-#if 0
- if (error == 0)
- zv->zv_provider->sectorsize = zc->zc_volblocksize;
-#endif
- }
-end:
- g_topology_unlock();
- PICKUP_GIANT();
-
- return (error);
-}
-
-void
-zvol_get_done(dmu_buf_t *db, void *vzgd)
-{
- zgd_t *zgd = (zgd_t *)vzgd;
- rl_t *rl = zgd->zgd_rl;
-
- dmu_buf_rele(db, vzgd);
- zfs_range_unlock(rl);
- zil_add_vdev(zgd->zgd_zilog, DVA_GET_VDEV(BP_IDENTITY(zgd->zgd_bp)));
- kmem_free(zgd, sizeof (zgd_t));
-}
-
-/*
- * Get data to generate a TX_WRITE intent log record.
- */
-static int
-zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
-{
- zvol_state_t *zv = arg;
- objset_t *os = zv->zv_objset;
- dmu_buf_t *db;
- rl_t *rl;
- zgd_t *zgd;
- uint64_t boff; /* block starting offset */
- int dlen = lr->lr_length; /* length of user data */
- int error;
-
- ASSERT(zio);
- ASSERT(dlen != 0);
-
- /*
- * Write records come in two flavors: immediate and indirect.
- * For small writes it's cheaper to store the data with the
- * log record (immediate); for large writes it's cheaper to
- * sync the data and get a pointer to it (indirect) so that
- * we don't have to write the data twice.
- */
- if (buf != NULL) /* immediate write */
- return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf));
-
- zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP);
- zgd->zgd_zilog = zv->zv_zilog;
- zgd->zgd_bp = &lr->lr_blkptr;
-
- /*
- * Lock the range of the block to ensure that when the data is
- * written out and it's checksum is being calculated that no other
- * thread can change the block.
- */
- boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t);
- rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize,
- RL_READER);
- zgd->zgd_rl = rl;
-
- VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db));
- error = dmu_sync(zio, db, &lr->lr_blkptr,
- lr->lr_common.lrc_txg, zvol_get_done, zgd);
- if (error == 0)
- zil_add_vdev(zv->zv_zilog,
- DVA_GET_VDEV(BP_IDENTITY(&lr->lr_blkptr)));
- /*
- * If we get EINPROGRESS, then we need to wait for a
- * write IO initiated by dmu_sync() to complete before
- * we can release this dbuf. We will finish everything
- * up in the zvol_get_done() callback.
- */
- if (error == EINPROGRESS)
- return (0);
- dmu_buf_rele(db, zgd);
- zfs_range_unlock(rl);
- kmem_free(zgd, sizeof (zgd_t));
- return (error);
-}
-
-int
-zvol_busy(void)
-{
- return (zvol_minors != 0);
-}
-
-void
-zvol_init(void)
-{
- ZFS_LOG(1, "ZVOL Initialized.");
-}
-
-void
-zvol_fini(void)
-{
- ZFS_LOG(1, "ZVOL Deinitialized.");
-}
diff --git a/sys/contrib/opensolaris/uts/common/os/callb.c b/sys/contrib/opensolaris/uts/common/os/callb.c
deleted file mode 100644
index c6e357e..0000000
--- a/sys/contrib/opensolaris/uts/common/os/callb.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/mutex.h>
-#include <sys/condvar.h>
-#include <sys/callb.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/debug.h>
-#include <sys/kobj.h>
-#include <sys/systm.h> /* for delay() */
-#include <sys/taskq.h> /* For TASKQ_NAMELEN */
-#include <sys/kernel.h>
-
-#define CB_MAXNAME TASKQ_NAMELEN
-
-/*
- * The callb mechanism provides generic event scheduling/echoing.
- * A callb function is registered and called on behalf of the event.
- */
-typedef struct callb {
- struct callb *c_next; /* next in class or on freelist */
- kthread_id_t c_thread; /* ptr to caller's thread struct */
- char c_flag; /* info about the callb state */
- uchar_t c_class; /* this callb's class */
- kcondvar_t c_done_cv; /* signal callb completion */
- boolean_t (*c_func)(); /* cb function: returns true if ok */
- void *c_arg; /* arg to c_func */
- char c_name[CB_MAXNAME+1]; /* debug:max func name length */
-} callb_t;
-
-/*
- * callb c_flag bitmap definitions
- */
-#define CALLB_FREE 0x0
-#define CALLB_TAKEN 0x1
-#define CALLB_EXECUTING 0x2
-
-/*
- * Basic structure for a callb table.
- * All callbs are organized into different class groups described
- * by ct_class array.
- * The callbs within a class are single-linked and normally run by a
- * serial execution.
- */
-typedef struct callb_table {
- kmutex_t ct_lock; /* protect all callb states */
- callb_t *ct_freelist; /* free callb structures */
- int ct_busy; /* != 0 prevents additions */
- kcondvar_t ct_busy_cv; /* to wait for not busy */
- int ct_ncallb; /* num of callbs allocated */
- callb_t *ct_first_cb[NCBCLASS]; /* ptr to 1st callb in a class */
-} callb_table_t;
-
-int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC;
-
-static callb_id_t callb_add_common(boolean_t (*)(void *, int),
- void *, int, char *, kthread_id_t);
-
-static callb_table_t callb_table; /* system level callback table */
-static callb_table_t *ct = &callb_table;
-static kmutex_t callb_safe_mutex;
-callb_cpr_t callb_cprinfo_safe = {
- &callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, 0, 0 };
-
-/*
- * Init all callb tables in the system.
- */
-void
-callb_init(void *dummy __unused)
-{
- callb_table.ct_busy = 0; /* mark table open for additions */
- mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL);
-}
-
-void
-callb_fini(void *dummy __unused)
-{
- callb_t *cp;
-
- mutex_enter(&ct->ct_lock);
- while ((cp = ct->ct_freelist) != NULL) {
- ct->ct_freelist = cp->c_next;
- ct->ct_ncallb--;
- kmem_free(cp, sizeof (callb_t));
- }
- ASSERT(ct->ct_ncallb == 0);
- mutex_exit(&ct->ct_lock);
- mutex_destroy(&callb_safe_mutex);
- mutex_destroy(&callb_table.ct_lock);
-}
-
-/*
- * callout_add() is called to register func() be called later.
- */
-static callb_id_t
-callb_add_common(boolean_t (*func)(void *arg, int code),
- void *arg, int class, char *name, kthread_id_t t)
-{
- callb_t *cp;
-
- ASSERT(class < NCBCLASS);
-
- mutex_enter(&ct->ct_lock);
- while (ct->ct_busy)
- cv_wait(&ct->ct_busy_cv, &ct->ct_lock);
- if ((cp = ct->ct_freelist) == NULL) {
- ct->ct_ncallb++;
- cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP);
- }
- ct->ct_freelist = cp->c_next;
- cp->c_thread = t;
- cp->c_func = func;
- cp->c_arg = arg;
- cp->c_class = (uchar_t)class;
- cp->c_flag |= CALLB_TAKEN;
-#ifdef DEBUG
- if (strlen(name) > CB_MAXNAME)
- cmn_err(CE_WARN, "callb_add: name of callback function '%s' "
- "too long -- truncated to %d chars",
- name, CB_MAXNAME);
-#endif
- (void) strncpy(cp->c_name, name, CB_MAXNAME);
- cp->c_name[CB_MAXNAME] = '\0';
-
- /*
- * Insert the new callb at the head of its class list.
- */
- cp->c_next = ct->ct_first_cb[class];
- ct->ct_first_cb[class] = cp;
-
- mutex_exit(&ct->ct_lock);
- return ((callb_id_t)cp);
-}
-
-/*
- * The default function to add an entry to the callback table. Since
- * it uses curthread as the thread identifier to store in the table,
- * it should be used for the normal case of a thread which is calling
- * to add ITSELF to the table.
- */
-callb_id_t
-callb_add(boolean_t (*func)(void *arg, int code),
- void *arg, int class, char *name)
-{
- return (callb_add_common(func, arg, class, name, curthread));
-}
-
-/*
- * A special version of callb_add() above for use by threads which
- * might be adding an entry to the table on behalf of some other
- * thread (for example, one which is constructed but not yet running).
- * In this version the thread id is an argument.
- */
-callb_id_t
-callb_add_thread(boolean_t (*func)(void *arg, int code),
- void *arg, int class, char *name, kthread_id_t t)
-{
- return (callb_add_common(func, arg, class, name, t));
-}
-
-/*
- * callout_delete() is called to remove an entry identified by id
- * that was originally placed there by a call to callout_add().
- * return -1 if fail to delete a callb entry otherwise return 0.
- */
-int
-callb_delete(callb_id_t id)
-{
- callb_t **pp;
- callb_t *me = (callb_t *)id;
-
- mutex_enter(&ct->ct_lock);
-
- for (;;) {
- pp = &ct->ct_first_cb[me->c_class];
- while (*pp != NULL && *pp != me)
- pp = &(*pp)->c_next;
-
-#ifdef DEBUG
- if (*pp != me) {
- cmn_err(CE_WARN, "callb delete bogus entry 0x%p",
- (void *)me);
- mutex_exit(&ct->ct_lock);
- return (-1);
- }
-#endif /* DEBUG */
-
- /*
- * It is not allowed to delete a callb in the middle of
- * executing otherwise, the callb_execute() will be confused.
- */
- if (!(me->c_flag & CALLB_EXECUTING))
- break;
-
- cv_wait(&me->c_done_cv, &ct->ct_lock);
- }
- /* relink the class list */
- *pp = me->c_next;
-
- /* clean up myself and return the free callb to the head of freelist */
- me->c_flag = CALLB_FREE;
- me->c_next = ct->ct_freelist;
- ct->ct_freelist = me;
-
- mutex_exit(&ct->ct_lock);
- return (0);
-}
-
-/*
- * class: indicates to execute all callbs in the same class;
- * code: optional argument for the callb functions.
- * return: = 0: success
- * != 0: ptr to string supplied when callback was registered
- */
-void *
-callb_execute_class(int class, int code)
-{
- callb_t *cp;
- void *ret = NULL;
-
- ASSERT(class < NCBCLASS);
-
- mutex_enter(&ct->ct_lock);
-
- for (cp = ct->ct_first_cb[class];
- cp != NULL && ret == 0; cp = cp->c_next) {
- while (cp->c_flag & CALLB_EXECUTING)
- cv_wait(&cp->c_done_cv, &ct->ct_lock);
- /*
- * cont if the callb is deleted while we're sleeping
- */
- if (cp->c_flag == CALLB_FREE)
- continue;
- cp->c_flag |= CALLB_EXECUTING;
-
-#ifdef CALLB_DEBUG
- printf("callb_execute: name=%s func=%p arg=%p\n",
- cp->c_name, (void *)cp->c_func, (void *)cp->c_arg);
-#endif /* CALLB_DEBUG */
-
- mutex_exit(&ct->ct_lock);
- /* If callback function fails, pass back client's name */
- if (!(*cp->c_func)(cp->c_arg, code))
- ret = cp->c_name;
- mutex_enter(&ct->ct_lock);
-
- cp->c_flag &= ~CALLB_EXECUTING;
- cv_broadcast(&cp->c_done_cv);
- }
- mutex_exit(&ct->ct_lock);
- return (ret);
-}
-
-/*
- * callers make sure no recursive entries to this func.
- * dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure.
- *
- * When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we
- * use a cv_timedwait() in case the kernel thread is blocked.
- *
- * Note that this is a generic callback handler for daemon CPR and
- * should NOT be changed to accommodate any specific requirement in a daemon.
- * Individual daemons that require changes to the handler shall write
- * callback routines in their own daemon modules.
- */
-boolean_t
-callb_generic_cpr(void *arg, int code)
-{
- callb_cpr_t *cp = (callb_cpr_t *)arg;
- clock_t ret = 0; /* assume success */
-
- mutex_enter(cp->cc_lockp);
-
- switch (code) {
- case CB_CODE_CPR_CHKPT:
- cp->cc_events |= CALLB_CPR_START;
- while (!(cp->cc_events & CALLB_CPR_SAFE))
- /* cv_timedwait() returns -1 if it times out. */
- if ((ret = cv_timedwait(&cp->cc_callb_cv,
- cp->cc_lockp,
- callb_timeout_sec * hz)) == -1)
- break;
- break;
-
- case CB_CODE_CPR_RESUME:
- cp->cc_events &= ~CALLB_CPR_START;
- cv_signal(&cp->cc_stop_cv);
- break;
- }
- mutex_exit(cp->cc_lockp);
- return (ret != -1);
-}
-
-/*
- * The generic callback function associated with kernel threads which
- * are always considered safe.
- */
-/* ARGSUSED */
-boolean_t
-callb_generic_cpr_safe(void *arg, int code)
-{
- return (B_TRUE);
-}
-/*
- * Prevent additions to callback table.
- */
-void
-callb_lock_table(void)
-{
- mutex_enter(&ct->ct_lock);
- ASSERT(ct->ct_busy == 0);
- ct->ct_busy = 1;
- mutex_exit(&ct->ct_lock);
-}
-
-/*
- * Allow additions to callback table.
- */
-void
-callb_unlock_table(void)
-{
- mutex_enter(&ct->ct_lock);
- ASSERT(ct->ct_busy != 0);
- ct->ct_busy = 0;
- cv_broadcast(&ct->ct_busy_cv);
- mutex_exit(&ct->ct_lock);
-}
-
-SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL);
-SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL);
diff --git a/sys/contrib/opensolaris/uts/common/os/list.c b/sys/contrib/opensolaris/uts/common/os/list.c
deleted file mode 100644
index f9b6fcb..0000000
--- a/sys/contrib/opensolaris/uts/common/os/list.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Generic doubly-linked list implementation
- */
-
-#include <sys/list.h>
-#include <sys/list_impl.h>
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <sys/debug.h>
-
-#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
-#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
-#define list_empty(a) ((a)->list_head.list_next == &(a)->list_head)
-
-#define list_insert_after_node(list, node, object) { \
- list_node_t *lnew = list_d2l(list, object); \
- lnew->list_prev = node; \
- lnew->list_next = node->list_next; \
- node->list_next->list_prev = lnew; \
- node->list_next = lnew; \
-}
-
-#define list_insert_before_node(list, node, object) { \
- list_node_t *lnew = list_d2l(list, object); \
- lnew->list_next = node; \
- lnew->list_prev = node->list_prev; \
- node->list_prev->list_next = lnew; \
- node->list_prev = lnew; \
-}
-
-void
-list_create(list_t *list, size_t size, size_t offset)
-{
- ASSERT(list);
- ASSERT(size > 0);
- ASSERT(size >= offset + sizeof (list_node_t));
-
- list->list_size = size;
- list->list_offset = offset;
- list->list_head.list_next = list->list_head.list_prev =
- &list->list_head;
-}
-
-void
-list_destroy(list_t *list)
-{
- list_node_t *node = &list->list_head;
-
- ASSERT(list);
- ASSERT(list->list_head.list_next == node);
- ASSERT(list->list_head.list_prev == node);
-
- node->list_next = node->list_prev = NULL;
-}
-
-void
-list_insert_after(list_t *list, void *object, void *nobject)
-{
- list_node_t *lold = list_d2l(list, object);
- list_insert_after_node(list, lold, nobject);
-}
-
-void
-list_insert_before(list_t *list, void *object, void *nobject)
-{
- list_node_t *lold = list_d2l(list, object);
- list_insert_before_node(list, lold, nobject)
-}
-
-void
-list_insert_head(list_t *list, void *object)
-{
- list_node_t *lold = &list->list_head;
- list_insert_after_node(list, lold, object);
-}
-
-void
-list_insert_tail(list_t *list, void *object)
-{
- list_node_t *lold = &list->list_head;
- list_insert_before_node(list, lold, object);
-}
-
-void
-list_remove(list_t *list, void *object)
-{
- list_node_t *lold = list_d2l(list, object);
- ASSERT(!list_empty(list));
- lold->list_prev->list_next = lold->list_next;
- lold->list_next->list_prev = lold->list_prev;
- lold->list_next = lold->list_prev = NULL;
-}
-
-void *
-list_head(list_t *list)
-{
- if (list_empty(list))
- return (NULL);
- return (list_object(list, list->list_head.list_next));
-}
-
-void *
-list_tail(list_t *list)
-{
- if (list_empty(list))
- return (NULL);
- return (list_object(list, list->list_head.list_prev));
-}
-
-void *
-list_next(list_t *list, void *object)
-{
- list_node_t *node = list_d2l(list, object);
-
- if (node->list_next != &list->list_head)
- return (list_object(list, node->list_next));
-
- return (NULL);
-}
-
-void *
-list_prev(list_t *list, void *object)
-{
- list_node_t *node = list_d2l(list, object);
-
- if (node->list_prev != &list->list_head)
- return (list_object(list, node->list_prev));
-
- return (NULL);
-}
-
-/*
- * Insert src list after dst list. Empty src list thereafter.
- */
-void
-list_move_tail(list_t *dst, list_t *src)
-{
- list_node_t *dstnode = &dst->list_head;
- list_node_t *srcnode = &src->list_head;
-
- ASSERT(dst->list_size == src->list_size);
- ASSERT(dst->list_offset == src->list_offset);
-
- if (list_empty(src))
- return;
-
- dstnode->list_prev->list_next = srcnode->list_next;
- srcnode->list_next->list_prev = dstnode->list_prev;
- dstnode->list_prev = srcnode->list_prev;
- srcnode->list_prev->list_next = dstnode;
-
- /* empty src list */
- srcnode->list_next = srcnode->list_prev = srcnode;
-}
-
-int
-list_link_active(list_node_t *link)
-{
- return (link->list_next != NULL);
-}
-
-int
-list_is_empty(list_t *list)
-{
- return (list_empty(list));
-}
diff --git a/sys/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c b/sys/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c
deleted file mode 100644
index 3682853..0000000
--- a/sys/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/nvpair.h>
-
-static void *
-nv_alloc_sys(nv_alloc_t *nva, size_t size)
-{
- return (kmem_alloc(size, (int)(uintptr_t)nva->nva_arg));
-}
-
-/*ARGSUSED*/
-static void
-nv_free_sys(nv_alloc_t *nva, void *buf, size_t size)
-{
- kmem_free(buf, size);
-}
-
-static const nv_alloc_ops_t system_ops = {
- NULL, /* nv_ao_init() */
- NULL, /* nv_ao_fini() */
- nv_alloc_sys, /* nv_ao_alloc() */
- nv_free_sys, /* nv_ao_free() */
- NULL /* nv_ao_reset() */
-};
-
-nv_alloc_t nv_alloc_sleep_def = {
- &system_ops,
- (void *)KM_SLEEP
-};
-
-nv_alloc_t nv_alloc_nosleep_def = {
- &system_ops,
- (void *)KM_NOSLEEP
-};
-
-nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def;
-nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
diff --git a/sys/contrib/opensolaris/uts/common/os/taskq.c b/sys/contrib/opensolaris/uts/common/os/taskq.c
deleted file mode 100644
index 1558c1f..0000000
--- a/sys/contrib/opensolaris/uts/common/os/taskq.c
+++ /dev/null
@@ -1,1020 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Kernel task queues: general-purpose asynchronous task scheduling.
- *
- * A common problem in kernel programming is the need to schedule tasks
- * to be performed later, by another thread. There are several reasons
- * you may want or need to do this:
- *
- * (1) The task isn't time-critical, but your current code path is.
- *
- * (2) The task may require grabbing locks that you already hold.
- *
- * (3) The task may need to block (e.g. to wait for memory), but you
- * cannot block in your current context.
- *
- * (4) Your code path can't complete because of some condition, but you can't
- * sleep or fail, so you queue the task for later execution when condition
- * disappears.
- *
- * (5) You just want a simple way to launch multiple tasks in parallel.
- *
- * Task queues provide such a facility. In its simplest form (used when
- * performance is not a critical consideration) a task queue consists of a
- * single list of tasks, together with one or more threads to service the
- * list. There are some cases when this simple queue is not sufficient:
- *
- * (1) The task queues are very hot and there is a need to avoid data and lock
- * contention over global resources.
- *
- * (2) Some tasks may depend on other tasks to complete, so they can't be put in
- * the same list managed by the same thread.
- *
- * (3) Some tasks may block for a long time, and this should not block other
- * tasks in the queue.
- *
- * To provide useful service in such cases we define a "dynamic task queue"
- * which has an individual thread for each of the tasks. These threads are
- * dynamically created as they are needed and destroyed when they are not in
- * use. The API for managing task pools is the same as for managing task queues
- * with the exception of a taskq creation flag TASKQ_DYNAMIC which tells that
- * dynamic task pool behavior is desired.
- *
- * Dynamic task queues may also place tasks in the normal queue (called "backing
- * queue") when task pool runs out of resources. Users of task queues may
- * disallow such queued scheduling by specifying TQ_NOQUEUE in the dispatch
- * flags.
- *
- * The backing task queue is also used for scheduling internal tasks needed for
- * dynamic task queue maintenance.
- *
- * INTERFACES:
- *
- * taskq_t *taskq_create(name, nthreads, pri_t pri, minalloc, maxall, flags);
- *
- * Create a taskq with specified properties.
- * Possible 'flags':
- *
- * TASKQ_DYNAMIC: Create task pool for task management. If this flag is
- * specified, 'nthreads' specifies the maximum number of threads in
- * the task queue. Task execution order for dynamic task queues is
- * not predictable.
- *
- * If this flag is not specified (default case) a
- * single-list task queue is created with 'nthreads' threads
- * servicing it. Entries in this queue are managed by
- * taskq_ent_alloc() and taskq_ent_free() which try to keep the
- * task population between 'minalloc' and 'maxalloc', but the
- * latter limit is only advisory for TQ_SLEEP dispatches and the
- * former limit is only advisory for TQ_NOALLOC dispatches. If
- * TASKQ_PREPOPULATE is set in 'flags', the taskq will be
- * prepopulated with 'minalloc' task structures.
- *
- * Since non-DYNAMIC taskqs are queues, tasks are guaranteed to be
- * executed in the order they are scheduled if nthreads == 1.
- * If nthreads > 1, task execution order is not predictable.
- *
- * TASKQ_PREPOPULATE: Prepopulate task queue with threads.
- * Also prepopulate the task queue with 'minalloc' task structures.
- *
- * TASKQ_CPR_SAFE: This flag specifies that users of the task queue will
- * use their own protocol for handling CPR issues. This flag is not
- * supported for DYNAMIC task queues.
- *
- * The 'pri' field specifies the default priority for the threads that
- * service all scheduled tasks.
- *
- * void taskq_destroy(tap):
- *
- * Waits for any scheduled tasks to complete, then destroys the taskq.
- * Caller should guarantee that no new tasks are scheduled in the closing
- * taskq.
- *
- * taskqid_t taskq_dispatch(tq, func, arg, flags):
- *
- * Dispatches the task "func(arg)" to taskq. The 'flags' indicates whether
- * the caller is willing to block for memory. The function returns an
- * opaque value which is zero iff dispatch fails. If flags is TQ_NOSLEEP
- * or TQ_NOALLOC and the task can't be dispatched, taskq_dispatch() fails
- * and returns (taskqid_t)0.
- *
- * ASSUMES: func != NULL.
- *
- * Possible flags:
- * TQ_NOSLEEP: Do not wait for resources; may fail.
- *
- * TQ_NOALLOC: Do not allocate memory; may fail. May only be used with
- * non-dynamic task queues.
- *
- * TQ_NOQUEUE: Do not enqueue a task if it can't dispatch it due to
- * lack of available resources and fail. If this flag is not
- * set, and the task pool is exhausted, the task may be scheduled
- * in the backing queue. This flag may ONLY be used with dynamic
- * task queues.
- *
- * NOTE: This flag should always be used when a task queue is used
- * for tasks that may depend on each other for completion.
- * Enqueueing dependent tasks may create deadlocks.
- *
- * TQ_SLEEP: May block waiting for resources. May still fail for
- * dynamic task queues if TQ_NOQUEUE is also specified, otherwise
- * always succeed.
- *
- * NOTE: Dynamic task queues are much more likely to fail in
- * taskq_dispatch() (especially if TQ_NOQUEUE was specified), so it
- * is important to have backup strategies handling such failures.
- *
- * void taskq_wait(tq):
- *
- * Waits for all previously scheduled tasks to complete.
- *
- * NOTE: It does not stop any new task dispatches.
- * Do NOT call taskq_wait() from a task: it will cause deadlock.
- *
- * void taskq_suspend(tq)
- *
- * Suspend all task execution. Tasks already scheduled for a dynamic task
- * queue will still be executed, but all new scheduled tasks will be
- * suspended until taskq_resume() is called.
- *
- * int taskq_suspended(tq)
- *
- * Returns 1 if taskq is suspended and 0 otherwise. It is intended to
- * ASSERT that the task queue is suspended.
- *
- * void taskq_resume(tq)
- *
- * Resume task queue execution.
- *
- * int taskq_member(tq, thread)
- *
- * Returns 1 if 'thread' belongs to taskq 'tq' and 0 otherwise. The
- * intended use is to ASSERT that a given function is called in taskq
- * context only.
- *
- * system_taskq
- *
- * Global system-wide dynamic task queue for common uses. It may be used by
- * any subsystem that needs to schedule tasks and does not need to manage
- * its own task queues. It is initialized quite early during system boot.
- *
- * IMPLEMENTATION.
- *
- * This is schematic representation of the task queue structures.
- *
- * taskq:
- * +-------------+
- * |tq_lock | +---< taskq_ent_free()
- * +-------------+ |
- * |... | | tqent: tqent:
- * +-------------+ | +------------+ +------------+
- * | tq_freelist |-->| tqent_next |--> ... ->| tqent_next |
- * +-------------+ +------------+ +------------+
- * |... | | ... | | ... |
- * +-------------+ +------------+ +------------+
- * | tq_task | |
- * | | +-------------->taskq_ent_alloc()
- * +--------------------------------------------------------------------------+
- * | | | tqent tqent |
- * | +---------------------+ +--> +------------+ +--> +------------+ |
- * | | ... | | | func, arg | | | func, arg | |
- * +>+---------------------+ <---|-+ +------------+ <---|-+ +------------+ |
- * | tq_taskq.tqent_next | ----+ | | tqent_next | --->+ | | tqent_next |--+
- * +---------------------+ | +------------+ ^ | +------------+
- * +-| tq_task.tqent_prev | +--| tqent_prev | | +--| tqent_prev | ^
- * | +---------------------+ +------------+ | +------------+ |
- * | |... | | ... | | | ... | |
- * | +---------------------+ +------------+ | +------------+ |
- * | ^ | |
- * | | | |
- * +--------------------------------------+--------------+ TQ_APPEND() -+
- * | | |
- * |... | taskq_thread()-----+
- * +-------------+
- * | tq_buckets |--+-------> [ NULL ] (for regular task queues)
- * +-------------+ |
- * | DYNAMIC TASK QUEUES:
- * |
- * +-> taskq_bucket[nCPU] taskq_bucket_dispatch()
- * +-------------------+ ^
- * +--->| tqbucket_lock | |
- * | +-------------------+ +--------+ +--------+
- * | | tqbucket_freelist |-->| tqent |-->...| tqent | ^
- * | +-------------------+<--+--------+<--...+--------+ |
- * | | ... | | thread | | thread | |
- * | +-------------------+ +--------+ +--------+ |
- * | +-------------------+ |
- * taskq_dispatch()--+--->| tqbucket_lock | TQ_APPEND()------+
- * TQ_HASH() | +-------------------+ +--------+ +--------+
- * | | tqbucket_freelist |-->| tqent |-->...| tqent |
- * | +-------------------+<--+--------+<--...+--------+
- * | | ... | | thread | | thread |
- * | +-------------------+ +--------+ +--------+
- * +---> ...
- *
- *
- * Task queues use tq_task field to link new entry in the queue. The queue is a
- * circular doubly-linked list. Entries are put in the end of the list with
- * TQ_APPEND() and processed from the front of the list by taskq_thread() in
- * FIFO order. Task queue entries are cached in the free list managed by
- * taskq_ent_alloc() and taskq_ent_free() functions.
- *
- * All threads used by task queues mark t_taskq field of the thread to
- * point to the task queue.
- *
- * Dynamic Task Queues Implementation.
- *
- * For a dynamic task queues there is a 1-to-1 mapping between a thread and
- * taskq_ent_structure. Each entry is serviced by its own thread and each thread
- * is controlled by a single entry.
- *
- * Entries are distributed over a set of buckets. To avoid using modulo
- * arithmetics the number of buckets is 2^n and is determined as the nearest
- * power of two roundown of the number of CPUs in the system. Tunable
- * variable 'taskq_maxbuckets' limits the maximum number of buckets. Each entry
- * is attached to a bucket for its lifetime and can't migrate to other buckets.
- *
- * Entries that have scheduled tasks are not placed in any list. The dispatch
- * function sets their "func" and "arg" fields and signals the corresponding
- * thread to execute the task. Once the thread executes the task it clears the
- * "func" field and places an entry on the bucket cache of free entries pointed
- * by "tqbucket_freelist" field. ALL entries on the free list should have "func"
- * field equal to NULL. The free list is a circular doubly-linked list identical
- * in structure to the tq_task list above, but entries are taken from it in LIFO
- * order - the last freed entry is the first to be allocated. The
- * taskq_bucket_dispatch() function gets the most recently used entry from the
- * free list, sets its "func" and "arg" fields and signals a worker thread.
- *
- * After executing each task a per-entry thread taskq_d_thread() places its
- * entry on the bucket free list and goes to a timed sleep. If it wakes up
- * without getting new task it removes the entry from the free list and destroys
- * itself. The thread sleep time is controlled by a tunable variable
- * `taskq_thread_timeout'.
- *
- * There is various statistics kept in the bucket which allows for later
- * analysis of taskq usage patterns. Also, a global copy of taskq creation and
- * death statistics is kept in the global taskq data structure. Since thread
- * creation and death happen rarely, updating such global data does not present
- * a performance problem.
- *
- * NOTE: Threads are not bound to any CPU and there is absolutely no association
- * between the bucket and actual thread CPU, so buckets are used only to
- * split resources and reduce resource contention. Having threads attached
- * to the CPU denoted by a bucket may reduce number of times the job
- * switches between CPUs.
- *
- * Current algorithm creates a thread whenever a bucket has no free
- * entries. It would be nice to know how many threads are in the running
- * state and don't create threads if all CPUs are busy with existing
- * tasks, but it is unclear how such strategy can be implemented.
- *
- * Currently buckets are created statically as an array attached to task
- * queue. On some system with nCPUs < max_ncpus it may waste system
- * memory. One solution may be allocation of buckets when they are first
- * touched, but it is not clear how useful it is.
- *
- * SUSPEND/RESUME implementation.
- *
- * Before executing a task taskq_thread() (executing non-dynamic task
- * queues) obtains taskq's thread lock as a reader. The taskq_suspend()
- * function gets the same lock as a writer blocking all non-dynamic task
- * execution. The taskq_resume() function releases the lock allowing
- * taskq_thread to continue execution.
- *
- * For dynamic task queues, each bucket is marked as TQBUCKET_SUSPEND by
- * taskq_suspend() function. After that taskq_bucket_dispatch() always
- * fails, so that taskq_dispatch() will either enqueue tasks for a
- * suspended backing queue or fail if TQ_NOQUEUE is specified in dispatch
- * flags.
- *
- * NOTE: taskq_suspend() does not immediately block any tasks already
- * scheduled for dynamic task queues. It only suspends new tasks
- * scheduled after taskq_suspend() was called.
- *
- * taskq_member() function works by comparing a thread t_taskq pointer with
- * the passed thread pointer.
- *
- * LOCKS and LOCK Hierarchy:
- *
- * There are two locks used in task queues.
- *
- * 1) Task queue structure has a lock, protecting global task queue state.
- *
- * 2) Each per-CPU bucket has a lock for bucket management.
- *
- * If both locks are needed, task queue lock should be taken only after bucket
- * lock.
- *
- * DEBUG FACILITIES.
- *
- * For DEBUG kernels it is possible to induce random failures to
- * taskq_dispatch() function when it is given TQ_NOSLEEP argument. The value of
- * taskq_dmtbf and taskq_smtbf tunables control the mean time between induced
- * failures for dynamic and static task queues respectively.
- *
- * Setting TASKQ_STATISTIC to 0 will disable per-bucket statistics.
- *
- * TUNABLES
- *
- * system_taskq_size - Size of the global system_taskq.
- * This value is multiplied by nCPUs to determine
- * actual size.
- * Default value: 64
- *
- * taskq_thread_timeout - Maximum idle time for taskq_d_thread()
- * Default value: 5 minutes
- *
- * taskq_maxbuckets - Maximum number of buckets in any task queue
- * Default value: 128
- *
- * taskq_search_depth - Maximum # of buckets searched for a free entry
- * Default value: 4
- *
- * taskq_dmtbf - Mean time between induced dispatch failures
- * for dynamic task queues.
- * Default value: UINT_MAX (no induced failures)
- *
- * taskq_smtbf - Mean time between induced dispatch failures
- * for static task queues.
- * Default value: UINT_MAX (no induced failures)
- *
- * CONDITIONAL compilation.
- *
- * TASKQ_STATISTIC - If set will enable bucket statistic (default).
- *
- */
-
-#include <sys/taskq_impl.h>
-#include <sys/proc.h>
-#include <sys/kmem.h>
-#include <sys/callb.h>
-#include <sys/systm.h>
-#include <sys/cmn_err.h>
-#include <sys/debug.h>
-#include <sys/sysmacros.h>
-#include <sys/sdt.h>
-#include <sys/mutex.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-
-static kmem_cache_t *taskq_ent_cache, *taskq_cache;
-
-/* Global system task queue for common use */
-taskq_t *system_taskq;
-
-/*
- * Maxmimum number of entries in global system taskq is
- * system_taskq_size * max_ncpus
- */
-#define SYSTEM_TASKQ_SIZE 1
-int system_taskq_size = SYSTEM_TASKQ_SIZE;
-
-/*
- * Dynamic task queue threads that don't get any work within
- * taskq_thread_timeout destroy themselves
- */
-#define TASKQ_THREAD_TIMEOUT (60 * 5)
-int taskq_thread_timeout = TASKQ_THREAD_TIMEOUT;
-
-#define TASKQ_MAXBUCKETS 128
-int taskq_maxbuckets = TASKQ_MAXBUCKETS;
-
-/*
- * When a bucket has no available entries another buckets are tried.
- * taskq_search_depth parameter limits the amount of buckets that we search
- * before failing. This is mostly useful in systems with many CPUs where we may
- * spend too much time scanning busy buckets.
- */
-#define TASKQ_SEARCH_DEPTH 4
-int taskq_search_depth = TASKQ_SEARCH_DEPTH;
-
-/*
- * Hashing function: mix various bits of x. May be pretty much anything.
- */
-#define TQ_HASH(x) ((x) ^ ((x) >> 11) ^ ((x) >> 17) ^ ((x) ^ 27))
-
-/*
- * We do not create any new threads when the system is low on memory and start
- * throttling memory allocations. The following macro tries to estimate such
- * condition.
- */
-#define ENOUGH_MEMORY() (freemem > throttlefree)
-
-/*
- * Static functions.
- */
-static taskq_t *taskq_create_common(const char *, int, int, pri_t, int,
- int, uint_t);
-static void taskq_thread(void *);
-static int taskq_constructor(void *, void *, int);
-static void taskq_destructor(void *, void *);
-static int taskq_ent_constructor(void *, void *, int);
-static void taskq_ent_destructor(void *, void *);
-static taskq_ent_t *taskq_ent_alloc(taskq_t *, int);
-static void taskq_ent_free(taskq_t *, taskq_ent_t *);
-
-/*
- * Collect per-bucket statistic when TASKQ_STATISTIC is defined.
- */
-#define TASKQ_STATISTIC 1
-
-#if TASKQ_STATISTIC
-#define TQ_STAT(b, x) b->tqbucket_stat.x++
-#else
-#define TQ_STAT(b, x)
-#endif
-
-/*
- * Random fault injection.
- */
-uint_t taskq_random;
-uint_t taskq_dmtbf = UINT_MAX; /* mean time between injected failures */
-uint_t taskq_smtbf = UINT_MAX; /* mean time between injected failures */
-
-/*
- * TQ_NOSLEEP dispatches on dynamic task queues are always allowed to fail.
- *
- * TQ_NOSLEEP dispatches on static task queues can't arbitrarily fail because
- * they could prepopulate the cache and make sure that they do not use more
- * then minalloc entries. So, fault injection in this case insures that
- * either TASKQ_PREPOPULATE is not set or there are more entries allocated
- * than is specified by minalloc. TQ_NOALLOC dispatches are always allowed
- * to fail, but for simplicity we treat them identically to TQ_NOSLEEP
- * dispatches.
- */
-#ifdef DEBUG
-#define TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flag) \
- taskq_random = (taskq_random * 2416 + 374441) % 1771875;\
- if ((flag & TQ_NOSLEEP) && \
- taskq_random < 1771875 / taskq_dmtbf) { \
- return (NULL); \
- }
-
-#define TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flag) \
- taskq_random = (taskq_random * 2416 + 374441) % 1771875;\
- if ((flag & (TQ_NOSLEEP | TQ_NOALLOC)) && \
- (!(tq->tq_flags & TASKQ_PREPOPULATE) || \
- (tq->tq_nalloc > tq->tq_minalloc)) && \
- (taskq_random < (1771875 / taskq_smtbf))) { \
- mutex_exit(&tq->tq_lock); \
- return ((taskqid_t)0); \
- }
-#else
-#define TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flag)
-#define TASKQ_D_RANDOM_DISPATCH_FAILURE(tq, flag)
-#endif
-
-#define IS_EMPTY(l) (((l).tqent_prev == (l).tqent_next) && \
- ((l).tqent_prev == &(l)))
-
-/*
- * Append `tqe' in the end of the doubly-linked list denoted by l.
- */
-#define TQ_APPEND(l, tqe) { \
- tqe->tqent_next = &l; \
- tqe->tqent_prev = l.tqent_prev; \
- tqe->tqent_next->tqent_prev = tqe; \
- tqe->tqent_prev->tqent_next = tqe; \
-}
-
-/*
- * Schedule a task specified by func and arg into the task queue entry tqe.
- */
-#define TQ_ENQUEUE(tq, tqe, func, arg) { \
- ASSERT(MUTEX_HELD(&tq->tq_lock)); \
- TQ_APPEND(tq->tq_task, tqe); \
- tqe->tqent_func = (func); \
- tqe->tqent_arg = (arg); \
- tq->tq_tasks++; \
- if (tq->tq_tasks - tq->tq_executed > tq->tq_maxtasks) \
- tq->tq_maxtasks = tq->tq_tasks - tq->tq_executed; \
- cv_signal(&tq->tq_dispatch_cv); \
- DTRACE_PROBE2(taskq__enqueue, taskq_t *, tq, taskq_ent_t *, tqe); \
-}
-
-/*
- * Do-nothing task which may be used to prepopulate thread caches.
- */
-/*ARGSUSED*/
-void
-nulltask(void *unused)
-{
-}
-
-
-/*ARGSUSED*/
-static int
-taskq_constructor(void *buf, void *cdrarg, int kmflags)
-{
- taskq_t *tq = buf;
-
- bzero(tq, sizeof (taskq_t));
-
- mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
- rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
- cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
- cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
-
- tq->tq_task.tqent_next = &tq->tq_task;
- tq->tq_task.tqent_prev = &tq->tq_task;
-
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-taskq_destructor(void *buf, void *cdrarg)
-{
- taskq_t *tq = buf;
-
- mutex_destroy(&tq->tq_lock);
- rw_destroy(&tq->tq_threadlock);
- cv_destroy(&tq->tq_dispatch_cv);
- cv_destroy(&tq->tq_wait_cv);
-}
-
-/*ARGSUSED*/
-static int
-taskq_ent_constructor(void *buf, void *cdrarg, int kmflags)
-{
- taskq_ent_t *tqe = buf;
-
- tqe->tqent_thread = NULL;
- cv_init(&tqe->tqent_cv, NULL, CV_DEFAULT, NULL);
-
- return (0);
-}
-
-/*ARGSUSED*/
-static void
-taskq_ent_destructor(void *buf, void *cdrarg)
-{
- taskq_ent_t *tqe = buf;
-
- ASSERT(tqe->tqent_thread == NULL);
- cv_destroy(&tqe->tqent_cv);
-}
-
-/*
- * Create global system dynamic task queue.
- */
-void
-system_taskq_init(void)
-{
- system_taskq = taskq_create_common("system_taskq", 0,
- system_taskq_size * max_ncpus, minclsyspri, 4, 512,
- TASKQ_PREPOPULATE);
-}
-
-void
-system_taskq_fini(void)
-{
- taskq_destroy(system_taskq);
-}
-
-static void
-taskq_init(void *dummy __unused)
-{
- taskq_ent_cache = kmem_cache_create("taskq_ent_cache",
- sizeof (taskq_ent_t), 0, taskq_ent_constructor,
- taskq_ent_destructor, NULL, NULL, NULL, 0);
- taskq_cache = kmem_cache_create("taskq_cache", sizeof (taskq_t),
- 0, taskq_constructor, taskq_destructor, NULL, NULL, NULL, 0);
- system_taskq_init();
-}
-
-static void
-taskq_fini(void *dummy __unused)
-{
- system_taskq_fini();
- kmem_cache_destroy(taskq_cache);
- kmem_cache_destroy(taskq_ent_cache);
-}
-
-/*
- * taskq_ent_alloc()
- *
- * Allocates a new taskq_ent_t structure either from the free list or from the
- * cache. Returns NULL if it can't be allocated.
- *
- * Assumes: tq->tq_lock is held.
- */
-static taskq_ent_t *
-taskq_ent_alloc(taskq_t *tq, int flags)
-{
- int kmflags = (flags & TQ_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
-
- taskq_ent_t *tqe;
-
- ASSERT(MUTEX_HELD(&tq->tq_lock));
-
- /*
- * TQ_NOALLOC allocations are allowed to use the freelist, even if
- * we are below tq_minalloc.
- */
- if ((tqe = tq->tq_freelist) != NULL &&
- ((flags & TQ_NOALLOC) || tq->tq_nalloc >= tq->tq_minalloc)) {
- tq->tq_freelist = tqe->tqent_next;
- } else {
- if (flags & TQ_NOALLOC)
- return (NULL);
-
- mutex_exit(&tq->tq_lock);
- if (tq->tq_nalloc >= tq->tq_maxalloc) {
- if (kmflags & KM_NOSLEEP) {
- mutex_enter(&tq->tq_lock);
- return (NULL);
- }
- /*
- * We don't want to exceed tq_maxalloc, but we can't
- * wait for other tasks to complete (and thus free up
- * task structures) without risking deadlock with
- * the caller. So, we just delay for one second
- * to throttle the allocation rate.
- */
- delay(hz);
- }
- tqe = kmem_cache_alloc(taskq_ent_cache, kmflags);
- mutex_enter(&tq->tq_lock);
- if (tqe != NULL)
- tq->tq_nalloc++;
- }
- return (tqe);
-}
-
-/*
- * taskq_ent_free()
- *
- * Free taskq_ent_t structure by either putting it on the free list or freeing
- * it to the cache.
- *
- * Assumes: tq->tq_lock is held.
- */
-static void
-taskq_ent_free(taskq_t *tq, taskq_ent_t *tqe)
-{
- ASSERT(MUTEX_HELD(&tq->tq_lock));
-
- if (tq->tq_nalloc <= tq->tq_minalloc) {
- tqe->tqent_next = tq->tq_freelist;
- tq->tq_freelist = tqe;
- } else {
- tq->tq_nalloc--;
- mutex_exit(&tq->tq_lock);
- kmem_cache_free(taskq_ent_cache, tqe);
- mutex_enter(&tq->tq_lock);
- }
-}
-
-/*
- * Dispatch a task.
- *
- * Assumes: func != NULL
- *
- * Returns: NULL if dispatch failed.
- * non-NULL if task dispatched successfully.
- * Actual return value is the pointer to taskq entry that was used to
- * dispatch a task. This is useful for debugging.
- */
-/* ARGSUSED */
-taskqid_t
-taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
-{
- taskq_ent_t *tqe = NULL;
-
- ASSERT(tq != NULL);
- ASSERT(func != NULL);
- ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
-
- /*
- * TQ_NOQUEUE flag can't be used with non-dynamic task queues.
- */
- ASSERT(! (flags & TQ_NOQUEUE));
-
- /*
- * Enqueue the task to the underlying queue.
- */
- mutex_enter(&tq->tq_lock);
-
- TASKQ_S_RANDOM_DISPATCH_FAILURE(tq, flags);
-
- if ((tqe = taskq_ent_alloc(tq, flags)) == NULL) {
- mutex_exit(&tq->tq_lock);
- return ((taskqid_t)NULL);
- }
- TQ_ENQUEUE(tq, tqe, func, arg);
- mutex_exit(&tq->tq_lock);
- return ((taskqid_t)tqe);
-}
-
-/*
- * Wait for all pending tasks to complete.
- * Calling taskq_wait from a task will cause deadlock.
- */
-void
-taskq_wait(taskq_t *tq)
-{
-
- mutex_enter(&tq->tq_lock);
- while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
- cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
- mutex_exit(&tq->tq_lock);
-}
-
-/*
- * Suspend execution of tasks.
- *
- * Tasks in the queue part will be suspended immediately upon return from this
- * function. Pending tasks in the dynamic part will continue to execute, but all
- * new tasks will be suspended.
- */
-void
-taskq_suspend(taskq_t *tq)
-{
- rw_enter(&tq->tq_threadlock, RW_WRITER);
-
- /*
- * Mark task queue as being suspended. Needed for taskq_suspended().
- */
- mutex_enter(&tq->tq_lock);
- ASSERT(!(tq->tq_flags & TASKQ_SUSPENDED));
- tq->tq_flags |= TASKQ_SUSPENDED;
- mutex_exit(&tq->tq_lock);
-}
-
-/*
- * returns: 1 if tq is suspended, 0 otherwise.
- */
-int
-taskq_suspended(taskq_t *tq)
-{
- return ((tq->tq_flags & TASKQ_SUSPENDED) != 0);
-}
-
-/*
- * Resume taskq execution.
- */
-void
-taskq_resume(taskq_t *tq)
-{
- ASSERT(RW_WRITE_HELD(&tq->tq_threadlock));
-
- mutex_enter(&tq->tq_lock);
- ASSERT(tq->tq_flags & TASKQ_SUSPENDED);
- tq->tq_flags &= ~TASKQ_SUSPENDED;
- mutex_exit(&tq->tq_lock);
-
- rw_exit(&tq->tq_threadlock);
-}
-
-/*
- * Worker thread for processing task queue.
- */
-static void
-taskq_thread(void *arg)
-{
- taskq_t *tq = arg;
- taskq_ent_t *tqe;
- callb_cpr_t cprinfo;
- hrtime_t start, end;
-
- CALLB_CPR_INIT(&cprinfo, &tq->tq_lock, callb_generic_cpr, tq->tq_name);
-
- mutex_enter(&tq->tq_lock);
- while (tq->tq_flags & TASKQ_ACTIVE) {
- if ((tqe = tq->tq_task.tqent_next) == &tq->tq_task) {
- if (--tq->tq_active == 0)
- cv_broadcast(&tq->tq_wait_cv);
- if (tq->tq_flags & TASKQ_CPR_SAFE) {
- cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
- } else {
- CALLB_CPR_SAFE_BEGIN(&cprinfo);
- cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
- CALLB_CPR_SAFE_END(&cprinfo, &tq->tq_lock);
- }
- tq->tq_active++;
- continue;
- }
- tqe->tqent_prev->tqent_next = tqe->tqent_next;
- tqe->tqent_next->tqent_prev = tqe->tqent_prev;
- mutex_exit(&tq->tq_lock);
-
- rw_enter(&tq->tq_threadlock, RW_READER);
- start = gethrtime();
- DTRACE_PROBE2(taskq__exec__start, taskq_t *, tq,
- taskq_ent_t *, tqe);
- tqe->tqent_func(tqe->tqent_arg);
- DTRACE_PROBE2(taskq__exec__end, taskq_t *, tq,
- taskq_ent_t *, tqe);
- end = gethrtime();
- rw_exit(&tq->tq_threadlock);
-
- mutex_enter(&tq->tq_lock);
- tq->tq_totaltime += end - start;
- tq->tq_executed++;
-
- taskq_ent_free(tq, tqe);
- }
- tq->tq_nthreads--;
- cv_broadcast(&tq->tq_wait_cv);
- ASSERT(!(tq->tq_flags & TASKQ_CPR_SAFE));
- CALLB_CPR_EXIT(&cprinfo);
- thread_exit();
-}
-
-/*
- * Taskq creation. May sleep for memory.
- * Always use automatically generated instances to avoid kstat name space
- * collisions.
- */
-
-taskq_t *
-taskq_create(const char *name, int nthreads, pri_t pri, int minalloc,
- int maxalloc, uint_t flags)
-{
- return taskq_create_common(name, 0, nthreads, pri, minalloc,
- maxalloc, flags | TASKQ_NOINSTANCE);
-}
-
-static taskq_t *
-taskq_create_common(const char *name, int instance, int nthreads, pri_t pri,
- int minalloc, int maxalloc, uint_t flags)
-{
- taskq_t *tq = kmem_cache_alloc(taskq_cache, KM_SLEEP);
- uint_t ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus);
- uint_t bsize; /* # of buckets - always power of 2 */
-
- ASSERT(instance == 0);
- ASSERT(flags == TASKQ_PREPOPULATE | TASKQ_NOINSTANCE);
-
- /*
- * TASKQ_CPR_SAFE and TASKQ_DYNAMIC flags are mutually exclusive.
- */
- ASSERT((flags & (TASKQ_DYNAMIC | TASKQ_CPR_SAFE)) !=
- ((TASKQ_DYNAMIC | TASKQ_CPR_SAFE)));
-
- ASSERT(tq->tq_buckets == NULL);
-
- bsize = 1 << (highbit(ncpus) - 1);
- ASSERT(bsize >= 1);
- bsize = MIN(bsize, taskq_maxbuckets);
-
- tq->tq_maxsize = nthreads;
-
- (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1);
- tq->tq_name[TASKQ_NAMELEN] = '\0';
- /* Make sure the name conforms to the rules for C indentifiers */
- strident_canon(tq->tq_name, TASKQ_NAMELEN);
-
- tq->tq_flags = flags | TASKQ_ACTIVE;
- tq->tq_active = nthreads;
- tq->tq_nthreads = nthreads;
- tq->tq_minalloc = minalloc;
- tq->tq_maxalloc = maxalloc;
- tq->tq_nbuckets = bsize;
- tq->tq_pri = pri;
-
- if (flags & TASKQ_PREPOPULATE) {
- mutex_enter(&tq->tq_lock);
- while (minalloc-- > 0)
- taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP));
- mutex_exit(&tq->tq_lock);
- }
-
- if (nthreads == 1) {
- tq->tq_thread = thread_create(NULL, 0, taskq_thread, tq,
- 0, NULL, TS_RUN, pri);
- } else {
- kthread_t **tpp = kmem_alloc(sizeof (kthread_t *) * nthreads,
- KM_SLEEP);
-
- tq->tq_threadlist = tpp;
-
- mutex_enter(&tq->tq_lock);
- while (nthreads-- > 0) {
- *tpp = thread_create(NULL, 0, taskq_thread, tq,
- 0, NULL, TS_RUN, pri);
- tpp++;
- }
- mutex_exit(&tq->tq_lock);
- }
-
- return (tq);
-}
-
-/*
- * taskq_destroy().
- *
- * Assumes: by the time taskq_destroy is called no one will use this task queue
- * in any way and no one will try to dispatch entries in it.
- */
-void
-taskq_destroy(taskq_t *tq)
-{
- taskq_bucket_t *b = tq->tq_buckets;
- int bid = 0;
-
- ASSERT(! (tq->tq_flags & TASKQ_CPR_SAFE));
-
- /*
- * Wait for any pending entries to complete.
- */
- taskq_wait(tq);
-
- mutex_enter(&tq->tq_lock);
- ASSERT((tq->tq_task.tqent_next == &tq->tq_task) &&
- (tq->tq_active == 0));
-
- if ((tq->tq_nthreads > 1) && (tq->tq_threadlist != NULL))
- kmem_free(tq->tq_threadlist, sizeof (kthread_t *) *
- tq->tq_nthreads);
-
- tq->tq_flags &= ~TASKQ_ACTIVE;
- cv_broadcast(&tq->tq_dispatch_cv);
- while (tq->tq_nthreads != 0)
- cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
-
- tq->tq_minalloc = 0;
- while (tq->tq_nalloc != 0)
- taskq_ent_free(tq, taskq_ent_alloc(tq, TQ_SLEEP));
-
- mutex_exit(&tq->tq_lock);
-
- /*
- * Mark each bucket as closing and wakeup all sleeping threads.
- */
- for (; (b != NULL) && (bid < tq->tq_nbuckets); b++, bid++) {
- taskq_ent_t *tqe;
-
- mutex_enter(&b->tqbucket_lock);
-
- b->tqbucket_flags |= TQBUCKET_CLOSE;
- /* Wakeup all sleeping threads */
-
- for (tqe = b->tqbucket_freelist.tqent_next;
- tqe != &b->tqbucket_freelist; tqe = tqe->tqent_next)
- cv_signal(&tqe->tqent_cv);
-
- ASSERT(b->tqbucket_nalloc == 0);
-
- /*
- * At this point we waited for all pending jobs to complete (in
- * both the task queue and the bucket and no new jobs should
- * arrive. Wait for all threads to die.
- */
- while (b->tqbucket_nfree > 0)
- cv_wait(&b->tqbucket_cv, &b->tqbucket_lock);
- mutex_exit(&b->tqbucket_lock);
- mutex_destroy(&b->tqbucket_lock);
- cv_destroy(&b->tqbucket_cv);
- }
-
- if (tq->tq_buckets != NULL) {
- ASSERT(tq->tq_flags & TASKQ_DYNAMIC);
- kmem_free(tq->tq_buckets,
- sizeof (taskq_bucket_t) * tq->tq_nbuckets);
-
- /* Cleanup fields before returning tq to the cache */
- tq->tq_buckets = NULL;
- tq->tq_tcreates = 0;
- tq->tq_tdeaths = 0;
- } else {
- ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
- }
-
- tq->tq_totaltime = 0;
- tq->tq_tasks = 0;
- tq->tq_maxtasks = 0;
- tq->tq_executed = 0;
- kmem_cache_free(taskq_cache, tq);
-}
-
-SYSINIT(sol_taskq, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, taskq_init, NULL);
-SYSUNINIT(sol_taskq, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, taskq_fini, NULL);
diff --git a/sys/contrib/opensolaris/uts/common/rpc/xdr.c b/sys/contrib/opensolaris/uts/common/rpc/xdr.c
deleted file mode 100644
index e934668..0000000
--- a/sys/contrib/opensolaris/uts/common/rpc/xdr.c
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * Portions of this source code were derived from Berkeley 4.3 BSD
- * under license from the Regents of the University of California.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * xdr.c, generic XDR routines implementation.
- * These are the "generic" xdr routines used to serialize and de-serialize
- * most common data items. See xdr.h for more info on the interface to
- * xdr.
- */
-
-#include <sys/param.h>
-#include <sys/cmn_err.h>
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include <rpc/types.h>
-#include <rpc/xdr.h>
-
-#pragma weak xdr_int32_t = xdr_int
-#pragma weak xdr_uint32_t = xdr_u_int
-#pragma weak xdr_int64_t = xdr_longlong_t
-#pragma weak xdr_uint64_t = xdr_u_longlong_t
-
-#if defined(sun)
-#if !defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)
-#error "Exactly one of _BIG_ENDIAN or _LITTLE_ENDIAN must be defined"
-#elif defined(_BIG_ENDIAN) && defined(_LITTLE_ENDIAN)
-#error "Only one of _BIG_ENDIAN or _LITTLE_ENDIAN may be defined"
-#endif
-#endif
-
-/*
- * constants specific to the xdr "protocol"
- */
-#define XDR_FALSE ((int32_t)0)
-#define XDR_TRUE ((int32_t)1)
-#define LASTUNSIGNED ((uint_t)0-1)
-
-/*
- * for unit alignment
- */
-static char xdr_zero[BYTES_PER_XDR_UNIT] = { 0, 0, 0, 0 };
-
-/*
- * Free a data structure using XDR
- * Not a filter, but a convenient utility nonetheless
- */
-void
-xdr_free(xdrproc_t proc, char *objp)
-{
- XDR x;
-
- x.x_op = XDR_FREE;
- (*proc)(&x, objp);
-}
-
-/*
- * XDR nothing
- */
-bool_t
-xdr_void(void)
-{
- return (TRUE);
-}
-
-/*
- * XDR integers
- *
- * PSARC 2003/523 Contract Private Interface
- * xdr_int
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-bool_t
-xdr_int(XDR *xdrs, int *ip)
-{
- if (xdrs->x_op == XDR_ENCODE)
- return (XDR_PUTINT32(xdrs, ip));
-
- if (xdrs->x_op == XDR_DECODE)
- return (XDR_GETINT32(xdrs, ip));
-
- if (xdrs->x_op == XDR_FREE)
- return (TRUE);
-
-#ifdef DEBUG
- printf("xdr_int: FAILED\n");
-#endif
- return (FALSE);
-}
-
-/*
- * XDR unsigned integers
- *
- * PSARC 2003/523 Contract Private Interface
- * xdr_u_int
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-bool_t
-xdr_u_int(XDR *xdrs, uint_t *up)
-{
- if (xdrs->x_op == XDR_ENCODE)
- return (XDR_PUTINT32(xdrs, (int32_t *)up));
-
- if (xdrs->x_op == XDR_DECODE)
- return (XDR_GETINT32(xdrs, (int32_t *)up));
-
- if (xdrs->x_op == XDR_FREE)
- return (TRUE);
-
-#ifdef DEBUG
- printf("xdr_int: FAILED\n");
-#endif
- return (FALSE);
-}
-
-
-#if defined(_ILP32)
-/*
- * xdr_long and xdr_u_long for binary compatability on ILP32 kernels.
- *
- * No prototypes since new code should not be using these interfaces.
- */
-bool_t
-xdr_long(XDR *xdrs, long *ip)
-{
- return (xdr_int(xdrs, (int *)ip));
-}
-
-bool_t
-xdr_u_long(XDR *xdrs, unsigned long *up)
-{
- return (xdr_u_int(xdrs, (uint_t *)up));
-}
-#endif /* _ILP32 */
-
-
-/*
- * XDR long long integers
- */
-bool_t
-xdr_longlong_t(XDR *xdrs, longlong_t *hp)
-{
- if (xdrs->x_op == XDR_ENCODE) {
-#if BYTE_ORDER == _LITTLE_ENDIAN
- if (XDR_PUTINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)) == TRUE) {
- return (XDR_PUTINT32(xdrs, (int32_t *)hp));
- }
-#else
- if (XDR_PUTINT32(xdrs, (int32_t *)hp) == TRUE) {
- return (XDR_PUTINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)));
- }
-#endif
- return (FALSE);
-
- }
- if (xdrs->x_op == XDR_DECODE) {
-#if BYTE_ORDER == _LITTLE_ENDIAN
- if (XDR_GETINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)) == TRUE) {
- return (XDR_GETINT32(xdrs, (int32_t *)hp));
- }
-#else
- if (XDR_GETINT32(xdrs, (int32_t *)hp) == TRUE) {
- return (XDR_GETINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)));
- }
-#endif
- return (FALSE);
- }
- return (TRUE);
-}
-
-/*
- * XDR unsigned long long integers
- */
-bool_t
-xdr_u_longlong_t(XDR *xdrs, u_longlong_t *hp)
-{
-
- if (xdrs->x_op == XDR_ENCODE) {
-#if BYTE_ORDER == _LITTLE_ENDIAN
- if (XDR_PUTINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)) == TRUE) {
- return (XDR_PUTINT32(xdrs, (int32_t *)hp));
- }
-#else
- if (XDR_PUTINT32(xdrs, (int32_t *)hp) == TRUE) {
- return (XDR_PUTINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)));
- }
-#endif
- return (FALSE);
-
- }
- if (xdrs->x_op == XDR_DECODE) {
-#if BYTE_ORDER == _LITTLE_ENDIAN
- if (XDR_GETINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)) == TRUE) {
- return (XDR_GETINT32(xdrs, (int32_t *)hp));
- }
-#else
- if (XDR_GETINT32(xdrs, (int32_t *)hp) == TRUE) {
- return (XDR_GETINT32(xdrs, (int32_t *)((char *)hp +
- BYTES_PER_XDR_UNIT)));
- }
-#endif
- return (FALSE);
- }
- return (TRUE);
-}
-
-/*
- * XDR short integers
- */
-bool_t
-xdr_short(XDR *xdrs, short *sp)
-{
- int32_t l;
-
- switch (xdrs->x_op) {
-
- case XDR_ENCODE:
- l = (int32_t)*sp;
- return (XDR_PUTINT32(xdrs, &l));
-
- case XDR_DECODE:
- if (!XDR_GETINT32(xdrs, &l))
- return (FALSE);
- *sp = (short)l;
- return (TRUE);
-
- case XDR_FREE:
- return (TRUE);
- }
- return (FALSE);
-}
-
-/*
- * XDR unsigned short integers
- */
-bool_t
-xdr_u_short(XDR *xdrs, ushort_t *usp)
-{
- uint32_t l;
-
- switch (xdrs->x_op) {
-
- case XDR_ENCODE:
- l = (uint32_t)*usp;
- return (XDR_PUTINT32(xdrs, (int32_t *)&l));
-
- case XDR_DECODE:
- if (!XDR_GETINT32(xdrs, (int32_t *)&l)) {
-#ifdef DEBUG
- printf("xdr_u_short: decode FAILED\n");
-#endif
- return (FALSE);
- }
- *usp = (ushort_t)l;
- return (TRUE);
-
- case XDR_FREE:
- return (TRUE);
- }
-#ifdef DEBUG
- printf("xdr_u_short: bad op FAILED\n");
-#endif
- return (FALSE);
-}
-
-
-/*
- * XDR a char
- */
-bool_t
-xdr_char(XDR *xdrs, char *cp)
-{
- int i;
-
- i = (*cp);
- if (!xdr_int(xdrs, &i)) {
- return (FALSE);
- }
- *cp = (char)i;
- return (TRUE);
-}
-
-/*
- * XDR booleans
- *
- * PSARC 2003/523 Contract Private Interface
- * xdr_bool
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-bool_t
-xdr_bool(XDR *xdrs, bool_t *bp)
-{
- int32_t i32b;
-
- switch (xdrs->x_op) {
-
- case XDR_ENCODE:
- i32b = *bp ? XDR_TRUE : XDR_FALSE;
- return (XDR_PUTINT32(xdrs, &i32b));
-
- case XDR_DECODE:
- if (!XDR_GETINT32(xdrs, &i32b)) {
-#ifdef DEBUG
- printf("xdr_bool: decode FAILED\n");
-#endif
- return (FALSE);
- }
- *bp = (i32b == XDR_FALSE) ? FALSE : TRUE;
- return (TRUE);
-
- case XDR_FREE:
- return (TRUE);
- }
-#ifdef DEBUG
- printf("xdr_bool: bad op FAILED\n");
-#endif
- return (FALSE);
-}
-
-/*
- * XDR enumerations
- *
- * PSARC 2003/523 Contract Private Interface
- * xdr_enum
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-#ifndef lint
-enum sizecheck { SIZEVAL } sizecheckvar; /* used to find the size of */
- /* an enum */
-#endif
-bool_t
-xdr_enum(XDR *xdrs, enum_t *ep)
-{
-#ifndef lint
- /*
- * enums are treated as ints
- */
- if (sizeof (sizecheckvar) == sizeof (int32_t)) {
- return (xdr_int(xdrs, (int32_t *)ep));
- } else if (sizeof (sizecheckvar) == sizeof (short)) {
- return (xdr_short(xdrs, (short *)ep));
- } else {
- return (FALSE);
- }
-#else
- (void) (xdr_short(xdrs, (short *)ep));
- return (xdr_int(xdrs, (int32_t *)ep));
-#endif
-}
-
-/*
- * XDR opaque data
- * Allows the specification of a fixed size sequence of opaque bytes.
- * cp points to the opaque object and cnt gives the byte length.
- *
- * PSARC 2003/523 Contract Private Interface
- * xdr_opaque
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-bool_t
-xdr_opaque(XDR *xdrs, caddr_t cp, const uint_t cnt)
-{
- uint_t rndup;
- static char crud[BYTES_PER_XDR_UNIT];
-
- /*
- * if no data we are done
- */
- if (cnt == 0)
- return (TRUE);
-
- /*
- * round byte count to full xdr units
- */
- rndup = cnt % BYTES_PER_XDR_UNIT;
- if (rndup != 0)
- rndup = BYTES_PER_XDR_UNIT - rndup;
-
- if (xdrs->x_op == XDR_DECODE) {
- if (!XDR_GETBYTES(xdrs, cp, cnt)) {
-#ifdef DEBUG
- printf("xdr_opaque: decode FAILED\n");
-#endif
- return (FALSE);
- }
- if (rndup == 0)
- return (TRUE);
- return (XDR_GETBYTES(xdrs, (caddr_t)crud, rndup));
- }
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!XDR_PUTBYTES(xdrs, cp, cnt)) {
-#ifdef DEBUG
- printf("xdr_opaque: encode FAILED\n");
-#endif
- return (FALSE);
- }
- if (rndup == 0)
- return (TRUE);
- return (XDR_PUTBYTES(xdrs, xdr_zero, rndup));
- }
-
- if (xdrs->x_op == XDR_FREE)
- return (TRUE);
-
-#ifdef DEBUG
- printf("xdr_opaque: bad op FAILED\n");
-#endif
- return (FALSE);
-}
-
-/*
- * XDR counted bytes
- * *cpp is a pointer to the bytes, *sizep is the count.
- * If *cpp is NULL maxsize bytes are allocated
- *
- * PSARC 2003/523 Contract Private Interface
- * xdr_bytes
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-bool_t
-xdr_bytes(XDR *xdrs, char **cpp, uint_t *sizep, const uint_t maxsize)
-{
- char *sp = *cpp; /* sp is the actual string pointer */
- uint_t nodesize;
-
- /*
- * first deal with the length since xdr bytes are counted
- */
- if (!xdr_u_int(xdrs, sizep)) {
-#ifdef DEBUG
- printf("xdr_bytes: size FAILED\n");
-#endif
- return (FALSE);
- }
- nodesize = *sizep;
- if ((nodesize > maxsize) && (xdrs->x_op != XDR_FREE)) {
-#ifdef DEBUG
- printf("xdr_bytes: bad size (%d) FAILED (%d max)\n",
- nodesize, maxsize);
-#endif
- return (FALSE);
- }
-
- /*
- * now deal with the actual bytes
- */
- switch (xdrs->x_op) {
- case XDR_DECODE:
- if (nodesize == 0)
- return (TRUE);
- if (sp == NULL)
- *cpp = sp = (char *)mem_alloc(nodesize);
- /* FALLTHROUGH */
-
- case XDR_ENCODE:
- return (xdr_opaque(xdrs, sp, nodesize));
-
- case XDR_FREE:
- if (sp != NULL) {
- mem_free(sp, nodesize);
- *cpp = NULL;
- }
- return (TRUE);
- }
-#ifdef DEBUG
- printf("xdr_bytes: bad op FAILED\n");
-#endif
- return (FALSE);
-}
-
-/*
- * Implemented here due to commonality of the object.
- */
-bool_t
-xdr_netobj(XDR *xdrs, struct netobj *np)
-{
- return (xdr_bytes(xdrs, &np->n_bytes, &np->n_len, MAX_NETOBJ_SZ));
-}
-
-/*
- * XDR a descriminated union
- * Support routine for discriminated unions.
- * You create an array of xdrdiscrim structures, terminated with
- * an entry with a null procedure pointer. The routine gets
- * the discriminant value and then searches the array of xdrdiscrims
- * looking for that value. It calls the procedure given in the xdrdiscrim
- * to handle the discriminant. If there is no specific routine a default
- * routine may be called.
- * If there is no specific or default routine an error is returned.
- */
-bool_t
-xdr_union(XDR *xdrs, enum_t *dscmp, char *unp,
- const struct xdr_discrim *choices, const xdrproc_t dfault)
-{
- enum_t dscm;
-
- /*
- * we deal with the discriminator; it's an enum
- */
- if (!xdr_enum(xdrs, dscmp)) {
-#ifdef DEBUG
- printf("xdr_enum: dscmp FAILED\n");
-#endif
- return (FALSE);
- }
- dscm = *dscmp;
-
- /*
- * search choices for a value that matches the discriminator.
- * if we find one, execute the xdr routine for that value.
- */
- for (; choices->proc != NULL_xdrproc_t; choices++) {
- if (choices->value == dscm)
- return ((*(choices->proc))(xdrs, unp, LASTUNSIGNED));
- }
-
- /*
- * no match - execute the default xdr routine if there is one
- */
- return ((dfault == NULL_xdrproc_t) ? FALSE :
- (*dfault)(xdrs, unp, LASTUNSIGNED));
-}
-
-
-/*
- * Non-portable xdr primitives.
- * Care should be taken when moving these routines to new architectures.
- */
-
-
-/*
- * XDR null terminated ASCII strings
- * xdr_string deals with "C strings" - arrays of bytes that are
- * terminated by a NULL character. The parameter cpp references a
- * pointer to storage; If the pointer is null, then the necessary
- * storage is allocated. The last parameter is the max allowed length
- * of the string as specified by a protocol.
- */
-bool_t
-xdr_string(XDR *xdrs, char **cpp, const uint_t maxsize)
-{
- char *sp = *cpp; /* sp is the actual string pointer */
- uint_t size;
- uint_t nodesize;
-
- /*
- * first deal with the length since xdr strings are counted-strings
- */
- switch (xdrs->x_op) {
- case XDR_FREE:
- if (sp == NULL)
- return (TRUE); /* already free */
- /* FALLTHROUGH */
- case XDR_ENCODE:
- size = (sp != NULL) ? (uint_t)strlen(sp) : 0;
- break;
- case XDR_DECODE:
- break;
- }
- if (!xdr_u_int(xdrs, &size)) {
-#ifdef DEBUG
- printf("xdr_string: size FAILED\n");
-#endif
- return (FALSE);
- }
- if (size > maxsize) {
-#ifdef DEBUG
- printf("xdr_string: bad size FAILED\n");
-#endif
- return (FALSE);
- }
- nodesize = size + 1;
-
- /*
- * now deal with the actual bytes
- */
- switch (xdrs->x_op) {
- case XDR_DECODE:
- if (nodesize == 0)
- return (TRUE);
- if (sp == NULL)
- sp = (char *)mem_alloc(nodesize);
- sp[size] = 0;
- if (!xdr_opaque(xdrs, sp, size)) {
- /*
- * free up memory if allocated here
- */
- if (*cpp == NULL) {
- mem_free(sp, nodesize);
- }
- return (FALSE);
- }
- if (strlen(sp) != size) {
- if (*cpp == NULL) {
- mem_free(sp, nodesize);
- }
- return (FALSE);
- }
- *cpp = sp;
- return (TRUE);
-
- case XDR_ENCODE:
- return (xdr_opaque(xdrs, sp, size));
-
- case XDR_FREE:
- mem_free(sp, nodesize);
- *cpp = NULL;
- return (TRUE);
- }
-#ifdef DEBUG
- printf("xdr_string: bad op FAILED\n");
-#endif
- return (FALSE);
-}
-
-/*
- * Wrapper for xdr_string that can be called directly from
- * routines like clnt_call
- */
-bool_t
-xdr_wrapstring(XDR *xdrs, char **cpp)
-{
- if (xdr_string(xdrs, cpp, LASTUNSIGNED))
- return (TRUE);
- return (FALSE);
-}
diff --git a/sys/contrib/opensolaris/uts/common/rpc/xdr.h b/sys/contrib/opensolaris/uts/common/rpc/xdr.h
deleted file mode 100644
index d60809e..0000000
--- a/sys/contrib/opensolaris/uts/common/rpc/xdr.h
+++ /dev/null
@@ -1,605 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-/*
- * Portions of this source code were derived from Berkeley
- * 4.3 BSD under license from the Regents of the University of
- * California.
- */
-
-/*
- * xdr.h, External Data Representation Serialization Routines.
- *
- */
-
-#ifndef _RPC_XDR_H
-#define _RPC_XDR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/byteorder.h> /* For all ntoh* and hton*() kind of macros */
-#include <rpc/types.h> /* For all ntoh* and hton*() kind of macros */
-#ifndef _KERNEL
-#include <stdio.h> /* defines FILE *, used in ANSI C function prototypes */
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * XDR provides a conventional way for converting between C data
- * types and an external bit-string representation. Library supplied
- * routines provide for the conversion on built-in C data types. These
- * routines and utility routines defined here are used to help implement
- * a type encode/decode routine for each user-defined type.
- *
- * Each data type provides a single procedure which takes two arguments:
- *
- * bool_t
- * xdrproc(xdrs, argresp)
- * XDR *xdrs;
- * <type> *argresp;
- *
- * xdrs is an instance of a XDR handle, to which or from which the data
- * type is to be converted. argresp is a pointer to the structure to be
- * converted. The XDR handle contains an operation field which indicates
- * which of the operations (ENCODE, DECODE * or FREE) is to be performed.
- *
- * XDR_DECODE may allocate space if the pointer argresp is null. This
- * data can be freed with the XDR_FREE operation.
- *
- * We write only one procedure per data type to make it easy
- * to keep the encode and decode procedures for a data type consistent.
- * In many cases the same code performs all operations on a user defined type,
- * because all the hard work is done in the component type routines.
- * decode as a series of calls on the nested data types.
- */
-
-/*
- * Xdr operations. XDR_ENCODE causes the type to be encoded into the
- * stream. XDR_DECODE causes the type to be extracted from the stream.
- * XDR_FREE can be used to release the space allocated by an XDR_DECODE
- * request.
- */
-enum xdr_op {
- XDR_ENCODE = 0,
- XDR_DECODE = 1,
- XDR_FREE = 2
-};
-
-/*
- * This is the number of bytes per unit of external data.
- */
-#define BYTES_PER_XDR_UNIT (4)
-#define RNDUP(x) ((((x) + BYTES_PER_XDR_UNIT - 1) / BYTES_PER_XDR_UNIT) \
- * BYTES_PER_XDR_UNIT)
-
-/*
- * The XDR handle.
- * Contains operation which is being applied to the stream,
- * an operations vector for the paticular implementation (e.g. see xdr_mem.c),
- * and two private fields for the use of the particular impelementation.
- *
- * PSARC 2003/523 Contract Private Interface
- * XDR
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-typedef struct XDR {
- enum xdr_op x_op; /* operation; fast additional param */
- struct xdr_ops *x_ops;
- caddr_t x_public; /* users' data */
- caddr_t x_private; /* pointer to private data */
- caddr_t x_base; /* private used for position info */
- int x_handy; /* extra private word */
-} XDR;
-
-/*
- * PSARC 2003/523 Contract Private Interface
- * xdr_ops
- * Changes must be reviewed by Solaris File Sharing
- * Changes must be communicated to contract-2003-523@sun.com
- */
-struct xdr_ops {
-#ifdef __STDC__
-#if !defined(_KERNEL)
- bool_t (*x_getlong)(struct XDR *, long *);
- /* get a long from underlying stream */
- bool_t (*x_putlong)(struct XDR *, long *);
- /* put a long to " */
-#endif /* KERNEL */
- bool_t (*x_getbytes)(struct XDR *, caddr_t, int);
- /* get some bytes from " */
- bool_t (*x_putbytes)(struct XDR *, caddr_t, int);
- /* put some bytes to " */
- uint_t (*x_getpostn)(struct XDR *);
- /* returns bytes off from beginning */
- bool_t (*x_setpostn)(struct XDR *, uint_t);
- /* lets you reposition the stream */
- rpc_inline_t *(*x_inline)(struct XDR *, int);
- /* buf quick ptr to buffered data */
- void (*x_destroy)(struct XDR *);
- /* free privates of this xdr_stream */
- bool_t (*x_control)(struct XDR *, int, void *);
-#if defined(_LP64) || defined(_KERNEL)
- bool_t (*x_getint32)(struct XDR *, int32_t *);
- /* get a int from underlying stream */
- bool_t (*x_putint32)(struct XDR *, int32_t *);
- /* put an int to " */
-#endif /* _LP64 || _KERNEL */
-#else
-#if !defined(_KERNEL)
- bool_t (*x_getlong)(); /* get a long from underlying stream */
- bool_t (*x_putlong)(); /* put a long to " */
-#endif /* KERNEL */
- bool_t (*x_getbytes)(); /* get some bytes from " */
- bool_t (*x_putbytes)(); /* put some bytes to " */
- uint_t (*x_getpostn)(); /* returns bytes off from beginning */
- bool_t (*x_setpostn)(); /* lets you reposition the stream */
- rpc_inline_t *(*x_inline)();
- /* buf quick ptr to buffered data */
- void (*x_destroy)(); /* free privates of this xdr_stream */
- bool_t (*x_control)();
-#if defined(_LP64) || defined(_KERNEL)
- bool_t (*x_getint32)();
- bool_t (*x_putint32)();
-#endif /* _LP64 || defined(_KERNEL) */
-#endif
-};
-
-/*
- * Operations defined on a XDR handle
- *
- * XDR *xdrs;
- * long *longp;
- * caddr_t addr;
- * uint_t len;
- * uint_t pos;
- */
-#if !defined(_KERNEL)
-#define XDR_GETLONG(xdrs, longp) \
- (*(xdrs)->x_ops->x_getlong)(xdrs, longp)
-#define xdr_getlong(xdrs, longp) \
- (*(xdrs)->x_ops->x_getlong)(xdrs, longp)
-
-#define XDR_PUTLONG(xdrs, longp) \
- (*(xdrs)->x_ops->x_putlong)(xdrs, longp)
-#define xdr_putlong(xdrs, longp) \
- (*(xdrs)->x_ops->x_putlong)(xdrs, longp)
-#endif /* KERNEL */
-
-
-#if !defined(_LP64) && !defined(_KERNEL)
-
-/*
- * For binary compatability on ILP32 we do not change the shape
- * of the XDR structure and the GET/PUTINT32 functions just use
- * the get/putlong vectors which operate on identically-sized
- * units of data.
- */
-
-#define XDR_GETINT32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_getlong)(xdrs, (long *)int32p)
-#define xdr_getint32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_getlong)(xdrs, (long *)int32p)
-
-#define XDR_PUTINT32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_putlong)(xdrs, (long *)int32p)
-#define xdr_putint32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_putlong)(xdrs, (long *)int32p)
-
-#else /* !_LP64 && !_KERNEL */
-
-#define XDR_GETINT32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_getint32)(xdrs, int32p)
-#define xdr_getint32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_getint32)(xdrs, int32p)
-
-#define XDR_PUTINT32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_putint32)(xdrs, int32p)
-#define xdr_putint32(xdrs, int32p) \
- (*(xdrs)->x_ops->x_putint32)(xdrs, int32p)
-
-#endif /* !_LP64 && !_KERNEL */
-
-#define XDR_GETBYTES(xdrs, addr, len) \
- (*(xdrs)->x_ops->x_getbytes)(xdrs, addr, len)
-#define xdr_getbytes(xdrs, addr, len) \
- (*(xdrs)->x_ops->x_getbytes)(xdrs, addr, len)
-
-#define XDR_PUTBYTES(xdrs, addr, len) \
- (*(xdrs)->x_ops->x_putbytes)(xdrs, addr, len)
-#define xdr_putbytes(xdrs, addr, len) \
- (*(xdrs)->x_ops->x_putbytes)(xdrs, addr, len)
-
-#define XDR_GETPOS(xdrs) \
- (*(xdrs)->x_ops->x_getpostn)(xdrs)
-#define xdr_getpos(xdrs) \
- (*(xdrs)->x_ops->x_getpostn)(xdrs)
-
-#define XDR_SETPOS(xdrs, pos) \
- (*(xdrs)->x_ops->x_setpostn)(xdrs, pos)
-#define xdr_setpos(xdrs, pos) \
- (*(xdrs)->x_ops->x_setpostn)(xdrs, pos)
-
-#define XDR_INLINE(xdrs, len) \
- (*(xdrs)->x_ops->x_inline)(xdrs, len)
-#define xdr_inline(xdrs, len) \
- (*(xdrs)->x_ops->x_inline)(xdrs, len)
-
-#define XDR_DESTROY(xdrs) \
- (*(xdrs)->x_ops->x_destroy)(xdrs)
-#define xdr_destroy(xdrs) \
- (*(xdrs)->x_ops->x_destroy)(xdrs)
-
-#define XDR_CONTROL(xdrs, req, op) \
- (*(xdrs)->x_ops->x_control)(xdrs, req, op)
-#define xdr_control(xdrs, req, op) \
- (*(xdrs)->x_ops->x_control)(xdrs, req, op)
-
-/*
- * Support struct for discriminated unions.
- * You create an array of xdrdiscrim structures, terminated with
- * a entry with a null procedure pointer. The xdr_union routine gets
- * the discriminant value and then searches the array of structures
- * for a matching value. If a match is found the associated xdr routine
- * is called to handle that part of the union. If there is
- * no match, then a default routine may be called.
- * If there is no match and no default routine it is an error.
- */
-
-
-/*
- * A xdrproc_t exists for each data type which is to be encoded or decoded.
- *
- * The second argument to the xdrproc_t is a pointer to an opaque pointer.
- * The opaque pointer generally points to a structure of the data type
- * to be decoded. If this pointer is 0, then the type routines should
- * allocate dynamic storage of the appropriate size and return it.
- * bool_t (*xdrproc_t)(XDR *, void *);
- */
-#ifdef __cplusplus
-typedef bool_t (*xdrproc_t)(XDR *, void *);
-#else
-#ifdef __STDC__
-typedef bool_t (*xdrproc_t)(); /* For Backward compatibility */
-#else
-typedef bool_t (*xdrproc_t)();
-#endif
-#endif
-
-#define NULL_xdrproc_t ((xdrproc_t)0)
-
-#if defined(_LP64) || defined(_I32LPx)
-#define xdr_rpcvers(xdrs, versp) xdr_u_int(xdrs, versp)
-#define xdr_rpcprog(xdrs, progp) xdr_u_int(xdrs, progp)
-#define xdr_rpcproc(xdrs, procp) xdr_u_int(xdrs, procp)
-#define xdr_rpcprot(xdrs, protp) xdr_u_int(xdrs, protp)
-#define xdr_rpcport(xdrs, portp) xdr_u_int(xdrs, portp)
-#else
-#define xdr_rpcvers(xdrs, versp) xdr_u_long(xdrs, versp)
-#define xdr_rpcprog(xdrs, progp) xdr_u_long(xdrs, progp)
-#define xdr_rpcproc(xdrs, procp) xdr_u_long(xdrs, procp)
-#define xdr_rpcprot(xdrs, protp) xdr_u_long(xdrs, protp)
-#define xdr_rpcport(xdrs, portp) xdr_u_long(xdrs, portp)
-#endif
-
-struct xdr_discrim {
- int value;
- xdrproc_t proc;
-};
-
-/*
- * In-line routines for fast encode/decode of primitve data types.
- * Caveat emptor: these use single memory cycles to get the
- * data from the underlying buffer, and will fail to operate
- * properly if the data is not aligned. The standard way to use these
- * is to say:
- * if ((buf = XDR_INLINE(xdrs, count)) == NULL)
- * return (FALSE);
- * <<< macro calls >>>
- * where ``count'' is the number of bytes of data occupied
- * by the primitive data types.
- *
- * N.B. and frozen for all time: each data type here uses 4 bytes
- * of external representation.
- */
-
-#define IXDR_GET_INT32(buf) ((int32_t)ntohl((uint32_t)*(buf)++))
-#define IXDR_PUT_INT32(buf, v) (*(buf)++ = (int32_t)htonl((uint32_t)v))
-#define IXDR_GET_U_INT32(buf) ((uint32_t)IXDR_GET_INT32(buf))
-#define IXDR_PUT_U_INT32(buf, v) IXDR_PUT_INT32((buf), ((int32_t)(v)))
-
-#if !defined(_KERNEL) && !defined(_LP64)
-
-#define IXDR_GET_LONG(buf) ((long)ntohl((ulong_t)*(buf)++))
-#define IXDR_PUT_LONG(buf, v) (*(buf)++ = (long)htonl((ulong_t)v))
-#define IXDR_GET_U_LONG(buf) ((ulong_t)IXDR_GET_LONG(buf))
-#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG((buf), ((long)(v)))
-
-#define IXDR_GET_BOOL(buf) ((bool_t)IXDR_GET_LONG(buf))
-#define IXDR_GET_ENUM(buf, t) ((t)IXDR_GET_LONG(buf))
-#define IXDR_GET_SHORT(buf) ((short)IXDR_GET_LONG(buf))
-#define IXDR_GET_U_SHORT(buf) ((ushort_t)IXDR_GET_LONG(buf))
-
-#define IXDR_PUT_BOOL(buf, v) IXDR_PUT_LONG((buf), ((long)(v)))
-#define IXDR_PUT_ENUM(buf, v) IXDR_PUT_LONG((buf), ((long)(v)))
-#define IXDR_PUT_SHORT(buf, v) IXDR_PUT_LONG((buf), ((long)(v)))
-#define IXDR_PUT_U_SHORT(buf, v) IXDR_PUT_LONG((buf), ((long)(v)))
-
-#else
-
-#define IXDR_GET_BOOL(buf) ((bool_t)IXDR_GET_INT32(buf))
-#define IXDR_GET_ENUM(buf, t) ((t)IXDR_GET_INT32(buf))
-#define IXDR_GET_SHORT(buf) ((short)IXDR_GET_INT32(buf))
-#define IXDR_GET_U_SHORT(buf) ((ushort_t)IXDR_GET_INT32(buf))
-
-#define IXDR_PUT_BOOL(buf, v) IXDR_PUT_INT32((buf), ((int)(v)))
-#define IXDR_PUT_ENUM(buf, v) IXDR_PUT_INT32((buf), ((int)(v)))
-#define IXDR_PUT_SHORT(buf, v) IXDR_PUT_INT32((buf), ((int)(v)))
-#define IXDR_PUT_U_SHORT(buf, v) IXDR_PUT_INT32((buf), ((int)(v)))
-
-#endif
-
-#if BYTE_ORDER == _BIG_ENDIAN
-#define IXDR_GET_HYPER(buf, v) { \
- *((int32_t *)(&v)) = ntohl(*(uint32_t *)buf++); \
- *((int32_t *)(((char *)&v) + BYTES_PER_XDR_UNIT)) \
- = ntohl(*(uint32_t *)buf++); \
- }
-#define IXDR_PUT_HYPER(buf, v) { \
- *(buf)++ = (int32_t)htonl(*(uint32_t *) \
- ((char *)&v)); \
- *(buf)++ = \
- (int32_t)htonl(*(uint32_t *)(((char *)&v) \
- + BYTES_PER_XDR_UNIT)); \
- }
-#else
-
-#define IXDR_GET_HYPER(buf, v) { \
- *((int32_t *)(((char *)&v) + \
- BYTES_PER_XDR_UNIT)) \
- = ntohl(*(uint32_t *)buf++); \
- *((int32_t *)(&v)) = \
- ntohl(*(uint32_t *)buf++); \
- }
-
-#define IXDR_PUT_HYPER(buf, v) { \
- *(buf)++ = \
- (int32_t)htonl(*(uint32_t *)(((char *)&v) + \
- BYTES_PER_XDR_UNIT)); \
- *(buf)++ = \
- (int32_t)htonl(*(uint32_t *)((char *)&v)); \
- }
-#endif
-#define IXDR_GET_U_HYPER(buf, v) IXDR_GET_HYPER(buf, v)
-#define IXDR_PUT_U_HYPER(buf, v) IXDR_PUT_HYPER(buf, v)
-
-
-/*
- * These are the "generic" xdr routines.
- */
-#ifdef __STDC__
-extern bool_t xdr_void(void);
-extern bool_t xdr_int(XDR *, int *);
-extern bool_t xdr_u_int(XDR *, uint_t *);
-extern bool_t xdr_long(XDR *, long *);
-extern bool_t xdr_u_long(XDR *, ulong_t *);
-extern bool_t xdr_short(XDR *, short *);
-extern bool_t xdr_u_short(XDR *, ushort_t *);
-extern bool_t xdr_bool(XDR *, bool_t *);
-extern bool_t xdr_enum(XDR *, enum_t *);
-extern bool_t xdr_array(XDR *, caddr_t *, uint_t *, const uint_t,
- const uint_t, const xdrproc_t);
-extern bool_t xdr_bytes(XDR *, char **, uint_t *, const uint_t);
-extern bool_t xdr_opaque(XDR *, caddr_t, const uint_t);
-extern bool_t xdr_string(XDR *, char **, const uint_t);
-extern bool_t xdr_union(XDR *, enum_t *, char *,
- const struct xdr_discrim *, const xdrproc_t);
-extern unsigned int xdr_sizeof(xdrproc_t, void *);
-
-extern bool_t xdr_hyper(XDR *, longlong_t *);
-extern bool_t xdr_longlong_t(XDR *, longlong_t *);
-extern bool_t xdr_u_hyper(XDR *, u_longlong_t *);
-extern bool_t xdr_u_longlong_t(XDR *, u_longlong_t *);
-
-extern bool_t xdr_char(XDR *, char *);
-extern bool_t xdr_wrapstring(XDR *, char **);
-extern bool_t xdr_reference(XDR *, caddr_t *, uint_t, const xdrproc_t);
-extern bool_t xdr_pointer(XDR *, char **, uint_t, const xdrproc_t);
-extern void xdr_free(xdrproc_t, char *);
-extern bool_t xdr_time_t(XDR *, time_t *);
-
-extern bool_t xdr_int8_t(XDR *, int8_t *);
-extern bool_t xdr_uint8_t(XDR *, uint8_t *);
-extern bool_t xdr_int16_t(XDR *, int16_t *);
-extern bool_t xdr_uint16_t(XDR *, uint16_t *);
-extern bool_t xdr_int32_t(XDR *, int32_t *);
-extern bool_t xdr_uint32_t(XDR *, uint32_t *);
-#if defined(_INT64_TYPE)
-extern bool_t xdr_int64_t(XDR *, int64_t *);
-extern bool_t xdr_uint64_t(XDR *, uint64_t *);
-#endif
-
-#ifndef _KERNEL
-extern bool_t xdr_u_char(XDR *, uchar_t *);
-extern bool_t xdr_vector(XDR *, char *, const uint_t, const uint_t, const
-xdrproc_t);
-extern bool_t xdr_float(XDR *, float *);
-extern bool_t xdr_double(XDR *, double *);
-extern bool_t xdr_quadruple(XDR *, long double *);
-#endif /* !_KERNEL */
-#else
-extern bool_t xdr_void();
-extern bool_t xdr_int();
-extern bool_t xdr_u_int();
-extern bool_t xdr_long();
-extern bool_t xdr_u_long();
-extern bool_t xdr_short();
-extern bool_t xdr_u_short();
-extern bool_t xdr_bool();
-extern bool_t xdr_enum();
-extern bool_t xdr_array();
-extern bool_t xdr_bytes();
-extern bool_t xdr_opaque();
-extern bool_t xdr_string();
-extern bool_t xdr_union();
-
-extern bool_t xdr_hyper();
-extern bool_t xdr_longlong_t();
-extern bool_t xdr_u_hyper();
-extern bool_t xdr_u_longlong_t();
-extern bool_t xdr_char();
-extern bool_t xdr_reference();
-extern bool_t xdr_pointer();
-extern void xdr_free();
-extern bool_t xdr_wrapstring();
-extern bool_t xdr_time_t();
-
-extern bool_t xdr_int8_t();
-extern bool_t xdr_uint8_t();
-extern bool_t xdr_int16_t();
-extern bool_t xdr_uint16_t();
-extern bool_t xdr_int32_t();
-extern bool_t xdr_uint32_t();
-#if defined(_INT64_TYPE)
-extern bool_t xdr_int64_t();
-extern bool_t xdr_uint64_t();
-#endif
-
-#ifndef _KERNEL
-extern bool_t xdr_u_char();
-extern bool_t xdr_vector();
-extern bool_t xdr_float();
-extern bool_t xdr_double();
-extern bool_t xdr_quadruple();
-#endif /* !_KERNEL */
-#endif
-
-/*
- * Common opaque bytes objects used by many rpc protocols;
- * declared here due to commonality.
- */
-#define MAX_NETOBJ_SZ 1024
-struct netobj {
- uint_t n_len;
- char *n_bytes;
-};
-typedef struct netobj netobj;
-
-#ifdef __STDC__
-extern bool_t xdr_netobj(XDR *, netobj *);
-#else
-extern bool_t xdr_netobj();
-#endif
-
-/*
- * These are XDR control operators
- */
-
-#define XDR_GET_BYTES_AVAIL 1
-
-struct xdr_bytesrec {
- bool_t xc_is_last_record;
- size_t xc_num_avail;
-};
-
-typedef struct xdr_bytesrec xdr_bytesrec;
-
-/*
- * These are the request arguments to XDR_CONTROL.
- *
- * XDR_PEEK - returns the contents of the next XDR unit on the XDR stream.
- * XDR_SKIPBYTES - skips the next N bytes in the XDR stream.
- * XDR_RDMAGET - for xdr implementation over RDMA, gets private flags from
- * the XDR stream being moved over RDMA
- * XDR_RDMANOCHUNK - for xdr implementaion over RDMA, sets private flags in
- * the XDR stream moving over RDMA.
- */
-#ifdef _KERNEL
-#define XDR_PEEK 2
-#define XDR_SKIPBYTES 3
-#define XDR_RDMAGET 4
-#define XDR_RDMASET 5
-#endif
-
-/*
- * These are the public routines for the various implementations of
- * xdr streams.
- */
-#ifndef _KERNEL
-#ifdef __STDC__
-extern void xdrmem_create(XDR *, const caddr_t, const uint_t, const enum
-xdr_op);
- /* XDR using memory buffers */
-extern void xdrrec_create(XDR *, const uint_t, const uint_t, const caddr_t,
-int (*) (void *, caddr_t, int), int (*) (void *, caddr_t, int));
-/* XDR pseudo records for tcp */
-extern bool_t xdrrec_endofrecord(XDR *, bool_t);
-/* make end of xdr record */
-extern bool_t xdrrec_skiprecord(XDR *);
-/* move to beginning of next record */
-extern bool_t xdrrec_eof(XDR *);
-extern uint_t xdrrec_readbytes(XDR *, caddr_t, uint_t);
-/* true if no more input */
-#else
-extern void xdrmem_create();
-extern void xdrstdio_create();
-extern void xdrrec_create();
-extern bool_t xdrrec_endofrecord();
-extern bool_t xdrrec_skiprecord();
-extern bool_t xdrrec_eof();
-extern uint_t xdrrec_readbytes();
-#endif
-#else
-
-extern void xdrmem_create(XDR *, caddr_t, uint_t, enum xdr_op);
-
-extern struct xdr_ops xdrmblk_ops;
-
-struct rpc_msg;
-extern bool_t xdr_callmsg(XDR *, struct rpc_msg *);
-extern bool_t xdr_replymsg_body(XDR *, struct rpc_msg *);
-extern bool_t xdr_replymsg_hdr(XDR *, struct rpc_msg *);
-
-#include <sys/malloc.h>
-#ifdef mem_alloc
-#undef mem_alloc
-#define mem_alloc(size) malloc((size), M_TEMP, M_WAITOK | M_ZERO)
-#endif
-#ifdef mem_free
-#undef mem_free
-#define mem_free(ptr, size) free((ptr), M_TEMP)
-#endif
-
-#endif /* !_KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_RPC_XDR_H */
diff --git a/sys/contrib/opensolaris/uts/common/rpc/xdr_array.c b/sys/contrib/opensolaris/uts/common/rpc/xdr_array.c
deleted file mode 100644
index 3711e53..0000000
--- a/sys/contrib/opensolaris/uts/common/rpc/xdr_array.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * Portions of this source code were derived from Berkeley 4.3 BSD
- * under license from the Regents of the University of California.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * xdr_array.c, Generic XDR routines impelmentation.
- * These are the "non-trivial" xdr primitives used to serialize and de-serialize
- * arrays. See xdr.h for more info on the interface to xdr.
- */
-
-#include <sys/param.h>
-#include <sys/cmn_err.h>
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include <rpc/types.h>
-#include <rpc/xdr.h>
-
-#define LASTUNSIGNED ((uint_t)0-1)
-
-/*
- * XDR an array of arbitrary elements
- * *addrp is a pointer to the array, *sizep is the number of elements.
- * If addrp is NULL (*sizep * elsize) bytes are allocated.
- * elsize is the size (in bytes) of each element, and elproc is the
- * xdr procedure to call to handle each element of the array.
- */
-bool_t
-xdr_array(XDR *xdrs, caddr_t *addrp, uint_t *sizep, const uint_t maxsize,
- const uint_t elsize, const xdrproc_t elproc)
-{
- uint_t i;
- caddr_t target = *addrp;
- uint_t c; /* the actual element count */
- bool_t stat = TRUE;
- uint_t nodesize;
-
- /* like strings, arrays are really counted arrays */
- if (!xdr_u_int(xdrs, sizep)) {
-#ifdef DEBUG
- printf("xdr_array: size FAILED\n");
-#endif
- return (FALSE);
- }
- c = *sizep;
- if ((c > maxsize || LASTUNSIGNED / elsize < c) &&
- xdrs->x_op != XDR_FREE) {
-#ifdef DEBUG
- printf("xdr_array: bad size FAILED\n");
-#endif
- return (FALSE);
- }
- nodesize = c * elsize;
-
- /*
- * if we are deserializing, we may need to allocate an array.
- * We also save time by checking for a null array if we are freeing.
- */
- if (target == NULL)
- switch (xdrs->x_op) {
- case XDR_DECODE:
- if (c == 0)
- return (TRUE);
- *addrp = target = (char *)mem_alloc(nodesize);
- bzero(target, nodesize);
- break;
-
- case XDR_FREE:
- return (TRUE);
-
- case XDR_ENCODE:
- break;
- }
-
- /*
- * now we xdr each element of array
- */
- for (i = 0; (i < c) && stat; i++) {
- stat = (*elproc)(xdrs, target, LASTUNSIGNED);
- target += elsize;
- }
-
- /*
- * the array may need freeing
- */
- if (xdrs->x_op == XDR_FREE) {
- mem_free(*addrp, nodesize);
- *addrp = NULL;
- }
- return (stat);
-}
diff --git a/sys/contrib/opensolaris/uts/common/rpc/xdr_mem.c b/sys/contrib/opensolaris/uts/common/rpc/xdr_mem.c
deleted file mode 100644
index 32ff32d..0000000
--- a/sys/contrib/opensolaris/uts/common/rpc/xdr_mem.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * Portions of this source code were derived from Berkeley 4.3 BSD
- * under license from the Regents of the University of California.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * xdr_mem.c, XDR implementation using memory buffers.
- *
- * If you have some data to be interpreted as external data representation
- * or to be converted to external data representation in a memory buffer,
- * then this is the package for you.
- */
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/systm.h>
-
-#include <rpc/types.h>
-#include <rpc/xdr.h>
-
-static struct xdr_ops *xdrmem_ops(void);
-
-/*
- * The procedure xdrmem_create initializes a stream descriptor for a
- * memory buffer.
- */
-void
-xdrmem_create(XDR *xdrs, caddr_t addr, uint_t size, enum xdr_op op)
-{
- xdrs->x_op = op;
- xdrs->x_ops = xdrmem_ops();
- xdrs->x_private = xdrs->x_base = addr;
- xdrs->x_handy = size;
- xdrs->x_public = NULL;
-}
-
-/* ARGSUSED */
-static void
-xdrmem_destroy(XDR *xdrs)
-{
-}
-
-static bool_t
-xdrmem_getint32(XDR *xdrs, int32_t *int32p)
-{
- if ((xdrs->x_handy -= (int)sizeof (int32_t)) < 0)
- return (FALSE);
- /* LINTED pointer alignment */
- *int32p = (int32_t)ntohl((uint32_t)(*((int32_t *)(xdrs->x_private))));
- xdrs->x_private += sizeof (int32_t);
- return (TRUE);
-}
-
-static bool_t
-xdrmem_putint32(XDR *xdrs, int32_t *int32p)
-{
- if ((xdrs->x_handy -= (int)sizeof (int32_t)) < 0)
- return (FALSE);
- /* LINTED pointer alignment */
- *(int32_t *)xdrs->x_private = (int32_t)htonl((uint32_t)(*int32p));
- xdrs->x_private += sizeof (int32_t);
- return (TRUE);
-}
-
-static bool_t
-xdrmem_getbytes(XDR *xdrs, caddr_t addr, int len)
-{
- if ((xdrs->x_handy -= len) < 0)
- return (FALSE);
- bcopy(xdrs->x_private, addr, len);
- xdrs->x_private += len;
- return (TRUE);
-}
-
-static bool_t
-xdrmem_putbytes(XDR *xdrs, caddr_t addr, int len)
-{
- if ((xdrs->x_handy -= len) < 0)
- return (FALSE);
- bcopy(addr, xdrs->x_private, len);
- xdrs->x_private += len;
- return (TRUE);
-}
-
-static uint_t
-xdrmem_getpos(XDR *xdrs)
-{
- return ((uint_t)((uintptr_t)xdrs->x_private - (uintptr_t)xdrs->x_base));
-}
-
-static bool_t
-xdrmem_setpos(XDR *xdrs, uint_t pos)
-{
- caddr_t newaddr = xdrs->x_base + pos;
- caddr_t lastaddr = xdrs->x_private + xdrs->x_handy;
- ptrdiff_t diff;
-
- if (newaddr > lastaddr)
- return (FALSE);
- xdrs->x_private = newaddr;
- diff = lastaddr - newaddr;
- xdrs->x_handy = (int)diff;
- return (TRUE);
-}
-
-static rpc_inline_t *
-xdrmem_inline(XDR *xdrs, int len)
-{
- rpc_inline_t *buf = NULL;
-
- if (xdrs->x_handy >= len) {
- xdrs->x_handy -= len;
- /* LINTED pointer alignment */
- buf = (rpc_inline_t *)xdrs->x_private;
- xdrs->x_private += len;
- }
- return (buf);
-}
-
-static bool_t
-xdrmem_control(XDR *xdrs, int request, void *info)
-{
- xdr_bytesrec *xptr;
- int32_t *int32p;
- int len;
-
- switch (request) {
-
- case XDR_GET_BYTES_AVAIL:
- xptr = (xdr_bytesrec *)info;
- xptr->xc_is_last_record = TRUE;
- xptr->xc_num_avail = xdrs->x_handy;
- return (TRUE);
-
- case XDR_PEEK:
- /*
- * Return the next 4 byte unit in the XDR stream.
- */
- if (xdrs->x_handy < sizeof (int32_t))
- return (FALSE);
- int32p = (int32_t *)info;
- *int32p = (int32_t)ntohl((uint32_t)
- (*((int32_t *)(xdrs->x_private))));
- return (TRUE);
-
- case XDR_SKIPBYTES:
- /*
- * Skip the next N bytes in the XDR stream.
- */
- int32p = (int32_t *)info;
- len = RNDUP((int)(*int32p));
- if ((xdrs->x_handy -= len) < 0)
- return (FALSE);
- xdrs->x_private += len;
- return (TRUE);
-
- }
- return (FALSE);
-}
-
-static struct xdr_ops *
-xdrmem_ops(void)
-{
- static struct xdr_ops ops;
-
- if (ops.x_getint32 == NULL) {
- ops.x_getbytes = xdrmem_getbytes;
- ops.x_putbytes = xdrmem_putbytes;
- ops.x_getpostn = xdrmem_getpos;
- ops.x_setpostn = xdrmem_setpos;
- ops.x_inline = xdrmem_inline;
- ops.x_destroy = xdrmem_destroy;
- ops.x_control = xdrmem_control;
- ops.x_getint32 = xdrmem_getint32;
- ops.x_putint32 = xdrmem_putint32;
- }
- return (&ops);
-}
diff --git a/sys/contrib/opensolaris/uts/common/sys/asm_linkage.h b/sys/contrib/opensolaris/uts/common/sys/asm_linkage.h
deleted file mode 100644
index b2a3c16..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/asm_linkage.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _IA32_SYS_ASM_LINKAGE_H
-#define _IA32_SYS_ASM_LINKAGE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _ASM /* The remainder of this file is only for assembly files */
-
-/*
- * make annoying differences in assembler syntax go away
- */
-
-#if defined(__i386__) || defined(__amd64__)
-
-#define ASM_ENTRY_ALIGN 16
-
-/*
- * ENTRY provides the standard procedure entry code and an easy way to
- * insert the calls to mcount for profiling. ENTRY_NP is identical, but
- * never calls mcount.
- */
-#define ENTRY(x) \
- .text; \
- .align ASM_ENTRY_ALIGN; \
- .globl x; \
- .type x, @function; \
-x:
-
-/*
- * ALTENTRY provides for additional entry points.
- */
-#define ALTENTRY(x) \
- .globl x; \
- .type x, @function; \
-x:
-
-/*
- * SET_SIZE trails a function and set the size for the ELF symbol table.
- */
-#define SET_SIZE(x) \
- .size x, [.-x]
-
-#elif defined(__sparc64__)
-
-/*
- * ENTRY provides the standard procedure entry code and an easy way to
- * insert the calls to mcount for profiling. ENTRY_NP is identical, but
- * never calls mcount.
- */
-#define ENTRY(x) \
- .section ".text"; \
- .align 4; \
- .global x; \
- .type x, @function; \
-x:
-
-/*
- * ALTENTRY provides for additional entry points.
- */
-#define ALTENTRY(x) \
- .global x; \
- .type x, @function; \
-x:
-
-/*
- * SET_SIZE trails a function and set the size for the ELF symbol table.
- */
-#define SET_SIZE(x) \
- .size x, (.-x)
-
-#else
-
-#error Unsupported architecture.
-
-#endif
-
-#endif /* _ASM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _IA32_SYS_ASM_LINKAGE_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/avl.h b/sys/contrib/opensolaris/uts/common/sys/avl.h
deleted file mode 100644
index bf9af89..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/avl.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _AVL_H
-#define _AVL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This is a private header file. Applications should not directly include
- * this file.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/avl_impl.h>
-
-/*
- * This is a generic implemenatation of AVL trees for use in the Solaris kernel.
- * The interfaces provide an efficient way of implementing an ordered set of
- * data structures.
- *
- * AVL trees provide an alternative to using an ordered linked list. Using AVL
- * trees will usually be faster, however they requires more storage. An ordered
- * linked list in general requires 2 pointers in each data structure. The
- * AVL tree implementation uses 3 pointers. The following chart gives the
- * approximate performance of operations with the different approaches:
- *
- * Operation Link List AVL tree
- * --------- -------- --------
- * lookup O(n) O(log(n))
- *
- * insert 1 node constant constant
- *
- * delete 1 node constant between constant and O(log(n))
- *
- * delete all nodes O(n) O(n)
- *
- * visit the next
- * or prev node constant between constant and O(log(n))
- *
- *
- * The data structure nodes are anchored at an "avl_tree_t" (the equivalent
- * of a list header) and the individual nodes will have a field of
- * type "avl_node_t" (corresponding to list pointers).
- *
- * The type "avl_index_t" is used to indicate a position in the list for
- * certain calls.
- *
- * The usage scenario is generally:
- *
- * 1. Create the list/tree with: avl_create()
- *
- * followed by any mixture of:
- *
- * 2a. Insert nodes with: avl_add(), or avl_find() and avl_insert()
- *
- * 2b. Visited elements with:
- * avl_first() - returns the lowest valued node
- * avl_last() - returns the highest valued node
- * AVL_NEXT() - given a node go to next higher one
- * AVL_PREV() - given a node go to previous lower one
- *
- * 2c. Find the node with the closest value either less than or greater
- * than a given value with avl_nearest().
- *
- * 2d. Remove individual nodes from the list/tree with avl_remove().
- *
- * and finally when the list is being destroyed
- *
- * 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes.
- * Note that once you use avl_destroy_nodes(), you can no longer
- * use any routine except avl_destroy_nodes() and avl_destoy().
- *
- * 4. Use avl_destroy() to destroy the AVL tree itself.
- *
- * Any locking for multiple thread access is up to the user to provide, just
- * as is needed for any linked list implementation.
- */
-
-
-/*
- * Type used for the root of the AVL tree.
- */
-typedef struct avl_tree avl_tree_t;
-
-/*
- * The data nodes in the AVL tree must have a field of this type.
- */
-typedef struct avl_node avl_node_t;
-
-/*
- * An opaque type used to locate a position in the tree where a node
- * would be inserted.
- */
-typedef uintptr_t avl_index_t;
-
-
-/*
- * Direction constants used for avl_nearest().
- */
-#define AVL_BEFORE (0)
-#define AVL_AFTER (1)
-
-
-
-/*
- * Prototypes
- *
- * Where not otherwise mentioned, "void *" arguments are a pointer to the
- * user data structure which must contain a field of type avl_node_t.
- *
- * Also assume the user data structures looks like:
- * stuct my_type {
- * ...
- * avl_node_t my_link;
- * ...
- * };
- */
-
-/*
- * Initialize an AVL tree. Arguments are:
- *
- * tree - the tree to be initialized
- * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
- * -1 for <, 0 for ==, and +1 for >
- * size - the value of sizeof(struct my_type)
- * offset - the value of OFFSETOF(struct my_type, my_link)
- */
-extern void avl_create(avl_tree_t *tree,
- int (*compar) (const void *, const void *), size_t size, size_t offset);
-
-
-/*
- * Find a node with a matching value in the tree. Returns the matching node
- * found. If not found, it returns NULL and then if "where" is not NULL it sets
- * "where" for use with avl_insert() or avl_nearest().
- *
- * node - node that has the value being looked for
- * where - position for use with avl_nearest() or avl_insert(), may be NULL
- */
-extern void *avl_find(avl_tree_t *tree, void *node, avl_index_t *where);
-
-/*
- * Insert a node into the tree.
- *
- * node - the node to insert
- * where - position as returned from avl_find()
- */
-extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where);
-
-/*
- * Insert "new_data" in "tree" in the given "direction" either after
- * or before the data "here".
- *
- * This might be usefull for avl clients caching recently accessed
- * data to avoid doing avl_find() again for insertion.
- *
- * new_data - new data to insert
- * here - existing node in "tree"
- * direction - either AVL_AFTER or AVL_BEFORE the data "here".
- */
-extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here,
- int direction);
-
-
-/*
- * Return the first or last valued node in the tree. Will return NULL
- * if the tree is empty.
- *
- */
-extern void *avl_first(avl_tree_t *tree);
-extern void *avl_last(avl_tree_t *tree);
-
-
-/*
- * Return the next or previous valued node in the tree.
- * AVL_NEXT() will return NULL if at the last node.
- * AVL_PREV() will return NULL if at the first node.
- *
- * node - the node from which the next or previous node is found
- */
-#define AVL_NEXT(tree, node) avl_walk(tree, node, AVL_AFTER)
-#define AVL_PREV(tree, node) avl_walk(tree, node, AVL_BEFORE)
-
-
-/*
- * Find the node with the nearest value either greater or less than
- * the value from a previous avl_find(). Returns the node or NULL if
- * there isn't a matching one.
- *
- * where - position as returned from avl_find()
- * direction - either AVL_BEFORE or AVL_AFTER
- *
- * EXAMPLE get the greatest node that is less than a given value:
- *
- * avl_tree_t *tree;
- * struct my_data look_for_value = {....};
- * struct my_data *node;
- * struct my_data *less;
- * avl_index_t where;
- *
- * node = avl_find(tree, &look_for_value, &where);
- * if (node != NULL)
- * less = AVL_PREV(tree, node);
- * else
- * less = avl_nearest(tree, where, AVL_BEFORE);
- */
-extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction);
-
-
-/*
- * Add a single node to the tree.
- * The node must not be in the tree, and it must not
- * compare equal to any other node already in the tree.
- *
- * node - the node to add
- */
-extern void avl_add(avl_tree_t *tree, void *node);
-
-
-/*
- * Remove a single node from the tree. The node must be in the tree.
- *
- * node - the node to remove
- */
-extern void avl_remove(avl_tree_t *tree, void *node);
-
-
-/*
- * Return the number of nodes in the tree
- */
-extern ulong_t avl_numnodes(avl_tree_t *tree);
-
-
-/*
- * Used to destroy any remaining nodes in a tree. The cookie argument should
- * be initialized to NULL before the first call. Returns a node that has been
- * removed from the tree and may be free()'d. Returns NULL when the tree is
- * empty.
- *
- * Once you call avl_destroy_nodes(), you can only continuing calling it and
- * finally avl_destroy(). No other AVL routines will be valid.
- *
- * cookie - a "void *" used to save state between calls to avl_destroy_nodes()
- *
- * EXAMPLE:
- * avl_tree_t *tree;
- * struct my_data *node;
- * void *cookie;
- *
- * cookie = NULL;
- * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
- * free(node);
- * avl_destroy(tree);
- */
-extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie);
-
-
-/*
- * Final destroy of an AVL tree. Arguments are:
- *
- * tree - the empty tree to destroy
- */
-extern void avl_destroy(avl_tree_t *tree);
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AVL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/avl_impl.h b/sys/contrib/opensolaris/uts/common/sys/avl_impl.h
deleted file mode 100644
index 620685f..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/avl_impl.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _AVL_IMPL_H
-#define _AVL_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This is a private header file. Applications should not directly include
- * this file.
- */
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * generic AVL tree implementation for kernel use
- *
- * There are 5 pieces of information stored for each node in an AVL tree
- *
- * pointer to less than child
- * pointer to greater than child
- * a pointer to the parent of this node
- * an indication [0/1] of which child I am of my parent
- * a "balance" (-1, 0, +1) indicating which child tree is taller
- *
- * Since they only need 3 bits, the last two fields are packed into the
- * bottom bits of the parent pointer on 64 bit machines to save on space.
- */
-
-#ifndef _LP64
-
-struct avl_node {
- struct avl_node *avl_child[2]; /* left/right children */
- struct avl_node *avl_parent; /* this node's parent */
- unsigned short avl_child_index; /* my index in parent's avl_child[] */
- short avl_balance; /* balance value: -1, 0, +1 */
-};
-
-#define AVL_XPARENT(n) ((n)->avl_parent)
-#define AVL_SETPARENT(n, p) ((n)->avl_parent = (p))
-
-#define AVL_XCHILD(n) ((n)->avl_child_index)
-#define AVL_SETCHILD(n, c) ((n)->avl_child_index = (unsigned short)(c))
-
-#define AVL_XBALANCE(n) ((n)->avl_balance)
-#define AVL_SETBALANCE(n, b) ((n)->avl_balance = (short)(b))
-
-#else /* _LP64 */
-
-/*
- * for 64 bit machines, avl_pcb contains parent pointer, balance and child_index
- * values packed in the following manner:
- *
- * |63 3| 2 |1 0 |
- * |-------------------------------------|-----------------|-------------|
- * | avl_parent hi order bits | avl_child_index | avl_balance |
- * | | | + 1 |
- * |-------------------------------------|-----------------|-------------|
- *
- */
-struct avl_node {
- struct avl_node *avl_child[2]; /* left/right children nodes */
- uintptr_t avl_pcb; /* parent, child_index, balance */
-};
-
-/*
- * macros to extract/set fields in avl_pcb
- *
- * pointer to the parent of the current node is the high order bits
- */
-#define AVL_XPARENT(n) ((struct avl_node *)((n)->avl_pcb & ~7))
-#define AVL_SETPARENT(n, p) \
- ((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p)))
-
-/*
- * index of this node in its parent's avl_child[]: bit #2
- */
-#define AVL_XCHILD(n) (((n)->avl_pcb >> 2) & 1)
-#define AVL_SETCHILD(n, c) \
- ((n)->avl_pcb = (uintptr_t)(((n)->avl_pcb & ~4) | ((c) << 2)))
-
-/*
- * balance indication for a node, lowest 2 bits. A valid balance is
- * -1, 0, or +1, and is encoded by adding 1 to the value to get the
- * unsigned values of 0, 1, 2.
- */
-#define AVL_XBALANCE(n) ((int)(((n)->avl_pcb & 3) - 1))
-#define AVL_SETBALANCE(n, b) \
- ((n)->avl_pcb = (uintptr_t)((((n)->avl_pcb & ~3) | ((b) + 1))))
-
-#endif /* _LP64 */
-
-
-
-/*
- * switch between a node and data pointer for a given tree
- * the value of "o" is tree->avl_offset
- */
-#define AVL_NODE2DATA(n, o) ((void *)((uintptr_t)(n) - (o)))
-#define AVL_DATA2NODE(d, o) ((struct avl_node *)((uintptr_t)(d) + (o)))
-
-
-
-/*
- * macros used to create/access an avl_index_t
- */
-#define AVL_INDEX2NODE(x) ((avl_node_t *)((x) & ~1))
-#define AVL_INDEX2CHILD(x) ((x) & 1)
-#define AVL_MKINDEX(n, c) ((avl_index_t)(n) | (c))
-
-
-/*
- * The tree structure. The fields avl_root, avl_compar, and avl_offset come
- * first since they are needed for avl_find(). We want them to fit into
- * a single 64 byte cache line to make avl_find() as fast as possible.
- */
-struct avl_tree {
- struct avl_node *avl_root; /* root node in tree */
- int (*avl_compar)(const void *, const void *);
- size_t avl_offset; /* offsetof(type, avl_link_t field) */
- ulong_t avl_numnodes; /* number of nodes in the tree */
- size_t avl_size; /* sizeof user type struct */
-};
-
-
-/*
- * This will only by used via AVL_NEXT() or AVL_PREV()
- */
-extern void *avl_walk(struct avl_tree *, void *, int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AVL_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/bitmap.h b/sys/contrib/opensolaris/uts/common/sys/bitmap.h
deleted file mode 100644
index d0dd12b..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/bitmap.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-#ifndef _SYS_BITMAP_H
-#define _SYS_BITMAP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/feature_tests.h>
-#if defined(__GNUC__) && defined(_ASM_INLINES) && \
- (defined(__i386) || defined(__amd64))
-#include <asm/bitmap.h>
-#endif
-
-/*
- * Operations on bitmaps of arbitrary size
- * A bitmap is a vector of 1 or more ulong_t's.
- * The user of the package is responsible for range checks and keeping
- * track of sizes.
- */
-
-#ifdef _LP64
-#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */
-#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */
-#else
-#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */
-#endif
-
-#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */
-#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */
-
-#ifdef _LP64
-#define BT_NBIPUL32 (1 << BT_ULSHIFT32) /* n bits per ulong_t */
-#define BT_ULMASK32 (BT_NBIPUL32 - 1) /* to extract bit index */
-#define BT_ULMAXMASK 0xffffffffffffffff /* used by bt_getlowbit */
-#else
-#define BT_ULMAXMASK 0xffffffff
-#endif
-
-/*
- * bitmap is a ulong_t *, bitindex an index_t
- *
- * The macros BT_WIM and BT_BIW internal; there is no need
- * for users of this package to use them.
- */
-
-/*
- * word in map
- */
-#define BT_WIM(bitmap, bitindex) \
- ((bitmap)[(bitindex) >> BT_ULSHIFT])
-/*
- * bit in word
- */
-#define BT_BIW(bitindex) \
- (1UL << ((bitindex) & BT_ULMASK))
-
-#ifdef _LP64
-#define BT_WIM32(bitmap, bitindex) \
- ((bitmap)[(bitindex) >> BT_ULSHIFT32])
-
-#define BT_BIW32(bitindex) \
- (1UL << ((bitindex) & BT_ULMASK32))
-#endif
-
-/*
- * These are public macros
- *
- * BT_BITOUL == n bits to n ulong_t's
- */
-#define BT_BITOUL(nbits) \
- (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
-#define BT_SIZEOFMAP(nbits) \
- (BT_BITOUL(nbits) * sizeof (ulong_t))
-#define BT_TEST(bitmap, bitindex) \
- ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
-#define BT_SET(bitmap, bitindex) \
- { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
-#define BT_CLEAR(bitmap, bitindex) \
- { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
-
-#ifdef _LP64
-#define BT_BITOUL32(nbits) \
- (((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32)
-#define BT_SIZEOFMAP32(nbits) \
- (BT_BITOUL32(nbits) * sizeof (uint_t))
-#define BT_TEST32(bitmap, bitindex) \
- ((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0)
-#define BT_SET32(bitmap, bitindex) \
- { BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); }
-#define BT_CLEAR32(bitmap, bitindex) \
- { BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); }
-#endif /* _LP64 */
-
-
-/*
- * BIT_ONLYONESET is a private macro not designed for bitmaps of
- * arbitrary size. u must be an unsigned integer/long. It returns
- * true if one and only one bit is set in u.
- */
-#define BIT_ONLYONESET(u) \
- ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
-
-#if defined(_KERNEL) && !defined(_ASM)
-#include <sys/atomic.h>
-
-/*
- * return next available bit index from map with specified number of bits
- */
-extern index_t bt_availbit(ulong_t *bitmap, size_t nbits);
-/*
- * find the highest order bit that is on, and is within or below
- * the word specified by wx
- */
-extern int bt_gethighbit(ulong_t *mapp, int wx);
-extern int bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2,
- size_t end_pos);
-/*
- * Find highest and lowest one bit set.
- * Returns bit number + 1 of bit that is set, otherwise returns 0.
- * Low order bit is 0, high order bit is 31.
- */
-extern int highbit(ulong_t);
-extern int lowbit(ulong_t);
-extern int bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop);
-extern void bt_copy(ulong_t *, ulong_t *, ulong_t);
-
-/*
- * find the parity
- */
-extern int odd_parity(ulong_t);
-
-/*
- * Atomically set/clear bits
- * Atomic exclusive operations will set "result" to "-1"
- * if the bit is already set/cleared. "result" will be set
- * to 0 otherwise.
- */
-#define BT_ATOMIC_SET(bitmap, bitindex) \
- { atomic_or_long(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
-#define BT_ATOMIC_CLEAR(bitmap, bitindex) \
- { atomic_and_long(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
-
-#define BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \
- { result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)), \
- (bitindex) % BT_NBIPUL); }
-#define BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \
- { result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)), \
- (bitindex) % BT_NBIPUL); }
-
-/*
- * Extracts bits between index h (high, inclusive) and l (low, exclusive) from
- * u, which must be an unsigned integer.
- */
-#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
-
-#endif /* _KERNEL && !_ASM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_BITMAP_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/byteorder.h b/sys/contrib/opensolaris/uts/common/sys/byteorder.h
deleted file mode 100644
index 00afdd5..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/byteorder.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _SYS_BYTEORDER_H
-#define _SYS_BYTEORDER_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/isa_defs.h>
-#include <sys/int_types.h>
-
-#if defined(__GNUC__) && defined(_ASM_INLINES) && \
- (defined(__i386) || defined(__amd64))
-#include <asm/byteorder.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * macros for conversion between host and (internet) network byte order
- */
-
-#if BYTE_ORDER == _BIG_ENDIAN && !defined(ntohl) && !defined(__lint)
-/* big-endian */
-#define ntohl(x) (x)
-#define ntohs(x) (x)
-#define htonl(x) (x)
-#define htons(x) (x)
-
-#elif !defined(ntohl) /* little-endian */
-
-#ifndef _IN_PORT_T
-#define _IN_PORT_T
-typedef uint16_t in_port_t;
-#endif
-
-#ifndef _IN_ADDR_T
-#define _IN_ADDR_T
-typedef uint32_t in_addr_t;
-#endif
-
-#if !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5)
-extern uint32_t htonl(uint32_t);
-extern uint16_t htons(uint16_t);
-extern uint32_t ntohl(uint32_t);
-extern uint16_t ntohs(uint16_t);
-#else
-extern in_addr_t htonl(in_addr_t);
-extern in_port_t htons(in_port_t);
-extern in_addr_t ntohl(in_addr_t);
-extern in_port_t ntohs(in_port_t);
-#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) || defined(_XPG5) */
-#endif
-
-#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
-
-/*
- * Macros to reverse byte order
- */
-#define BSWAP_8(x) ((x) & 0xff)
-#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
-#define BSWAP_32(x) ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
-#define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
-
-#define BMASK_8(x) ((x) & 0xff)
-#define BMASK_16(x) ((x) & 0xffff)
-#define BMASK_32(x) ((x) & 0xffffffff)
-#define BMASK_64(x) (x)
-
-/*
- * Macros to convert from a specific byte order to/from native byte order
- */
-#if BYTE_ORDER == _BIG_ENDIAN
-#define BE_8(x) BMASK_8(x)
-#define BE_16(x) BMASK_16(x)
-#define BE_32(x) BMASK_32(x)
-#define BE_64(x) BMASK_64(x)
-#define LE_8(x) BSWAP_8(x)
-#define LE_16(x) BSWAP_16(x)
-#define LE_32(x) BSWAP_32(x)
-#define LE_64(x) BSWAP_64(x)
-#else
-#define LE_8(x) BMASK_8(x)
-#define LE_16(x) BMASK_16(x)
-#define LE_32(x) BMASK_32(x)
-#define LE_64(x) BMASK_64(x)
-#define BE_8(x) BSWAP_8(x)
-#define BE_16(x) BSWAP_16(x)
-#define BE_32(x) BSWAP_32(x)
-#define BE_64(x) BSWAP_64(x)
-#endif
-
-#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_BYTEORDER_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/callb.h b/sys/contrib/opensolaris/uts/common/sys/callb.h
deleted file mode 100644
index b12b2e2..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/callb.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CALLB_H
-#define _SYS_CALLB_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/t_lock.h>
-#include <sys/thread.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * definitions of callback classes (c_class)
- *
- * Callbacks belong in the same class if (1) their callback routines
- * do the same kind of processing (ideally, using the same callback function)
- * and (2) they can/should be executed at the same time in a cpr
- * suspend/resume operation.
- *
- * Note: The DAEMON class, in particular, is for stopping kernel threads
- * and nothing else. The CALLB_* macros below should be used to deal
- * with kernel threads, and the callback function should be callb_generic_cpr.
- * Another idiosyncrasy of the DAEMON class is that if a suspend operation
- * fails, some of the callback functions may be called with the RESUME
- * code which were never called with SUSPEND. Not a problem currently,
- * but see bug 4201851.
- */
-#define CB_CL_CPR_DAEMON 0
-#define CB_CL_CPR_VM 1
-#define CB_CL_CPR_CALLOUT 2
-#define CB_CL_CPR_OBP 3
-#define CB_CL_CPR_FB 4
-#define CB_CL_PANIC 5
-#define CB_CL_CPR_RPC 6
-#define CB_CL_CPR_PROMPRINTF 7
-#define CB_CL_UADMIN 8
-#define CB_CL_CPR_PM 9
-#define CB_CL_HALT 10
-#define CB_CL_CPR_DMA 11
-#define CB_CL_CPR_POST_USER 12
-#define CB_CL_UADMIN_PRE_VFS 13
-#define CB_CL_MDBOOT CB_CL_UADMIN
-#define CB_CL_ENTER_DEBUGGER 14
-#define CB_CL_CPR_POST_KERNEL 15
-#define NCBCLASS 16 /* CHANGE ME if classes are added/removed */
-
-/*
- * CB_CL_CPR_DAEMON class specific definitions are given below:
- */
-
-/*
- * code for CPR callb_execute_class
- */
-#define CB_CODE_CPR_CHKPT 0
-#define CB_CODE_CPR_RESUME 1
-
-typedef void * callb_id_t;
-/*
- * Per kernel thread structure for CPR daemon callbacks.
- * Must be protected by either a existing lock in the daemon or
- * a new lock created for such a purpose.
- */
-typedef struct callb_cpr {
- kmutex_t *cc_lockp; /* lock to protect this struct */
- char cc_events; /* various events for CPR */
- callb_id_t cc_id; /* callb id address */
- kcondvar_t cc_callb_cv; /* cv for callback waiting */
- kcondvar_t cc_stop_cv; /* cv to checkpoint block */
-} callb_cpr_t;
-
-/*
- * cc_events definitions
- */
-#define CALLB_CPR_START 1 /* a checkpoint request's started */
-#define CALLB_CPR_SAFE 2 /* thread is safe for CPR */
-#define CALLB_CPR_ALWAYS_SAFE 4 /* thread is ALWAYS safe for CPR */
-
-/*
- * Used when checking that all kernel threads are stopped.
- */
-#define CALLB_MAX_RETRY 3 /* when waiting for kthread to sleep */
-#define CALLB_THREAD_DELAY 10 /* ticks allowed to reach sleep */
-#define CPR_KTHREAD_TIMEOUT_SEC 90 /* secs before callback times out -- */
- /* due to pwr mgmt of disks, make -- */
- /* big enough for worst spinup time */
-
-#ifdef _KERNEL
-/*
- *
- * CALLB_CPR_INIT macro is used by kernel threads to add their entry to
- * the callback table and perform other initialization. It automatically
- * adds the thread as being in the callback class CB_CL_CPR_DAEMON.
- *
- * cp - ptr to the callb_cpr_t structure for this kernel thread
- *
- * lockp - pointer to mutex protecting the callb_cpr_t stuct
- *
- * func - pointer to the callback function for this kernel thread.
- * It has the prototype boolean_t <func>(void *arg, int code)
- * where: arg - ptr to the callb_cpr_t structure
- * code - not used for this type of callback
- * returns: B_TRUE if successful; B_FALSE if unsuccessful.
- *
- * name - a string giving the name of the kernel thread
- *
- * Note: lockp is the lock to protect the callb_cpr_t (cp) structure
- * later on. No lock held is needed for this initialization.
- */
-#define CALLB_CPR_INIT(cp, lockp, func, name) { \
- bzero((caddr_t)(cp), sizeof (callb_cpr_t)); \
- (cp)->cc_lockp = lockp; \
- (cp)->cc_id = callb_add(func, (void *)(cp), \
- CB_CL_CPR_DAEMON, name); \
- }
-
-#ifndef __lock_lint
-#define CALLB_CPR_ASSERT(cp) ASSERT(MUTEX_HELD((cp)->cc_lockp));
-#else
-#define CALLB_CPR_ASSERT(cp)
-#endif
-/*
- * Some threads (like the idle threads) do not adhere to the callback
- * protocol and are always considered safe. Such threads must never exit.
- * They register their presence by calling this macro during their
- * initialization.
- *
- * Args:
- * t - thread pointer of the client kernel thread
- * name - a string giving the name of the kernel thread
- */
-#define CALLB_CPR_INIT_SAFE(t, name) { \
- (void) callb_add_thread(callb_generic_cpr_safe, \
- (void *) &callb_cprinfo_safe, CB_CL_CPR_DAEMON, \
- name, t); \
- }
-/*
- * The lock to protect cp's content must be held before
- * calling the following two macros.
- *
- * Any code region between CALLB_CPR_SAFE_BEGIN and CALLB_CPR_SAFE_END
- * is safe for checkpoint/resume.
- */
-#define CALLB_CPR_SAFE_BEGIN(cp) { \
- CALLB_CPR_ASSERT(cp) \
- (cp)->cc_events |= CALLB_CPR_SAFE; \
- if ((cp)->cc_events & CALLB_CPR_START) \
- cv_signal(&(cp)->cc_callb_cv); \
- }
-#define CALLB_CPR_SAFE_END(cp, lockp) { \
- CALLB_CPR_ASSERT(cp) \
- while ((cp)->cc_events & CALLB_CPR_START) \
- cv_wait(&(cp)->cc_stop_cv, lockp); \
- (cp)->cc_events &= ~CALLB_CPR_SAFE; \
- }
-/*
- * cv_destroy is nop right now but may be needed in the future.
- */
-#define CALLB_CPR_EXIT(cp) { \
- CALLB_CPR_ASSERT(cp) \
- (cp)->cc_events |= CALLB_CPR_SAFE; \
- if ((cp)->cc_events & CALLB_CPR_START) \
- cv_signal(&(cp)->cc_callb_cv); \
- mutex_exit((cp)->cc_lockp); \
- (void) callb_delete((cp)->cc_id); \
- cv_destroy(&(cp)->cc_callb_cv); \
- cv_destroy(&(cp)->cc_stop_cv); \
- }
-
-extern callb_cpr_t callb_cprinfo_safe;
-extern callb_id_t callb_add(boolean_t (*)(void *, int), void *, int, char *);
-extern callb_id_t callb_add_thread(boolean_t (*)(void *, int),
- void *, int, char *, kthread_id_t);
-extern int callb_delete(callb_id_t);
-extern void callb_execute(callb_id_t, int);
-extern void *callb_execute_class(int, int);
-extern boolean_t callb_generic_cpr(void *, int);
-extern boolean_t callb_generic_cpr_safe(void *, int);
-extern boolean_t callb_is_stopped(kthread_id_t, caddr_t *);
-extern void callb_lock_table(void);
-extern void callb_unlock_table(void);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CALLB_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/ccompile.h b/sys/contrib/opensolaris/uts/common/sys/ccompile.h
deleted file mode 100644
index c9857b08..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/ccompile.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_CCOMPILE_H
-#define _SYS_CCOMPILE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This file contains definitions designed to enable different compilers
- * to be used harmoniously on Solaris systems.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Allow for version tests for compiler bugs and features.
- */
-#if defined(__GNUC__)
-#define __GNUC_VERSION \
- (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#else
-#define __GNUC_VERSION 0
-#endif
-
-#if defined(__ATTRIBUTE_IMPLEMENTED) || defined(__GNUC__)
-
-/*
- * analogous to lint's PRINTFLIKEn
- */
-#define __sun_attr___PRINTFLIKE__(__n) \
- __attribute__((__format__(printf, __n, (__n)+1)))
-#define __sun_attr___VPRINTFLIKE__(__n) \
- __attribute__((__format__(printf, __n, 0)))
-
-/*
- * Handle the kernel printf routines that can take '%b' too
- */
-#if __GNUC_VERSION < 30402
-/*
- * XX64 at least this doesn't work correctly yet with 3.4.1 anyway!
- */
-#define __sun_attr___KPRINTFLIKE__ __sun_attr___PRINTFLIKE__
-#define __sun_attr___KVPRINTFLIKE__ __sun_attr___VPRINTFLIKE__
-#else
-#define __sun_attr___KPRINTFLIKE__(__n) \
- __attribute__((__format__(cmn_err, __n, (__n)+1)))
-#define __sun_attr___KVPRINTFLIKE__(__n) \
- __attribute__((__format__(cmn_err, __n, 0)))
-#endif
-
-/*
- * This one's pretty obvious -- the function never returns
- */
-#define __sun_attr___noreturn__ __attribute__((__noreturn__))
-
-
-/*
- * This is an appropriate label for functions that do not
- * modify their arguments, e.g. strlen()
- */
-#define __sun_attr___pure__ __attribute__((__pure__))
-
-/*
- * This is a stronger form of __pure__. Can be used for functions
- * that do not modify their arguments and don't depend on global
- * memory.
- */
-#define __sun_attr___const__ __attribute__((__const__))
-
-/*
- * structure packing like #pragma pack(1)
- */
-#define __sun_attr___packed__ __attribute__((__packed__))
-
-#define ___sun_attr_inner(__a) __sun_attr_##__a
-#define __sun_attr__(__a) ___sun_attr_inner __a
-
-#else /* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */
-
-#define __sun_attr__(__a)
-
-#endif /* __ATTRIBUTE_IMPLEMENTED || __GNUC__ */
-
-/*
- * Shorthand versions for readability
- */
-
-#define __PRINTFLIKE(__n) __sun_attr__((__PRINTFLIKE__(__n)))
-#define __VPRINTFLIKE(__n) __sun_attr__((__VPRINTFLIKE__(__n)))
-#define __KPRINTFLIKE(__n) __sun_attr__((__KPRINTFLIKE__(__n)))
-#define __KVPRINTFLIKE(__n) __sun_attr__((__KVPRINTFLIKE__(__n)))
-#define __NORETURN __sun_attr__((__noreturn__))
-#define __CONST __sun_attr__((__const__))
-#define __PURE __sun_attr__((__pure__))
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CCOMPILE_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/compress.h b/sys/contrib/opensolaris/uts/common/sys/compress.h
deleted file mode 100644
index 3d79d95..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/compress.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- * All rights reserved.
- */
-
-#ifndef _SYS_COMPRESS_H
-#define _SYS_COMPRESS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern size_t compress(void *, void *, size_t);
-extern size_t decompress(void *, void *, size_t, size_t);
-extern uint32_t checksum32(void *, size_t);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_COMPRESS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/cred.h b/sys/contrib/opensolaris/uts/common/sys/cred.h
deleted file mode 100644
index c1400b8..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/cred.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * Portions of this source code were derived from Berkeley 4.3 BSD
- * under license from the Regents of the University of California.
- */
-
-#ifndef _SYS_CRED_H
-#define _SYS_CRED_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * The credential is an opaque kernel private data structure defined in
- * <sys/cred_impl.h>.
- */
-
-typedef struct cred cred_t;
-
-#ifdef _KERNEL
-
-#define CRED() curthread->t_cred
-
-struct proc; /* cred.h is included in proc.h */
-struct prcred;
-
-struct auditinfo_addr; /* cred.h is included in audit.h */
-
-extern int ngroups_max;
-/*
- * kcred is used when you need all privileges.
- */
-extern struct cred *kcred;
-
-extern void cred_init(void);
-extern void crhold(cred_t *);
-extern void crfree(cred_t *);
-extern cred_t *cralloc(void); /* all but ref uninitialized */
-extern cred_t *crget(void); /* initialized */
-extern cred_t *crcopy(cred_t *);
-extern void crcopy_to(cred_t *, cred_t *);
-extern cred_t *crdup(cred_t *);
-extern void crdup_to(cred_t *, cred_t *);
-extern cred_t *crgetcred(void);
-extern void crset(struct proc *, cred_t *);
-extern int groupmember(gid_t, const cred_t *);
-extern int supgroupmember(gid_t, const cred_t *);
-extern int hasprocperm(const cred_t *, const cred_t *);
-extern int prochasprocperm(struct proc *, struct proc *, const cred_t *);
-extern int crcmp(const cred_t *, const cred_t *);
-extern cred_t *zone_kcred(void);
-
-extern uid_t crgetuid(const cred_t *);
-extern uid_t crgetruid(const cred_t *);
-extern uid_t crgetsuid(const cred_t *);
-extern gid_t crgetgid(const cred_t *);
-extern gid_t crgetrgid(const cred_t *);
-extern gid_t crgetsgid(const cred_t *);
-extern zoneid_t crgetzoneid(const cred_t *);
-extern projid_t crgetprojid(const cred_t *);
-
-
-extern const struct auditinfo_addr *crgetauinfo(const cred_t *);
-extern struct auditinfo_addr *crgetauinfo_modifiable(cred_t *);
-
-extern uint_t crgetref(const cred_t *);
-
-extern const gid_t *crgetgroups(const cred_t *);
-
-extern int crgetngroups(const cred_t *);
-
-/*
- * Sets real, effective and/or saved uid/gid;
- * -1 argument accepted as "no change".
- */
-extern int crsetresuid(cred_t *, uid_t, uid_t, uid_t);
-extern int crsetresgid(cred_t *, gid_t, gid_t, gid_t);
-
-/*
- * Sets real, effective and saved uids/gids all to the same
- * values. Both values must be non-negative and <= MAXUID
- */
-extern int crsetugid(cred_t *, uid_t, gid_t);
-
-extern int crsetgroups(cred_t *, int, gid_t *);
-
-/*
- * Private interface for setting zone association of credential.
- */
-struct zone;
-extern void crsetzone(cred_t *, struct zone *);
-extern struct zone *crgetzone(const cred_t *);
-
-/*
- * Private interface for setting project id in credential.
- */
-extern void crsetprojid(cred_t *, projid_t);
-
-/*
- * Private interface for nfs.
- */
-extern cred_t *crnetadjust(cred_t *);
-
-/*
- * Private interface for procfs.
- */
-extern void cred2prcred(const cred_t *, struct prcred *);
-
-/*
- * Private interfaces for Rampart Trusted Solaris.
- */
-struct ts_label_s;
-extern struct ts_label_s *crgetlabel(const cred_t *);
-extern boolean_t crisremote(const cred_t *);
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_CRED_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/debug.h b/sys/contrib/opensolaris/uts/common/sys/debug.h
deleted file mode 100644
index c87c884..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/debug.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-#ifndef _SYS_DEBUG_H
-#define _SYS_DEBUG_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * ASSERT(ex) causes a panic or debugger entry if expression ex is not
- * true. ASSERT() is included only for debugging, and is a no-op in
- * production kernels. VERIFY(ex), on the other hand, behaves like
- * ASSERT and is evaluated on both debug and non-debug kernels.
- */
-
-#if defined(__STDC__)
-extern int assfail(const char *, const char *, int);
-#define VERIFY(EX) ((void)((EX) || assfail(#EX, __FILE__, __LINE__)))
-#if DEBUG
-#define ASSERT(EX) VERIFY(EX)
-#else
-#define ASSERT(x) ((void)0)
-#endif
-#else /* defined(__STDC__) */
-extern int assfail();
-#define VERIFY(EX) ((void)((EX) || assfail("EX", __FILE__, __LINE__)))
-#if DEBUG
-#define ASSERT(EX) VERIFY(EX)
-#else
-#define ASSERT(x) ((void)0)
-#endif
-#endif /* defined(__STDC__) */
-
-/*
- * Assertion variants sensitive to the compilation data model
- */
-#if defined(_LP64)
-#define ASSERT64(x) ASSERT(x)
-#define ASSERT32(x)
-#else
-#define ASSERT64(x)
-#define ASSERT32(x) ASSERT(x)
-#endif
-
-/*
- * ASSERT3() behaves like ASSERT() except that it is an explicit conditional,
- * and prints out the values of the left and right hand expressions as part of
- * the panic message to ease debugging. The three variants imply the type
- * of their arguments. ASSERT3S() is for signed data types, ASSERT3U() is
- * for unsigned, and ASSERT3P() is for pointers. The VERIFY3*() macros
- * have the same relationship as above.
- */
-extern void assfail3(const char *, uintmax_t, const char *, uintmax_t,
- const char *, int);
-#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
- const TYPE __left = (TYPE)(LEFT); \
- const TYPE __right = (TYPE)(RIGHT); \
- if (!(__left OP __right)) \
- assfail3(#LEFT " " #OP " " #RIGHT, \
- (uintmax_t)__left, #OP, (uintmax_t)__right, \
- __FILE__, __LINE__); \
-_NOTE(CONSTCOND) } while (0)
-
-#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
-#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
-#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
-#if DEBUG
-#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
-#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
-#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
-#else
-#define ASSERT3S(x, y, z) ((void)0)
-#define ASSERT3U(x, y, z) ((void)0)
-#define ASSERT3P(x, y, z) ((void)0)
-#endif
-
-#ifdef _KERNEL
-
-extern void abort_sequence_enter(char *);
-extern void debug_enter(char *);
-
-#endif /* _KERNEL */
-
-#if defined(DEBUG) && !defined(__sun)
-/* CSTYLED */
-#define STATIC
-#else
-/* CSTYLED */
-#define STATIC static
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DEBUG_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/dkio.h b/sys/contrib/opensolaris/uts/common/sys/dkio.h
deleted file mode 100644
index b0ddd07..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/dkio.h
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DKIO_H
-#define _SYS_DKIO_H
-
-#pragma ident "%Z%%M% %I% %E% SMI" /* SunOS-4.0 5.19 */
-
-#include <sys/dklabel.h> /* Needed for NDKMAP define */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Structures and definitions for disk io control commands
- */
-
-/*
- * Structures used as data by ioctl calls.
- */
-
-#define DK_DEVLEN 16 /* device name max length, including */
- /* unit # & NULL (ie - "xyc1") */
-
-/*
- * Used for controller info
- */
-struct dk_cinfo {
- char dki_cname[DK_DEVLEN]; /* controller name (no unit #) */
- ushort_t dki_ctype; /* controller type */
- ushort_t dki_flags; /* flags */
- ushort_t dki_cnum; /* controller number */
- uint_t dki_addr; /* controller address */
- uint_t dki_space; /* controller bus type */
- uint_t dki_prio; /* interrupt priority */
- uint_t dki_vec; /* interrupt vector */
- char dki_dname[DK_DEVLEN]; /* drive name (no unit #) */
- uint_t dki_unit; /* unit number */
- uint_t dki_slave; /* slave number */
- ushort_t dki_partition; /* partition number */
- ushort_t dki_maxtransfer; /* max. transfer size in DEV_BSIZE */
-};
-
-/*
- * Controller types
- */
-#define DKC_UNKNOWN 0
-#define DKC_CDROM 1 /* CD-ROM, SCSI or otherwise */
-#define DKC_WDC2880 2
-#define DKC_XXX_0 3 /* unassigned */
-#define DKC_XXX_1 4 /* unassigned */
-#define DKC_DSD5215 5
-#define DKC_ACB4000 7
-#define DKC_MD21 8
-#define DKC_XXX_2 9 /* unassigned */
-#define DKC_NCRFLOPPY 10
-#define DKC_SMSFLOPPY 12
-#define DKC_SCSI_CCS 13 /* SCSI CCS compatible */
-#define DKC_INTEL82072 14 /* native floppy chip */
-#define DKC_MD 16 /* meta-disk (virtual-disk) driver */
-#define DKC_INTEL82077 19 /* 82077 floppy disk controller */
-#define DKC_DIRECT 20 /* Intel direct attached device i.e. IDE */
-#define DKC_PCMCIA_MEM 21 /* PCMCIA memory disk-like type */
-#define DKC_PCMCIA_ATA 22 /* PCMCIA AT Attached type */
-
-/*
- * Sun reserves up through 1023
- */
-
-#define DKC_CUSTOMER_BASE 1024
-
-/*
- * Flags
- */
-#define DKI_BAD144 0x01 /* use DEC std 144 bad sector fwding */
-#define DKI_MAPTRK 0x02 /* controller does track mapping */
-#define DKI_FMTTRK 0x04 /* formats only full track at a time */
-#define DKI_FMTVOL 0x08 /* formats only full volume at a time */
-#define DKI_FMTCYL 0x10 /* formats only full cylinders at a time */
-#define DKI_HEXUNIT 0x20 /* unit number is printed as 3 hex digits */
-#define DKI_PCMCIA_PFD 0x40 /* PCMCIA pseudo-floppy memory card */
-
-/*
- * Used for all partitions
- */
-struct dk_allmap {
- struct dk_map dka_map[NDKMAP];
-};
-
-#if defined(_SYSCALL32)
-struct dk_allmap32 {
- struct dk_map32 dka_map[NDKMAP];
-};
-#endif /* _SYSCALL32 */
-
-/*
- * Definition of a disk's geometry
- */
-struct dk_geom {
- unsigned short dkg_ncyl; /* # of data cylinders */
- unsigned short dkg_acyl; /* # of alternate cylinders */
- unsigned short dkg_bcyl; /* cyl offset (for fixed head area) */
- unsigned short dkg_nhead; /* # of heads */
- unsigned short dkg_obs1; /* obsolete */
- unsigned short dkg_nsect; /* # of data sectors per track */
- unsigned short dkg_intrlv; /* interleave factor */
- unsigned short dkg_obs2; /* obsolete */
- unsigned short dkg_obs3; /* obsolete */
- unsigned short dkg_apc; /* alternates per cyl (SCSI only) */
- unsigned short dkg_rpm; /* revolutions per minute */
- unsigned short dkg_pcyl; /* # of physical cylinders */
- unsigned short dkg_write_reinstruct; /* # sectors to skip, writes */
- unsigned short dkg_read_reinstruct; /* # sectors to skip, reads */
- unsigned short dkg_extra[7]; /* for compatible expansion */
-};
-
-/*
- * These defines are for historic compatibility with old drivers.
- */
-#define dkg_bhead dkg_obs1 /* used to be head offset */
-#define dkg_gap1 dkg_obs2 /* used to be gap1 */
-#define dkg_gap2 dkg_obs3 /* used to be gap2 */
-
-/*
- * Disk io control commands
- * Warning: some other ioctls with the DIOC prefix exist elsewhere.
- * The Generic DKIOC numbers are from 0 - 50.
- * The Floppy Driver uses 51 - 100.
- * The Hard Disk (except SCSI) 101 - 106. (these are obsolete)
- * The CDROM Driver 151 - 200.
- * The USCSI ioctl 201 - 250.
- */
-#define DKIOC (0x04 << 8)
-
-/*
- * The following ioctls are generic in nature and need to be
- * suported as appropriate by all disk drivers
- */
-#define DKIOCGGEOM (DKIOC|1) /* Get geometry */
-#define DKIOCINFO (DKIOC|3) /* Get info */
-#define DKIOCEJECT (DKIOC|6) /* Generic 'eject' */
-#define DKIOCGVTOC (DKIOC|11) /* Get VTOC */
-#define DKIOCSVTOC (DKIOC|12) /* Set VTOC & Write to Disk */
-
-/*
- * Disk Cache Controls. These ioctls should be supported by
- * all disk drivers.
- *
- * DKIOCFLUSHWRITECACHE when used from user-mode ignores the ioctl
- * argument, but it should be passed as NULL to allow for future
- * reinterpretation. From user-mode, this ioctl request is synchronous.
- *
- * When invoked from within the kernel, the arg can be NULL to indicate
- * a synchronous request or can be the address of a struct dk_callback
- * to request an asynchronous callback when the flush request is complete.
- * In this case, the flag to the ioctl must include FKIOCTL and the
- * dkc_callback field of the pointed to struct must be non-null or the
- * request is made synchronously.
- *
- * In the callback case: if the ioctl returns 0, a callback WILL be performed.
- * If the ioctl returns non-zero, a callback will NOT be performed.
- * NOTE: In some cases, the callback may be done BEFORE the ioctl call
- * returns. The caller's locking strategy should be prepared for this case.
- */
-#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */
-
-struct dk_callback {
- void (*dkc_callback)(void *dkc_cookie, int error);
- void *dkc_cookie;
-};
-
-#define DKIOCGETWCE (DKIOC|36) /* Get current write cache */
- /* enablement status */
-#define DKIOCSETWCE (DKIOC|37) /* Enable/Disable write cache */
-
-/*
- * The following ioctls are used by Sun drivers to communicate
- * with their associated format routines. Support of these ioctls
- * is not required of foreign drivers
- */
-#define DKIOCSGEOM (DKIOC|2) /* Set geometry */
-#define DKIOCSAPART (DKIOC|4) /* Set all partitions */
-#define DKIOCGAPART (DKIOC|5) /* Get all partitions */
-#define DKIOCG_PHYGEOM (DKIOC|32) /* get physical geometry */
-#define DKIOCG_VIRTGEOM (DKIOC|33) /* get virtual geometry */
-
-/*
- * The following ioctl's are removable media support
- */
-#define DKIOCLOCK (DKIOC|7) /* Generic 'lock' */
-#define DKIOCUNLOCK (DKIOC|8) /* Generic 'unlock' */
-#define DKIOCSTATE (DKIOC|13) /* Inquire insert/eject state */
-#define DKIOCREMOVABLE (DKIOC|16) /* is media removable */
-
-
-/*
- * ioctl for hotpluggable devices
- */
-#define DKIOCHOTPLUGGABLE (DKIOC|35) /* is hotpluggable */
-
-/*
- * Ioctl to force driver to re-read the alternate partition and rebuild
- * the internal defect map.
- */
-#define DKIOCADDBAD (DKIOC|20) /* Re-read the alternate map (IDE) */
-#define DKIOCGETDEF (DKIOC|21) /* read defect list (IDE) */
-
-/*
- * Used by applications to get disk defect information from IDE
- * drives.
- */
-#ifdef _SYSCALL32
-struct defect_header32 {
- int head;
- caddr32_t buffer;
-};
-#endif /* _SYSCALL32 */
-
-struct defect_header {
- int head;
- caddr_t buffer;
-};
-
-#define DKIOCPARTINFO (DKIOC|22) /* Get partition or slice parameters */
-
-/*
- * Used by applications to get partition or slice information
- */
-#ifdef _SYSCALL32
-struct part_info32 {
- daddr32_t p_start;
- int p_length;
-};
-#endif /* _SYSCALL32 */
-
-struct part_info {
- daddr_t p_start;
- int p_length;
-};
-
-/* The following ioctls are for Optical Memory Device */
-#define DKIOC_EBP_ENABLE (DKIOC|40) /* enable by pass erase on write */
-#define DKIOC_EBP_DISABLE (DKIOC|41) /* disable by pass erase on write */
-
-/*
- * This state enum is the argument passed to the DKIOCSTATE ioctl.
- */
-enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE };
-
-#define DKIOCGMEDIAINFO (DKIOC|42) /* get information about the media */
-
-/*
- * ioctls to read/write mboot info.
- */
-#define DKIOCGMBOOT (DKIOC|43) /* get mboot info */
-#define DKIOCSMBOOT (DKIOC|44) /* set mboot info */
-
-/*
- * ioctl to get the device temperature.
- */
-#define DKIOCGTEMPERATURE (DKIOC|45) /* get temperature */
-
-/*
- * Used for providing the temperature.
- */
-
-struct dk_temperature {
- uint_t dkt_flags; /* Flags */
- short dkt_cur_temp; /* Current disk temperature */
- short dkt_ref_temp; /* reference disk temperature */
-};
-
-#define DKT_BYPASS_PM 0x1
-#define DKT_INVALID_TEMP 0xFFFF
-
-
-/*
- * Used for Media info or the current profile info
- */
-struct dk_minfo {
- uint_t dki_media_type; /* Media type or profile info */
- uint_t dki_lbsize; /* Logical blocksize of media */
- diskaddr_t dki_capacity; /* Capacity as # of dki_lbsize blks */
-};
-
-/*
- * Media types or profiles known
- */
-#define DK_UNKNOWN 0x00 /* Media inserted - type unknown */
-
-
-/*
- * SFF 8090 Specification Version 3, media types 0x01 - 0xfffe are retained to
- * maintain compatibility with SFF8090. The following define the
- * optical media type.
- */
-#define DK_REMOVABLE_DISK 0x02 /* Removable Disk */
-#define DK_MO_ERASABLE 0x03 /* MO Erasable */
-#define DK_MO_WRITEONCE 0x04 /* MO Write once */
-#define DK_AS_MO 0x05 /* AS MO */
-#define DK_CDROM 0x08 /* CDROM */
-#define DK_CDR 0x09 /* CD-R */
-#define DK_CDRW 0x0A /* CD-RW */
-#define DK_DVDROM 0x10 /* DVD-ROM */
-#define DK_DVDR 0x11 /* DVD-R */
-#define DK_DVDRAM 0x12 /* DVD_RAM or DVD-RW */
-
-/*
- * Media types for other rewritable magnetic media
- */
-#define DK_FIXED_DISK 0x10001 /* Fixed disk SCSI or otherwise */
-#define DK_FLOPPY 0x10002 /* Floppy media */
-#define DK_ZIP 0x10003 /* IOMEGA ZIP media */
-#define DK_JAZ 0x10004 /* IOMEGA JAZ media */
-
-#define DKIOCSETEFI (DKIOC|17) /* Set EFI info */
-#define DKIOCGETEFI (DKIOC|18) /* Get EFI info */
-
-#define DKIOCPARTITION (DKIOC|9) /* Get partition info */
-
-/*
- * Ioctls to get/set volume capabilities related to Logical Volume Managers.
- * They include the ability to get/set capabilities and to issue a read to a
- * specific underlying device of a replicated device.
- */
-
-#define DKIOCGETVOLCAP (DKIOC | 25) /* Get volume capabilities */
-#define DKIOCSETVOLCAP (DKIOC | 26) /* Set volume capabilities */
-#define DKIOCDMR (DKIOC | 27) /* Issue a directed read */
-
-typedef uint_t volcapinfo_t;
-
-typedef uint_t volcapset_t;
-
-#define DKV_ABR_CAP 0x00000001 /* Support Appl.Based Recovery */
-#define DKV_DMR_CAP 0x00000002 /* Support Directed Mirror Read */
-
-typedef struct volcap {
- volcapinfo_t vc_info; /* Capabilities available */
- volcapset_t vc_set; /* Capabilities set */
-} volcap_t;
-
-#define VOL_SIDENAME 256
-
-typedef struct vol_directed_rd {
- int vdr_flags;
- offset_t vdr_offset;
- size_t vdr_nbytes;
- size_t vdr_bytesread;
- void *vdr_data;
- int vdr_side;
- char vdr_side_name[VOL_SIDENAME];
-} vol_directed_rd_t;
-
-#define DKV_SIDE_INIT (-1)
-#define DKV_DMR_NEXT_SIDE 0x00000001
-#define DKV_DMR_DONE 0x00000002
-#define DKV_DMR_ERROR 0x00000004
-#define DKV_DMR_SUCCESS 0x00000008
-#define DKV_DMR_SHORT 0x00000010
-
-#ifdef _MULTI_DATAMODEL
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack(4)
-#endif
-typedef struct vol_directed_rd32 {
- int32_t vdr_flags;
- offset_t vdr_offset; /* 64-bit element on 32-bit alignment */
- size32_t vdr_nbytes;
- size32_t vdr_bytesread;
- caddr32_t vdr_data;
- int32_t vdr_side;
- char vdr_side_name[VOL_SIDENAME];
-} vol_directed_rd32_t;
-#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
-#pragma pack()
-#endif
-#endif /* _MULTI_DATAMODEL */
-
-/*
- * The ioctl is used to fetch disk's device type, vendor ID,
- * model number/product ID, firmware revision and serial number together.
- *
- * Currently there are two device types - DKD_ATA_TYPE which means the
- * disk is driven by cmdk/ata or dad/uata driver, and DKD_SCSI_TYPE
- * which means the disk is driven by sd/scsi hba driver.
- */
-#define DKIOC_GETDISKID (DKIOC|46)
-
-/* These two labels are for dkd_dtype of dk_disk_id_t */
-#define DKD_ATA_TYPE 0x01 /* ATA disk or legacy mode SATA disk */
-#define DKD_SCSI_TYPE 0x02 /* SCSI disk or native mode SATA disk */
-
-#define DKD_ATA_MODEL 40 /* model number length */
-#define DKD_ATA_FWVER 8 /* firmware revision length */
-#define DKD_ATA_SERIAL 20 /* serial number length */
-
-#define DKD_SCSI_VENDOR 8 /* vendor ID length */
-#define DKD_SCSI_PRODUCT 16 /* product ID length */
-#define DKD_SCSI_REVLEVEL 4 /* revision level length */
-#define DKD_SCSI_SERIAL 12 /* serial number length */
-
-/*
- * The argument type for DKIOC_GETDISKID ioctl.
- */
-typedef struct dk_disk_id {
- uint_t dkd_dtype;
- union {
- struct {
- char dkd_amodel[DKD_ATA_MODEL]; /* 40 bytes */
- char dkd_afwver[DKD_ATA_FWVER]; /* 8 bytes */
- char dkd_aserial[DKD_ATA_SERIAL]; /* 20 bytes */
- } ata_disk_id;
- struct {
- char dkd_svendor[DKD_SCSI_VENDOR]; /* 8 bytes */
- char dkd_sproduct[DKD_SCSI_PRODUCT]; /* 16 bytes */
- char dkd_sfwver[DKD_SCSI_REVLEVEL]; /* 4 bytes */
- char dkd_sserial[DKD_SCSI_SERIAL]; /* 12 bytes */
- } scsi_disk_id;
- } disk_id;
-} dk_disk_id_t;
-
-/*
- * The ioctl is used to update the firmware of device.
- */
-#define DKIOC_UPDATEFW (DKIOC|47)
-
-/* The argument type for DKIOC_UPDATEFW ioctl */
-typedef struct dk_updatefw {
- caddr_t dku_ptrbuf; /* pointer to firmware buf */
- uint_t dku_size; /* firmware buf length */
- uint8_t dku_type; /* firmware update type */
-} dk_updatefw_t;
-
-#ifdef _SYSCALL32
-typedef struct dk_updatefw_32 {
- caddr32_t dku_ptrbuf; /* pointer to firmware buf */
- uint_t dku_size; /* firmware buf length */
- uint8_t dku_type; /* firmware update type */
-} dk_updatefw_32_t;
-#endif /* _SYSCALL32 */
-
-/*
- * firmware update type - temporary or permanent use
- */
-#define FW_TYPE_TEMP 0x0 /* temporary use */
-#define FW_TYPE_PERM 0x1 /* permanent use */
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DKIO_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/dklabel.h b/sys/contrib/opensolaris/uts/common/sys/dklabel.h
deleted file mode 100644
index 92cb47a..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/dklabel.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 1990-2002 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DKLABEL_H
-#define _SYS_DKLABEL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/isa_defs.h>
-#include <sys/types32.h>
-#include <sys/isa_defs.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Miscellaneous defines
- */
-#define DKL_MAGIC 0xDABE /* magic number */
-#define FKL_MAGIC 0xff /* magic number for DOS floppies */
-
-#if defined(_SUNOS_VTOC_16)
-#define NDKMAP 16 /* # of logical partitions */
-#define DK_LABEL_LOC 1 /* location of disk label */
-#elif defined(_SUNOS_VTOC_8)
-#define NDKMAP 8 /* # of logical partitions */
-#define DK_LABEL_LOC 0 /* location of disk label */
-#else
-#error "No VTOC format defined."
-#endif
-
-#define LEN_DKL_ASCII 128 /* length of dkl_asciilabel */
-#define LEN_DKL_VVOL 8 /* length of v_volume */
-#define DK_LABEL_SIZE 512 /* size of disk label */
-#define DK_MAX_BLOCKS 0x7fffffff /* max # of blocks handled */
-
-/*
- * Reserve two cylinders on SCSI disks.
- * One is for the backup disk label and the other is for the deviceid.
- *
- * IPI disks only reserve one cylinder, but they will go away soon.
- * CDROMs do not reserve any cylinders.
- */
-#define DK_ACYL 2
-
-/*
- * Format of a Sun disk label.
- * Resides in cylinder 0, head 0, sector 0.
- *
- * sizeof (struct dk_label) should be 512 (the current sector size),
- * but should the sector size increase, this structure should remain
- * at the beginning of the sector.
- */
-
-/*
- * partition headers: section 1
- * Returned in struct dk_allmap by ioctl DKIOC[SG]APART (dkio(7I))
- */
-struct dk_map {
- daddr_t dkl_cylno; /* starting cylinder */
- daddr_t dkl_nblk; /* number of blocks; if == 0, */
- /* partition is undefined */
-};
-
-/*
- * partition headers: section 1
- * Fixed size for on-disk dk_label
- */
-struct dk_map32 {
- daddr32_t dkl_cylno; /* starting cylinder */
- daddr32_t dkl_nblk; /* number of blocks; if == 0, */
- /* partition is undefined */
-};
-
-/*
- * partition headers: section 2,
- * brought over from AT&T SVr4 vtoc structure.
- */
-struct dk_map2 {
- uint16_t p_tag; /* ID tag of partition */
- uint16_t p_flag; /* permission flag */
-};
-
-struct dkl_partition {
- uint16_t p_tag; /* ID tag of partition */
- uint16_t p_flag; /* permision flags */
- daddr32_t p_start; /* start sector no of partition */
- int32_t p_size; /* # of blocks in partition */
-};
-
-
-/*
- * VTOC inclusions from AT&T SVr4
- * Fixed sized types for on-disk VTOC
- */
-
-struct dk_vtoc {
-#if defined(_SUNOS_VTOC_16)
- uint32_t v_bootinfo[3]; /* info for mboot (unsupported) */
- uint32_t v_sanity; /* to verify vtoc sanity */
- uint32_t v_version; /* layout version */
- char v_volume[LEN_DKL_VVOL]; /* volume name */
- uint16_t v_sectorsz; /* sector size in bytes */
- uint16_t v_nparts; /* number of partitions */
- uint32_t v_reserved[10]; /* free space */
- struct dkl_partition v_part[NDKMAP]; /* partition headers */
- time32_t timestamp[NDKMAP]; /* partition timestamp (unsupported) */
- char v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
-#elif defined(_SUNOS_VTOC_8)
- uint32_t v_version; /* layout version */
- char v_volume[LEN_DKL_VVOL]; /* volume name */
- uint16_t v_nparts; /* number of partitions */
- struct dk_map2 v_part[NDKMAP]; /* partition hdrs, sec 2 */
- uint32_t v_bootinfo[3]; /* info needed by mboot */
- uint32_t v_sanity; /* to verify vtoc sanity */
- uint32_t v_reserved[10]; /* free space */
- time32_t v_timestamp[NDKMAP]; /* partition timestamp */
-#else
-#error "No VTOC format defined."
-#endif
-};
-
-/*
- * define the amount of disk label padding needed to make
- * the entire structure occupy 512 bytes.
- */
-#if defined(_SUNOS_VTOC_16)
-#define LEN_DKL_PAD (DK_LABEL_SIZE - \
- ((sizeof (struct dk_vtoc) + \
- (4 * sizeof (uint32_t)) + \
- (12 * sizeof (uint16_t)) + \
- (2 * (sizeof (uint16_t))))))
-#elif defined(_SUNOS_VTOC_8)
-#define LEN_DKL_PAD (DK_LABEL_SIZE \
- - ((LEN_DKL_ASCII) + \
- (sizeof (struct dk_vtoc)) + \
- (sizeof (struct dk_map32) * NDKMAP) + \
- (14 * (sizeof (uint16_t))) + \
- (2 * (sizeof (uint16_t)))))
-#else
-#error "No VTOC format defined."
-#endif
-
-
-struct dk_label {
-#if defined(_SUNOS_VTOC_16)
- struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */
- uint32_t dkl_pcyl; /* # of physical cylinders */
- uint32_t dkl_ncyl; /* # of data cylinders */
- uint16_t dkl_acyl; /* # of alternate cylinders */
- uint16_t dkl_bcyl; /* cyl offset (for fixed head area) */
- uint32_t dkl_nhead; /* # of heads */
- uint32_t dkl_nsect; /* # of data sectors per track */
- uint16_t dkl_intrlv; /* interleave factor */
- uint16_t dkl_skew; /* skew factor */
- uint16_t dkl_apc; /* alternates per cyl (SCSI only) */
- uint16_t dkl_rpm; /* revolutions per minute */
- uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */
- uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */
- uint16_t dkl_extra[4]; /* for compatible expansion */
- char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */
-#elif defined(_SUNOS_VTOC_8)
- char dkl_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
- struct dk_vtoc dkl_vtoc; /* vtoc inclusions from AT&T SVr4 */
- uint16_t dkl_write_reinstruct; /* # sectors to skip, writes */
- uint16_t dkl_read_reinstruct; /* # sectors to skip, reads */
- char dkl_pad[LEN_DKL_PAD]; /* unused part of 512 bytes */
- uint16_t dkl_rpm; /* rotations per minute */
- uint16_t dkl_pcyl; /* # physical cylinders */
- uint16_t dkl_apc; /* alternates per cylinder */
- uint16_t dkl_obs1; /* obsolete */
- uint16_t dkl_obs2; /* obsolete */
- uint16_t dkl_intrlv; /* interleave factor */
- uint16_t dkl_ncyl; /* # of data cylinders */
- uint16_t dkl_acyl; /* # of alternate cylinders */
- uint16_t dkl_nhead; /* # of heads in this partition */
- uint16_t dkl_nsect; /* # of 512 byte sectors per track */
- uint16_t dkl_obs3; /* obsolete */
- uint16_t dkl_obs4; /* obsolete */
- struct dk_map32 dkl_map[NDKMAP]; /* logical partition headers */
-#else
-#error "No VTOC format defined."
-#endif
- uint16_t dkl_magic; /* identifies this label format */
- uint16_t dkl_cksum; /* xor checksum of sector */
-};
-
-#if defined(_SUNOS_VTOC_16)
-#define dkl_asciilabel dkl_vtoc.v_asciilabel
-#define v_timestamp timestamp
-
-#elif defined(_SUNOS_VTOC_8)
-
-/*
- * These defines are for historic compatibility with old drivers.
- */
-#define dkl_gap1 dkl_obs1 /* used to be gap1 */
-#define dkl_gap2 dkl_obs2 /* used to be gap2 */
-#define dkl_bhead dkl_obs3 /* used to be label head offset */
-#define dkl_ppart dkl_obs4 /* used to by physical partition */
-#else
-#error "No VTOC format defined."
-#endif
-
-struct fk_label { /* DOS floppy label */
- uchar_t fkl_type;
- uchar_t fkl_magich;
- uchar_t fkl_magicl;
- uchar_t filler;
-};
-
-/*
- * Layout of stored fabricated device id (on-disk)
- */
-#define DK_DEVID_BLKSIZE (512)
-#define DK_DEVID_SIZE (DK_DEVID_BLKSIZE - ((sizeof (uchar_t) * 7)))
-#define DK_DEVID_REV_MSB (0)
-#define DK_DEVID_REV_LSB (1)
-
-struct dk_devid {
- uchar_t dkd_rev_hi; /* revision (MSB) */
- uchar_t dkd_rev_lo; /* revision (LSB) */
- uchar_t dkd_flags; /* flags (not used yet) */
- uchar_t dkd_devid[DK_DEVID_SIZE]; /* devid stored here */
- uchar_t dkd_checksum3; /* checksum (MSB) */
- uchar_t dkd_checksum2;
- uchar_t dkd_checksum1;
- uchar_t dkd_checksum0; /* checksum (LSB) */
-};
-
-#define DKD_GETCHKSUM(dkd) ((dkd)->dkd_checksum3 << 24) + \
- ((dkd)->dkd_checksum2 << 16) + \
- ((dkd)->dkd_checksum1 << 8) + \
- ((dkd)->dkd_checksum0)
-
-#define DKD_FORMCHKSUM(c, dkd) (dkd)->dkd_checksum3 = hibyte(hiword((c))); \
- (dkd)->dkd_checksum2 = lobyte(hiword((c))); \
- (dkd)->dkd_checksum1 = hibyte(loword((c))); \
- (dkd)->dkd_checksum0 = lobyte(loword((c)));
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DKLABEL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/errorq.h b/sys/contrib/opensolaris/uts/common/sys/errorq.h
deleted file mode 100644
index 971b19e..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/errorq.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ERRORQ_H
-#define _ERRORQ_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/nvpair.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct errorq errorq_t;
-typedef struct errorq_elem errorq_elem_t;
-typedef void (*errorq_func_t)(void *, const void *, const errorq_elem_t *);
-
-/*
- * Public flags for errorq_create(): bit range 0-15
- */
-#define ERRORQ_VITAL 0x0001 /* drain queue automatically on system reset */
-
-/*
- * Public flags for errorq_dispatch():
- */
-#define ERRORQ_ASYNC 0 /* schedule async queue drain for caller */
-#define ERRORQ_SYNC 1 /* do not schedule drain; caller will drain */
-
-#ifdef _KERNEL
-
-extern errorq_t *errorq_create(const char *, errorq_func_t, void *,
- ulong_t, size_t, uint_t, uint_t);
-
-extern errorq_t *errorq_nvcreate(const char *, errorq_func_t, void *,
- ulong_t, size_t, uint_t, uint_t);
-
-extern void errorq_destroy(errorq_t *);
-extern void errorq_dispatch(errorq_t *, const void *, size_t, uint_t);
-extern void errorq_drain(errorq_t *);
-extern void errorq_init(void);
-extern void errorq_panic(void);
-extern errorq_elem_t *errorq_reserve(errorq_t *);
-extern void errorq_commit(errorq_t *, errorq_elem_t *, uint_t);
-extern void errorq_cancel(errorq_t *, errorq_elem_t *);
-extern nvlist_t *errorq_elem_nvl(errorq_t *, const errorq_elem_t *);
-extern nv_alloc_t *errorq_elem_nva(errorq_t *, const errorq_elem_t *);
-extern void *errorq_elem_dup(errorq_t *, const errorq_elem_t *,
- errorq_elem_t **);
-extern void errorq_dump();
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ERRORQ_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/feature_tests.h b/sys/contrib/opensolaris/uts/common/sys/feature_tests.h
deleted file mode 100644
index bb79cb8..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/feature_tests.h
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FEATURE_TESTS_H
-#define _SYS_FEATURE_TESTS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/ccompile.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Values of _POSIX_C_SOURCE
- *
- * undefined not a POSIX compilation
- * 1 POSIX.1-1990 compilation
- * 2 POSIX.2-1992 compilation
- * 199309L POSIX.1b-1993 compilation (Real Time)
- * 199506L POSIX.1c-1995 compilation (POSIX Threads)
- * 200112L POSIX.1-2001 compilation (Austin Group Revision)
- */
-#if defined(_POSIX_SOURCE) && !defined(_POSIX_C_SOURCE)
-#define _POSIX_C_SOURCE 1
-#endif
-
-/*
- * The feature test macros __XOPEN_OR_POSIX, _STRICT_STDC, and _STDC_C99
- * are Sun implementation specific macros created in order to compress
- * common standards specified feature test macros for easier reading.
- * These macros should not be used by the application developer as
- * unexpected results may occur. Instead, the user should reference
- * standards(5) for correct usage of the standards feature test macros.
- *
- * __XOPEN_OR_POSIX Used in cases where a symbol is defined by both
- * X/Open or POSIX or in the negative, when neither
- * X/Open or POSIX defines a symbol.
- *
- * _STRICT_STDC __STDC__ is specified by the C Standards and defined
- * by the compiler. For Sun compilers the value of
- * __STDC__ is either 1, 0, or not defined based on the
- * compilation mode (see cc(1)). When the value of
- * __STDC__ is 1 and in the absence of any other feature
- * test macros, the namespace available to the application
- * is limited to only those symbols defined by the C
- * Standard. _STRICT_STDC provides a more readable means
- * of identifying symbols defined by the standard, or in
- * the negative, symbols that are extensions to the C
- * Standard. See additional comments for GNU C differences.
- *
- * _STDC_C99 __STDC_VERSION__ is specified by the C standards and
- * defined by the compiler and indicates the version of
- * the C standard. A value of 199901L indicates a
- * compiler that complies with ISO/IEC 9899:1999, other-
- * wise known as the C99 standard.
- */
-
-#if defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE)
-#define __XOPEN_OR_POSIX
-#endif
-
-/*
- * ISO/IEC 9899:1990 and it's revision, ISO/IEC 9899:1999 specify the
- * following predefined macro name:
- *
- * __STDC__ The integer constant 1, intended to indicate a conforming
- * implementation.
- *
- * Furthermore, a strictly conforming program shall use only those features
- * of the language and library specified in these standards. A conforming
- * implementation shall accept any strictly conforming program.
- *
- * Based on these requirements, Sun's C compiler defines __STDC__ to 1 for
- * strictly conforming environments and __STDC__ to 0 for environments that
- * use ANSI C semantics but allow extensions to the C standard. For non-ANSI
- * C semantics, Sun's C compiler does not define __STDC__.
- *
- * The GNU C project interpretation is that __STDC__ should always be defined
- * to 1 for compilation modes that accept ANSI C syntax regardless of whether
- * or not extensions to the C standard are used. Violations of conforming
- * behavior are conditionally flagged as warnings via the use of the
- * -pedantic option. In addition to defining __STDC__ to 1, the GNU C
- * compiler also defines __STRICT_ANSI__ as a means of specifying strictly
- * conforming environments using the -ansi or -std=<standard> options.
- *
- * In the absence of any other compiler options, Sun and GNU set the value
- * of __STDC__ as follows when using the following options:
- *
- * Value of __STDC__ __STRICT_ANSI__
- *
- * cc -Xa (default) 0 undefined
- * cc -Xt (transitional) 0 undefined
- * cc -Xc (strictly conforming) 1 undefined
- * cc -Xs (K&R C) undefined undefined
- *
- * gcc (default) 1 undefined
- * gcc -ansi, -std={c89, c99,...) 1 defined
- * gcc -traditional (K&R) undefined undefined
- *
- * The default compilation modes for Sun C compilers versus GNU C compilers
- * results in a differing value for __STDC__ which results in a more
- * restricted namespace when using Sun compilers. To allow both GNU and Sun
- * interpretations to peacefully co-exist, we use the following Sun
- * implementation _STRICT_STDC_ macro:
- */
-
-#if (__STDC__ - 0 == 1 && !defined(__GNUC__)) || \
- (defined(__GNUC__) && defined(__STRICT_ANSI__))
-#define _STRICT_STDC
-#else
-#undef _STRICT_STDC
-#endif
-
-/*
- * Compiler complies with ISO/IEC 9899:1999
- */
-
-#if __STDC_VERSION__ - 0 >= 199901L
-#ifndef _STDC_C99
-#define _STDC_C99
-#endif
-#endif
-
-/*
- * Large file interfaces:
- *
- * _LARGEFILE_SOURCE
- * 1 large file-related additions to POSIX
- * interfaces requested (fseeko, etc.)
- * _LARGEFILE64_SOURCE
- * 1 transitional large-file-related interfaces
- * requested (seek64, stat64, etc.)
- *
- * The corresponding announcement macros are respectively:
- * _LFS_LARGEFILE
- * _LFS64_LARGEFILE
- * (These are set in <unistd.h>.)
- *
- * Requesting _LARGEFILE64_SOURCE implies requesting _LARGEFILE_SOURCE as
- * well.
- *
- * The large file interfaces are made visible regardless of the initial values
- * of the feature test macros under certain circumstances:
- * - If no explicit standards-conforming environment is requested (neither
- * of _POSIX_SOURCE nor _XOPEN_SOURCE is defined and the value of
- * __STDC__ does not imply standards conformance).
- * - Extended system interfaces are explicitly requested (__EXTENSIONS__
- * is defined).
- * - Access to in-kernel interfaces is requested (_KERNEL or _KMEMUSER is
- * defined). (Note that this dependency is an artifact of the current
- * kernel implementation and may change in future releases.)
- */
-#if (!defined(_STRICT_STDC) && !defined(__XOPEN_OR_POSIX)) || \
- defined(_KERNEL) || defined(_KMEMUSER) || \
- defined(__EXTENSIONS__)
-#undef _LARGEFILE64_SOURCE
-#define _LARGEFILE64_SOURCE 1
-#endif
-#if _LARGEFILE64_SOURCE - 0 == 1
-#undef _LARGEFILE_SOURCE
-#define _LARGEFILE_SOURCE 1
-#endif
-
-/*
- * Large file compilation environment control:
- *
- * The setting of _FILE_OFFSET_BITS controls the size of various file-related
- * types and governs the mapping between file-related source function symbol
- * names and the corresponding binary entry points.
- *
- * In the 32-bit environment, the default value is 32; if not set, set it to
- * the default here, to simplify tests in other headers.
- *
- * In the 64-bit compilation environment, the only value allowed is 64.
- */
-#if defined(_LP64)
-#ifndef _FILE_OFFSET_BITS
-#define _FILE_OFFSET_BITS 64
-#endif
-#if _FILE_OFFSET_BITS - 0 != 64
-#error "invalid _FILE_OFFSET_BITS value specified"
-#endif
-#else /* _LP64 */
-#ifndef _FILE_OFFSET_BITS
-#define _FILE_OFFSET_BITS 32
-#endif
-#if _FILE_OFFSET_BITS - 0 != 32 && _FILE_OFFSET_BITS - 0 != 64
-#error "invalid _FILE_OFFSET_BITS value specified"
-#endif
-#endif /* _LP64 */
-
-/*
- * Use of _XOPEN_SOURCE
- *
- * The following X/Open specifications are supported:
- *
- * X/Open Portability Guide, Issue 3 (XPG3)
- * X/Open CAE Specification, Issue 4 (XPG4)
- * X/Open CAE Specification, Issue 4, Version 2 (XPG4v2)
- * X/Open CAE Specification, Issue 5 (XPG5)
- * Open Group Technical Standard, Issue 6 (XPG6), also referred to as
- * IEEE Std. 1003.1-2001 and ISO/IEC 9945:2002.
- *
- * XPG4v2 is also referred to as UNIX 95 (SUS or SUSv1).
- * XPG5 is also referred to as UNIX 98 or the Single Unix Specification,
- * Version 2 (SUSv2)
- * XPG6 is the result of a merge of the X/Open and POSIX specifications
- * and as such is also referred to as IEEE Std. 1003.1-2001 in
- * addition to UNIX 03 and SUSv3.
- *
- * When writing a conforming X/Open application, as per the specification
- * requirements, the appropriate feature test macros must be defined at
- * compile time. These are as follows. For more info, see standards(5).
- *
- * Feature Test Macro Specification
- * ------------------------------------------------ -------------
- * _XOPEN_SOURCE XPG3
- * _XOPEN_SOURCE && _XOPEN_VERSION = 4 XPG4
- * _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED = 1 XPG4v2
- * _XOPEN_SOURCE = 500 XPG5
- * _XOPEN_SOURCE = 600 (or POSIX_C_SOURCE=200112L) XPG6
- *
- * In order to simplify the guards within the headers, the following
- * implementation private test macros have been created. Applications
- * must NOT use these private test macros as unexpected results will
- * occur.
- *
- * Note that in general, the use of these private macros is cumulative.
- * For example, the use of _XPG3 with no other restrictions on the X/Open
- * namespace will make the symbols visible for XPG3 through XPG6
- * compilation environments. The use of _XPG4_2 with no other X/Open
- * namespace restrictions indicates that the symbols were introduced in
- * XPG4v2 and are therefore visible for XPG4v2 through XPG6 compilation
- * environments, but not for XPG3 or XPG4 compilation environments.
- *
- * _XPG3 X/Open Portability Guide, Issue 3 (XPG3)
- * _XPG4 X/Open CAE Specification, Issue 4 (XPG4)
- * _XPG4_2 X/Open CAE Specification, Issue 4, Version 2 (XPG4v2/UNIX 95/SUS)
- * _XPG5 X/Open CAE Specification, Issue 5 (XPG5/UNIX 98/SUSv2)
- * _XPG6 Open Group Technical Standard, Issue 6 (XPG6/UNIX 03/SUSv3)
- */
-
-/* X/Open Portability Guide, Issue 3 */
-#if defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE - 0 < 500) && \
- (_XOPEN_VERSION - 0 < 4) && !defined(_XOPEN_SOURCE_EXTENDED)
-#define _XPG3
-/* X/Open CAE Specification, Issue 4 */
-#elif (defined(_XOPEN_SOURCE) && _XOPEN_VERSION - 0 == 4)
-#define _XPG4
-#define _XPG3
-/* X/Open CAE Specification, Issue 4, Version 2 */
-#elif (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE_EXTENDED - 0 == 1)
-#define _XPG4_2
-#define _XPG4
-#define _XPG3
-/* X/Open CAE Specification, Issue 5 */
-#elif (_XOPEN_SOURCE - 0 == 500)
-#define _XPG5
-#define _XPG4_2
-#define _XPG4
-#define _XPG3
-#undef _POSIX_C_SOURCE
-#define _POSIX_C_SOURCE 199506L
-/* Open Group Technical Standard , Issue 6 */
-#elif (_XOPEN_SOURCE - 0 == 600) || (_POSIX_C_SOURCE - 0 == 200112L)
-#define _XPG6
-#define _XPG5
-#define _XPG4_2
-#define _XPG4
-#define _XPG3
-#undef _POSIX_C_SOURCE
-#define _POSIX_C_SOURCE 200112L
-#undef _XOPEN_SOURCE
-#define _XOPEN_SOURCE 600
-#endif
-
-/*
- * _XOPEN_VERSION is defined by the X/Open specifications and is not
- * normally defined by the application, except in the case of an XPG4
- * application. On the implementation side, _XOPEN_VERSION defined with
- * the value of 3 indicates an XPG3 application. _XOPEN_VERSION defined
- * with the value of 4 indicates an XPG4 or XPG4v2 (UNIX 95) application.
- * _XOPEN_VERSION defined with a value of 500 indicates an XPG5 (UNIX 98)
- * application and with a value of 600 indicates an XPG6 (UNIX 03)
- * application. The appropriate version is determined by the use of the
- * feature test macros described earlier. The value of _XOPEN_VERSION
- * defaults to 3 otherwise indicating support for XPG3 applications.
- */
-#ifndef _XOPEN_VERSION
-#ifdef _XPG6
-#define _XOPEN_VERSION 600
-#elif defined(_XPG5)
-#define _XOPEN_VERSION 500
-#elif defined(_XPG4_2)
-#define _XOPEN_VERSION 4
-#else
-#define _XOPEN_VERSION 3
-#endif
-#endif
-
-/*
- * ANSI C and ISO 9899:1990 say the type long long doesn't exist in strictly
- * conforming environments. ISO 9899:1999 says it does.
- *
- * The presence of _LONGLONG_TYPE says "long long exists" which is therefore
- * defined in all but strictly conforming environments that disallow it.
- */
-#if !defined(_STDC_C99) && defined(_STRICT_STDC) && !defined(__GNUC__)
-/*
- * Resist attempts to force the definition of long long in this case.
- */
-#if defined(_LONGLONG_TYPE)
-#error "No long long in strictly conforming ANSI C & 1990 ISO C environments"
-#endif
-#else
-#if !defined(_LONGLONG_TYPE)
-#define _LONGLONG_TYPE
-#endif
-#endif
-
-/*
- * It is invalid to compile an XPG3, XPG4, XPG4v2, or XPG5 application
- * using c99. The same is true for POSIX.1-1990, POSIX.2-1992, POSIX.1b,
- * and POSIX.1c applications. Likewise, it is invalid to compile an XPG6
- * or a POSIX.1-2001 application with anything other than a c99 or later
- * compiler. Therefore, we force an error in both cases.
- */
-#if defined(_STDC_C99) && (defined(__XOPEN_OR_POSIX) && !defined(_XPG6))
-#error "Compiler or options invalid for pre-UNIX 03 X/Open applications \
- and pre-2001 POSIX applications"
-#elif !defined(_STDC_C99) && \
- (defined(__XOPEN_OR_POSIX) && defined(_XPG6))
-#error "Compiler or options invalid; UNIX 03 and POSIX.1-2001 applications \
- require the use of c99"
-#endif
-
-/*
- * The following macro defines a value for the ISO C99 restrict
- * keyword so that _RESTRICT_KYWD resolves to "restrict" if
- * an ISO C99 compiler is used and "" (null string) if any other
- * compiler is used. This allows for the use of single prototype
- * declarations regardless of compiler version.
- */
-#if (defined(__STDC__) && defined(_STDC_C99))
-#define _RESTRICT_KYWD restrict
-#else
-#define _RESTRICT_KYWD
-#endif
-
-/*
- * The following macro indicates header support for the ANSI C++
- * standard. The ISO/IEC designation for this is ISO/IEC FDIS 14882.
- */
-#define _ISO_CPP_14882_1998
-
-/*
- * The following macro indicates header support for the C99 standard,
- * ISO/IEC 9899:1999, Programming Languages - C.
- */
-#define _ISO_C_9899_1999
-
-/*
- * The following macro indicates header support for DTrace. The value is an
- * integer that corresponds to the major version number for DTrace.
- */
-#define _DTRACE_VERSION 1
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FEATURE_TESTS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h b/sys/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h
deleted file mode 100644
index aa5c7ee..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FM_FS_ZFS_H
-#define _SYS_FM_FS_ZFS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ZFS_ERROR_CLASS "fs.zfs"
-
-#define FM_EREPORT_ZFS_CHECKSUM "checksum"
-#define FM_EREPORT_ZFS_IO "io"
-#define FM_EREPORT_ZFS_DATA "data"
-#define FM_EREPORT_ZFS_POOL "zpool"
-#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
-#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
-#define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data"
-#define FM_EREPORT_ZFS_DEVICE_NO_REPLICAS "vdev.no_replicas"
-#define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum"
-#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small"
-#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label"
-
-#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
-#define FM_EREPORT_PAYLOAD_ZFS_POOL_GUID "pool_guid"
-#define FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT "pool_context"
-#define FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID "vdev_guid"
-#define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type"
-#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
-#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
-#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
-#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
-#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
-#define FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID "parent_devid"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET "zio_objset"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT "zio_object"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL "zio_level"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID "zio_blkid"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR "zio_err"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset"
-#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
-#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
-
-#define FM_RESOURCE_OK "ok"
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FM_FS_ZFS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/fm/protocol.h b/sys/contrib/opensolaris/uts/common/sys/fm/protocol.h
deleted file mode 100644
index a9980fe..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/fm/protocol.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FM_PROTOCOL_H
-#define _SYS_FM_PROTOCOL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _KERNEL
-#include <sys/varargs.h>
-#include <sys/nvpair.h>
-#else
-#include <libnvpair.h>
-#include <stdarg.h>
-#endif
-
-/* FM common member names */
-#define FM_CLASS "class"
-#define FM_VERSION "version"
-
-/* FM event class values */
-#define FM_EREPORT_CLASS "ereport"
-#define FM_FAULT_CLASS "fault"
-#define FM_RSRC_CLASS "resource"
-#define FM_LIST_EVENT "list"
-
-/* FM list.* event class values */
-#define FM_LIST_SUSPECT_CLASS FM_LIST_EVENT ".suspect"
-#define FM_LIST_ISOLATED_CLASS FM_LIST_EVENT ".isolated"
-#define FM_LIST_REPAIRED_CLASS FM_LIST_EVENT ".repaired"
-
-/* ereport class subcategory values */
-#define FM_ERROR_CPU "cpu"
-#define FM_ERROR_IO "io"
-
-/* ereport version and payload member names */
-#define FM_EREPORT_VERS0 0
-#define FM_EREPORT_VERSION FM_EREPORT_VERS0
-
-/* ereport payload member names */
-#define FM_EREPORT_DETECTOR "detector"
-#define FM_EREPORT_ENA "ena"
-
-/* list.* event payload member names */
-#define FM_LIST_EVENT_SIZE "list-sz"
-
-/* list.suspect, isolated, and repaired versions and payload member names */
-#define FM_SUSPECT_UUID "uuid"
-#define FM_SUSPECT_DIAG_CODE "code"
-#define FM_SUSPECT_DIAG_TIME "diag-time"
-#define FM_SUSPECT_DE "de"
-#define FM_SUSPECT_FAULT_LIST "fault-list"
-#define FM_SUSPECT_FAULT_SZ "fault-list-sz"
-#define FM_SUSPECT_FAULT_STATUS "fault-status"
-#define FM_SUSPECT_MESSAGE "message"
-
-#define FM_SUSPECT_VERS0 0
-#define FM_SUSPECT_VERSION FM_SUSPECT_VERS0
-
-/* fault event versions and payload member names */
-#define FM_FAULT_VERS0 0
-#define FM_FAULT_VERSION FM_FAULT_VERS0
-
-#define FM_FAULT_ASRU "asru"
-#define FM_FAULT_FRU "fru"
-#define FM_FAULT_FRU_LABEL "fru-label"
-#define FM_FAULT_CERTAINTY "certainty"
-#define FM_FAULT_RESOURCE "resource"
-#define FM_FAULT_LOCATION "location"
-
-/* resource event versions and payload member names */
-#define FM_RSRC_VERS0 0
-#define FM_RSRC_VERSION FM_RSRC_VERS0
-#define FM_RSRC_RESOURCE "resource"
-
-/* resource.fm.asru.* payload member names */
-#define FM_RSRC_ASRU_UUID "uuid"
-#define FM_RSRC_ASRU_CODE "code"
-#define FM_RSRC_ASRU_FAULTY "faulty"
-#define FM_RSRC_ASRU_UNUSABLE "unusable"
-#define FM_RSRC_ASRU_EVENT "event"
-
-/* resource.fm.xprt.* versions and payload member names */
-#define FM_RSRC_XPRT_VERS0 0
-#define FM_RSRC_XPRT_VERSION FM_RSRC_XPRT_VERS0
-#define FM_RSRC_XPRT_UUID "uuid"
-#define FM_RSRC_XPRT_SUBCLASS "subclass"
-
-/*
- * FM ENA Format Macros
- */
-#define ENA_FORMAT_MASK 0x3
-#define ENA_FORMAT(ena) ((ena) & ENA_FORMAT_MASK)
-
-/* ENA format types */
-#define FM_ENA_FMT0 0
-#define FM_ENA_FMT1 1
-#define FM_ENA_FMT2 2
-
-/* Format 1 */
-#define ENA_FMT1_GEN_MASK 0x00000000000003FCull
-#define ENA_FMT1_ID_MASK 0xFFFFFFFFFFFFFC00ull
-#define ENA_FMT1_CPUID_MASK 0x00000000000FFC00ull
-#define ENA_FMT1_TIME_MASK 0xFFFFFFFFFFF00000ull
-#define ENA_FMT1_GEN_SHFT 2
-#define ENA_FMT1_ID_SHFT 10
-#define ENA_FMT1_CPUID_SHFT ENA_FMT1_ID_SHFT
-#define ENA_FMT1_TIME_SHFT 20
-
-/* Format 2 */
-#define ENA_FMT2_GEN_MASK 0x00000000000003FCull
-#define ENA_FMT2_ID_MASK 0xFFFFFFFFFFFFFC00ull
-#define ENA_FMT2_TIME_MASK ENA_FMT2_ID_MASK
-#define ENA_FMT2_GEN_SHFT 2
-#define ENA_FMT2_ID_SHFT 10
-#define ENA_FMT2_TIME_SHFT ENA_FMT2_ID_SHFT
-
-/* Common FMRI type names */
-#define FM_FMRI_AUTHORITY "authority"
-#define FM_FMRI_SCHEME "scheme"
-#define FM_FMRI_SVC_AUTHORITY "svc-authority"
-
-/* FMRI authority-type member names */
-#define FM_FMRI_AUTH_CHASSIS "chassis-id"
-#define FM_FMRI_AUTH_PRODUCT "product-id"
-#define FM_FMRI_AUTH_DOMAIN "domain-id"
-#define FM_FMRI_AUTH_SERVER "server-id"
-#define FM_FMRI_AUTH_HOST "host-id"
-
-#define FM_AUTH_VERS0 0
-#define FM_FMRI_AUTH_VERSION FM_AUTH_VERS0
-
-/* scheme name values */
-#define FM_FMRI_SCHEME_FMD "fmd"
-#define FM_FMRI_SCHEME_DEV "dev"
-#define FM_FMRI_SCHEME_HC "hc"
-#define FM_FMRI_SCHEME_SVC "svc"
-#define FM_FMRI_SCHEME_CPU "cpu"
-#define FM_FMRI_SCHEME_MEM "mem"
-#define FM_FMRI_SCHEME_MOD "mod"
-#define FM_FMRI_SCHEME_PKG "pkg"
-#define FM_FMRI_SCHEME_LEGACY "legacy-hc"
-#define FM_FMRI_SCHEME_ZFS "zfs"
-
-/* Scheme versions */
-#define FMD_SCHEME_VERSION0 0
-#define FM_FMD_SCHEME_VERSION FMD_SCHEME_VERSION0
-#define DEV_SCHEME_VERSION0 0
-#define FM_DEV_SCHEME_VERSION DEV_SCHEME_VERSION0
-#define FM_HC_VERS0 0
-#define FM_HC_SCHEME_VERSION FM_HC_VERS0
-#define CPU_SCHEME_VERSION0 0
-#define CPU_SCHEME_VERSION1 1
-#define FM_CPU_SCHEME_VERSION CPU_SCHEME_VERSION1
-#define MEM_SCHEME_VERSION0 0
-#define FM_MEM_SCHEME_VERSION MEM_SCHEME_VERSION0
-#define MOD_SCHEME_VERSION0 0
-#define FM_MOD_SCHEME_VERSION MOD_SCHEME_VERSION0
-#define PKG_SCHEME_VERSION0 0
-#define FM_PKG_SCHEME_VERSION PKG_SCHEME_VERSION0
-#define LEGACY_SCHEME_VERSION0 0
-#define FM_LEGACY_SCHEME_VERSION LEGACY_SCHEME_VERSION0
-#define ZFS_SCHEME_VERSION0 0
-#define FM_ZFS_SCHEME_VERSION ZFS_SCHEME_VERSION0
-
-/* hc scheme member names */
-#define FM_FMRI_HC_SERIAL_ID "serial"
-#define FM_FMRI_HC_PART "part"
-#define FM_FMRI_HC_REVISION "revision"
-#define FM_FMRI_HC_ROOT "hc-root"
-#define FM_FMRI_HC_LIST_SZ "hc-list-sz"
-#define FM_FMRI_HC_LIST "hc-list"
-#define FM_FMRI_HC_SPECIFIC "hc-specific"
-
-/* hc-list version and member names */
-#define FM_FMRI_HC_NAME "hc-name"
-#define FM_FMRI_HC_ID "hc-id"
-
-#define HC_LIST_VERSION0 0
-#define FM_HC_LIST_VERSION HC_LIST_VERSION0
-
-/* hc-specific member names */
-#define FM_FMRI_HC_SPECIFIC_OFFSET "offset"
-
-/* fmd module scheme member names */
-#define FM_FMRI_FMD_NAME "mod-name"
-#define FM_FMRI_FMD_VERSION "mod-version"
-
-/* dev scheme member names */
-#define FM_FMRI_DEV_ID "devid"
-#define FM_FMRI_DEV_PATH "device-path"
-
-/* pkg scheme member names */
-#define FM_FMRI_PKG_BASEDIR "pkg-basedir"
-#define FM_FMRI_PKG_INST "pkg-inst"
-#define FM_FMRI_PKG_VERSION "pkg-version"
-
-/* svc scheme member names */
-#define FM_FMRI_SVC_NAME "service-name"
-#define FM_FMRI_SVC_VERSION "service-version"
-#define FM_FMRI_SVC_INSTANCE "instance"
-#define FM_FMRI_SVC_CONTRACT_ID "contract-id"
-
-/* svc-authority member names */
-#define FM_FMRI_SVC_AUTH_SCOPE "scope"
-#define FM_FMRI_SVC_AUTH_SYSTEM_FQN "system-FQN"
-
-/* cpu scheme member names */
-#define FM_FMRI_CPU_ID "cpuid"
-#define FM_FMRI_CPU_SERIAL_ID "serial"
-#define FM_FMRI_CPU_MASK "cpumask"
-#define FM_FMRI_CPU_VID "cpuvid"
-#define FM_FMRI_CPU_CPUFRU "cpufru"
-
-/* legacy-hc scheme member names */
-#define FM_FMRI_LEGACY_HC "component"
-#define FM_FMRI_LEGACY_HC_PREFIX FM_FMRI_SCHEME_HC":///" \
- FM_FMRI_LEGACY_HC"="
-
-/* mem scheme member names */
-#define FM_FMRI_MEM_UNUM "unum"
-#define FM_FMRI_MEM_SERIAL_ID "serial"
-#define FM_FMRI_MEM_PHYSADDR "physaddr"
-#define FM_FMRI_MEM_MEMCONFIG "memconfig"
-#define FM_FMRI_MEM_OFFSET "offset"
-
-/* mod scheme member names */
-#define FM_FMRI_MOD_PKG "mod-pkg"
-#define FM_FMRI_MOD_NAME "mod-name"
-#define FM_FMRI_MOD_ID "mod-id"
-#define FM_FMRI_MOD_DESC "mod-desc"
-
-/* zfs scheme member names */
-#define FM_FMRI_ZFS_POOL "pool"
-#define FM_FMRI_ZFS_VDEV "vdev"
-
-extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
-extern void fm_nva_xdestroy(nv_alloc_t *);
-
-extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
-extern void fm_nvlist_destroy(nvlist_t *, int);
-
-#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */
-#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */
-
-extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t,
- const nvlist_t *, ...);
-extern void fm_payload_set(nvlist_t *, ...);
-extern int i_fm_payload_set(nvlist_t *, const char *, va_list);
-extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *,
- int, ...);
-extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *,
- const char *);
-extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *);
-extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
- uint8_t *, const char *);
-extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
- const char *, uint64_t);
-extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
- const char *, const char *);
-extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
-
-extern uint64_t fm_ena_increment(uint64_t);
-extern uint64_t fm_ena_generate(uint64_t, uchar_t);
-extern uint64_t fm_ena_generation_get(uint64_t);
-extern uchar_t fm_ena_format_get(uint64_t);
-extern uint64_t fm_ena_id_get(uint64_t);
-extern uint64_t fm_ena_time_get(uint64_t);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FM_PROTOCOL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/fm/util.h b/sys/contrib/opensolaris/uts/common/sys/fm/util.h
deleted file mode 100644
index f65e0ab4..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/fm/util.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FM_UTIL_H
-#define _SYS_FM_UTIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/nvpair.h>
-#include <sys/errorq.h>
-
-/*
- * Shared user/kernel definitions for class length, error channel name,
- * and kernel event publisher string.
- */
-#define FM_MAX_CLASS 100
-#define FM_ERROR_CHAN "com.sun:fm:error"
-#define FM_PUB "fm"
-
-/*
- * ereport dump device transport support
- *
- * Ereports are written out to the dump device at a proscribed offset from the
- * end, similar to in-transit log messages. The ereports are represented as a
- * erpt_dump_t header followed by ed_size bytes of packed native nvlist data.
- *
- * NOTE: All of these constants and the header must be defined so they have the
- * same representation for *both* 32-bit and 64-bit producers and consumers.
- */
-#define ERPT_MAGIC 0xf00d4eddU
-#define ERPT_MAX_ERRS 16
-#define ERPT_DATA_SZ (6 * 1024)
-#define ERPT_EVCH_MAX 256
-#define ERPT_HIWAT 64
-
-typedef struct erpt_dump {
- uint32_t ed_magic; /* ERPT_MAGIC or zero to indicate end */
- uint32_t ed_chksum; /* checksum32() of packed nvlist data */
- uint32_t ed_size; /* ereport (nvl) fixed buf size */
- uint32_t ed_pad; /* reserved for future use */
- hrtime_t ed_hrt_nsec; /* hrtime of this ereport */
- hrtime_t ed_hrt_base; /* hrtime sample corresponding to ed_tod_base */
- struct {
- uint64_t sec; /* seconds since gettimeofday() Epoch */
- uint64_t nsec; /* nanoseconds past ed_tod_base.sec */
- } ed_tod_base;
-} erpt_dump_t;
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-
-#define FM_STK_DEPTH 20 /* maximum stack depth */
-#define FM_SYM_SZ 64 /* maximum symbol size */
-#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */
-
-#define FM_EREPORT_PAYLOAD_NAME_STACK "stack"
-
-extern errorq_t *ereport_errorq;
-extern void *ereport_dumpbuf;
-extern size_t ereport_dumplen;
-
-extern void fm_init(void);
-extern void fm_nvprint(nvlist_t *);
-extern void fm_panic(const char *, ...);
-extern void fm_banner(void);
-
-extern void fm_ereport_dump(void);
-extern void fm_ereport_post(nvlist_t *, int);
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FM_UTIL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/contrib/opensolaris/uts/common/sys/fs/zfs.h
deleted file mode 100644
index bcf8594..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/fs/zfs.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_FS_ZFS_H
-#define _SYS_FS_ZFS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/ioccom.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Types and constants shared between userland and the kernel.
- */
-
-/*
- * Each dataset can be one of the following types. These constants can be
- * combined into masks that can be passed to various functions.
- */
-typedef enum {
- ZFS_TYPE_FILESYSTEM = 0x1,
- ZFS_TYPE_SNAPSHOT = 0x2,
- ZFS_TYPE_VOLUME = 0x4,
- ZFS_TYPE_POOL = 0x8
-} zfs_type_t;
-
-#define ZFS_TYPE_ANY \
- (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
-
-/*
- * Properties are identified by these constants and must be added to the
- * end of this list to ensure that external conumsers are not affected
- * by the change. The property list also determines how 'zfs get' will
- * display them. If you make any changes to this list, be sure to update
- * the property table in usr/src/common/zfs/zfs_prop.c.
- */
-typedef enum {
- ZFS_PROP_CONT = -2,
- ZFS_PROP_INVAL = -1,
- ZFS_PROP_TYPE,
- ZFS_PROP_CREATION,
- ZFS_PROP_USED,
- ZFS_PROP_AVAILABLE,
- ZFS_PROP_REFERENCED,
- ZFS_PROP_COMPRESSRATIO,
- ZFS_PROP_MOUNTED,
- ZFS_PROP_ORIGIN,
- ZFS_PROP_QUOTA,
- ZFS_PROP_RESERVATION,
- ZFS_PROP_VOLSIZE,
- ZFS_PROP_VOLBLOCKSIZE,
- ZFS_PROP_RECORDSIZE,
- ZFS_PROP_MOUNTPOINT,
- ZFS_PROP_SHARENFS,
- ZFS_PROP_CHECKSUM,
- ZFS_PROP_COMPRESSION,
- ZFS_PROP_ATIME,
- ZFS_PROP_DEVICES,
- ZFS_PROP_EXEC,
- ZFS_PROP_SETUID,
- ZFS_PROP_READONLY,
- ZFS_PROP_ZONED,
- ZFS_PROP_SNAPDIR,
- ZFS_PROP_ACLMODE,
- ZFS_PROP_ACLINHERIT,
- ZFS_PROP_CREATETXG, /* not exposed to the user */
- ZFS_PROP_NAME, /* not exposed to the user */
- ZFS_PROP_CANMOUNT,
- ZFS_PROP_SHAREISCSI,
- ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
- ZFS_PROP_XATTR,
- ZFS_PROP_NUMCLONES, /* not exposed to the user */
- ZFS_PROP_COPIES,
- ZFS_PROP_BOOTFS
-} zfs_prop_t;
-
-typedef zfs_prop_t zpool_prop_t;
-
-#define ZFS_PROP_VALUE "value"
-#define ZFS_PROP_SOURCE "source"
-
-typedef enum {
- ZFS_SRC_NONE = 0x1,
- ZFS_SRC_DEFAULT = 0x2,
- ZFS_SRC_TEMPORARY = 0x4,
- ZFS_SRC_LOCAL = 0x8,
- ZFS_SRC_INHERITED = 0x10
-} zfs_source_t;
-
-#define ZFS_SRC_ALL 0x1f
-
-/*
- * The following functions are shared between libzfs and the kernel.
- */
-zfs_prop_t zfs_name_to_prop(const char *);
-zpool_prop_t zpool_name_to_prop(const char *);
-boolean_t zfs_prop_user(const char *);
-int zfs_prop_readonly(zfs_prop_t);
-const char *zfs_prop_default_string(zfs_prop_t);
-const char *zfs_prop_to_name(zfs_prop_t);
-const char *zpool_prop_to_name(zfs_prop_t);
-uint64_t zfs_prop_default_numeric(zfs_prop_t);
-int zfs_prop_inheritable(zfs_prop_t);
-int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
-int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
-
-/*
- * Property Iterator
- */
-typedef zfs_prop_t (*zfs_prop_f)(zfs_prop_t, void *);
-typedef zfs_prop_f zpool_prop_f;
-extern zfs_prop_t zfs_prop_iter(zfs_prop_f, void *, boolean_t);
-extern zpool_prop_t zpool_prop_iter(zpool_prop_f, void *, boolean_t);
-
-/*
- * On-disk version number.
- */
-#define ZFS_VERSION_1 1ULL
-#define ZFS_VERSION_2 2ULL
-#define ZFS_VERSION_3 3ULL
-#define ZFS_VERSION_4 4ULL
-#define ZFS_VERSION_5 5ULL
-#define ZFS_VERSION_6 6ULL
-/*
- * When bumping up ZFS_VERSION, make sure GRUB ZFS understand the on-disk
- * format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
- * and do the appropriate changes.
- */
-#define ZFS_VERSION ZFS_VERSION_6
-#define ZFS_VERSION_STRING "6"
-
-/*
- * Symbolic names for the changes that caused a ZFS_VERSION switch.
- * Used in the code when checking for presence or absence of a feature.
- * Feel free to define multiple symbolic names for each version if there
- * were multiple changes to on-disk structures during that version.
- *
- * NOTE: When checking the current ZFS_VERSION in your code, be sure
- * to use spa_version() since it reports the version of the
- * last synced uberblock. Checking the in-flight version can
- * be dangerous in some cases.
- */
-#define ZFS_VERSION_INITIAL ZFS_VERSION_1
-#define ZFS_VERSION_DITTO_BLOCKS ZFS_VERSION_2
-#define ZFS_VERSION_SPARES ZFS_VERSION_3
-#define ZFS_VERSION_RAID6 ZFS_VERSION_3
-#define ZFS_VERSION_BPLIST_ACCOUNT ZFS_VERSION_3
-#define ZFS_VERSION_RAIDZ_DEFLATE ZFS_VERSION_3
-#define ZFS_VERSION_DNODE_BYTES ZFS_VERSION_3
-#define ZFS_VERSION_ZPOOL_HISTORY ZFS_VERSION_4
-#define ZFS_VERSION_GZIP_COMPRESSION ZFS_VERSION_5
-#define ZFS_VERSION_BOOTFS ZFS_VERSION_6
-
-/*
- * The following are configuration names used in the nvlist describing a pool's
- * configuration.
- */
-#define ZPOOL_CONFIG_VERSION "version"
-#define ZPOOL_CONFIG_POOL_NAME "name"
-#define ZPOOL_CONFIG_POOL_STATE "state"
-#define ZPOOL_CONFIG_POOL_TXG "txg"
-#define ZPOOL_CONFIG_POOL_GUID "pool_guid"
-#define ZPOOL_CONFIG_CREATE_TXG "create_txg"
-#define ZPOOL_CONFIG_TOP_GUID "top_guid"
-#define ZPOOL_CONFIG_VDEV_TREE "vdev_tree"
-#define ZPOOL_CONFIG_TYPE "type"
-#define ZPOOL_CONFIG_CHILDREN "children"
-#define ZPOOL_CONFIG_ID "id"
-#define ZPOOL_CONFIG_GUID "guid"
-#define ZPOOL_CONFIG_PATH "path"
-#define ZPOOL_CONFIG_DEVID "devid"
-#define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array"
-#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift"
-#define ZPOOL_CONFIG_ASHIFT "ashift"
-#define ZPOOL_CONFIG_ASIZE "asize"
-#define ZPOOL_CONFIG_DTL "DTL"
-#define ZPOOL_CONFIG_STATS "stats"
-#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
-#define ZPOOL_CONFIG_OFFLINE "offline"
-#define ZPOOL_CONFIG_ERRCOUNT "error_count"
-#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
-#define ZPOOL_CONFIG_SPARES "spares"
-#define ZPOOL_CONFIG_IS_SPARE "is_spare"
-#define ZPOOL_CONFIG_NPARITY "nparity"
-#define ZPOOL_CONFIG_HOSTID "hostid"
-#define ZPOOL_CONFIG_HOSTNAME "hostname"
-#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
-
-#define VDEV_TYPE_ROOT "root"
-#define VDEV_TYPE_MIRROR "mirror"
-#define VDEV_TYPE_REPLACING "replacing"
-#define VDEV_TYPE_RAIDZ "raidz"
-#define VDEV_TYPE_DISK "disk"
-#define VDEV_TYPE_FILE "file"
-#define VDEV_TYPE_MISSING "missing"
-#define VDEV_TYPE_SPARE "spare"
-
-/*
- * This is needed in userland to report the minimum necessary device size.
- */
-#define SPA_MINDEVSIZE (64ULL << 20)
-
-/*
- * The location of the pool configuration repository, shared between kernel and
- * userland.
- */
-#define ZPOOL_CACHE_DIR "/boot/zfs"
-#define ZPOOL_CACHE_FILE "zpool.cache"
-#define ZPOOL_CACHE_TMP ".zpool.cache"
-
-#define ZPOOL_CACHE ZPOOL_CACHE_DIR "/" ZPOOL_CACHE_FILE
-
-/*
- * vdev states are ordered from least to most healthy.
- * A vdev that's CANT_OPEN or below is considered unusable.
- */
-typedef enum vdev_state {
- VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */
- VDEV_STATE_CLOSED, /* Not currently open */
- VDEV_STATE_OFFLINE, /* Not allowed to open */
- VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */
- VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */
- VDEV_STATE_HEALTHY /* Presumed good */
-} vdev_state_t;
-
-/*
- * vdev aux states. When a vdev is in the CANT_OPEN state, the aux field
- * of the vdev stats structure uses these constants to distinguish why.
- */
-typedef enum vdev_aux {
- VDEV_AUX_NONE, /* no error */
- VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */
- VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */
- VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */
- VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */
- VDEV_AUX_TOO_SMALL, /* vdev size is too small */
- VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
- VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
- VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
- VDEV_AUX_SPARED /* hot spare used in another pool */
-} vdev_aux_t;
-
-/*
- * pool state. The following states are written to disk as part of the normal
- * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are
- * software abstractions used at various levels to communicate pool state.
- */
-typedef enum pool_state {
- POOL_STATE_ACTIVE = 0, /* In active use */
- POOL_STATE_EXPORTED, /* Explicitly exported */
- POOL_STATE_DESTROYED, /* Explicitly destroyed */
- POOL_STATE_SPARE, /* Reserved for hot spare use */
- POOL_STATE_UNINITIALIZED, /* Internal spa_t state */
- POOL_STATE_UNAVAIL, /* Internal libzfs state */
- POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
-} pool_state_t;
-
-/*
- * Scrub types.
- */
-typedef enum pool_scrub_type {
- POOL_SCRUB_NONE,
- POOL_SCRUB_RESILVER,
- POOL_SCRUB_EVERYTHING,
- POOL_SCRUB_TYPES
-} pool_scrub_type_t;
-
-/*
- * ZIO types. Needed to interpret vdev statistics below.
- */
-typedef enum zio_type {
- ZIO_TYPE_NULL = 0,
- ZIO_TYPE_READ,
- ZIO_TYPE_WRITE,
- ZIO_TYPE_FREE,
- ZIO_TYPE_CLAIM,
- ZIO_TYPE_IOCTL,
- ZIO_TYPES
-} zio_type_t;
-
-/*
- * Vdev statistics. Note: all fields should be 64-bit because this
- * is passed between kernel and userland as an nvlist uint64 array.
- */
-typedef struct vdev_stat {
- hrtime_t vs_timestamp; /* time since vdev load */
- uint64_t vs_state; /* vdev state */
- uint64_t vs_aux; /* see vdev_aux_t */
- uint64_t vs_alloc; /* space allocated */
- uint64_t vs_space; /* total capacity */
- uint64_t vs_dspace; /* deflated capacity */
- uint64_t vs_rsize; /* replaceable dev size */
- uint64_t vs_ops[ZIO_TYPES]; /* operation count */
- uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */
- uint64_t vs_read_errors; /* read errors */
- uint64_t vs_write_errors; /* write errors */
- uint64_t vs_checksum_errors; /* checksum errors */
- uint64_t vs_self_healed; /* self-healed bytes */
- uint64_t vs_scrub_type; /* pool_scrub_type_t */
- uint64_t vs_scrub_complete; /* completed? */
- uint64_t vs_scrub_examined; /* bytes examined; top */
- uint64_t vs_scrub_repaired; /* bytes repaired; leaf */
- uint64_t vs_scrub_errors; /* errors during scrub */
- uint64_t vs_scrub_start; /* UTC scrub start time */
- uint64_t vs_scrub_end; /* UTC scrub end time */
-} vdev_stat_t;
-
-#define ZFS_DRIVER "zfs"
-#define ZFS_DEV_NAME "zfs"
-#define ZFS_DEV "/dev/" ZFS_DEV_NAME
-
-/*
- * zvol paths. Irritatingly, the devfsadm interfaces want all these
- * paths without the /dev prefix, but for some things, we want the
- * /dev prefix. Below are the names without /dev.
- */
-#define ZVOL_DEV_DIR "zvol"
-
-/*
- * And here are the things we need with /dev, etc. in front of them.
- */
-#define ZVOL_PSEUDO_DEV "/devices/pseudo/zvol@0:"
-#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR
-
-#define ZVOL_PROP_NAME "name"
-
-/*
- * /dev/zfs ioctl numbers.
- */
-typedef unsigned long zfs_ioc_t;
-
-#define ZFS_IOC(ioreq) ((ioreq) & 0xff)
-
-#define ZFS_IOC_POOL_CREATE _IOWR('Z', 0, struct zfs_cmd)
-#define ZFS_IOC_POOL_DESTROY _IOWR('Z', 1, struct zfs_cmd)
-#define ZFS_IOC_POOL_IMPORT _IOWR('Z', 2, struct zfs_cmd)
-#define ZFS_IOC_POOL_EXPORT _IOWR('Z', 3, struct zfs_cmd)
-#define ZFS_IOC_POOL_CONFIGS _IOWR('Z', 4, struct zfs_cmd)
-#define ZFS_IOC_POOL_STATS _IOWR('Z', 5, struct zfs_cmd)
-#define ZFS_IOC_POOL_TRYIMPORT _IOWR('Z', 6, struct zfs_cmd)
-#define ZFS_IOC_POOL_SCRUB _IOWR('Z', 7, struct zfs_cmd)
-#define ZFS_IOC_POOL_FREEZE _IOWR('Z', 8, struct zfs_cmd)
-#define ZFS_IOC_POOL_UPGRADE _IOWR('Z', 9, struct zfs_cmd)
-#define ZFS_IOC_POOL_GET_HISTORY _IOWR('Z', 10, struct zfs_cmd)
-#define ZFS_IOC_POOL_LOG_HISTORY _IOWR('Z', 11, struct zfs_cmd)
-#define ZFS_IOC_VDEV_ADD _IOWR('Z', 12, struct zfs_cmd)
-#define ZFS_IOC_VDEV_REMOVE _IOWR('Z', 13, struct zfs_cmd)
-#define ZFS_IOC_VDEV_ONLINE _IOWR('Z', 14, struct zfs_cmd)
-#define ZFS_IOC_VDEV_OFFLINE _IOWR('Z', 15, struct zfs_cmd)
-#define ZFS_IOC_VDEV_ATTACH _IOWR('Z', 16, struct zfs_cmd)
-#define ZFS_IOC_VDEV_DETACH _IOWR('Z', 17, struct zfs_cmd)
-#define ZFS_IOC_VDEV_SETPATH _IOWR('Z', 18, struct zfs_cmd)
-#define ZFS_IOC_OBJSET_STATS _IOWR('Z', 19, struct zfs_cmd)
-#define ZFS_IOC_DATASET_LIST_NEXT _IOWR('Z', 20, struct zfs_cmd)
-#define ZFS_IOC_SNAPSHOT_LIST_NEXT _IOWR('Z', 21, struct zfs_cmd)
-#define ZFS_IOC_SET_PROP _IOWR('Z', 22, struct zfs_cmd)
-#define ZFS_IOC_CREATE_MINOR _IOWR('Z', 23, struct zfs_cmd)
-#define ZFS_IOC_REMOVE_MINOR _IOWR('Z', 24, struct zfs_cmd)
-#define ZFS_IOC_CREATE _IOWR('Z', 25, struct zfs_cmd)
-#define ZFS_IOC_DESTROY _IOWR('Z', 26, struct zfs_cmd)
-#define ZFS_IOC_ROLLBACK _IOWR('Z', 27, struct zfs_cmd)
-#define ZFS_IOC_RENAME _IOWR('Z', 28, struct zfs_cmd)
-#define ZFS_IOC_RECVBACKUP _IOWR('Z', 29, struct zfs_cmd)
-#define ZFS_IOC_SENDBACKUP _IOWR('Z', 30, struct zfs_cmd)
-#define ZFS_IOC_INJECT_FAULT _IOWR('Z', 31, struct zfs_cmd)
-#define ZFS_IOC_CLEAR_FAULT _IOWR('Z', 32, struct zfs_cmd)
-#define ZFS_IOC_INJECT_LIST_NEXT _IOWR('Z', 33, struct zfs_cmd)
-#define ZFS_IOC_ERROR_LOG _IOWR('Z', 34, struct zfs_cmd)
-#define ZFS_IOC_CLEAR _IOWR('Z', 35, struct zfs_cmd)
-#define ZFS_IOC_PROMOTE _IOWR('Z', 36, struct zfs_cmd)
-#define ZFS_IOC_DESTROY_SNAPS _IOWR('Z', 37, struct zfs_cmd)
-#define ZFS_IOC_SNAPSHOT _IOWR('Z', 38, struct zfs_cmd)
-#define ZFS_IOC_DSOBJ_TO_DSNAME _IOWR('Z', 39, struct zfs_cmd)
-#define ZFS_IOC_OBJ_TO_PATH _IOWR('Z', 40, struct zfs_cmd)
-#define ZFS_IOC_POOL_SET_PROPS _IOWR('Z', 41, struct zfs_cmd)
-#define ZFS_IOC_POOL_GET_PROPS _IOWR('Z', 42, struct zfs_cmd)
-#define ZFS_IOC_JAIL _IOWR('Z', 43, struct zfs_cmd)
-#define ZFS_IOC_UNJAIL _IOWR('Z', 44, struct zfs_cmd)
-
-/*
- * Internal SPA load state. Used by FMA diagnosis engine.
- */
-typedef enum {
- SPA_LOAD_NONE, /* no load in progress */
- SPA_LOAD_OPEN, /* normal open */
- SPA_LOAD_IMPORT, /* import in progress */
- SPA_LOAD_TRYIMPORT /* tryimport in progress */
-} spa_load_state_t;
-
-/*
- * Bookmark name values.
- */
-#define ZPOOL_ERR_LIST "error list"
-#define ZPOOL_ERR_DATASET "dataset"
-#define ZPOOL_ERR_OBJECT "object"
-
-#define HIS_MAX_RECORD_LEN (MAXPATHLEN + MAXPATHLEN + 1)
-
-/*
- * The following are names used in the nvlist describing
- * the pool's history log.
- */
-#define ZPOOL_HIST_RECORD "history record"
-#define ZPOOL_HIST_TIME "history time"
-#define ZPOOL_HIST_CMD "history command"
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_FS_ZFS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/gfs.h b/sys/contrib/opensolaris/uts/common/sys/gfs.h
deleted file mode 100644
index 8e70f29..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/gfs.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_GFS_H
-#define _SYS_GFS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/vnode.h>
-#include <sys/mutex.h>
-#include <sys/dirent.h>
-#include <sys/uio.h>
-#include <sys/list.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define GFS_CACHE_VNODE 0x1
-
-typedef struct gfs_dirent {
- char *gfse_name; /* entry name */
- vnode_t *(*gfse_ctor)(vnode_t *); /* constructor */
- int gfse_flags; /* flags */
- list_node_t gfse_link; /* dynamic list */
- vnode_t *gfse_vnode; /* cached vnode */
-} gfs_dirent_t;
-
-typedef enum gfs_type {
- GFS_DIR,
- GFS_FILE
-} gfs_type_t;
-
-typedef struct gfs_file {
- vnode_t *gfs_vnode; /* current vnode */
- vnode_t *gfs_parent; /* parent vnode */
- size_t gfs_size; /* size of private data structure */
- gfs_type_t gfs_type; /* type of vnode */
- int gfs_index; /* index in parent dir */
- ino64_t gfs_ino; /* inode for this vnode */
-} gfs_file_t;
-
-typedef int (*gfs_readdir_cb)(vnode_t *, struct dirent64 *, int *, offset_t *,
- offset_t *, void *);
-typedef int (*gfs_lookup_cb)(vnode_t *, const char *, vnode_t **, ino64_t *);
-typedef ino64_t (*gfs_inode_cb)(vnode_t *, int);
-
-typedef struct gfs_dir {
- gfs_file_t gfsd_file; /* generic file attributes */
- gfs_dirent_t *gfsd_static; /* statically defined entries */
- int gfsd_nstatic; /* # static entries */
- kmutex_t gfsd_lock; /* protects entries */
- int gfsd_maxlen; /* maximum name length */
- gfs_readdir_cb gfsd_readdir; /* readdir() callback */
- gfs_lookup_cb gfsd_lookup; /* lookup() callback */
- gfs_inode_cb gfsd_inode; /* get an inode number */
-} gfs_dir_t;
-
-struct vfs;
-
-extern vnode_t *gfs_file_create(size_t, vnode_t *, vfs_t *, vnodeops_t *);
-extern vnode_t *gfs_dir_create(size_t, vnode_t *, vfs_t *, vnodeops_t *,
- gfs_dirent_t *, gfs_inode_cb, int, gfs_readdir_cb, gfs_lookup_cb);
-extern vnode_t *gfs_root_create(size_t, vfs_t *, vnodeops_t *, ino64_t,
- gfs_dirent_t *, gfs_inode_cb, int, gfs_readdir_cb, gfs_lookup_cb);
-extern vnode_t *gfs_root_create_file(size_t, struct vfs *, vnodeops_t *,
- ino64_t);
-
-extern void *gfs_file_inactive(vnode_t *);
-extern void *gfs_dir_inactive(vnode_t *);
-
-extern int gfs_dir_lookup(vnode_t *, const char *, vnode_t **);
-extern int gfs_dir_readdir(vnode_t *, uio_t *, int *, int *, u_long **, void *);
-
-#define gfs_dir_lock(gd) mutex_enter(&(gd)->gfsd_lock)
-#define gfs_dir_unlock(gd) mutex_exit(&(gd)->gfsd_lock)
-
-#define gfs_file_parent(vp) (((gfs_file_t *)(vp)->v_data)->gfs_parent)
-
-#define gfs_file_index(vp) (((gfs_file_t *)(vp)->v_data)->gfs_index)
-#define gfs_file_set_index(vp, idx) \
- (((gfs_file_t *)(vp)->v_data)->gfs_index = (idx))
-
-#define gfs_file_inode(vp) (((gfs_file_t *)(vp)->v_data)->gfs_ino)
-#define gfs_file_set_inode(vp, ino) \
- (((gfs_file_t *)(vp)->v_data)->gfs_ino = (ino))
-
-typedef struct gfs_readdir_state {
- struct dirent64 *grd_dirent; /* directory entry buffer */
- size_t grd_namlen; /* max file name length */
- size_t grd_ureclen; /* exported record size */
- ssize_t grd_oresid; /* original uio_resid */
- ino64_t grd_parent; /* inode of parent */
- ino64_t grd_self; /* inode of self */
-} gfs_readdir_state_t;
-
-extern int gfs_readdir_init(gfs_readdir_state_t *, int, int, uio_t *, ino64_t,
- ino64_t);
-extern int gfs_readdir_emit(gfs_readdir_state_t *, uio_t *, offset_t, ino64_t,
- const char *, int *, u_long **);
-extern int gfs_readdir_pred(gfs_readdir_state_t *, uio_t *, offset_t *, int *,
- u_long **);
-extern int gfs_readdir_fini(gfs_readdir_state_t *, int, int *, int);
-
-extern int gfs_lookup_dot(vnode_t **, vnode_t *, vnode_t *, const char *);
-
-extern int gfs_vop_readdir(struct vop_readdir_args *);
-extern int gfs_vop_inactive(struct vop_inactive_args *);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_GFS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/isa_defs.h b/sys/contrib/opensolaris/uts/common/sys/isa_defs.h
deleted file mode 100644
index a65d16a..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/isa_defs.h
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ISA_DEFS_H
-#define _SYS_ISA_DEFS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This header file serves to group a set of well known defines and to
- * set these for each instruction set architecture. These defines may
- * be divided into two groups; characteristics of the processor and
- * implementation choices for Solaris on a processor.
- *
- * Processor Characteristics:
- *
- * _LITTLE_ENDIAN / _BIG_ENDIAN:
- * The natural byte order of the processor. A pointer to an int points
- * to the least/most significant byte of that int.
- *
- * _STACK_GROWS_UPWARD / _STACK_GROWS_DOWNWARD:
- * The processor specific direction of stack growth. A push onto the
- * stack increases/decreases the stack pointer, so it stores data at
- * successively higher/lower addresses. (Stackless machines ignored
- * without regrets).
- *
- * _LONG_LONG_HTOL / _LONG_LONG_LTOH:
- * A pointer to a long long points to the most/least significant long
- * within that long long.
- *
- * _BIT_FIELDS_HTOL / _BIT_FIELDS_LTOH:
- * The C compiler assigns bit fields from the high/low to the low/high end
- * of an int (most to least significant vs. least to most significant).
- *
- * _IEEE_754:
- * The processor (or supported implementations of the processor)
- * supports the ieee-754 floating point standard. No other floating
- * point standards are supported (or significant). Any other supported
- * floating point formats are expected to be cased on the ISA processor
- * symbol.
- *
- * _CHAR_IS_UNSIGNED / _CHAR_IS_SIGNED:
- * The C Compiler implements objects of type `char' as `unsigned' or
- * `signed' respectively. This is really an implementation choice of
- * the compiler writer, but it is specified in the ABI and tends to
- * be uniform across compilers for an instruction set architecture.
- * Hence, it has the properties of a processor characteristic.
- *
- * _CHAR_ALIGNMENT / _SHORT_ALIGNMENT / _INT_ALIGNMENT / _LONG_ALIGNMENT /
- * _LONG_LONG_ALIGNMENT / _DOUBLE_ALIGNMENT / _LONG_DOUBLE_ALIGNMENT /
- * _POINTER_ALIGNMENT / _FLOAT_ALIGNMENT:
- * The ABI defines alignment requirements of each of the primitive
- * object types. Some, if not all, may be hardware requirements as
- * well. The values are expressed in "byte-alignment" units.
- *
- * _MAX_ALIGNMENT:
- * The most stringent alignment requirement as specified by the ABI.
- * Equal to the maximum of all the above _XXX_ALIGNMENT values.
- *
- * _ALIGNMENT_REQUIRED:
- * True or false (1 or 0) whether or not the hardware requires the ABI
- * alignment.
- *
- * _LONG_LONG_ALIGNMENT_32
- * The 32-bit ABI supported by a 64-bit kernel may have different
- * alignment requirements for primitive object types. The value of this
- * identifier is expressed in "byte-alignment" units.
- *
- * _HAVE_CPUID_INSN
- * This indicates that the architecture supports the 'cpuid'
- * instruction as defined by Intel. (Intel allows other vendors
- * to extend the instruction for their own purposes.)
- *
- *
- * Implementation Choices:
- *
- * _ILP32 / _LP64:
- * This specifies the compiler data type implementation as specified in
- * the relevant ABI. The choice between these is strongly influenced
- * by the underlying hardware, but is not absolutely tied to it.
- * Currently only two data type models are supported:
- *
- * _ILP32:
- * Int/Long/Pointer are 32 bits. This is the historical UNIX
- * and Solaris implementation. Due to its historical standing,
- * this is the default case.
- *
- * _LP64:
- * Long/Pointer are 64 bits, Int is 32 bits. This is the chosen
- * implementation for 64-bit ABIs such as SPARC V9.
- *
- * _I32LPx:
- * A compilation environment where 'int' is 32-bit, and
- * longs and pointers are simply the same size.
- *
- * In all cases, Char is 8 bits and Short is 16 bits.
- *
- * _SUNOS_VTOC_8 / _SUNOS_VTOC_16 / _SVR4_VTOC_16:
- * This specifies the form of the disk VTOC (or label):
- *
- * _SUNOS_VTOC_8:
- * This is a VTOC form which is upwardly compatible with the
- * SunOS 4.x disk label and allows 8 partitions per disk.
- *
- * _SUNOS_VTOC_16:
- * In this format the incore vtoc image matches the ondisk
- * version. It allows 16 slices per disk, and is not
- * compatible with the SunOS 4.x disk label.
- *
- * Note that these are not the only two VTOC forms possible and
- * additional forms may be added. One possible form would be the
- * SVr4 VTOC form. The symbol for that is reserved now, although
- * it is not implemented.
- *
- * _SVR4_VTOC_16:
- * This VTOC form is compatible with the System V Release 4
- * VTOC (as implemented on the SVr4 Intel and 3b ports) with
- * 16 partitions per disk.
- *
- *
- * _DMA_USES_PHYSADDR / _DMA_USES_VIRTADDR
- * This describes the type of addresses used by system DMA:
- *
- * _DMA_USES_PHYSADDR:
- * This type of DMA, used in the x86 implementation,
- * requires physical addresses for DMA buffers. The 24-bit
- * addresses used by some legacy boards is the source of the
- * "low-memory" (<16MB) requirement for some devices using DMA.
- *
- * _DMA_USES_VIRTADDR:
- * This method of DMA allows the use of virtual addresses for
- * DMA transfers.
- *
- * _FIRMWARE_NEEDS_FDISK / _NO_FDISK_PRESENT
- * This indicates the presence/absence of an fdisk table.
- *
- * _FIRMWARE_NEEDS_FDISK
- * The fdisk table is required by system firmware. If present,
- * it allows a disk to be subdivided into multiple fdisk
- * partitions, each of which is equivalent to a separate,
- * virtual disk. This enables the co-existence of multiple
- * operating systems on a shared hard disk.
- *
- * _NO_FDISK_PRESENT
- * If the fdisk table is absent, it is assumed that the entire
- * media is allocated for a single operating system.
- *
- * _HAVE_TEM_FIRMWARE
- * Defined if this architecture has the (fallback) option of
- * using prom_* calls for doing I/O if a suitable kernel driver
- * is not available to do it.
- *
- * _DONT_USE_1275_GENERIC_NAMES
- * Controls whether or not device tree node names should
- * comply with the IEEE 1275 "Generic Names" Recommended
- * Practice. With _DONT_USE_GENERIC_NAMES, device-specific
- * names identifying the particular device will be used.
- *
- * __i386_COMPAT
- * This indicates whether the i386 ABI is supported as a *non-native*
- * mode for the platform. When this symbol is defined:
- * - 32-bit xstat-style system calls are enabled
- * - 32-bit xmknod-style system calls are enabled
- * - 32-bit system calls use i386 sizes -and- alignments
- *
- * Note that this is NOT defined for the i386 native environment!
- *
- * __x86
- * This is ONLY a synonym for defined(__i386) || defined(__amd64)
- * which is useful only insofar as these two architectures share
- * common attributes. Analogous to __sparc.
- *
- * _PSM_MODULES
- * This indicates whether or not the implementation uses PSM
- * modules for processor support, reading /etc/mach from inside
- * the kernel to extract a list.
- *
- * _RTC_CONFIG
- * This indicates whether or not the implementation uses /etc/rtc_config
- * to configure the real-time clock in the kernel.
- *
- * _UNIX_KRTLD
- * This indicates that the implementation uses a dynamically
- * linked unix + krtld to form the core kernel image at boot
- * time, or (in the absence of this symbol) a prelinked kernel image.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * The following set of definitions characterize Solaris on AMD's
- * 64-bit systems.
- */
-#if defined(__x86_64) || defined(__amd64)
-
-#if !defined(__amd64)
-#define __amd64 /* preferred guard */
-#endif
-
-#if !defined(__x86)
-#define __x86
-#endif
-
-/*
- * Define the appropriate "processor characteristics"
- */
-#if defined(sun)
-#define _LITTLE_ENDIAN
-#endif
-#define _STACK_GROWS_DOWNWARD
-#define _LONG_LONG_LTOH
-#define _BIT_FIELDS_LTOH
-#define _IEEE_754
-#define _CHAR_IS_SIGNED
-#define _BOOL_ALIGNMENT 1
-#define _CHAR_ALIGNMENT 1
-#define _SHORT_ALIGNMENT 2
-#define _INT_ALIGNMENT 4
-#define _FLOAT_ALIGNMENT 4
-#define _FLOAT_COMPLEX_ALIGNMENT 4
-#define _LONG_ALIGNMENT 8
-#define _LONG_LONG_ALIGNMENT 8
-#define _DOUBLE_ALIGNMENT 8
-#define _DOUBLE_COMPLEX_ALIGNMENT 8
-#define _LONG_DOUBLE_ALIGNMENT 16
-#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 16
-#define _POINTER_ALIGNMENT 8
-#define _MAX_ALIGNMENT 16
-#define _ALIGNMENT_REQUIRED 1
-
-/*
- * Different alignment constraints for the i386 ABI in compatibility mode
- */
-#define _LONG_LONG_ALIGNMENT_32 4
-
-/*
- * Define the appropriate "implementation choices".
- */
-#if !defined(_LP64)
-#define _LP64
-#endif
-#if !defined(_I32LPx) && defined(_KERNEL)
-#define _I32LPx
-#endif
-#define _MULTI_DATAMODEL
-#define _SUNOS_VTOC_16
-#define _DMA_USES_PHYSADDR
-#define _FIRMWARE_NEEDS_FDISK
-#define __i386_COMPAT
-#define _PSM_MODULES
-#define _RTC_CONFIG
-#define _DONT_USE_1275_GENERIC_NAMES
-#define _HAVE_CPUID_INSN
-
-/*
- * The feature test macro __i386 is generic for all processors implementing
- * the Intel 386 instruction set or a superset of it. Specifically, this
- * includes all members of the 386, 486, and Pentium family of processors.
- */
-#elif defined(__i386) || defined(__i386__)
-
-#if !defined(__i386)
-#define __i386
-#endif
-
-#if !defined(__x86)
-#define __x86
-#endif
-
-/*
- * Define the appropriate "processor characteristics"
- */
-#if defined(sun)
-#define _LITTLE_ENDIAN
-#endif
-#define _STACK_GROWS_DOWNWARD
-#define _LONG_LONG_LTOH
-#define _BIT_FIELDS_LTOH
-#define _IEEE_754
-#define _CHAR_IS_SIGNED
-#define _BOOL_ALIGNMENT 1
-#define _CHAR_ALIGNMENT 1
-#define _SHORT_ALIGNMENT 2
-#define _INT_ALIGNMENT 4
-#define _FLOAT_ALIGNMENT 4
-#define _FLOAT_COMPLEX_ALIGNMENT 4
-#define _LONG_ALIGNMENT 4
-#define _LONG_LONG_ALIGNMENT 4
-#define _DOUBLE_ALIGNMENT 4
-#define _DOUBLE_COMPLEX_ALIGNMENT 4
-#define _LONG_DOUBLE_ALIGNMENT 4
-#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 4
-#define _POINTER_ALIGNMENT 4
-#define _MAX_ALIGNMENT 4
-#define _ALIGNMENT_REQUIRED 0
-
-#define _LONG_LONG_ALIGNMENT_32 _LONG_LONG_ALIGNMENT
-
-/*
- * Define the appropriate "implementation choices".
- */
-#define _ILP32
-#if !defined(_I32LPx) && defined(_KERNEL)
-#define _I32LPx
-#endif
-#define _SUNOS_VTOC_16
-#define _DMA_USES_PHYSADDR
-#define _FIRMWARE_NEEDS_FDISK
-#define _PSM_MODULES
-#define _RTC_CONFIG
-#define _DONT_USE_1275_GENERIC_NAMES
-#define _HAVE_CPUID_INSN
-
-/*
- * The following set of definitions characterize the Solaris on SPARC systems.
- *
- * The symbol __sparc indicates any of the SPARC family of processor
- * architectures. This includes SPARC V7, SPARC V8 and SPARC V9.
- *
- * The symbol __sparcv8 indicates the 32-bit SPARC V8 architecture as defined
- * by Version 8 of the SPARC Architecture Manual. (SPARC V7 is close enough
- * to SPARC V8 for the former to be subsumed into the latter definition.)
- *
- * The symbol __sparcv9 indicates the 64-bit SPARC V9 architecture as defined
- * by Version 9 of the SPARC Architecture Manual.
- *
- * The symbols __sparcv8 and __sparcv9 are mutually exclusive, and are only
- * relevant when the symbol __sparc is defined.
- */
-/*
- * XXX Due to the existence of 5110166, "defined(__sparcv9)" needs to be added
- * to support backwards builds. This workaround should be removed in s10_71.
- */
-#elif defined(__sparc) || defined(__sparcv9) || defined(__sparc__)
-#if !defined(__sparc)
-#define __sparc
-#endif
-
-/*
- * You can be 32-bit or 64-bit, but not both at the same time.
- */
-#if defined(__sparcv8) && defined(__sparcv9)
-#error "SPARC Versions 8 and 9 are mutually exclusive choices"
-#endif
-
-/*
- * Existing compilers do not set __sparcv8. Years will transpire before
- * the compilers can be depended on to set the feature test macro. In
- * the interim, we'll set it here on the basis of historical behaviour;
- * if you haven't asked for SPARC V9, then you must've meant SPARC V8.
- */
-#if !defined(__sparcv9) && !defined(__sparcv8)
-#define __sparcv8
-#endif
-
-/*
- * Define the appropriate "processor characteristics" shared between
- * all Solaris on SPARC systems.
- */
-#if defined(sun)
-#define _BIG_ENDIAN
-#endif
-#define _STACK_GROWS_DOWNWARD
-#define _LONG_LONG_HTOL
-#define _BIT_FIELDS_HTOL
-#define _IEEE_754
-#define _CHAR_IS_SIGNED
-#define _BOOL_ALIGNMENT 1
-#define _CHAR_ALIGNMENT 1
-#define _SHORT_ALIGNMENT 2
-#define _INT_ALIGNMENT 4
-#define _FLOAT_ALIGNMENT 4
-#define _FLOAT_COMPLEX_ALIGNMENT 4
-#define _LONG_LONG_ALIGNMENT 8
-#define _DOUBLE_ALIGNMENT 8
-#define _DOUBLE_COMPLEX_ALIGNMENT 8
-#define _ALIGNMENT_REQUIRED 1
-
-/*
- * Define the appropriate "implementation choices" shared between versions.
- */
-#define _SUNOS_VTOC_8
-#define _DMA_USES_VIRTADDR
-#define _NO_FDISK_PRESENT
-#define _HAVE_TEM_FIRMWARE
-#define _UNIX_KRTLD
-
-/*
- * The following set of definitions characterize the implementation of
- * 32-bit Solaris on SPARC V8 systems.
- */
-#if defined(__sparcv8)
-
-/*
- * Define the appropriate "processor characteristics"
- */
-#define _LONG_ALIGNMENT 4
-#define _LONG_DOUBLE_ALIGNMENT 8
-#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 8
-#define _POINTER_ALIGNMENT 4
-#define _MAX_ALIGNMENT 8
-
-#define _LONG_LONG_ALIGNMENT_32 _LONG_LONG_ALIGNMENT
-
-/*
- * Define the appropriate "implementation choices"
- */
-#define _ILP32
-#if !defined(_I32LPx) && defined(_KERNEL)
-#define _I32LPx
-#endif
-
-/*
- * The following set of definitions characterize the implementation of
- * 64-bit Solaris on SPARC V9 systems.
- */
-#elif defined(__sparcv9)
-
-/*
- * Define the appropriate "processor characteristics"
- */
-#define _LONG_ALIGNMENT 8
-#define _LONG_DOUBLE_ALIGNMENT 16
-#define _LONG_DOUBLE_COMPLEX_ALIGNMENT 16
-#define _POINTER_ALIGNMENT 8
-#define _MAX_ALIGNMENT 16
-
-#define _LONG_LONG_ALIGNMENT_32 _LONG_LONG_ALIGMENT
-
-/*
- * Define the appropriate "implementation choices"
- */
-#if !defined(_LP64)
-#define _LP64
-#endif
-#if !defined(_I32LPx)
-#define _I32LPx
-#endif
-#define _MULTI_DATAMODEL
-
-#else
-#error "unknown SPARC version"
-#endif
-
-/*
- * #error is strictly ansi-C, but works as well as anything for K&R systems.
- */
-#else
-#error "ISA not supported"
-#endif
-
-#if defined(_ILP32) && defined(_LP64)
-#error "Both _ILP32 and _LP64 are defined"
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ISA_DEFS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/list.h b/sys/contrib/opensolaris/uts/common/sys/list.h
deleted file mode 100644
index 7e9d9aa..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/list.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_LIST_H
-#define _SYS_LIST_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/list_impl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct list_node list_node_t;
-typedef struct list list_t;
-
-void list_create(list_t *, size_t, size_t);
-void list_destroy(list_t *);
-
-void list_insert_after(list_t *, void *, void *);
-void list_insert_before(list_t *, void *, void *);
-void list_insert_head(list_t *, void *);
-void list_insert_tail(list_t *, void *);
-void list_remove(list_t *, void *);
-void list_move_tail(list_t *, list_t *);
-
-void *list_head(list_t *);
-void *list_tail(list_t *);
-void *list_next(list_t *, void *);
-void *list_prev(list_t *, void *);
-
-int list_link_active(list_node_t *);
-int list_is_empty(list_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_LIST_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/list_impl.h b/sys/contrib/opensolaris/uts/common/sys/list_impl.h
deleted file mode 100644
index 9c42f88..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/list_impl.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_LIST_IMPL_H
-#define _SYS_LIST_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct list_node {
- struct list_node *list_next;
- struct list_node *list_prev;
-};
-
-struct list {
- size_t list_size;
- size_t list_offset;
- struct list_node list_head;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_LIST_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/note.h b/sys/contrib/opensolaris/uts/common/sys/note.h
deleted file mode 100644
index 2cb7fd8..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/note.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1994 by Sun Microsystems, Inc.
- */
-
-/*
- * sys/note.h: interface for annotating source with info for tools
- *
- * This is the underlying interface; NOTE (/usr/include/note.h) is the
- * preferred interface, but all exported header files should include this
- * file directly and use _NOTE so as not to take "NOTE" from the user's
- * namespace. For consistency, *all* kernel source should use _NOTE.
- *
- * By default, annotations expand to nothing. This file implements
- * that. Tools using annotations will interpose a different version
- * of this file that will expand annotations as needed.
- */
-
-#ifndef _SYS_NOTE_H
-#define _SYS_NOTE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef _NOTE
-#define _NOTE(s)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_NOTE_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/nvpair.h b/sys/contrib/opensolaris/uts/common/sys/nvpair.h
deleted file mode 100644
index 306e30f..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/nvpair.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_NVPAIR_H
-#define _SYS_NVPAIR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/errno.h>
-
-#if defined(_KERNEL) && !defined(_BOOT)
-#include <sys/kmem.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
- DATA_TYPE_UNKNOWN = 0,
- DATA_TYPE_BOOLEAN,
- DATA_TYPE_BYTE,
- DATA_TYPE_INT16,
- DATA_TYPE_UINT16,
- DATA_TYPE_INT32,
- DATA_TYPE_UINT32,
- DATA_TYPE_INT64,
- DATA_TYPE_UINT64,
- DATA_TYPE_STRING,
- DATA_TYPE_BYTE_ARRAY,
- DATA_TYPE_INT16_ARRAY,
- DATA_TYPE_UINT16_ARRAY,
- DATA_TYPE_INT32_ARRAY,
- DATA_TYPE_UINT32_ARRAY,
- DATA_TYPE_INT64_ARRAY,
- DATA_TYPE_UINT64_ARRAY,
- DATA_TYPE_STRING_ARRAY,
- DATA_TYPE_HRTIME,
- DATA_TYPE_NVLIST,
- DATA_TYPE_NVLIST_ARRAY,
- DATA_TYPE_BOOLEAN_VALUE,
- DATA_TYPE_INT8,
- DATA_TYPE_UINT8,
- DATA_TYPE_BOOLEAN_ARRAY,
- DATA_TYPE_INT8_ARRAY,
- DATA_TYPE_UINT8_ARRAY
-} data_type_t;
-
-typedef struct nvpair {
- int32_t nvp_size; /* size of this nvpair */
- int16_t nvp_name_sz; /* length of name string */
- int16_t nvp_reserve; /* not used */
- int32_t nvp_value_elem; /* number of elements for array types */
- data_type_t nvp_type; /* type of value */
- /* name string */
- /* aligned ptr array for string arrays */
- /* aligned array of data for value */
-} nvpair_t;
-
-/* nvlist header */
-typedef struct nvlist {
- int32_t nvl_version;
- uint32_t nvl_nvflag; /* persistent flags */
- uint64_t nvl_priv; /* ptr to private data if not packed */
- uint32_t nvl_flag;
- int32_t nvl_pad; /* currently not used, for alignment */
-} nvlist_t;
-
-/* nvp implementation version */
-#define NV_VERSION 0
-
-/* nvlist pack encoding */
-#define NV_ENCODE_NATIVE 0
-#define NV_ENCODE_XDR 1
-
-/* nvlist persistent unique name flags, stored in nvl_nvflags */
-#define NV_UNIQUE_NAME 0x1
-#define NV_UNIQUE_NAME_TYPE 0x2
-
-/* nvlist lookup pairs related flags */
-#define NV_FLAG_NOENTOK 0x1
-
-/* convenience macros */
-#define NV_ALIGN(x) (((ulong_t)(x) + 7ul) & ~7ul)
-#define NV_ALIGN4(x) (((x) + 3) & ~3)
-
-#define NVP_SIZE(nvp) ((nvp)->nvp_size)
-#define NVP_NAME(nvp) ((char *)(nvp) + sizeof (nvpair_t))
-#define NVP_TYPE(nvp) ((nvp)->nvp_type)
-#define NVP_NELEM(nvp) ((nvp)->nvp_value_elem)
-#define NVP_VALUE(nvp) ((char *)(nvp) + NV_ALIGN(sizeof (nvpair_t) \
- + (nvp)->nvp_name_sz))
-
-#define NVL_VERSION(nvl) ((nvl)->nvl_version)
-#define NVL_SIZE(nvl) ((nvl)->nvl_size)
-#define NVL_FLAG(nvl) ((nvl)->nvl_flag)
-
-/* NV allocator framework */
-typedef struct nv_alloc_ops nv_alloc_ops_t;
-
-typedef struct nv_alloc {
- const nv_alloc_ops_t *nva_ops;
- void *nva_arg;
-} nv_alloc_t;
-
-struct nv_alloc_ops {
- int (*nv_ao_init)(nv_alloc_t *, __va_list);
- void (*nv_ao_fini)(nv_alloc_t *);
- void *(*nv_ao_alloc)(nv_alloc_t *, size_t);
- void (*nv_ao_free)(nv_alloc_t *, void *, size_t);
- void (*nv_ao_reset)(nv_alloc_t *);
-};
-
-extern const nv_alloc_ops_t *nv_fixed_ops;
-extern nv_alloc_t *nv_alloc_nosleep;
-
-#if defined(_KERNEL) && !defined(_BOOT)
-extern nv_alloc_t *nv_alloc_sleep;
-#endif
-
-int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, /* args */ ...);
-void nv_alloc_reset(nv_alloc_t *);
-void nv_alloc_fini(nv_alloc_t *);
-
-/* list management */
-int nvlist_alloc(nvlist_t **, uint_t, int);
-void nvlist_free(nvlist_t *);
-int nvlist_size(nvlist_t *, size_t *, int);
-int nvlist_pack(nvlist_t *, char **, size_t *, int, int);
-int nvlist_unpack(char *, size_t, nvlist_t **, int);
-int nvlist_dup(nvlist_t *, nvlist_t **, int);
-int nvlist_merge(nvlist_t *, nvlist_t *, int);
-
-int nvlist_xalloc(nvlist_t **, uint_t, nv_alloc_t *);
-int nvlist_xpack(nvlist_t *, char **, size_t *, int, nv_alloc_t *);
-int nvlist_xunpack(char *, size_t, nvlist_t **, nv_alloc_t *);
-int nvlist_xdup(nvlist_t *, nvlist_t **, nv_alloc_t *);
-nv_alloc_t *nvlist_lookup_nv_alloc(nvlist_t *);
-
-int nvlist_add_nvpair(nvlist_t *, nvpair_t *);
-int nvlist_add_boolean(nvlist_t *, const char *);
-int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
-int nvlist_add_byte(nvlist_t *, const char *, uchar_t);
-int nvlist_add_int8(nvlist_t *, const char *, int8_t);
-int nvlist_add_uint8(nvlist_t *, const char *, uint8_t);
-int nvlist_add_int16(nvlist_t *, const char *, int16_t);
-int nvlist_add_uint16(nvlist_t *, const char *, uint16_t);
-int nvlist_add_int32(nvlist_t *, const char *, int32_t);
-int nvlist_add_uint32(nvlist_t *, const char *, uint32_t);
-int nvlist_add_int64(nvlist_t *, const char *, int64_t);
-int nvlist_add_uint64(nvlist_t *, const char *, uint64_t);
-int nvlist_add_string(nvlist_t *, const char *, const char *);
-int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
-int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
-int nvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
-int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
-int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
-int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
-int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
-int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
-int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
-int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
-int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
-int nvlist_add_string_array(nvlist_t *, const char *, char *const *, uint_t);
-int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
-int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t);
-
-int nvlist_remove(nvlist_t *, const char *, data_type_t);
-int nvlist_remove_all(nvlist_t *, const char *);
-
-int nvlist_lookup_boolean(nvlist_t *, const char *);
-int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *);
-int nvlist_lookup_byte(nvlist_t *, const char *, uchar_t *);
-int nvlist_lookup_int8(nvlist_t *, const char *, int8_t *);
-int nvlist_lookup_uint8(nvlist_t *, const char *, uint8_t *);
-int nvlist_lookup_int16(nvlist_t *, const char *, int16_t *);
-int nvlist_lookup_uint16(nvlist_t *, const char *, uint16_t *);
-int nvlist_lookup_int32(nvlist_t *, const char *, int32_t *);
-int nvlist_lookup_uint32(nvlist_t *, const char *, uint32_t *);
-int nvlist_lookup_int64(nvlist_t *, const char *, int64_t *);
-int nvlist_lookup_uint64(nvlist_t *, const char *, uint64_t *);
-int nvlist_lookup_string(nvlist_t *, const char *, char **);
-int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **);
-int nvlist_lookup_boolean_array(nvlist_t *, const char *,
- boolean_t **, uint_t *);
-int nvlist_lookup_byte_array(nvlist_t *, const char *, uchar_t **, uint_t *);
-int nvlist_lookup_int8_array(nvlist_t *, const char *, int8_t **, uint_t *);
-int nvlist_lookup_uint8_array(nvlist_t *, const char *, uint8_t **, uint_t *);
-int nvlist_lookup_int16_array(nvlist_t *, const char *, int16_t **, uint_t *);
-int nvlist_lookup_uint16_array(nvlist_t *, const char *, uint16_t **, uint_t *);
-int nvlist_lookup_int32_array(nvlist_t *, const char *, int32_t **, uint_t *);
-int nvlist_lookup_uint32_array(nvlist_t *, const char *, uint32_t **, uint_t *);
-int nvlist_lookup_int64_array(nvlist_t *, const char *, int64_t **, uint_t *);
-int nvlist_lookup_uint64_array(nvlist_t *, const char *, uint64_t **, uint_t *);
-int nvlist_lookup_string_array(nvlist_t *, const char *, char ***, uint_t *);
-int nvlist_lookup_nvlist_array(nvlist_t *, const char *,
- nvlist_t ***, uint_t *);
-int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *);
-int nvlist_lookup_pairs(nvlist_t *nvl, int, ...);
-
-/* processing nvpair */
-nvpair_t *nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *);
-char *nvpair_name(nvpair_t *);
-data_type_t nvpair_type(nvpair_t *);
-int nvpair_value_boolean_value(nvpair_t *, boolean_t *);
-int nvpair_value_byte(nvpair_t *, uchar_t *);
-int nvpair_value_int8(nvpair_t *, int8_t *);
-int nvpair_value_uint8(nvpair_t *, uint8_t *);
-int nvpair_value_int16(nvpair_t *, int16_t *);
-int nvpair_value_uint16(nvpair_t *, uint16_t *);
-int nvpair_value_int32(nvpair_t *, int32_t *);
-int nvpair_value_uint32(nvpair_t *, uint32_t *);
-int nvpair_value_int64(nvpair_t *, int64_t *);
-int nvpair_value_uint64(nvpair_t *, uint64_t *);
-int nvpair_value_string(nvpair_t *, char **);
-int nvpair_value_nvlist(nvpair_t *, nvlist_t **);
-int nvpair_value_boolean_array(nvpair_t *, boolean_t **, uint_t *);
-int nvpair_value_byte_array(nvpair_t *, uchar_t **, uint_t *);
-int nvpair_value_int8_array(nvpair_t *, int8_t **, uint_t *);
-int nvpair_value_uint8_array(nvpair_t *, uint8_t **, uint_t *);
-int nvpair_value_int16_array(nvpair_t *, int16_t **, uint_t *);
-int nvpair_value_uint16_array(nvpair_t *, uint16_t **, uint_t *);
-int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *);
-int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *);
-int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *);
-int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *);
-int nvpair_value_string_array(nvpair_t *, char ***, uint_t *);
-int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *);
-int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_NVPAIR_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/nvpair_impl.h b/sys/contrib/opensolaris/uts/common/sys/nvpair_impl.h
deleted file mode 100644
index f12dbbf..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/nvpair_impl.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _NVPAIR_IMPL_H
-#define _NVPAIR_IMPL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/nvpair.h>
-
-/*
- * The structures here provided for information and debugging purposes only
- * may be changed in the future.
- */
-
-/*
- * implementation linked list for pre-packed data
- */
-typedef struct i_nvp i_nvp_t;
-
-struct i_nvp {
- union {
- uint64_t _nvi_align; /* ensure alignment */
- struct {
- i_nvp_t *_nvi_next; /* pointer to next nvpair */
- i_nvp_t *_nvi_prev; /* pointer to prev nvpair */
- } _nvi;
- } _nvi_un;
- nvpair_t nvi_nvp; /* nvpair */
-};
-#define nvi_next _nvi_un._nvi._nvi_next
-#define nvi_prev _nvi_un._nvi._nvi_prev
-
-typedef struct {
- i_nvp_t *nvp_list; /* linked list of nvpairs */
- i_nvp_t *nvp_last; /* last nvpair */
- i_nvp_t *nvp_curr; /* current walker nvpair */
- nv_alloc_t *nvp_nva; /* pluggable allocator */
- uint32_t nvp_stat; /* internal state */
-} nvpriv_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NVPAIR_IMPL_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/processor.h b/sys/contrib/opensolaris/uts/common/sys/processor.h
deleted file mode 100644
index 063f7dacb..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/processor.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
- * All Rights Reserved
- *
- */
-
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_PROCESSOR_H
-#define _SYS_PROCESSOR_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/procset.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Definitions for p_online, processor_info & lgrp system calls.
- */
-
-/*
- * Type for an lgrpid
- */
-typedef uint16_t lgrpid_t;
-
-/*
- * Type for processor name (CPU number).
- */
-typedef int processorid_t;
-typedef int chipid_t;
-
-/*
- * Flags and return values for p_online(2), and pi_state for processor_info(2).
- * These flags are *not* for in-kernel examination of CPU states.
- * See <sys/cpuvar.h> for appropriate informational functions.
- */
-#define P_OFFLINE 0x0001 /* processor is offline, as quiet as possible */
-#define P_ONLINE 0x0002 /* processor is online */
-#define P_STATUS 0x0003 /* value passed to p_online to request status */
-#define P_FAULTED 0x0004 /* processor is offline, in faulted state */
-#define P_POWEROFF 0x0005 /* processor is powered off */
-#define P_NOINTR 0x0006 /* processor is online, but no I/O interrupts */
-#define P_SPARE 0x0007 /* processor is offline, can be reactivated */
-#define P_BAD P_FAULTED /* unused but defined by USL */
-#define P_FORCED 0x10000000 /* force processor offline */
-
-/*
- * String names for processor states defined above.
- */
-#define PS_OFFLINE "off-line"
-#define PS_ONLINE "on-line"
-#define PS_FAULTED "faulted"
-#define PS_POWEROFF "powered-off"
-#define PS_NOINTR "no-intr"
-#define PS_SPARE "spare"
-
-/*
- * Structure filled in by processor_info(2).
- *
- * The string fields are guaranteed to contain a NULL.
- *
- * The pi_fputypes field contains a (possibly empty) comma-separated
- * list of floating point identifier strings.
- */
-#define PI_TYPELEN 16 /* max size of CPU type string */
-#define PI_FPUTYPE 32 /* max size of FPU types string */
-
-typedef struct {
- int pi_state; /* processor state, see above */
- char pi_processor_type[PI_TYPELEN]; /* ASCII CPU type */
- char pi_fputypes[PI_FPUTYPE]; /* ASCII FPU types */
- int pi_clock; /* CPU clock freq in MHz */
-} processor_info_t;
-
-/*
- * Binding values for processor_bind(2)
- */
-#define PBIND_NONE -1 /* LWP/thread is not bound */
-#define PBIND_QUERY -2 /* don't set, just return the binding */
-
-/*
- * User-level system call interface prototypes
- */
-#ifndef _KERNEL
-#ifdef __STDC__
-
-extern int p_online(processorid_t processorid, int flag);
-extern int processor_info(processorid_t processorid,
- processor_info_t *infop);
-extern int processor_bind(idtype_t idtype, id_t id,
- processorid_t processorid, processorid_t *obind);
-extern processorid_t getcpuid(void);
-extern lgrpid_t gethomelgroup(void);
-
-#else
-
-extern int p_online();
-extern int processor_info();
-extern int processor_bind();
-extern processorid_t getcpuid();
-extern lgrpid_t gethomelgroup();
-
-#endif /* __STDC__ */
-
-#else /* _KERNEL */
-
-/*
- * Internal interface prototypes
- */
-extern int p_online_internal(processorid_t, int, int *);
-
-#endif /* !_KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_PROCESSOR_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/procset.h b/sys/contrib/opensolaris/uts/common/sys/procset.h
deleted file mode 100644
index c367c93..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/procset.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-#ifndef _SYS_PROCSET_H
-#define _SYS_PROCSET_H
-
-#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.6 */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/feature_tests.h>
-#include <sys/types.h>
-#include <sys/signal.h>
-
-/*
- * This file defines the data needed to specify a set of
- * processes. These types are used by the sigsend, sigsendset,
- * priocntl, priocntlset, waitid, evexit, and evexitset system
- * calls.
- */
-#define P_INITPID 1
-#define P_INITUID 0
-#define P_INITPGID 0
-
-
-/*
- * The following defines the values for an identifier type. It
- * specifies the interpretation of an id value. An idtype and
- * id together define a simple set of processes.
- */
-typedef enum
-#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
- idtype /* pollutes XPG4.2 namespace */
-#endif
- {
- P_PID, /* A process identifier. */
- P_PPID, /* A parent process identifier. */
- P_PGID, /* A process group (job control group) */
- /* identifier. */
- P_SID, /* A session identifier. */
- P_CID, /* A scheduling class identifier. */
- P_UID, /* A user identifier. */
- P_GID, /* A group identifier. */
- P_ALL, /* All processes. */
- P_LWPID, /* An LWP identifier. */
- P_TASKID, /* A task identifier. */
- P_PROJID, /* A project identifier. */
- P_POOLID, /* A pool identifier. */
- P_ZONEID, /* A zone identifier. */
- P_CTID, /* A (process) contract identifier. */
- P_CPUID, /* CPU identifier. */
- P_PSETID /* Processor set identifier */
-} idtype_t;
-
-
-/*
- * The following defines the operations which can be performed to
- * combine two simple sets of processes to form another set of
- * processes.
- */
-#if !defined(_XPG4_2) || defined(__EXTENSIONS__)
-typedef enum idop {
- POP_DIFF, /* Set difference. The processes which */
- /* are in the left operand set and not */
- /* in the right operand set. */
- POP_AND, /* Set disjunction. The processes */
- /* which are in both the left and right */
- /* operand sets. */
- POP_OR, /* Set conjunction. The processes */
- /* which are in either the left or the */
- /* right operand sets (or both). */
- POP_XOR /* Set exclusive or. The processes */
- /* which are in either the left or */
- /* right operand sets but not in both. */
-} idop_t;
-
-
-/*
- * The following structure is used to define a set of processes.
- * The set is defined in terms of two simple sets of processes
- * and an operator which operates on these two operand sets.
- */
-typedef struct procset {
- idop_t p_op; /* The operator connection the */
- /* following two operands each */
- /* of which is a simple set of */
- /* processes. */
-
- idtype_t p_lidtype;
- /* The type of the left operand */
- /* simple set. */
- id_t p_lid; /* The id of the left operand. */
-
- idtype_t p_ridtype;
- /* The type of the right */
- /* operand simple set. */
- id_t p_rid; /* The id of the right operand. */
-} procset_t;
-
-/*
- * The following macro can be used to initialize a procset_t
- * structure.
- */
-#define setprocset(psp, op, ltype, lid, rtype, rid) \
- (psp)->p_op = (op); \
- (psp)->p_lidtype = (ltype); \
- (psp)->p_lid = (lid); \
- (psp)->p_ridtype = (rtype); \
- (psp)->p_rid = (rid);
-
-#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
-
-#if defined(sun)
-#ifdef _KERNEL
-
-struct proc;
-
-extern int dotoprocs(procset_t *, int (*)(), char *);
-extern int dotolwp(procset_t *, int (*)(), char *);
-extern int procinset(struct proc *, procset_t *);
-extern int sigsendproc(struct proc *, sigsend_t *);
-extern int sigsendset(procset_t *, sigsend_t *);
-extern boolean_t cur_inset_only(procset_t *);
-extern id_t getmyid(idtype_t);
-
-#endif /* _KERNEL */
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_PROCSET_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/sdt.h b/sys/contrib/opensolaris/uts/common/sys/sdt.h
deleted file mode 100644
index da695c9..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/sdt.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SDT_H
-#define _SYS_SDT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef _KERNEL
-
-#define DTRACE_PROBE(provider, name) { \
- extern void __dtrace_##provider##___##name(void); \
- __dtrace_##provider##___##name(); \
-}
-
-#define DTRACE_PROBE1(provider, name, arg1) { \
- extern void __dtrace_##provider##___##name(unsigned long); \
- __dtrace_##provider##___##name((unsigned long)arg1); \
-}
-
-#define DTRACE_PROBE2(provider, name, arg1, arg2) { \
- extern void __dtrace_##provider##___##name(unsigned long, \
- unsigned long); \
- __dtrace_##provider##___##name((unsigned long)arg1, \
- (unsigned long)arg2); \
-}
-
-#define DTRACE_PROBE3(provider, name, arg1, arg2, arg3) { \
- extern void __dtrace_##provider##___##name(unsigned long, \
- unsigned long, unsigned long); \
- __dtrace_##provider##___##name((unsigned long)arg1, \
- (unsigned long)arg2, (unsigned long)arg3); \
-}
-
-#define DTRACE_PROBE4(provider, name, arg1, arg2, arg3, arg4) { \
- extern void __dtrace_##provider##___##name(unsigned long, \
- unsigned long, unsigned long, unsigned long); \
- __dtrace_##provider##___##name((unsigned long)arg1, \
- (unsigned long)arg2, (unsigned long)arg3, \
- (unsigned long)arg4); \
-}
-
-#define DTRACE_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) { \
- extern void __dtrace_##provider##___##name(unsigned long, \
- unsigned long, unsigned long, unsigned long, unsigned long);\
- __dtrace_##provider##___##name((unsigned long)arg1, \
- (unsigned long)arg2, (unsigned long)arg3, \
- (unsigned long)arg4, (unsigned long)arg5); \
-}
-
-#else /* _KERNEL */
-
-#define DTRACE_PROBE(name) { \
- extern void __dtrace_probe_##name(void); \
- __dtrace_probe_##name(); \
-}
-
-#define DTRACE_PROBE1(name, type1, arg1) { \
- extern void __dtrace_probe_##name(uintptr_t); \
- __dtrace_probe_##name((uintptr_t)(arg1)); \
-}
-
-#define DTRACE_PROBE2(name, type1, arg1, type2, arg2) { \
- extern void __dtrace_probe_##name(uintptr_t, uintptr_t); \
- __dtrace_probe_##name((uintptr_t)(arg1), (uintptr_t)(arg2)); \
-}
-
-#define DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3) { \
- extern void __dtrace_probe_##name(uintptr_t, uintptr_t, uintptr_t); \
- __dtrace_probe_##name((uintptr_t)(arg1), (uintptr_t)(arg2), \
- (uintptr_t)(arg3)); \
-}
-
-#define DTRACE_PROBE4(name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4) { \
- extern void __dtrace_probe_##name(uintptr_t, uintptr_t, \
- uintptr_t, uintptr_t); \
- __dtrace_probe_##name((uintptr_t)(arg1), (uintptr_t)(arg2), \
- (uintptr_t)(arg3), (uintptr_t)(arg4)); \
-}
-
-#define DTRACE_SCHED(name) \
- DTRACE_PROBE(__sched_##name);
-
-#define DTRACE_SCHED1(name, type1, arg1) \
- DTRACE_PROBE1(__sched_##name, type1, arg1);
-
-#define DTRACE_SCHED2(name, type1, arg1, type2, arg2) \
- DTRACE_PROBE2(__sched_##name, type1, arg1, type2, arg2);
-
-#define DTRACE_SCHED3(name, type1, arg1, type2, arg2, type3, arg3) \
- DTRACE_PROBE3(__sched_##name, type1, arg1, type2, arg2, type3, arg3);
-
-#define DTRACE_SCHED4(name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4) \
- DTRACE_PROBE4(__sched_##name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4);
-
-#define DTRACE_PROC(name) \
- DTRACE_PROBE(__proc_##name);
-
-#define DTRACE_PROC1(name, type1, arg1) \
- DTRACE_PROBE1(__proc_##name, type1, arg1);
-
-#define DTRACE_PROC2(name, type1, arg1, type2, arg2) \
- DTRACE_PROBE2(__proc_##name, type1, arg1, type2, arg2);
-
-#define DTRACE_PROC3(name, type1, arg1, type2, arg2, type3, arg3) \
- DTRACE_PROBE3(__proc_##name, type1, arg1, type2, arg2, type3, arg3);
-
-#define DTRACE_PROC4(name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4) \
- DTRACE_PROBE4(__proc_##name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4);
-
-#define DTRACE_IO(name) \
- DTRACE_PROBE(__io_##name);
-
-#define DTRACE_IO1(name, type1, arg1) \
- DTRACE_PROBE1(__io_##name, type1, arg1);
-
-#define DTRACE_IO2(name, type1, arg1, type2, arg2) \
- DTRACE_PROBE2(__io_##name, type1, arg1, type2, arg2);
-
-#define DTRACE_IO3(name, type1, arg1, type2, arg2, type3, arg3) \
- DTRACE_PROBE3(__io_##name, type1, arg1, type2, arg2, type3, arg3);
-
-#define DTRACE_IO4(name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4) \
- DTRACE_PROBE4(__io_##name, type1, arg1, type2, arg2, \
- type3, arg3, type4, arg4);
-
-#define DTRACE_SYSEVENT2(name, type1, arg1, type2, arg2) \
- DTRACE_PROBE2(__sysevent_##name, type1, arg1, type2, arg2);
-
-#endif /* _KERNEL */
-
-extern const char *sdt_prefix;
-
-typedef struct sdt_probedesc {
- char *sdpd_name; /* name of this probe */
- unsigned long sdpd_offset; /* offset of call in text */
- struct sdt_probedesc *sdpd_next; /* next static probe */
-} sdt_probedesc_t;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SDT_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/synch.h b/sys/contrib/opensolaris/uts/common/sys/synch.h
deleted file mode 100644
index 8f52d72..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/synch.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SYNCH_H
-#define _SYS_SYNCH_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifndef _ASM
-#include <sys/types.h>
-#include <sys/int_types.h>
-#endif /* _ASM */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef _ASM
-/*
- * Thread and LWP mutexes have the same type
- * definitions.
- *
- * NOTE:
- *
- * POSIX requires that <pthread.h> define the structures pthread_mutex_t
- * and pthread_cond_t. Although these structures are identical to mutex_t
- * (lwp_mutex_t) and cond_t (lwp_cond_t), defined here, a typedef of these
- * types would require including <synch.h> in <pthread.h>, pulling in
- * non-posix symbols/constants, violating POSIX namespace restrictions. Hence,
- * pthread_mutex_t/pthread_cond_t have been redefined (in <sys/types.h>).
- * Any modifications done to mutex_t/lwp_mutex_t or cond_t/lwp_cond_t must
- * also be done to pthread_mutex_t/pthread_cond_t.
- */
-typedef struct _lwp_mutex {
- struct {
- uint16_t flag1;
- uint8_t flag2;
- uint8_t ceiling;
- union {
- uint16_t bcptype;
- struct {
- uint8_t count_type1;
- uint8_t count_type2;
- } mtype_rcount;
- } mbcp_type_un;
- uint16_t magic;
- } flags;
- union {
- struct {
- uint8_t pad[8];
- } lock64;
- struct {
- uint32_t ownerpid;
- uint32_t lockword;
- } lock32;
- upad64_t owner64;
- } lock;
- upad64_t data;
-} lwp_mutex_t;
-
-/*
- * Thread and LWP condition variables have the same
- * type definition.
- * NOTE:
- * The layout of the following structure should be kept in sync with the
- * layout of pthread_cond_t in sys/types.h. See NOTE above for lwp_mutex_t.
- */
-typedef struct _lwp_cond {
- struct {
- uint8_t flag[4];
- uint16_t type;
- uint16_t magic;
- } flags;
- upad64_t data;
-} lwp_cond_t;
-
-/*
- * LWP semaphores
- */
-typedef struct _lwp_sema {
- uint32_t count; /* semaphore count */
- uint16_t type;
- uint16_t magic;
- uint8_t flags[8]; /* last byte reserved for waiters */
- upad64_t data; /* optional data */
-} lwp_sema_t;
-
-/*
- * Thread and LWP rwlocks have the same type definition.
- * NOTE: The layout of this structure should be kept in sync with the layout
- * of the correponding structure of pthread_rwlock_t in sys/types.h.
- * Also, because we have to deal with C++, there is an identical structure
- * for rwlock_t in head/sync.h that we cannot change.
- */
-typedef struct _lwp_rwlock {
- int32_t readers; /* -1 == writer else # of readers */
- uint16_t type;
- uint16_t magic;
- lwp_mutex_t mutex; /* used to indicate ownership */
- lwp_cond_t readercv; /* unused */
- lwp_cond_t writercv; /* unused */
-} lwp_rwlock_t;
-
-#endif /* _ASM */
-/*
- * Definitions of synchronization types.
- */
-#define USYNC_THREAD 0x00 /* private to a process */
-#define USYNC_PROCESS 0x01 /* shared by processes */
-
-/* Keep the following 3 fields in sync with pthread.h */
-#define LOCK_NORMAL 0x00 /* same as USYNC_THREAD */
-#define LOCK_ERRORCHECK 0x02 /* error check lock */
-#define LOCK_RECURSIVE 0x04 /* recursive lock */
-
-#define USYNC_PROCESS_ROBUST 0x08 /* shared by processes robustly */
-
-/* Keep the following 5 fields in sync with pthread.h */
-
-#define LOCK_PRIO_NONE 0x00
-#define LOCK_PRIO_INHERIT 0x10
-#define LOCK_PRIO_PROTECT 0x20
-#define LOCK_STALL_NP 0x00
-#define LOCK_ROBUST_NP 0x40
-
-/*
- * lwp_mutex_t flags
- */
-#define LOCK_OWNERDEAD 0x1
-#define LOCK_NOTRECOVERABLE 0x2
-#define LOCK_INITED 0x4
-#define LOCK_UNMAPPED 0x8
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SYNCH_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/sysevent.h b/sys/contrib/opensolaris/uts/common/sys/sysevent.h
deleted file mode 100644
index 0a61e41..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/sysevent.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SYSEVENT_H
-#define _SYS_SYSEVENT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/nvpair.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef NULL
-#if defined(_LP64) && !defined(__cplusplus)
-#define NULL 0L
-#else
-#define NULL 0
-#endif
-#endif
-
-/* Internal registration class and subclass */
-#define EC_ALL "register_all_classes"
-#define EC_SUB_ALL "register_all_subclasses"
-
-/*
- * Event allocation/enqueuing sleep/nosleep flags
- */
-#define SE_SLEEP 0
-#define SE_NOSLEEP 1
-
-/* Framework error codes */
-#define SE_EINVAL 1 /* Invalid argument */
-#define SE_ENOMEM 2 /* Unable to allocate memory */
-#define SE_EQSIZE 3 /* Maximum event q size exceeded */
-#define SE_EFAULT 4 /* Copy fault */
-#define SE_NOTFOUND 5 /* Attribute not found */
-#define SE_NO_TRANSPORT 6 /* sysevent transport down */
-
-/* Internal data types */
-
-#define SE_DATA_TYPE_BYTE DATA_TYPE_BYTE
-#define SE_DATA_TYPE_INT16 DATA_TYPE_INT16
-#define SE_DATA_TYPE_UINT16 DATA_TYPE_UINT16
-#define SE_DATA_TYPE_INT32 DATA_TYPE_INT32
-#define SE_DATA_TYPE_UINT32 DATA_TYPE_UINT32
-#define SE_DATA_TYPE_INT64 DATA_TYPE_INT64
-#define SE_DATA_TYPE_UINT64 DATA_TYPE_UINT64
-#define SE_DATA_TYPE_STRING DATA_TYPE_STRING
-#define SE_DATA_TYPE_BYTES DATA_TYPE_BYTE_ARRAY
-#define SE_DATA_TYPE_TIME DATA_TYPE_HRTIME
-
-#define SE_KERN_PID 0
-
-#define SUNW_VENDOR "SUNW"
-#define SE_USR_PUB "usr:"
-#define SE_KERN_PUB "kern:"
-#define SUNW_KERN_PUB SUNW_VENDOR":"SE_KERN_PUB
-#define SUNW_USR_PUB SUNW_VENDOR":"SE_USR_PUB
-
-/*
- * Event header and attribute value limits
- */
-#define MAX_ATTR_NAME 1024
-#define MAX_STRING_SZ 1024
-#define MAX_BYTE_ARRAY 1024
-
-#define MAX_CLASS_LEN 64
-#define MAX_SUBCLASS_LEN 64
-#define MAX_PUB_LEN 128
-#define MAX_CHNAME_LEN 128
-#define MAX_SUBID_LEN 16
-
-/*
- * Limit for the event payload size
- */
-#define MAX_EV_SIZE_LEN (SHRT_MAX/4)
-
-/* Opaque sysevent_t data type */
-typedef void *sysevent_t;
-
-/* Opaque channel bind data type */
-typedef void evchan_t;
-
-/* sysevent attribute list */
-typedef nvlist_t sysevent_attr_list_t;
-
-/* sysevent attribute name-value pair */
-typedef nvpair_t sysevent_attr_t;
-
-/* Unique event identifier */
-typedef struct sysevent_id {
- uint64_t eid_seq;
- hrtime_t eid_ts;
-} sysevent_id_t;
-
-/* Event attribute value structures */
-typedef struct sysevent_bytes {
- int32_t size;
- uchar_t *data;
-} sysevent_bytes_t;
-
-typedef struct sysevent_value {
- int32_t value_type; /* data type */
- union {
- uchar_t sv_byte;
- int16_t sv_int16;
- uint16_t sv_uint16;
- int32_t sv_int32;
- uint32_t sv_uint32;
- int64_t sv_int64;
- uint64_t sv_uint64;
- hrtime_t sv_time;
- char *sv_string;
- sysevent_bytes_t sv_bytes;
- } value;
-} sysevent_value_t;
-
-/*
- * The following flags determine the memory allocation semantics to use for
- * kernel event buffer allocation by userland and kernel versions of
- * sysevent_evc_publish().
- *
- * EVCH_SLEEP and EVCH_NOSLEEP respectively map to KM_SLEEP and KM_NOSLEEP.
- * EVCH_TRYHARD is a kernel-only publish flag that allow event allocation
- * routines to use use alternate kmem caches in situations where free memory
- * may be low. Kernel callers of sysevent_evc_publish() must set flags to
- * one of EVCH_SLEEP, EVCH_NOSLEEP or EVCH_TRYHARD. Userland callers of
- * sysevent_evc_publish() must set flags to one of EVCH_SLEEP or EVCH_NOSLEEP.
- *
- * EVCH_QWAIT determines whether or not we should wait for slots in the event
- * queue at publication time. EVCH_QWAIT may be used by kernel and userland
- * publishers and must be used in conjunction with any of one of EVCH_SLEEP,
- * EVCH_NOSLEEP or EVCH_TRYHARD (kernel-only).
- */
-
-#define EVCH_NOSLEEP 0x0001 /* No sleep on kmem_alloc() */
-#define EVCH_SLEEP 0x0002 /* Sleep on kmem_alloc() */
-#define EVCH_TRYHARD 0x0004 /* May use alternate kmem cache for alloc */
-#define EVCH_QWAIT 0x0008 /* Wait for slot in event queue */
-
-/*
- * Meaning of flags for subscribe/unsubscribe. Bits 0 to 7 are dedicated to
- * the consolidation private interface.
- */
-#define EVCH_SUB_KEEP 0x0001
-#define EVCH_ALLSUB "all_subs"
-
-/*
- * Meaning of flags parameter of channel bind function
- */
-#define EVCH_CREAT 0x0001 /* Create a channel if not present */
-#define EVCH_HOLD_PEND 0x0002
-#define EVCH_B_FLAGS 0x0003 /* All valid bits */
-
-/*
- * Meaning of commands of evc_control function
- */
-#define EVCH_GET_CHAN_LEN_MAX 1 /* Get event queue length limit */
-#define EVCH_GET_CHAN_LEN 2 /* Get event queue length */
-#define EVCH_SET_CHAN_LEN 3 /* Set event queue length */
-#define EVCH_CMD_LAST EVCH_SET_CHAN_LEN /* Last command */
-
-/*
- * Event channel interface definitions
- */
-int sysevent_evc_bind(const char *, evchan_t **, uint32_t);
-void sysevent_evc_unbind(evchan_t *);
-int sysevent_evc_subscribe(evchan_t *, const char *, const char *,
- int (*)(sysevent_t *, void *), void *, uint32_t);
-void sysevent_evc_unsubscribe(evchan_t *, const char *);
-int sysevent_evc_publish(evchan_t *, const char *, const char *,
- const char *, const char *, nvlist_t *, uint32_t);
-int sysevent_evc_control(evchan_t *, int, ...);
-
-#ifdef _KERNEL
-
-/*
- * Kernel log_event interfaces.
- */
-int log_sysevent(sysevent_t *, int, sysevent_id_t *);
-
-sysevent_t *sysevent_alloc(char *, char *, char *, int);
-void sysevent_free(sysevent_t *);
-int sysevent_add_attr(sysevent_attr_list_t **, char *, sysevent_value_t *, int);
-void sysevent_free_attr(sysevent_attr_list_t *);
-int sysevent_attach_attributes(sysevent_t *, sysevent_attr_list_t *);
-void sysevent_detach_attributes(sysevent_t *);
-char *sysevent_get_class_name(sysevent_t *);
-char *sysevent_get_subclass_name(sysevent_t *);
-uint64_t sysevent_get_seq(sysevent_t *);
-void sysevent_get_time(sysevent_t *, hrtime_t *);
-size_t sysevent_get_size(sysevent_t *);
-char *sysevent_get_pub(sysevent_t *);
-int sysevent_get_attr_list(sysevent_t *, nvlist_t **);
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SYSEVENT_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/sysmacros.h b/sys/contrib/opensolaris/uts/common/sys/sysmacros.h
deleted file mode 100644
index 9f16a07..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/sysmacros.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SYSMACROS_H
-#define _SYS_SYSMACROS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/param.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Some macros for units conversion
- */
-/*
- * Disk blocks (sectors) and bytes.
- */
-#define dtob(DD) ((DD) << DEV_BSHIFT)
-#define btod(BB) (((BB) + DEV_BSIZE - 1) >> DEV_BSHIFT)
-#define btodt(BB) ((BB) >> DEV_BSHIFT)
-#define lbtod(BB) (((offset_t)(BB) + DEV_BSIZE - 1) >> DEV_BSHIFT)
-
-/* common macros */
-#ifndef MIN
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#endif
-#ifndef MAX
-#define MAX(a, b) ((a) < (b) ? (b) : (a))
-#endif
-#ifndef ABS
-#define ABS(a) ((a) < 0 ? -(a) : (a))
-#endif
-
-#ifdef _KERNEL
-
-/*
- * Convert a single byte to/from binary-coded decimal (BCD).
- */
-extern unsigned char byte_to_bcd[256];
-extern unsigned char bcd_to_byte[256];
-
-#define BYTE_TO_BCD(x) byte_to_bcd[(x) & 0xff]
-#define BCD_TO_BYTE(x) bcd_to_byte[(x) & 0xff]
-
-#endif /* _KERNEL */
-
-/*
- * WARNING: The device number macros defined here should not be used by device
- * drivers or user software. Device drivers should use the device functions
- * defined in the DDI/DKI interface (see also ddi.h). Application software
- * should make use of the library routines available in makedev(3). A set of
- * new device macros are provided to operate on the expanded device number
- * format supported in SVR4. Macro versions of the DDI device functions are
- * provided for use by kernel proper routines only. Macro routines bmajor(),
- * major(), minor(), emajor(), eminor(), and makedev() will be removed or
- * their definitions changed at the next major release following SVR4.
- */
-
-#define O_BITSMAJOR 7 /* # of SVR3 major device bits */
-#define O_BITSMINOR 8 /* # of SVR3 minor device bits */
-#define O_MAXMAJ 0x7f /* SVR3 max major value */
-#define O_MAXMIN 0xff /* SVR3 max minor value */
-
-
-#define L_BITSMAJOR32 14 /* # of SVR4 major device bits */
-#define L_BITSMINOR32 18 /* # of SVR4 minor device bits */
-#define L_MAXMAJ32 0x3fff /* SVR4 max major value */
-#define L_MAXMIN32 0x3ffff /* MAX minor for 3b2 software drivers. */
- /* For 3b2 hardware devices the minor is */
- /* restricted to 256 (0-255) */
-
-#ifdef _LP64
-#define L_BITSMAJOR 32 /* # of major device bits in 64-bit Solaris */
-#define L_BITSMINOR 32 /* # of minor device bits in 64-bit Solaris */
-#define L_MAXMAJ 0xfffffffful /* max major value */
-#define L_MAXMIN 0xfffffffful /* max minor value */
-#else
-#define L_BITSMAJOR L_BITSMAJOR32
-#define L_BITSMINOR L_BITSMINOR32
-#define L_MAXMAJ L_MAXMAJ32
-#define L_MAXMIN L_MAXMIN32
-#endif
-
-#if defined(sun)
-#ifdef _KERNEL
-
-/* major part of a device internal to the kernel */
-
-#define major(x) (major_t)((((unsigned)(x)) >> O_BITSMINOR) & O_MAXMAJ)
-#define bmajor(x) (major_t)((((unsigned)(x)) >> O_BITSMINOR) & O_MAXMAJ)
-
-/* get internal major part of expanded device number */
-
-#define getmajor(x) (major_t)((((dev_t)(x)) >> L_BITSMINOR) & L_MAXMAJ)
-
-/* minor part of a device internal to the kernel */
-
-#define minor(x) (minor_t)((x) & O_MAXMIN)
-
-/* get internal minor part of expanded device number */
-
-#define getminor(x) (minor_t)((x) & L_MAXMIN)
-
-#else
-
-/* major part of a device external from the kernel (same as emajor below) */
-
-#define major(x) (major_t)((((unsigned)(x)) >> O_BITSMINOR) & O_MAXMAJ)
-
-/* minor part of a device external from the kernel (same as eminor below) */
-
-#define minor(x) (minor_t)((x) & O_MAXMIN)
-
-#endif /* _KERNEL */
-
-/* create old device number */
-
-#define makedev(x, y) (unsigned short)(((x) << O_BITSMINOR) | ((y) & O_MAXMIN))
-
-/* make an new device number */
-
-#define makedevice(x, y) (dev_t)(((dev_t)(x) << L_BITSMINOR) | ((y) & L_MAXMIN))
-
-
-/*
- * emajor() allows kernel/driver code to print external major numbers
- * eminor() allows kernel/driver code to print external minor numbers
- */
-
-#define emajor(x) \
- (major_t)(((unsigned int)(x) >> O_BITSMINOR) > O_MAXMAJ) ? \
- NODEV : (((unsigned int)(x) >> O_BITSMINOR) & O_MAXMAJ)
-
-#define eminor(x) \
- (minor_t)((x) & O_MAXMIN)
-
-/*
- * get external major and minor device
- * components from expanded device number
- */
-#define getemajor(x) (major_t)((((dev_t)(x) >> L_BITSMINOR) > L_MAXMAJ) ? \
- NODEV : (((dev_t)(x) >> L_BITSMINOR) & L_MAXMAJ))
-#define geteminor(x) (minor_t)((x) & L_MAXMIN)
-
-#endif /* sun */
-
-/*
- * These are versions of the kernel routines for compressing and
- * expanding long device numbers that don't return errors.
- */
-#if (L_BITSMAJOR32 == L_BITSMAJOR) && (L_BITSMINOR32 == L_BITSMINOR)
-
-#define DEVCMPL(x) (x)
-#define DEVEXPL(x) (x)
-
-#else
-
-#define DEVCMPL(x) \
- (dev32_t)((((x) >> L_BITSMINOR) > L_MAXMAJ32 || \
- ((x) & L_MAXMIN) > L_MAXMIN32) ? NODEV32 : \
- ((((x) >> L_BITSMINOR) << L_BITSMINOR32) | ((x) & L_MAXMIN32)))
-
-#define DEVEXPL(x) \
- (((x) == NODEV32) ? NODEV : \
- makedevice(((x) >> L_BITSMINOR32) & L_MAXMAJ32, (x) & L_MAXMIN32))
-
-#endif /* L_BITSMAJOR32 ... */
-
-/* convert to old (SVR3.2) dev format */
-
-#define cmpdev(x) \
- (o_dev_t)((((x) >> L_BITSMINOR) > O_MAXMAJ || \
- ((x) & L_MAXMIN) > O_MAXMIN) ? NODEV : \
- ((((x) >> L_BITSMINOR) << O_BITSMINOR) | ((x) & O_MAXMIN)))
-
-/* convert to new (SVR4) dev format */
-
-#define expdev(x) \
- (dev_t)(((dev_t)(((x) >> O_BITSMINOR) & O_MAXMAJ) << L_BITSMINOR) | \
- ((x) & O_MAXMIN))
-
-/*
- * Macro for checking power of 2 address alignment.
- */
-#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
-
-/*
- * Macros for counting and rounding.
- */
-#define howmany(x, y) (((x)+((y)-1))/(y))
-#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
-
-/*
- * Macro to determine if value is a power of 2
- */
-#define ISP2(x) (((x) & ((x) - 1)) == 0)
-
-/*
- * Macros for various sorts of alignment and rounding when the alignment
- * is known to be a power of 2.
- */
-#define P2ALIGN(x, align) ((x) & -(align))
-#define P2PHASE(x, align) ((x) & ((align) - 1))
-#define P2NPHASE(x, align) (-(x) & ((align) - 1))
-#define P2ROUNDUP(x, align) (-(-(x) & -(align)))
-#define P2END(x, align) (-(~(x) & -(align)))
-#define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align)))
-#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
-/*
- * Determine whether two numbers have the same high-order bit.
- */
-#define P2SAMEHIGHBIT(x, y) (((x) ^ (y)) < ((x) & (y)))
-
-/*
- * Typed version of the P2* macros. These macros should be used to ensure
- * that the result is correctly calculated based on the data type of (x),
- * which is passed in as the last argument, regardless of the data
- * type of the alignment. For example, if (x) is of type uint64_t,
- * and we want to round it up to a page boundary using "PAGESIZE" as
- * the alignment, we can do either
- * P2ROUNDUP(x, (uint64_t)PAGESIZE)
- * or
- * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
- */
-#define P2ALIGN_TYPED(x, align, type) \
- ((type)(x) & -(type)(align))
-#define P2PHASE_TYPED(x, align, type) \
- ((type)(x) & ((type)(align) - 1))
-#define P2NPHASE_TYPED(x, align, type) \
- (-(type)(x) & ((type)(align) - 1))
-#define P2ROUNDUP_TYPED(x, align, type) \
- (-(-(type)(x) & -(type)(align)))
-#define P2END_TYPED(x, align, type) \
- (-(~(type)(x) & -(type)(align)))
-#define P2PHASEUP_TYPED(x, align, phase, type) \
- ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
-#define P2CROSS_TYPED(x, y, align, type) \
- (((type)(x) ^ (type)(y)) > (type)(align) - 1)
-#define P2SAMEHIGHBIT_TYPED(x, y, type) \
- (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
-
-/*
- * Macros to atomically increment/decrement a variable. mutex and var
- * must be pointers.
- */
-#define INCR_COUNT(var, mutex) mutex_enter(mutex), (*(var))++, mutex_exit(mutex)
-#define DECR_COUNT(var, mutex) mutex_enter(mutex), (*(var))--, mutex_exit(mutex)
-
-#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
-
-/* avoid any possibility of clashing with <stddef.h> version */
-
-#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SYSMACROS_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/vmem.h b/sys/contrib/opensolaris/uts/common/sys/vmem.h
deleted file mode 100644
index f0caec6..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/vmem.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_VMEM_H
-#define _SYS_VMEM_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Per-allocation flags
- */
-#define VM_SLEEP 0x00000000 /* same as KM_SLEEP */
-#define VM_NOSLEEP 0x00000001 /* same as KM_NOSLEEP */
-#define VM_PANIC 0x00000002 /* same as KM_PANIC */
-#define VM_PUSHPAGE 0x00000004 /* same as KM_PUSHPAGE */
-#define VM_KMFLAGS 0x000000ff /* flags that must match KM_* flags */
-
-#define VM_BESTFIT 0x00000100
-#define VM_FIRSTFIT 0x00000200
-#define VM_NEXTFIT 0x00000400
-
-/*
- * The following flags are restricted for use only within the kernel.
- * VM_MEMLOAD is for use by the HAT to avoid infinite recursion.
- * VM_NORELOC is used by the kernel when static VA->PA mappings are required.
- */
-#define VM_MEMLOAD 0x00000800
-#define VM_NORELOC 0x00001000
-/*
- * VM_ABORT requests that vmem_alloc() *ignore* the VM_SLEEP/VM_NOSLEEP flags
- * and forgo reaping if the allocation or attempted import, fails. This
- * flag is a segkmem-specific flag, and should not be used by anyone else.
- */
-#define VM_ABORT 0x00002000
-
-#define VM_FLAGS 0x0000FFFF
-
-/*
- * Arena creation flags
- */
-#define VMC_POPULATOR 0x00010000
-#define VMC_NO_QCACHE 0x00020000 /* cannot use quantum caches */
-#define VMC_IDENTIFIER 0x00040000 /* not backed by memory */
-/*
- * internal use only; the import function uses the vmem_ximport_t interface
- * and may increase the request size if it so desires
- */
-#define VMC_XALLOC 0x00080000
-#define VMC_FLAGS 0xFFFF0000
-
-/*
- * Public segment types
- */
-#define VMEM_ALLOC 0x01
-#define VMEM_FREE 0x02
-
-/*
- * Implementation-private segment types
- */
-#define VMEM_SPAN 0x10
-#define VMEM_ROTOR 0x20
-#define VMEM_WALKER 0x40
-
-/*
- * VMEM_REENTRANT indicates to vmem_walk() that the callback routine may
- * call back into the arena being walked, so vmem_walk() must drop the
- * arena lock before each callback. The caveat is that since the arena
- * isn't locked, its state can change. Therefore it is up to the callback
- * routine to handle cases where the segment isn't of the expected type.
- * For example, we use this to walk heap_arena when generating a crash dump;
- * see segkmem_dump() for sample usage.
- */
-#define VMEM_REENTRANT 0x80000000
-
-typedef struct vmem vmem_t;
-typedef void *(vmem_alloc_t)(vmem_t *, size_t, int);
-typedef void (vmem_free_t)(vmem_t *, void *, size_t);
-
-/*
- * Alternate import style; the requested size is passed in a pointer,
- * which can be increased by the import function if desired.
- */
-typedef void *(vmem_ximport_t)(vmem_t *, size_t *, int);
-
-#ifdef _KERNEL
-extern vmem_t *vmem_init(const char *, void *, size_t, size_t,
- vmem_alloc_t *, vmem_free_t *);
-extern void vmem_update(void *);
-extern int vmem_is_populator(void);
-extern size_t vmem_seg_size;
-#endif
-
-extern vmem_t *vmem_create(const char *, void *, size_t, size_t,
- vmem_alloc_t *, vmem_free_t *, vmem_t *, size_t, int);
-extern vmem_t *vmem_xcreate(const char *, void *, size_t, size_t,
- vmem_ximport_t *, vmem_free_t *, vmem_t *, size_t, int);
-extern void vmem_destroy(vmem_t *);
-extern void *vmem_alloc(vmem_t *, size_t, int);
-extern void *vmem_xalloc(vmem_t *, size_t, size_t, size_t, size_t,
- void *, void *, int);
-extern void vmem_free(vmem_t *, void *, size_t);
-extern void vmem_xfree(vmem_t *, void *, size_t);
-extern void *vmem_add(vmem_t *, void *, size_t, int);
-extern int vmem_contains(vmem_t *, void *, size_t);
-extern void vmem_walk(vmem_t *, int, void (*)(void *, void *, size_t), void *);
-extern size_t vmem_size(vmem_t *, int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_VMEM_H */
diff --git a/sys/contrib/opensolaris/uts/common/sys/zmod.h b/sys/contrib/opensolaris/uts/common/sys/zmod.h
deleted file mode 100644
index ba02672..0000000
--- a/sys/contrib/opensolaris/uts/common/sys/zmod.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZMOD_H
-#define _ZMOD_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * zmod - RFC-1950-compatible decompression routines
- *
- * This file provides the public interfaces to zmod, an in-kernel RFC 1950
- * decompression library. More information about the implementation of these
- * interfaces can be found in the usr/src/uts/common/zmod/ directory.
- */
-
-#define Z_OK 0
-#define Z_STREAM_END 1
-#define Z_NEED_DICT 2
-#define Z_ERRNO (-1)
-#define Z_STREAM_ERROR (-2)
-#define Z_DATA_ERROR (-3)
-#define Z_MEM_ERROR (-4)
-#define Z_BUF_ERROR (-5)
-#define Z_VERSION_ERROR (-6)
-
-#define Z_NO_COMPRESSION 0
-#define Z_BEST_SPEED 1
-#define Z_BEST_COMPRESSION 9
-#define Z_DEFAULT_COMPRESSION (-1)
-
-extern int z_uncompress(void *, size_t *, const void *, size_t);
-extern int z_compress(void *, size_t *, const void *, size_t);
-extern int z_compress_level(void *, size_t *, const void *, size_t, int);
-extern const char *z_strerror(int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZMOD_H */
diff --git a/sys/contrib/opensolaris/uts/common/zmod/adler32.c b/sys/contrib/opensolaris/uts/common/zmod/adler32.c
deleted file mode 100644
index 59d8463..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/adler32.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/* adler32.c -- compute the Adler-32 checksum of a data stream
- * Copyright (C) 1995-2004 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#define ZLIB_INTERNAL
-#include "zlib.h"
-
-#define BASE 65521UL /* largest prime smaller than 65536 */
-#define NMAX 5552
-/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-
-#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
-#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
-#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
-#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
-#define DO16(buf) DO8(buf,0); DO8(buf,8);
-
-/* use NO_DIVIDE if your processor does not do division in hardware */
-#ifdef NO_DIVIDE
-# define MOD(a) \
- do { \
- if (a >= (BASE << 16)) a -= (BASE << 16); \
- if (a >= (BASE << 15)) a -= (BASE << 15); \
- if (a >= (BASE << 14)) a -= (BASE << 14); \
- if (a >= (BASE << 13)) a -= (BASE << 13); \
- if (a >= (BASE << 12)) a -= (BASE << 12); \
- if (a >= (BASE << 11)) a -= (BASE << 11); \
- if (a >= (BASE << 10)) a -= (BASE << 10); \
- if (a >= (BASE << 9)) a -= (BASE << 9); \
- if (a >= (BASE << 8)) a -= (BASE << 8); \
- if (a >= (BASE << 7)) a -= (BASE << 7); \
- if (a >= (BASE << 6)) a -= (BASE << 6); \
- if (a >= (BASE << 5)) a -= (BASE << 5); \
- if (a >= (BASE << 4)) a -= (BASE << 4); \
- if (a >= (BASE << 3)) a -= (BASE << 3); \
- if (a >= (BASE << 2)) a -= (BASE << 2); \
- if (a >= (BASE << 1)) a -= (BASE << 1); \
- if (a >= BASE) a -= BASE; \
- } while (0)
-# define MOD4(a) \
- do { \
- if (a >= (BASE << 4)) a -= (BASE << 4); \
- if (a >= (BASE << 3)) a -= (BASE << 3); \
- if (a >= (BASE << 2)) a -= (BASE << 2); \
- if (a >= (BASE << 1)) a -= (BASE << 1); \
- if (a >= BASE) a -= BASE; \
- } while (0)
-#else
-# define MOD(a) a %= BASE
-# define MOD4(a) a %= BASE
-#endif
-
-/* ========================================================================= */
-uLong ZEXPORT adler32(adler, buf, len)
- uLong adler;
- const Bytef *buf;
- uInt len;
-{
- unsigned long sum2;
- unsigned n;
-
- /* split Adler-32 into component sums */
- sum2 = (adler >> 16) & 0xffff;
- adler &= 0xffff;
-
- /* in case user likes doing a byte at a time, keep it fast */
- if (len == 1) {
- adler += buf[0];
- if (adler >= BASE)
- adler -= BASE;
- sum2 += adler;
- if (sum2 >= BASE)
- sum2 -= BASE;
- return adler | (sum2 << 16);
- }
-
- /* initial Adler-32 value (deferred check for len == 1 speed) */
- if (buf == Z_NULL)
- return 1L;
-
- /* in case short lengths are provided, keep it somewhat fast */
- if (len < 16) {
- while (len--) {
- adler += *buf++;
- sum2 += adler;
- }
- if (adler >= BASE)
- adler -= BASE;
- MOD4(sum2); /* only added so many BASE's */
- return adler | (sum2 << 16);
- }
-
- /* do length NMAX blocks -- requires just one modulo operation */
- while (len >= NMAX) {
- len -= NMAX;
- n = NMAX / 16; /* NMAX is divisible by 16 */
- do {
- DO16(buf); /* 16 sums unrolled */
- buf += 16;
- } while (--n);
- MOD(adler);
- MOD(sum2);
- }
-
- /* do remaining bytes (less than NMAX, still just one modulo) */
- if (len) { /* avoid modulos if none remaining */
- while (len >= 16) {
- len -= 16;
- DO16(buf);
- buf += 16;
- }
- while (len--) {
- adler += *buf++;
- sum2 += adler;
- }
- MOD(adler);
- MOD(sum2);
- }
-
- /* return recombined sums */
- return adler | (sum2 << 16);
-}
-
-/* ========================================================================= */
-uLong ZEXPORT adler32_combine(adler1, adler2, len2)
- uLong adler1;
- uLong adler2;
- z_off_t len2;
-{
- unsigned long sum1;
- unsigned long sum2;
- unsigned rem;
-
- /* the derivation of this formula is left as an exercise for the reader */
- rem = (unsigned)(len2 % BASE);
- sum1 = adler1 & 0xffff;
- sum2 = rem * sum1;
- MOD(sum2);
- sum1 += (adler2 & 0xffff) + BASE - 1;
- sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
- if (sum1 > BASE) sum1 -= BASE;
- if (sum1 > BASE) sum1 -= BASE;
- if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
- if (sum2 > BASE) sum2 -= BASE;
- return sum1 | (sum2 << 16);
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/crc32.c b/sys/contrib/opensolaris/uts/common/zmod/crc32.c
deleted file mode 100644
index 61ad581..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/crc32.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* crc32.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
- * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
- * tables for updating the shift register in one step with three exclusive-ors
- * instead of four steps with four exclusive-ors. This results in about a
- * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
- protection on the static variables used to control the first-use generation
- of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
- first call get_crc_table() to initialize the tables before allowing more than
- one thread to use crc32().
- */
-
-#ifdef MAKECRCH
-# include <stdio.h>
-# ifndef DYNAMIC_CRC_TABLE
-# define DYNAMIC_CRC_TABLE
-# endif /* !DYNAMIC_CRC_TABLE */
-#endif /* MAKECRCH */
-
-#include "zutil.h" /* for STDC and FAR definitions */
-
-#define local static
-
-/* Find a four-byte integer type for crc32_little() and crc32_big(). */
-#ifndef NOBYFOUR
-# ifdef STDC /* need ANSI C limits.h to determine sizes */
-# include <limits.h>
-# define BYFOUR
-# if (UINT_MAX == 0xffffffffUL)
- typedef unsigned int u4;
-# else
-# if (ULONG_MAX == 0xffffffffUL)
- typedef unsigned long u4;
-# else
-# if (USHRT_MAX == 0xffffffffUL)
- typedef unsigned short u4;
-# else
-# undef BYFOUR /* can't find a four-byte integer type! */
-# endif
-# endif
-# endif
-# endif /* STDC */
-#endif /* !NOBYFOUR */
-
-/* Definitions for doing the crc four data bytes at a time. */
-#ifdef BYFOUR
-# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
- (((w)&0xff00)<<8)+(((w)&0xff)<<24))
- local unsigned long crc32_little OF((unsigned long,
- const unsigned char FAR *, unsigned));
- local unsigned long crc32_big OF((unsigned long,
- const unsigned char FAR *, unsigned));
-# define TBLS 8
-#else
-# define TBLS 1
-#endif /* BYFOUR */
-
-/* Local functions for crc concatenation */
-local unsigned long gf2_matrix_times OF((unsigned long *mat,
- unsigned long vec));
-local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
-
-#ifdef DYNAMIC_CRC_TABLE
-
-local volatile int crc_table_empty = 1;
-local unsigned long FAR crc_table[TBLS][256];
-local void make_crc_table OF((void));
-#ifdef MAKECRCH
- local void write_table OF((FILE *, const unsigned long FAR *));
-#endif /* MAKECRCH */
-/*
- Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
- x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
-
- Polynomials over GF(2) are represented in binary, one bit per coefficient,
- with the lowest powers in the most significant bit. Then adding polynomials
- is just exclusive-or, and multiplying a polynomial by x is a right shift by
- one. If we call the above polynomial p, and represent a byte as the
- polynomial q, also with the lowest power in the most significant bit (so the
- byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
- where a mod b means the remainder after dividing a by b.
-
- This calculation is done using the shift-register method of multiplying and
- taking the remainder. The register is initialized to zero, and for each
- incoming bit, x^32 is added mod p to the register if the bit is a one (where
- x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
- x (which is shifting right by one and adding x^32 mod p if the bit shifted
- out is a one). We start with the highest power (least significant bit) of
- q and repeat for all eight bits of q.
-
- The first table is simply the CRC of all possible eight bit values. This is
- all the information needed to generate CRCs on data a byte at a time for all
- combinations of CRC register values and incoming bytes. The remaining tables
- allow for word-at-a-time CRC calculation for both big-endian and little-
- endian machines, where a word is four bytes.
-*/
-local void make_crc_table()
-{
- unsigned long c;
- int n, k;
- unsigned long poly; /* polynomial exclusive-or pattern */
- /* terms of polynomial defining this crc (except x^32): */
- static volatile int first = 1; /* flag to limit concurrent making */
- static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
-
- /* See if another task is already doing this (not thread-safe, but better
- than nothing -- significantly reduces duration of vulnerability in
- case the advice about DYNAMIC_CRC_TABLE is ignored) */
- if (first) {
- first = 0;
-
- /* make exclusive-or pattern from polynomial (0xedb88320UL) */
- poly = 0UL;
- for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
- poly |= 1UL << (31 - p[n]);
-
- /* generate a crc for every 8-bit value */
- for (n = 0; n < 256; n++) {
- c = (unsigned long)n;
- for (k = 0; k < 8; k++)
- c = c & 1 ? poly ^ (c >> 1) : c >> 1;
- crc_table[0][n] = c;
- }
-
-#ifdef BYFOUR
- /* generate crc for each value followed by one, two, and three zeros,
- and then the byte reversal of those as well as the first table */
- for (n = 0; n < 256; n++) {
- c = crc_table[0][n];
- crc_table[4][n] = REV(c);
- for (k = 1; k < 4; k++) {
- c = crc_table[0][c & 0xff] ^ (c >> 8);
- crc_table[k][n] = c;
- crc_table[k + 4][n] = REV(c);
- }
- }
-#endif /* BYFOUR */
-
- crc_table_empty = 0;
- }
- else { /* not first */
- /* wait for the other guy to finish (not efficient, but rare) */
- while (crc_table_empty)
- ;
- }
-
-#ifdef MAKECRCH
- /* write out CRC tables to crc32.h */
- {
- FILE *out;
-
- out = fopen("crc32.h", "w");
- if (out == NULL) return;
- fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
- fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
- fprintf(out, "local const unsigned long FAR ");
- fprintf(out, "crc_table[TBLS][256] =\n{\n {\n");
- write_table(out, crc_table[0]);
-# ifdef BYFOUR
- fprintf(out, "#ifdef BYFOUR\n");
- for (k = 1; k < 8; k++) {
- fprintf(out, " },\n {\n");
- write_table(out, crc_table[k]);
- }
- fprintf(out, "#endif\n");
-# endif /* BYFOUR */
- fprintf(out, " }\n};\n");
- fclose(out);
- }
-#endif /* MAKECRCH */
-}
-
-#ifdef MAKECRCH
-local void write_table(out, table)
- FILE *out;
- const unsigned long FAR *table;
-{
- int n;
-
- for (n = 0; n < 256; n++)
- fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n],
- n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
-}
-#endif /* MAKECRCH */
-
-#else /* !DYNAMIC_CRC_TABLE */
-/* ========================================================================
- * Tables of CRC-32s of all single-byte values, made by make_crc_table().
- */
-#include "crc32.h"
-#endif /* DYNAMIC_CRC_TABLE */
-
-/* =========================================================================
- * This function can be used by asm versions of crc32()
- */
-const unsigned long FAR * ZEXPORT get_crc_table()
-{
-#ifdef DYNAMIC_CRC_TABLE
- if (crc_table_empty)
- make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
- return (const unsigned long FAR *)crc_table;
-}
-
-/* ========================================================================= */
-#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
-#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
-
-/* ========================================================================= */
-unsigned long ZEXPORT crc32(crc, buf, len)
- unsigned long crc;
- const unsigned char FAR *buf;
- unsigned len;
-{
- if (buf == Z_NULL) return 0UL;
-
-#ifdef DYNAMIC_CRC_TABLE
- if (crc_table_empty)
- make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
-
-#ifdef BYFOUR
- if (sizeof(void *) == sizeof(ptrdiff_t)) {
- u4 endian;
-
- endian = 1;
- if (*((unsigned char *)(&endian)))
- return crc32_little(crc, buf, len);
- else
- return crc32_big(crc, buf, len);
- }
-#endif /* BYFOUR */
- crc = crc ^ 0xffffffffUL;
- while (len >= 8) {
- DO8;
- len -= 8;
- }
- if (len) do {
- DO1;
- } while (--len);
- return crc ^ 0xffffffffUL;
-}
-
-#ifdef BYFOUR
-
-/* ========================================================================= */
-#define DOLIT4 c ^= *buf4++; \
- c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
- crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
-#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
-
-/* ========================================================================= */
-local unsigned long crc32_little(crc, buf, len)
- unsigned long crc;
- const unsigned char FAR *buf;
- unsigned len;
-{
- register u4 c;
- register const u4 FAR *buf4;
-
- c = (u4)crc;
- c = ~c;
- while (len && ((ptrdiff_t)buf & 3)) {
- c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
- len--;
- }
-
- buf4 = (const u4 FAR *)(const void FAR *)buf;
- while (len >= 32) {
- DOLIT32;
- len -= 32;
- }
- while (len >= 4) {
- DOLIT4;
- len -= 4;
- }
- buf = (const unsigned char FAR *)buf4;
-
- if (len) do {
- c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
- } while (--len);
- c = ~c;
- return (unsigned long)c;
-}
-
-/* ========================================================================= */
-#define DOBIG4 c ^= *++buf4; \
- c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
- crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
-#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
-
-/* ========================================================================= */
-local unsigned long crc32_big(crc, buf, len)
- unsigned long crc;
- const unsigned char FAR *buf;
- unsigned len;
-{
- register u4 c;
- register const u4 FAR *buf4;
-
- c = REV((u4)crc);
- c = ~c;
- while (len && ((ptrdiff_t)buf & 3)) {
- c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
- len--;
- }
-
- buf4 = (const u4 FAR *)(const void FAR *)buf;
- buf4--;
- while (len >= 32) {
- DOBIG32;
- len -= 32;
- }
- while (len >= 4) {
- DOBIG4;
- len -= 4;
- }
- buf4++;
- buf = (const unsigned char FAR *)buf4;
-
- if (len) do {
- c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
- } while (--len);
- c = ~c;
- return (unsigned long)(REV(c));
-}
-
-#endif /* BYFOUR */
-
-#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
-
-/* ========================================================================= */
-local unsigned long gf2_matrix_times(mat, vec)
- unsigned long *mat;
- unsigned long vec;
-{
- unsigned long sum;
-
- sum = 0;
- while (vec) {
- if (vec & 1)
- sum ^= *mat;
- vec >>= 1;
- mat++;
- }
- return sum;
-}
-
-/* ========================================================================= */
-local void gf2_matrix_square(square, mat)
- unsigned long *square;
- unsigned long *mat;
-{
- int n;
-
- for (n = 0; n < GF2_DIM; n++)
- square[n] = gf2_matrix_times(mat, mat[n]);
-}
-
-/* ========================================================================= */
-uLong ZEXPORT crc32_combine(crc1, crc2, len2)
- uLong crc1;
- uLong crc2;
- z_off_t len2;
-{
- int n;
- unsigned long row;
- unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */
- unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */
-
- /* degenerate case */
- if (len2 == 0)
- return crc1;
-
- /* put operator for one zero bit in odd */
- odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
- row = 1;
- for (n = 1; n < GF2_DIM; n++) {
- odd[n] = row;
- row <<= 1;
- }
-
- /* put operator for two zero bits in even */
- gf2_matrix_square(even, odd);
-
- /* put operator for four zero bits in odd */
- gf2_matrix_square(odd, even);
-
- /* apply len2 zeros to crc1 (first square will put the operator for one
- zero byte, eight zero bits, in even) */
- do {
- /* apply zeros operator for this bit of len2 */
- gf2_matrix_square(even, odd);
- if (len2 & 1)
- crc1 = gf2_matrix_times(even, crc1);
- len2 >>= 1;
-
- /* if no more bits set, then done */
- if (len2 == 0)
- break;
-
- /* another iteration of the loop with odd and even swapped */
- gf2_matrix_square(odd, even);
- if (len2 & 1)
- crc1 = gf2_matrix_times(odd, crc1);
- len2 >>= 1;
-
- /* if no more bits set, then done */
- } while (len2 != 0);
-
- /* return combined crc */
- crc1 ^= crc2;
- return crc1;
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/crc32.h b/sys/contrib/opensolaris/uts/common/zmod/crc32.h
deleted file mode 100644
index 495c83e..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/crc32.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/* crc32.h -- tables for rapid CRC calculation
- * Generated automatically by crc32.c
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-local const unsigned long FAR crc_table[TBLS][256] =
-{
- {
- 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
- 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
- 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
- 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
- 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
- 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
- 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
- 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
- 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
- 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
- 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
- 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
- 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
- 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
- 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
- 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
- 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
- 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
- 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
- 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
- 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
- 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
- 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
- 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
- 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
- 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
- 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
- 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
- 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
- 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
- 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
- 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
- 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
- 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
- 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
- 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
- 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
- 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
- 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
- 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
- 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
- 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
- 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
- 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
- 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
- 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
- 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
- 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
- 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
- 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
- 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
- 0x2d02ef8dUL
-#ifdef BYFOUR
- },
- {
- 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
- 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
- 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
- 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
- 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
- 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
- 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
- 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
- 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
- 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
- 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
- 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
- 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
- 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
- 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
- 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
- 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
- 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
- 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
- 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
- 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
- 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
- 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
- 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
- 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
- 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
- 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
- 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
- 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
- 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
- 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
- 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
- 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
- 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
- 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
- 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
- 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
- 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
- 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
- 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
- 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
- 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
- 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
- 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
- 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
- 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
- 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
- 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
- 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
- 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
- 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
- 0x9324fd72UL
- },
- {
- 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
- 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
- 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
- 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
- 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
- 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
- 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
- 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
- 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
- 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
- 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
- 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
- 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
- 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
- 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
- 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
- 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
- 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
- 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
- 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
- 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
- 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
- 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
- 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
- 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
- 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
- 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
- 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
- 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
- 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
- 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
- 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
- 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
- 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
- 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
- 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
- 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
- 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
- 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
- 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
- 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
- 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
- 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
- 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
- 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
- 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
- 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
- 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
- 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
- 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
- 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
- 0xbe9834edUL
- },
- {
- 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
- 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
- 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
- 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
- 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
- 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
- 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
- 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
- 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
- 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
- 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
- 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
- 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
- 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
- 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
- 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
- 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
- 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
- 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
- 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
- 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
- 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
- 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
- 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
- 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
- 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
- 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
- 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
- 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
- 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
- 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
- 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
- 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
- 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
- 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
- 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
- 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
- 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
- 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
- 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
- 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
- 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
- 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
- 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
- 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
- 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
- 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
- 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
- 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
- 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
- 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
- 0xde0506f1UL
- },
- {
- 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
- 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
- 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
- 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
- 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
- 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
- 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
- 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
- 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
- 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
- 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
- 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
- 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
- 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
- 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
- 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
- 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
- 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
- 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
- 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
- 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
- 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
- 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
- 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
- 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
- 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
- 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
- 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
- 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
- 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
- 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
- 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
- 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
- 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
- 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
- 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
- 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
- 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
- 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
- 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
- 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
- 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
- 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
- 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
- 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
- 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
- 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
- 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
- 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
- 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
- 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
- 0x8def022dUL
- },
- {
- 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
- 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
- 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
- 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
- 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
- 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
- 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
- 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
- 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
- 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
- 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
- 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
- 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
- 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
- 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
- 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
- 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
- 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
- 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
- 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
- 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
- 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
- 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
- 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
- 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
- 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
- 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
- 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
- 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
- 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
- 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
- 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
- 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
- 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
- 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
- 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
- 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
- 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
- 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
- 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
- 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
- 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
- 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
- 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
- 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
- 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
- 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
- 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
- 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
- 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
- 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
- 0x72fd2493UL
- },
- {
- 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
- 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
- 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
- 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
- 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
- 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
- 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
- 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
- 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
- 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
- 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
- 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
- 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
- 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
- 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
- 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
- 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
- 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
- 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
- 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
- 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
- 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
- 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
- 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
- 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
- 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
- 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
- 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
- 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
- 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
- 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
- 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
- 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
- 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
- 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
- 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
- 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
- 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
- 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
- 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
- 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
- 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
- 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
- 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
- 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
- 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
- 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
- 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
- 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
- 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
- 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
- 0xed3498beUL
- },
- {
- 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
- 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
- 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
- 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
- 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
- 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
- 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
- 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
- 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
- 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
- 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
- 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
- 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
- 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
- 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
- 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
- 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
- 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
- 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
- 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
- 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
- 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
- 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
- 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
- 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
- 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
- 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
- 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
- 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
- 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
- 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
- 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
- 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
- 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
- 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
- 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
- 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
- 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
- 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
- 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
- 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
- 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
- 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
- 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
- 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
- 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
- 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
- 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
- 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
- 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
- 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
- 0xf10605deUL
-#endif
- }
-};
diff --git a/sys/contrib/opensolaris/uts/common/zmod/deflate.c b/sys/contrib/opensolaris/uts/common/zmod/deflate.c
deleted file mode 100644
index 7847e40..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/deflate.c
+++ /dev/null
@@ -1,1742 +0,0 @@
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2005 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * ALGORITHM
- *
- * The "deflation" process depends on being able to identify portions
- * of the input text which are identical to earlier input (within a
- * sliding window trailing behind the input currently being processed).
- *
- * The most straightforward technique turns out to be the fastest for
- * most input files: try all possible matches and select the longest.
- * The key feature of this algorithm is that insertions into the string
- * dictionary are very simple and thus fast, and deletions are avoided
- * completely. Insertions are performed at each input character, whereas
- * string matches are performed only when the previous match ends. So it
- * is preferable to spend more time in matches to allow very fast string
- * insertions and avoid deletions. The matching algorithm for small
- * strings is inspired from that of Rabin & Karp. A brute force approach
- * is used to find longer strings when a small match has been found.
- * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
- * (by Leonid Broukhis).
- * A previous version of this file used a more sophisticated algorithm
- * (by Fiala and Greene) which is guaranteed to run in linear amortized
- * time, but has a larger average cost, uses more memory and is patented.
- * However the F&G algorithm may be faster for some highly redundant
- * files if the parameter max_chain_length (described below) is too large.
- *
- * ACKNOWLEDGEMENTS
- *
- * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
- * I found it in 'freeze' written by Leonid Broukhis.
- * Thanks to many people for bug reports and testing.
- *
- * REFERENCES
- *
- * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
- * Available in http://www.ietf.org/rfc/rfc1951.txt
- *
- * A description of the Rabin and Karp algorithm is given in the book
- * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
- *
- * Fiala,E.R., and Greene,D.H.
- * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
- *
- */
-
-#include "deflate.h"
-
-static const char deflate_copyright[] =
- " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly ";
-/*
- If you use the zlib library in a product, an acknowledgment is welcome
- in the documentation of your product. If for some reason you cannot
- include such an acknowledgment, I would appreciate that you keep this
- copyright string in the executable of your product.
- */
-
-/* ===========================================================================
- * Function prototypes.
- */
-typedef enum {
- need_more, /* block not completed, need more input or more output */
- block_done, /* block flush performed */
- finish_started, /* finish started, need only more output at next deflate */
- finish_done /* finish done, accept no more input or output */
-} block_state;
-
-typedef block_state (*compress_func) OF((deflate_state *s, int flush));
-/* Compression function. Returns the block state after the call. */
-
-local void fill_window OF((deflate_state *s));
-local block_state deflate_stored OF((deflate_state *s, int flush));
-local block_state deflate_fast OF((deflate_state *s, int flush));
-#ifndef FASTEST
-local block_state deflate_slow OF((deflate_state *s, int flush));
-#endif
-local void lm_init OF((deflate_state *s));
-local void putShortMSB OF((deflate_state *s, uInt b));
-local void flush_pending OF((z_streamp strm));
-local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
-#ifndef FASTEST
-#ifdef ASMV
- void match_init OF((void)); /* asm code initialization */
- uInt longest_match OF((deflate_state *s, IPos cur_match));
-#else
-local uInt longest_match OF((deflate_state *s, IPos cur_match));
-#endif
-#endif
-local uInt longest_match_fast OF((deflate_state *s, IPos cur_match));
-
-#ifdef DEBUG
-local void check_match OF((deflate_state *s, IPos start, IPos match,
- int length));
-#endif
-
-/* ===========================================================================
- * Local data
- */
-
-#define NIL 0
-/* Tail of hash chains */
-
-#ifndef TOO_FAR
-# define TOO_FAR 4096
-#endif
-/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-
-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-/* Minimum amount of lookahead, except at the end of the input file.
- * See deflate.c for comments about the MIN_MATCH+1.
- */
-
-/* Values for max_lazy_match, good_match and max_chain_length, depending on
- * the desired pack level (0..9). The values given below have been tuned to
- * exclude worst case performance for pathological files. Better values may be
- * found for specific files.
- */
-typedef struct config_s {
- ush good_length; /* reduce lazy search above this match length */
- ush max_lazy; /* do not perform lazy search above this match length */
- ush nice_length; /* quit search above this match length */
- ush max_chain;
- compress_func func;
-} config;
-
-#ifdef FASTEST
-local const config configuration_table[2] = {
-/* good lazy nice chain */
-/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */
-#else
-local const config configuration_table[10] = {
-/* good lazy nice chain */
-/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */
-/* 2 */ {4, 5, 16, 8, deflate_fast},
-/* 3 */ {4, 6, 32, 32, deflate_fast},
-
-/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
-/* 5 */ {8, 16, 32, 32, deflate_slow},
-/* 6 */ {8, 16, 128, 128, deflate_slow},
-/* 7 */ {8, 32, 128, 256, deflate_slow},
-/* 8 */ {32, 128, 258, 1024, deflate_slow},
-/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
-#endif
-
-/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
- * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
- * meaning.
- */
-
-#define EQUAL 0
-/* result of memcmp for equal strings */
-
-#ifndef NO_DUMMY_DECL
-struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
-#endif
-
-/* ===========================================================================
- * Update a hash value with the given input byte
- * IN assertion: all calls to to UPDATE_HASH are made with consecutive
- * input characters, so that a running hash key can be computed from the
- * previous key instead of complete recalculation each time.
- */
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
-
-
-/* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * If this file is compiled with -DFASTEST, the compression level is forced
- * to 1, and no hash chains are maintained.
- * IN assertion: all calls to to INSERT_STRING are made with consecutive
- * input characters and the first MIN_MATCH bytes of str are valid
- * (except for the last MIN_MATCH-1 bytes of the input file).
- */
-#ifdef FASTEST
-#define INSERT_STRING(s, str, match_head) \
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
- match_head = s->head[s->ins_h], \
- s->head[s->ins_h] = (Pos)(str))
-#else
-#define INSERT_STRING(s, str, match_head) \
- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
- match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
- s->head[s->ins_h] = (Pos)(str))
-#endif
-
-/* ===========================================================================
- * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
- * prev[] will be initialized on the fly.
- */
-#define CLEAR_HASH(s) \
- s->head[s->hash_size-1] = NIL; \
- (void) zmemzero((Bytef *)s->head, \
- (unsigned)(s->hash_size-1)*sizeof(*s->head));
-
-/* ========================================================================= */
-int ZEXPORT deflateInit_(strm, level, version, stream_size)
- z_streamp strm;
- int level;
- const char *version;
- int stream_size;
-{
- return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
- Z_DEFAULT_STRATEGY, version, stream_size);
- /* To do: ignore strm->next_in if we use it as window */
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
- version, stream_size)
- z_streamp strm;
- int level;
- int method;
- int windowBits;
- int memLevel;
- int strategy;
- const char *version;
- int stream_size;
-{
- deflate_state *s;
- int wrap = 1;
- static const char my_version[] = ZLIB_VERSION;
-
- ushf *overlay;
- /* We overlay pending_buf and d_buf+l_buf. This works since the average
- * output size for (length,distance) codes is <= 24 bits.
- */
-
- if (version == Z_NULL || version[0] != my_version[0] ||
- stream_size != sizeof(z_stream)) {
- return Z_VERSION_ERROR;
- }
- if (strm == Z_NULL) return Z_STREAM_ERROR;
-
- strm->msg = Z_NULL;
- if (strm->zalloc == (alloc_func)0) {
- strm->zalloc = zcalloc;
- strm->opaque = (voidpf)0;
- }
- if (strm->zfree == (free_func)0) strm->zfree = zcfree;
-
-#ifdef FASTEST
- if (level != 0) level = 1;
-#else
- if (level == Z_DEFAULT_COMPRESSION) level = 6;
-#endif
-
- if (windowBits < 0) { /* suppress zlib wrapper */
- wrap = 0;
- windowBits = -windowBits;
- }
-#ifdef GZIP
- else if (windowBits > 15) {
- wrap = 2; /* write gzip wrapper instead */
- windowBits -= 16;
- }
-#endif
- if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
- windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
- strategy < 0 || strategy > Z_FIXED) {
- return Z_STREAM_ERROR;
- }
- if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */
- s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
- if (s == Z_NULL) return Z_MEM_ERROR;
- strm->state = (struct internal_state FAR *)s;
- s->strm = strm;
-
- s->wrap = wrap;
- s->gzhead = Z_NULL;
- s->w_bits = windowBits;
- s->w_size = 1 << s->w_bits;
- s->w_mask = s->w_size - 1;
-
- s->hash_bits = memLevel + 7;
- s->hash_size = 1 << s->hash_bits;
- s->hash_mask = s->hash_size - 1;
- s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
-
- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
- s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
- s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
-
- s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
-
- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
- s->pending_buf = (uchf *) overlay;
- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
-
- if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
- s->pending_buf == Z_NULL) {
- s->status = FINISH_STATE;
- strm->msg = (char*)ERR_MSG(Z_MEM_ERROR);
- (void) deflateEnd (strm);
- return Z_MEM_ERROR;
- }
- s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
-
- s->level = level;
- s->strategy = strategy;
- s->method = (Byte)method;
-
- return deflateReset(strm);
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
- z_streamp strm;
- const Bytef *dictionary;
- uInt dictLength;
-{
- deflate_state *s;
- uInt length = dictLength;
- uInt n;
- IPos hash_head = 0;
-
- if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
- strm->state->wrap == 2 ||
- (strm->state->wrap == 1 && strm->state->status != INIT_STATE))
- return Z_STREAM_ERROR;
-
- s = strm->state;
- if (s->wrap)
- strm->adler = adler32(strm->adler, dictionary, dictLength);
-
- if (length < MIN_MATCH) return Z_OK;
- if (length > MAX_DIST(s)) {
- length = MAX_DIST(s);
- dictionary += dictLength - length; /* use the tail of the dictionary */
- }
- (void) zmemcpy(s->window, dictionary, length);
- s->strstart = length;
- s->block_start = (long)length;
-
- /* Insert all strings in the hash table (except for the last two bytes).
- * s->lookahead stays null, so s->ins_h will be recomputed at the next
- * call of fill_window.
- */
- s->ins_h = s->window[0];
- UPDATE_HASH(s, s->ins_h, s->window[1]);
- for (n = 0; n <= length - MIN_MATCH; n++) {
- INSERT_STRING(s, n, hash_head);
- }
- if (hash_head) hash_head = 0; /* to make compiler happy */
- return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateReset (strm)
- z_streamp strm;
-{
- deflate_state *s;
-
- if (strm == Z_NULL || strm->state == Z_NULL ||
- strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) {
- return Z_STREAM_ERROR;
- }
-
- strm->total_in = strm->total_out = 0;
- strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
- strm->data_type = Z_UNKNOWN;
-
- s = (deflate_state *)strm->state;
- s->pending = 0;
- s->pending_out = s->pending_buf;
-
- if (s->wrap < 0) {
- s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
- }
- s->status = s->wrap ? INIT_STATE : BUSY_STATE;
- strm->adler =
-#ifdef GZIP
- s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
-#endif
- adler32(0L, Z_NULL, 0);
- s->last_flush = Z_NO_FLUSH;
-
- _tr_init(s);
- lm_init(s);
-
- return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateSetHeader (strm, head)
- z_streamp strm;
- gz_headerp head;
-{
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- if (strm->state->wrap != 2) return Z_STREAM_ERROR;
- strm->state->gzhead = head;
- return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflatePrime (strm, bits, value)
- z_streamp strm;
- int bits;
- int value;
-{
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- strm->state->bi_valid = bits;
- strm->state->bi_buf = (ush)(value & ((1 << bits) - 1));
- return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateParams(strm, level, strategy)
- z_streamp strm;
- int level;
- int strategy;
-{
- deflate_state *s;
- compress_func func;
- int err = Z_OK;
-
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- s = strm->state;
-
-#ifdef FASTEST
- if (level != 0) level = 1;
-#else
- if (level == Z_DEFAULT_COMPRESSION) level = 6;
-#endif
- if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
- return Z_STREAM_ERROR;
- }
- func = configuration_table[s->level].func;
-
- if (func != configuration_table[level].func && strm->total_in != 0) {
- /* Flush the last buffer: */
- err = deflate(strm, Z_PARTIAL_FLUSH);
- }
- if (s->level != level) {
- s->level = level;
- s->max_lazy_match = configuration_table[level].max_lazy;
- s->good_match = configuration_table[level].good_length;
- s->nice_match = configuration_table[level].nice_length;
- s->max_chain_length = configuration_table[level].max_chain;
- }
- s->strategy = strategy;
- return err;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
- z_streamp strm;
- int good_length;
- int max_lazy;
- int nice_length;
- int max_chain;
-{
- deflate_state *s;
-
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- s = strm->state;
- s->good_match = good_length;
- s->max_lazy_match = max_lazy;
- s->nice_match = nice_length;
- s->max_chain_length = max_chain;
- return Z_OK;
-}
-
-/* =========================================================================
- * For the default windowBits of 15 and memLevel of 8, this function returns
- * a close to exact, as well as small, upper bound on the compressed size.
- * They are coded as constants here for a reason--if the #define's are
- * changed, then this function needs to be changed as well. The return
- * value for 15 and 8 only works for those exact settings.
- *
- * For any setting other than those defaults for windowBits and memLevel,
- * the value returned is a conservative worst case for the maximum expansion
- * resulting from using fixed blocks instead of stored blocks, which deflate
- * can emit on compressed data for some combinations of the parameters.
- *
- * This function could be more sophisticated to provide closer upper bounds
- * for every combination of windowBits and memLevel, as well as wrap.
- * But even the conservative upper bound of about 14% expansion does not
- * seem onerous for output buffer allocation.
- */
-uLong ZEXPORT deflateBound(strm, sourceLen)
- z_streamp strm;
- uLong sourceLen;
-{
- deflate_state *s;
- uLong destLen;
-
- /* conservative upper bound */
- destLen = sourceLen +
- ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11;
-
- /* if can't get parameters, return conservative bound */
- if (strm == Z_NULL || strm->state == Z_NULL)
- return destLen;
-
- /* if not default parameters, return conservative bound */
- s = strm->state;
- if (s->w_bits != 15 || s->hash_bits != 8 + 7)
- return destLen;
-
- /* default settings: return tight bound for that case */
- return compressBound(sourceLen);
-}
-
-/* =========================================================================
- * Put a short in the pending buffer. The 16-bit value is put in MSB order.
- * IN assertion: the stream state is correct and there is enough room in
- * pending_buf.
- */
-local void putShortMSB (s, b)
- deflate_state *s;
- uInt b;
-{
- put_byte(s, (Byte)(b >> 8));
- put_byte(s, (Byte)(b & 0xff));
-}
-
-/* =========================================================================
- * Flush as much pending output as possible. All deflate() output goes
- * through this function so some applications may wish to modify it
- * to avoid allocating a large strm->next_out buffer and copying into it.
- * (See also read_buf()).
- */
-local void flush_pending(strm)
- z_streamp strm;
-{
- unsigned len = strm->state->pending;
-
- if (len > strm->avail_out) len = strm->avail_out;
- if (len == 0) return;
-
- zmemcpy(strm->next_out, strm->state->pending_out, len);
- strm->next_out += len;
- strm->state->pending_out += len;
- strm->total_out += len;
- strm->avail_out -= len;
- strm->state->pending -= len;
- if (strm->state->pending == 0) {
- strm->state->pending_out = strm->state->pending_buf;
- }
-}
-
-/* ========================================================================= */
-int ZEXPORT deflate (strm, flush)
- z_streamp strm;
- int flush;
-{
- int old_flush; /* value of flush param for previous deflate call */
- deflate_state *s;
-
- if (strm == Z_NULL || strm->state == Z_NULL ||
- flush > Z_FINISH || flush < 0) {
- return Z_STREAM_ERROR;
- }
- s = strm->state;
-
- if (strm->next_out == Z_NULL ||
- (strm->next_in == Z_NULL && strm->avail_in != 0) ||
- (s->status == FINISH_STATE && flush != Z_FINISH)) {
- ERR_RETURN(strm, Z_STREAM_ERROR);
- }
- if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
-
- s->strm = strm; /* just in case */
- old_flush = s->last_flush;
- s->last_flush = flush;
-
- /* Write the header */
- if (s->status == INIT_STATE) {
-#ifdef GZIP
- if (s->wrap == 2) {
- strm->adler = crc32(0L, Z_NULL, 0);
- put_byte(s, 31);
- put_byte(s, 139);
- put_byte(s, 8);
- if (s->gzhead == NULL) {
- put_byte(s, 0);
- put_byte(s, 0);
- put_byte(s, 0);
- put_byte(s, 0);
- put_byte(s, 0);
- put_byte(s, s->level == 9 ? 2 :
- (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
- 4 : 0));
- put_byte(s, OS_CODE);
- s->status = BUSY_STATE;
- }
- else {
- put_byte(s, (s->gzhead->text ? 1 : 0) +
- (s->gzhead->hcrc ? 2 : 0) +
- (s->gzhead->extra == Z_NULL ? 0 : 4) +
- (s->gzhead->name == Z_NULL ? 0 : 8) +
- (s->gzhead->comment == Z_NULL ? 0 : 16)
- );
- put_byte(s, (Byte)(s->gzhead->time & 0xff));
- put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
- put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
- put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
- put_byte(s, s->level == 9 ? 2 :
- (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
- 4 : 0));
- put_byte(s, s->gzhead->os & 0xff);
- if (s->gzhead->extra != NULL) {
- put_byte(s, s->gzhead->extra_len & 0xff);
- put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
- }
- if (s->gzhead->hcrc)
- strm->adler = crc32(strm->adler, s->pending_buf,
- s->pending);
- s->gzindex = 0;
- s->status = EXTRA_STATE;
- }
- }
- else
-#endif
- {
- uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
- uInt level_flags;
-
- if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
- level_flags = 0;
- else if (s->level < 6)
- level_flags = 1;
- else if (s->level == 6)
- level_flags = 2;
- else
- level_flags = 3;
- header |= (level_flags << 6);
- if (s->strstart != 0) header |= PRESET_DICT;
- header += 31 - (header % 31);
-
- s->status = BUSY_STATE;
- putShortMSB(s, header);
-
- /* Save the adler32 of the preset dictionary: */
- if (s->strstart != 0) {
- putShortMSB(s, (uInt)(strm->adler >> 16));
- putShortMSB(s, (uInt)(strm->adler & 0xffff));
- }
- strm->adler = adler32(0L, Z_NULL, 0);
- }
- }
-#ifdef GZIP
- if (s->status == EXTRA_STATE) {
- if (s->gzhead->extra != NULL) {
- uInt beg = s->pending; /* start of bytes to update crc */
-
- while (s->gzindex < (s->gzhead->extra_len & 0xffff)) {
- if (s->pending == s->pending_buf_size) {
- if (s->gzhead->hcrc && s->pending > beg)
- strm->adler = crc32(strm->adler, s->pending_buf + beg,
- s->pending - beg);
- flush_pending(strm);
- beg = s->pending;
- if (s->pending == s->pending_buf_size)
- break;
- }
- put_byte(s, s->gzhead->extra[s->gzindex]);
- s->gzindex++;
- }
- if (s->gzhead->hcrc && s->pending > beg)
- strm->adler = crc32(strm->adler, s->pending_buf + beg,
- s->pending - beg);
- if (s->gzindex == s->gzhead->extra_len) {
- s->gzindex = 0;
- s->status = NAME_STATE;
- }
- }
- else
- s->status = NAME_STATE;
- }
- if (s->status == NAME_STATE) {
- if (s->gzhead->name != NULL) {
- uInt beg = s->pending; /* start of bytes to update crc */
- int val;
-
- do {
- if (s->pending == s->pending_buf_size) {
- if (s->gzhead->hcrc && s->pending > beg)
- strm->adler = crc32(strm->adler, s->pending_buf + beg,
- s->pending - beg);
- flush_pending(strm);
- beg = s->pending;
- if (s->pending == s->pending_buf_size) {
- val = 1;
- break;
- }
- }
- val = s->gzhead->name[s->gzindex++];
- put_byte(s, val);
- } while (val != 0);
- if (s->gzhead->hcrc && s->pending > beg)
- strm->adler = crc32(strm->adler, s->pending_buf + beg,
- s->pending - beg);
- if (val == 0) {
- s->gzindex = 0;
- s->status = COMMENT_STATE;
- }
- }
- else
- s->status = COMMENT_STATE;
- }
- if (s->status == COMMENT_STATE) {
- if (s->gzhead->comment != NULL) {
- uInt beg = s->pending; /* start of bytes to update crc */
- int val;
-
- do {
- if (s->pending == s->pending_buf_size) {
- if (s->gzhead->hcrc && s->pending > beg)
- strm->adler = crc32(strm->adler, s->pending_buf + beg,
- s->pending - beg);
- flush_pending(strm);
- beg = s->pending;
- if (s->pending == s->pending_buf_size) {
- val = 1;
- break;
- }
- }
- val = s->gzhead->comment[s->gzindex++];
- put_byte(s, val);
- } while (val != 0);
- if (s->gzhead->hcrc && s->pending > beg)
- strm->adler = crc32(strm->adler, s->pending_buf + beg,
- s->pending - beg);
- if (val == 0)
- s->status = HCRC_STATE;
- }
- else
- s->status = HCRC_STATE;
- }
- if (s->status == HCRC_STATE) {
- if (s->gzhead->hcrc) {
- if (s->pending + 2 > s->pending_buf_size)
- flush_pending(strm);
- if (s->pending + 2 <= s->pending_buf_size) {
- put_byte(s, (Byte)(strm->adler & 0xff));
- put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
- strm->adler = crc32(0L, Z_NULL, 0);
- s->status = BUSY_STATE;
- }
- }
- else
- s->status = BUSY_STATE;
- }
-#endif
-
- /* Flush as much pending output as possible */
- if (s->pending != 0) {
- flush_pending(strm);
- if (strm->avail_out == 0) {
- /* Since avail_out is 0, deflate will be called again with
- * more output space, but possibly with both pending and
- * avail_in equal to zero. There won't be anything to do,
- * but this is not an error situation so make sure we
- * return OK instead of BUF_ERROR at next call of deflate:
- */
- s->last_flush = -1;
- return Z_OK;
- }
-
- /* Make sure there is something to do and avoid duplicate consecutive
- * flushes. For repeated and useless calls with Z_FINISH, we keep
- * returning Z_STREAM_END instead of Z_BUF_ERROR.
- */
- } else if (strm->avail_in == 0 && flush <= old_flush &&
- flush != Z_FINISH) {
- ERR_RETURN(strm, Z_BUF_ERROR);
- }
-
- /* User must not provide more input after the first FINISH: */
- if (s->status == FINISH_STATE && strm->avail_in != 0) {
- ERR_RETURN(strm, Z_BUF_ERROR);
- }
-
- /* Start a new block or continue the current one.
- */
- if (strm->avail_in != 0 || s->lookahead != 0 ||
- (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
- block_state bstate;
-
- bstate = (*(configuration_table[s->level].func))(s, flush);
-
- if (bstate == finish_started || bstate == finish_done) {
- s->status = FINISH_STATE;
- }
- if (bstate == need_more || bstate == finish_started) {
- if (strm->avail_out == 0) {
- s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
- }
- return Z_OK;
- /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
- * of deflate should use the same flush parameter to make sure
- * that the flush is complete. So we don't have to output an
- * empty block here, this will be done at next call. This also
- * ensures that for a very small output buffer, we emit at most
- * one empty block.
- */
- }
- if (bstate == block_done) {
- if (flush == Z_PARTIAL_FLUSH) {
- _tr_align(s);
- } else { /* FULL_FLUSH or SYNC_FLUSH */
- _tr_stored_block(s, (char*)0, 0L, 0);
- /* For a full flush, this empty block will be recognized
- * as a special marker by inflate_sync().
- */
- if (flush == Z_FULL_FLUSH) {
- CLEAR_HASH(s); /* forget history */
- }
- }
- flush_pending(strm);
- if (strm->avail_out == 0) {
- s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
- return Z_OK;
- }
- }
- }
- Assert(strm->avail_out > 0, "bug2");
-
- if (flush != Z_FINISH) return Z_OK;
- if (s->wrap <= 0) return Z_STREAM_END;
-
- /* Write the trailer */
-#ifdef GZIP
- if (s->wrap == 2) {
- put_byte(s, (Byte)(strm->adler & 0xff));
- put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
- put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
- put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
- put_byte(s, (Byte)(strm->total_in & 0xff));
- put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
- put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
- put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
- }
- else
-#endif
- {
- putShortMSB(s, (uInt)(strm->adler >> 16));
- putShortMSB(s, (uInt)(strm->adler & 0xffff));
- }
- flush_pending(strm);
- /* If avail_out is zero, the application will call deflate again
- * to flush the rest.
- */
- if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
- return s->pending != 0 ? Z_OK : Z_STREAM_END;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateEnd (strm)
- z_streamp strm;
-{
- int status;
-
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-
- status = strm->state->status;
- if (status != INIT_STATE &&
- status != EXTRA_STATE &&
- status != NAME_STATE &&
- status != COMMENT_STATE &&
- status != HCRC_STATE &&
- status != BUSY_STATE &&
- status != FINISH_STATE) {
- return Z_STREAM_ERROR;
- }
-
- /* Deallocate in reverse order of allocations: */
- TRY_FREE(strm, strm->state->pending_buf);
- TRY_FREE(strm, strm->state->head);
- TRY_FREE(strm, strm->state->prev);
- TRY_FREE(strm, strm->state->window);
-
- ZFREE(strm, strm->state);
- strm->state = Z_NULL;
-
- return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
-}
-
-/* =========================================================================
- * Copy the source state to the destination state.
- * To simplify the source, this is not supported for 16-bit MSDOS (which
- * doesn't have enough memory anyway to duplicate compression states).
- */
-int ZEXPORT deflateCopy (dest, source)
- z_streamp dest;
- z_streamp source;
-{
-#ifdef MAXSEG_64K
- return Z_STREAM_ERROR;
-#else
- deflate_state *ds;
- deflate_state *ss;
- ushf *overlay;
-
-
- if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
- return Z_STREAM_ERROR;
- }
-
- ss = source->state;
-
- zmemcpy(dest, source, sizeof(z_stream));
-
- ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
- if (ds == Z_NULL) return Z_MEM_ERROR;
- dest->state = (struct internal_state FAR *) ds;
- zmemcpy(ds, ss, sizeof(deflate_state));
- ds->strm = dest;
-
- ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
- ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
- ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
- ds->pending_buf = (uchf *) overlay;
-
- if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
- ds->pending_buf == Z_NULL) {
- deflateEnd (dest);
- return Z_MEM_ERROR;
- }
- /* following zmemcpy do not work for 16-bit MSDOS */
- zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
- zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
- zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
- zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
- ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
- ds->l_desc.dyn_tree = ds->dyn_ltree;
- ds->d_desc.dyn_tree = ds->dyn_dtree;
- ds->bl_desc.dyn_tree = ds->bl_tree;
-
- return Z_OK;
-#endif /* MAXSEG_64K */
-}
-
-/* ===========================================================================
- * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read. All deflate() input goes through
- * this function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it.
- * (See also flush_pending()).
- */
-local int read_buf(strm, buf, size)
- z_streamp strm;
- Bytef *buf;
- unsigned size;
-{
- unsigned len = strm->avail_in;
-
- if (len > size) len = size;
- if (len == 0) return 0;
-
- strm->avail_in -= len;
-
- if (strm->state->wrap == 1) {
- strm->adler = adler32(strm->adler, strm->next_in, len);
- }
-#ifdef GZIP
- else if (strm->state->wrap == 2) {
- strm->adler = crc32(strm->adler, strm->next_in, len);
- }
-#endif
- zmemcpy(buf, strm->next_in, len);
- strm->next_in += len;
- strm->total_in += len;
-
- return (int)len;
-}
-
-/* ===========================================================================
- * Initialize the "longest match" routines for a new zlib stream
- */
-local void lm_init (s)
- deflate_state *s;
-{
- s->window_size = (ulg)2L*s->w_size;
-
- CLEAR_HASH(s);
-
- /* Set the default configuration parameters:
- */
- s->max_lazy_match = configuration_table[s->level].max_lazy;
- s->good_match = configuration_table[s->level].good_length;
- s->nice_match = configuration_table[s->level].nice_length;
- s->max_chain_length = configuration_table[s->level].max_chain;
-
- s->strstart = 0;
- s->block_start = 0L;
- s->lookahead = 0;
- s->match_length = s->prev_length = MIN_MATCH-1;
- s->match_available = 0;
- s->ins_h = 0;
-#ifndef FASTEST
-#ifdef ASMV
- match_init(); /* initialize the asm code */
-#endif
-#endif
-}
-
-#ifndef FASTEST
-/* ===========================================================================
- * Set match_start to the longest match starting at the given string and
- * return its length. Matches shorter or equal to prev_length are discarded,
- * in which case the result is equal to prev_length and match_start is
- * garbage.
- * IN assertions: cur_match is the head of the hash chain for the current
- * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
- * OUT assertion: the match length is not greater than s->lookahead.
- */
-#ifndef ASMV
-/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
- * match.S. The code will be functionally equivalent.
- */
-local uInt longest_match(s, cur_match)
- deflate_state *s;
- IPos cur_match; /* current match */
-{
- unsigned chain_length = s->max_chain_length;/* max hash chain length */
- register Bytef *scan = s->window + s->strstart; /* current string */
- register Bytef *match; /* matched string */
- register int len; /* length of current match */
- int best_len = s->prev_length; /* best match length so far */
- int nice_match = s->nice_match; /* stop if match long enough */
- IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
- s->strstart - (IPos)MAX_DIST(s) : NIL;
- /* Stop when cur_match becomes <= limit. To simplify the code,
- * we prevent matches with the string of window index 0.
- */
- Posf *prev = s->prev;
- uInt wmask = s->w_mask;
-
-#ifdef UNALIGNED_OK
- /* Compare two bytes at a time. Note: this is not always beneficial.
- * Try with and without -DUNALIGNED_OK to check.
- */
- register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
- register ush scan_start = *(ushf*)scan;
- register ush scan_end = *(ushf*)(scan+best_len-1);
-#else
- register Bytef *strend = s->window + s->strstart + MAX_MATCH;
- register Byte scan_end1 = scan[best_len-1];
- register Byte scan_end = scan[best_len];
-#endif
-
- /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
- * It is easy to get rid of this optimization if necessary.
- */
- Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-
- /* Do not waste too much time if we already have a good match: */
- if (s->prev_length >= s->good_match) {
- chain_length >>= 2;
- }
- /* Do not look for matches beyond the end of the input. This is necessary
- * to make deflate deterministic.
- */
- if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
-
- Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-
- do {
- Assert(cur_match < s->strstart, "no future");
- match = s->window + cur_match;
-
- /* Skip to next match if the match length cannot increase
- * or if the match length is less than 2. Note that the checks below
- * for insufficient lookahead only occur occasionally for performance
- * reasons. Therefore uninitialized memory will be accessed, and
- * conditional jumps will be made that depend on those values.
- * However the length of the match is limited to the lookahead, so
- * the output of deflate is not affected by the uninitialized values.
- */
-#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
- /* This code assumes sizeof(unsigned short) == 2. Do not use
- * UNALIGNED_OK if your compiler uses a different size.
- */
- if (*(ushf*)(match+best_len-1) != scan_end ||
- *(ushf*)match != scan_start) continue;
-
- /* It is not necessary to compare scan[2] and match[2] since they are
- * always equal when the other bytes match, given that the hash keys
- * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
- * strstart+3, +5, ... up to strstart+257. We check for insufficient
- * lookahead only every 4th comparison; the 128th check will be made
- * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
- * necessary to put more guard bytes at the end of the window, or
- * to check more often for insufficient lookahead.
- */
- Assert(scan[2] == match[2], "scan[2]?");
- scan++, match++;
- do {
- } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
- *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
- *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
- *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
- scan < strend);
- /* The funny "do {}" generates better code on most compilers */
-
- /* Here, scan <= window+strstart+257 */
- Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
- if (*scan == *match) scan++;
-
- len = (MAX_MATCH - 1) - (int)(strend-scan);
- scan = strend - (MAX_MATCH-1);
-
-#else /* UNALIGNED_OK */
-
- if (match[best_len] != scan_end ||
- match[best_len-1] != scan_end1 ||
- *match != *scan ||
- *++match != scan[1]) continue;
-
- /* The check at best_len-1 can be removed because it will be made
- * again later. (This heuristic is not always a win.)
- * It is not necessary to compare scan[2] and match[2] since they
- * are always equal when the other bytes match, given that
- * the hash keys are equal and that HASH_BITS >= 8.
- */
- scan += 2, match++;
- Assert(*scan == *match, "match[2]?");
-
- /* We check for insufficient lookahead only every 8th comparison;
- * the 256th check will be made at strstart+258.
- */
- do {
- } while (*++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- scan < strend);
-
- Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-
- len = MAX_MATCH - (int)(strend - scan);
- scan = strend - MAX_MATCH;
-
-#endif /* UNALIGNED_OK */
-
- if (len > best_len) {
- s->match_start = cur_match;
- best_len = len;
- if (len >= nice_match) break;
-#ifdef UNALIGNED_OK
- scan_end = *(ushf*)(scan+best_len-1);
-#else
- scan_end1 = scan[best_len-1];
- scan_end = scan[best_len];
-#endif
- }
- } while ((cur_match = prev[cur_match & wmask]) > limit
- && --chain_length != 0);
-
- if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
- return s->lookahead;
-}
-#endif /* ASMV */
-#endif /* FASTEST */
-
-/* ---------------------------------------------------------------------------
- * Optimized version for level == 1 or strategy == Z_RLE only
- */
-local uInt longest_match_fast(s, cur_match)
- deflate_state *s;
- IPos cur_match; /* current match */
-{
- register Bytef *scan = s->window + s->strstart; /* current string */
- register Bytef *match; /* matched string */
- register int len; /* length of current match */
- register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-
- /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
- * It is easy to get rid of this optimization if necessary.
- */
- Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-
- Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-
- Assert(cur_match < s->strstart, "no future");
-
- match = s->window + cur_match;
-
- /* Return failure if the match length is less than 2:
- */
- if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
-
- /* The check at best_len-1 can be removed because it will be made
- * again later. (This heuristic is not always a win.)
- * It is not necessary to compare scan[2] and match[2] since they
- * are always equal when the other bytes match, given that
- * the hash keys are equal and that HASH_BITS >= 8.
- */
- scan += 2, match += 2;
- Assert(*scan == *match, "match[2]?");
-
- /* We check for insufficient lookahead only every 8th comparison;
- * the 256th check will be made at strstart+258.
- */
- do {
- } while (*++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- *++scan == *++match && *++scan == *++match &&
- scan < strend);
-
- Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-
- len = MAX_MATCH - (int)(strend - scan);
-
- if (len < MIN_MATCH) return MIN_MATCH - 1;
-
- s->match_start = cur_match;
- return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
-}
-
-#ifdef DEBUG
-/* ===========================================================================
- * Check that the match at match_start is indeed a match.
- */
-local void check_match(s, start, match, length)
- deflate_state *s;
- IPos start, match;
- int length;
-{
- /* check that the match is indeed a match */
- if (zmemcmp(s->window + match,
- s->window + start, length) != EQUAL) {
- fprintf(stderr, " start %u, match %u, length %d\n",
- start, match, length);
- do {
- fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
- } while (--length != 0);
- z_error("invalid match");
- }
- if (z_verbose > 1) {
- fprintf(stderr,"\\[%d,%d]", start-match, length);
- do { putc(s->window[start++], stderr); } while (--length != 0);
- }
-}
-#else
-# define check_match(s, start, match, length)
-#endif /* DEBUG */
-
-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead.
- *
- * IN assertion: lookahead < MIN_LOOKAHEAD
- * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
- * At least one byte has been read, or avail_in == 0; reads are
- * performed for at least two bytes (required for the zip translate_eol
- * option -- not supported here).
- */
-local void fill_window(s)
- deflate_state *s;
-{
- register unsigned n, m;
- register Posf *p;
- unsigned more; /* Amount of free space at the end of the window. */
- uInt wsize = s->w_size;
-
- do {
- more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-
- /* Deal with !@#$% 64K limit: */
- if (sizeof(int) <= 2) {
- if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
- more = wsize;
-
- } else if (more == (unsigned)(-1)) {
- /* Very unlikely, but possible on 16 bit machine if
- * strstart == 0 && lookahead == 1 (input done a byte at time)
- */
- more--;
- }
- }
-
- /* If the window is almost full and there is insufficient lookahead,
- * move the upper half to the lower one to make room in the upper half.
- */
- if (s->strstart >= wsize+MAX_DIST(s)) {
-
- zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
- s->match_start -= wsize;
- s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
- s->block_start -= (long) wsize;
-
- /* Slide the hash table (could be avoided with 32 bit values
- at the expense of memory usage). We slide even when level == 0
- to keep the hash table consistent if we switch back to level > 0
- later. (Using level 0 permanently is not an optimal usage of
- zlib, so we don't care about this pathological case.)
- */
- /* %%% avoid this when Z_RLE */
- n = s->hash_size;
- p = &s->head[n];
- do {
- m = *--p;
- *p = (Pos)(m >= wsize ? m-wsize : NIL);
- } while (--n);
-
- n = wsize;
-#ifndef FASTEST
- p = &s->prev[n];
- do {
- m = *--p;
- *p = (Pos)(m >= wsize ? m-wsize : NIL);
- /* If n is not on any hash chain, prev[n] is garbage but
- * its value will never be used.
- */
- } while (--n);
-#endif
- more += wsize;
- }
- if (s->strm->avail_in == 0) return;
-
- /* If there was no sliding:
- * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
- * more == window_size - lookahead - strstart
- * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
- * => more >= window_size - 2*WSIZE + 2
- * In the BIG_MEM or MMAP case (not yet supported),
- * window_size == input_size + MIN_LOOKAHEAD &&
- * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
- * Otherwise, window_size == 2*WSIZE so more >= 2.
- * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
- */
- Assert(more >= 2, "more < 2");
-
- n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
- s->lookahead += n;
-
- /* Initialize the hash value now that we have some input: */
- if (s->lookahead >= MIN_MATCH) {
- s->ins_h = s->window[s->strstart];
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-#if MIN_MATCH != 3
- Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
- }
- /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
- * but this is not important since only literal bytes will be emitted.
- */
-
- } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-}
-
-/* ===========================================================================
- * Flush the current block, with given end-of-file flag.
- * IN assertion: strstart is set to the end of the current match.
- */
-#define FLUSH_BLOCK_ONLY(s, eof) { \
- _tr_flush_block(s, (s->block_start >= 0L ? \
- (charf *)&s->window[(unsigned)s->block_start] : \
- (charf *)Z_NULL), \
- (ulg)((long)s->strstart - s->block_start), \
- (eof)); \
- s->block_start = s->strstart; \
- flush_pending(s->strm); \
- Tracev((stderr,"[FLUSH]")); \
-}
-
-/* Same but force premature exit if necessary. */
-#define FLUSH_BLOCK(s, eof) { \
- FLUSH_BLOCK_ONLY(s, eof); \
- if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
-}
-
-/* ===========================================================================
- * Copy without compression as much as possible from the input stream, return
- * the current block state.
- * This function does not insert new strings in the dictionary since
- * uncompressible data is probably not useful. This function is used
- * only for the level=0 compression option.
- * NOTE: this function should be optimized to avoid extra copying from
- * window to pending_buf.
- */
-local block_state deflate_stored(s, flush)
- deflate_state *s;
- int flush;
-{
- /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
- * to pending_buf_size, and each stored block has a 5 byte header:
- */
- ulg max_block_size = 0xffff;
- ulg max_start;
-
- if (max_block_size > s->pending_buf_size - 5) {
- max_block_size = s->pending_buf_size - 5;
- }
-
- /* Copy as much as possible from input to output: */
- for (;;) {
- /* Fill the window as much as possible: */
- if (s->lookahead <= 1) {
-
- Assert(s->strstart < s->w_size+MAX_DIST(s) ||
- s->block_start >= (long)s->w_size, "slide too late");
-
- fill_window(s);
- if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
-
- if (s->lookahead == 0) break; /* flush the current block */
- }
- Assert(s->block_start >= 0L, "block gone");
-
- s->strstart += s->lookahead;
- s->lookahead = 0;
-
- /* Emit a stored block if pending_buf will be full: */
- max_start = s->block_start + max_block_size;
- if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
- /* strstart == 0 is possible when wraparound on 16-bit machine */
- s->lookahead = (uInt)(s->strstart - max_start);
- s->strstart = (uInt)max_start;
- FLUSH_BLOCK(s, 0);
- }
- /* Flush if we may have to slide, otherwise block_start may become
- * negative and the data will be gone:
- */
- if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
- FLUSH_BLOCK(s, 0);
- }
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-
-/* ===========================================================================
- * Compress as much as possible from the input stream, return the current
- * block state.
- * This function does not perform lazy evaluation of matches and inserts
- * new strings in the dictionary only for unmatched strings or for short
- * matches. It is used only for the fast compression options.
- */
-local block_state deflate_fast(s, flush)
- deflate_state *s;
- int flush;
-{
- IPos hash_head = NIL; /* head of the hash chain */
- int bflush; /* set if current block must be flushed */
-
- for (;;) {
- /* Make sure that we always have enough lookahead, except
- * at the end of the input file. We need MAX_MATCH bytes
- * for the next match, plus MIN_MATCH bytes to insert the
- * string following the next match.
- */
- if (s->lookahead < MIN_LOOKAHEAD) {
- fill_window(s);
- if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
- return need_more;
- }
- if (s->lookahead == 0) break; /* flush the current block */
- }
-
- /* Insert the string window[strstart .. strstart+2] in the
- * dictionary, and set hash_head to the head of the hash chain:
- */
- if (s->lookahead >= MIN_MATCH) {
- INSERT_STRING(s, s->strstart, hash_head);
- }
-
- /* Find the longest match, discarding those <= prev_length.
- * At this point we have always match_length < MIN_MATCH
- */
- if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
- /* To simplify the code, we prevent matches with the string
- * of window index 0 (in particular we have to avoid a match
- * of the string with itself at the start of the input file).
- */
-#ifdef FASTEST
- if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) ||
- (s->strategy == Z_RLE && s->strstart - hash_head == 1)) {
- s->match_length = longest_match_fast (s, hash_head);
- }
-#else
- if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
- s->match_length = longest_match (s, hash_head);
- } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
- s->match_length = longest_match_fast (s, hash_head);
- }
-#endif
- /* longest_match() or longest_match_fast() sets match_start */
- }
- if (s->match_length >= MIN_MATCH) {
- check_match(s, s->strstart, s->match_start, s->match_length);
-
- _tr_tally_dist(s, s->strstart - s->match_start,
- s->match_length - MIN_MATCH, bflush);
-
- s->lookahead -= s->match_length;
-
- /* Insert new strings in the hash table only if the match length
- * is not too large. This saves time but degrades compression.
- */
-#ifndef FASTEST
- if (s->match_length <= s->max_insert_length &&
- s->lookahead >= MIN_MATCH) {
- s->match_length--; /* string at strstart already in table */
- do {
- s->strstart++;
- INSERT_STRING(s, s->strstart, hash_head);
- /* strstart never exceeds WSIZE-MAX_MATCH, so there are
- * always MIN_MATCH bytes ahead.
- */
- } while (--s->match_length != 0);
- s->strstart++;
- } else
-#endif
- {
- s->strstart += s->match_length;
- s->match_length = 0;
- s->ins_h = s->window[s->strstart];
- UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-#if MIN_MATCH != 3
- Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
- /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
- * matter since it will be recomputed at next deflate call.
- */
- }
- } else {
- /* No match, output a literal byte */
- Tracevv((stderr,"%c", s->window[s->strstart]));
- _tr_tally_lit (s, s->window[s->strstart], bflush);
- s->lookahead--;
- s->strstart++;
- }
- if (bflush) FLUSH_BLOCK(s, 0);
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-
-#ifndef FASTEST
-/* ===========================================================================
- * Same as above, but achieves better compression. We use a lazy
- * evaluation for matches: a match is finally adopted only if there is
- * no better match at the next window position.
- */
-local block_state deflate_slow(s, flush)
- deflate_state *s;
- int flush;
-{
- IPos hash_head = NIL; /* head of hash chain */
- int bflush; /* set if current block must be flushed */
-
- /* Process the input block. */
- for (;;) {
- /* Make sure that we always have enough lookahead, except
- * at the end of the input file. We need MAX_MATCH bytes
- * for the next match, plus MIN_MATCH bytes to insert the
- * string following the next match.
- */
- if (s->lookahead < MIN_LOOKAHEAD) {
- fill_window(s);
- if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
- return need_more;
- }
- if (s->lookahead == 0) break; /* flush the current block */
- }
-
- /* Insert the string window[strstart .. strstart+2] in the
- * dictionary, and set hash_head to the head of the hash chain:
- */
- if (s->lookahead >= MIN_MATCH) {
- INSERT_STRING(s, s->strstart, hash_head);
- }
-
- /* Find the longest match, discarding those <= prev_length.
- */
- s->prev_length = s->match_length, s->prev_match = s->match_start;
- s->match_length = MIN_MATCH-1;
-
- if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
- s->strstart - hash_head <= MAX_DIST(s)) {
- /* To simplify the code, we prevent matches with the string
- * of window index 0 (in particular we have to avoid a match
- * of the string with itself at the start of the input file).
- */
- if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
- s->match_length = longest_match (s, hash_head);
- } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
- s->match_length = longest_match_fast (s, hash_head);
- }
- /* longest_match() or longest_match_fast() sets match_start */
-
- if (s->match_length <= 5 && (s->strategy == Z_FILTERED
-#if TOO_FAR <= 32767
- || (s->match_length == MIN_MATCH &&
- s->strstart - s->match_start > TOO_FAR)
-#endif
- )) {
-
- /* If prev_match is also MIN_MATCH, match_start is garbage
- * but we will ignore the current match anyway.
- */
- s->match_length = MIN_MATCH-1;
- }
- }
- /* If there was a match at the previous step and the current
- * match is not better, output the previous match:
- */
- if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
- uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
- /* Do not insert strings in hash table beyond this. */
-
- check_match(s, s->strstart-1, s->prev_match, s->prev_length);
-
- _tr_tally_dist(s, s->strstart -1 - s->prev_match,
- s->prev_length - MIN_MATCH, bflush);
-
- /* Insert in hash table all strings up to the end of the match.
- * strstart-1 and strstart are already inserted. If there is not
- * enough lookahead, the last two strings are not inserted in
- * the hash table.
- */
- s->lookahead -= s->prev_length-1;
- s->prev_length -= 2;
- do {
- if (++s->strstart <= max_insert) {
- INSERT_STRING(s, s->strstart, hash_head);
- }
- } while (--s->prev_length != 0);
- s->match_available = 0;
- s->match_length = MIN_MATCH-1;
- s->strstart++;
-
- if (bflush) FLUSH_BLOCK(s, 0);
-
- } else if (s->match_available) {
- /* If there was no match at the previous position, output a
- * single literal. If there was a match but the current match
- * is longer, truncate the previous match to a single literal.
- */
- Tracevv((stderr,"%c", s->window[s->strstart-1]));
- _tr_tally_lit(s, s->window[s->strstart-1], bflush);
- if (bflush) {
- FLUSH_BLOCK_ONLY(s, 0);
- }
- s->strstart++;
- s->lookahead--;
- if (s->strm->avail_out == 0) return need_more;
- } else {
- /* There is no previous match to compare with, wait for
- * the next step to decide.
- */
- s->match_available = 1;
- s->strstart++;
- s->lookahead--;
- }
- }
- Assert (flush != Z_NO_FLUSH, "no flush?");
- if (s->match_available) {
- Tracevv((stderr,"%c", s->window[s->strstart-1]));
- _tr_tally_lit(s, s->window[s->strstart-1], bflush);
- s->match_available = 0;
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-#endif /* FASTEST */
-
-#if 0
-/* ===========================================================================
- * For Z_RLE, simply look for runs of bytes, generate matches only of distance
- * one. Do not maintain a hash table. (It will be regenerated if this run of
- * deflate switches away from Z_RLE.)
- */
-local block_state deflate_rle(s, flush)
- deflate_state *s;
- int flush;
-{
- int bflush; /* set if current block must be flushed */
- uInt run; /* length of run */
- uInt max; /* maximum length of run */
- uInt prev; /* byte at distance one to match */
- Bytef *scan; /* scan for end of run */
-
- for (;;) {
- /* Make sure that we always have enough lookahead, except
- * at the end of the input file. We need MAX_MATCH bytes
- * for the longest encodable run.
- */
- if (s->lookahead < MAX_MATCH) {
- fill_window(s);
- if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) {
- return need_more;
- }
- if (s->lookahead == 0) break; /* flush the current block */
- }
-
- /* See how many times the previous byte repeats */
- run = 0;
- if (s->strstart > 0) { /* if there is a previous byte, that is */
- max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH;
- scan = s->window + s->strstart - 1;
- prev = *scan++;
- do {
- if (*scan++ != prev)
- break;
- } while (++run < max);
- }
-
- /* Emit match if have run of MIN_MATCH or longer, else emit literal */
- if (run >= MIN_MATCH) {
- check_match(s, s->strstart, s->strstart - 1, run);
- _tr_tally_dist(s, 1, run - MIN_MATCH, bflush);
- s->lookahead -= run;
- s->strstart += run;
- } else {
- /* No match, output a literal byte */
- Tracevv((stderr,"%c", s->window[s->strstart]));
- _tr_tally_lit (s, s->window[s->strstart], bflush);
- s->lookahead--;
- s->strstart++;
- }
- if (bflush) FLUSH_BLOCK(s, 0);
- }
- FLUSH_BLOCK(s, flush == Z_FINISH);
- return flush == Z_FINISH ? finish_done : block_done;
-}
-#endif
diff --git a/sys/contrib/opensolaris/uts/common/zmod/deflate.h b/sys/contrib/opensolaris/uts/common/zmod/deflate.h
deleted file mode 100644
index d01a3c1..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/deflate.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/* deflate.h -- internal compression state
- * Copyright (C) 1995-2004 Jean-loup Gailly
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-#ifndef _DEFLATE_H
-#define _DEFLATE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "zutil.h"
-
-/* define NO_GZIP when compiling if you want to disable gzip header and
- trailer creation by deflate(). NO_GZIP would be used to avoid linking in
- the crc code when it is not needed. For shared libraries, gzip encoding
- should be left enabled. */
-#ifndef NO_GZIP
-# define GZIP
-#endif
-
-/* ===========================================================================
- * Internal compression state.
- */
-
-#define LENGTH_CODES 29
-/* number of length codes, not counting the special END_BLOCK code */
-
-#define LITERALS 256
-/* number of literal bytes 0..255 */
-
-#define L_CODES (LITERALS+1+LENGTH_CODES)
-/* number of Literal or Length codes, including the END_BLOCK code */
-
-#define D_CODES 30
-/* number of distance codes */
-
-#define BL_CODES 19
-/* number of codes used to transfer the bit lengths */
-
-#define HEAP_SIZE (2*L_CODES+1)
-/* maximum heap size */
-
-#define MAX_BITS 15
-/* All codes must not exceed MAX_BITS bits */
-
-#define INIT_STATE 42
-#define EXTRA_STATE 69
-#define NAME_STATE 73
-#define COMMENT_STATE 91
-#define HCRC_STATE 103
-#define BUSY_STATE 113
-#define FINISH_STATE 666
-/* Stream status */
-
-
-/* Data structure describing a single value and its code string. */
-typedef struct ct_data_s {
- union {
- ush freq; /* frequency count */
- ush code; /* bit string */
- } fc;
- union {
- ush dad; /* father node in Huffman tree */
- ush len; /* length of bit string */
- } dl;
-} FAR ct_data;
-
-#define Freq fc.freq
-#define Code fc.code
-#define Dad dl.dad
-#define Len dl.len
-
-typedef struct static_tree_desc_s static_tree_desc;
-
-typedef struct tree_desc_s {
- ct_data *dyn_tree; /* the dynamic tree */
- int max_code; /* largest code with non zero frequency */
- static_tree_desc *stat_desc; /* the corresponding static tree */
-} FAR tree_desc;
-
-typedef ush Pos;
-typedef Pos FAR Posf;
-typedef unsigned IPos;
-
-/* A Pos is an index in the character window. We use short instead of int to
- * save space in the various tables. IPos is used only for parameter passing.
- */
-
-typedef struct internal_state {
- z_streamp strm; /* pointer back to this zlib stream */
- int status; /* as the name implies */
- Bytef *pending_buf; /* output still pending */
- ulg pending_buf_size; /* size of pending_buf */
- Bytef *pending_out; /* next pending byte to output to the stream */
- uInt pending; /* nb of bytes in the pending buffer */
- int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
- gz_headerp gzhead; /* gzip header information to write */
- uInt gzindex; /* where in extra, name, or comment */
- Byte method; /* STORED (for zip only) or DEFLATED */
- int last_flush; /* value of flush param for previous deflate call */
-
- /* used by deflate.c: */
-
- uInt w_size; /* LZ77 window size (32K by default) */
- uInt w_bits; /* log2(w_size) (8..16) */
- uInt w_mask; /* w_size - 1 */
-
- Bytef *window;
- /* Sliding window. Input bytes are read into the second half of the window,
- * and move to the first half later to keep a dictionary of at least wSize
- * bytes. With this organization, matches are limited to a distance of
- * wSize-MAX_MATCH bytes, but this ensures that IO is always
- * performed with a length multiple of the block size. Also, it limits
- * the window size to 64K, which is quite useful on MSDOS.
- * To do: use the user input buffer as sliding window.
- */
-
- ulg window_size;
- /* Actual size of window: 2*wSize, except when the user input buffer
- * is directly used as sliding window.
- */
-
- Posf *prev;
- /* Link to older string with same hash index. To limit the size of this
- * array to 64K, this link is maintained only for the last 32K strings.
- * An index in this array is thus a window index modulo 32K.
- */
-
- Posf *head; /* Heads of the hash chains or NIL. */
-
- uInt ins_h; /* hash index of string to be inserted */
- uInt hash_size; /* number of elements in hash table */
- uInt hash_bits; /* log2(hash_size) */
- uInt hash_mask; /* hash_size-1 */
-
- uInt hash_shift;
- /* Number of bits by which ins_h must be shifted at each input
- * step. It must be such that after MIN_MATCH steps, the oldest
- * byte no longer takes part in the hash key, that is:
- * hash_shift * MIN_MATCH >= hash_bits
- */
-
- long block_start;
- /* Window position at the beginning of the current output block. Gets
- * negative when the window is moved backwards.
- */
-
- uInt match_length; /* length of best match */
- IPos prev_match; /* previous match */
- int match_available; /* set if previous match exists */
- uInt strstart; /* start of string to insert */
- uInt match_start; /* start of matching string */
- uInt lookahead; /* number of valid bytes ahead in window */
-
- uInt prev_length;
- /* Length of the best match at previous step. Matches not greater than this
- * are discarded. This is used in the lazy match evaluation.
- */
-
- uInt max_chain_length;
- /* To speed up deflation, hash chains are never searched beyond this
- * length. A higher limit improves compression ratio but degrades the
- * speed.
- */
-
- uInt max_lazy_match;
- /* Attempt to find a better match only when the current match is strictly
- * smaller than this value. This mechanism is used only for compression
- * levels >= 4.
- */
-# define max_insert_length max_lazy_match
- /* Insert new strings in the hash table only if the match length is not
- * greater than this length. This saves time but degrades compression.
- * max_insert_length is used only for compression levels <= 3.
- */
-
- int level; /* compression level (1..9) */
- int strategy; /* favor or force Huffman coding*/
-
- uInt good_match;
- /* Use a faster search when the previous match is longer than this */
-
- int nice_match; /* Stop searching when current match exceeds this */
-
- /* used by trees.c: */
- /* Didn't use ct_data typedef below to supress compiler warning */
- struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
- struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
- struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
-
- struct tree_desc_s l_desc; /* desc. for literal tree */
- struct tree_desc_s d_desc; /* desc. for distance tree */
- struct tree_desc_s bl_desc; /* desc. for bit length tree */
-
- ush bl_count[MAX_BITS+1];
- /* number of codes at each bit length for an optimal tree */
-
- int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
- int heap_len; /* number of elements in the heap */
- int heap_max; /* element of largest frequency */
- /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
- * The same heap array is used to build all trees.
- */
-
- uch depth[2*L_CODES+1];
- /* Depth of each subtree used as tie breaker for trees of equal frequency
- */
-
- uchf *l_buf; /* buffer for literals or lengths */
-
- uInt lit_bufsize;
- /* Size of match buffer for literals/lengths. There are 4 reasons for
- * limiting lit_bufsize to 64K:
- * - frequencies can be kept in 16 bit counters
- * - if compression is not successful for the first block, all input
- * data is still in the window so we can still emit a stored block even
- * when input comes from standard input. (This can also be done for
- * all blocks if lit_bufsize is not greater than 32K.)
- * - if compression is not successful for a file smaller than 64K, we can
- * even emit a stored file instead of a stored block (saving 5 bytes).
- * This is applicable only for zip (not gzip or zlib).
- * - creating new Huffman trees less frequently may not provide fast
- * adaptation to changes in the input data statistics. (Take for
- * example a binary file with poorly compressible code followed by
- * a highly compressible string table.) Smaller buffer sizes give
- * fast adaptation but have of course the overhead of transmitting
- * trees more frequently.
- * - I can't count above 4
- */
-
- uInt last_lit; /* running index in l_buf */
-
- ushf *d_buf;
- /* Buffer for distances. To simplify the code, d_buf and l_buf have
- * the same number of elements. To use different lengths, an extra flag
- * array would be necessary.
- */
-
- ulg opt_len; /* bit length of current block with optimal trees */
- ulg static_len; /* bit length of current block with static trees */
- uInt matches; /* number of string matches in current block */
- int last_eob_len; /* bit length of EOB code for last block */
-
-#ifdef DEBUG
- ulg compressed_len; /* total bit length of compressed file mod 2^32 */
- ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
-#endif
-
- ush bi_buf;
- /* Output buffer. bits are inserted starting at the bottom (least
- * significant bits).
- */
- int bi_valid;
- /* Number of valid bits in bi_buf. All bits above the last valid bit
- * are always zero.
- */
-
-} FAR deflate_state;
-
-/* Output a byte on the stream.
- * IN assertion: there is enough room in pending_buf.
- */
-#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
-
-
-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-/* Minimum amount of lookahead, except at the end of the input file.
- * See deflate.c for comments about the MIN_MATCH+1.
- */
-
-#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
-/* In order to simplify the code, particularly on 16 bit machines, match
- * distances are limited to MAX_DIST instead of WSIZE.
- */
-
- /* in trees.c */
-void _tr_init OF((deflate_state *s));
-int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
-void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
- int eof));
-void _tr_align OF((deflate_state *s));
-void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
- int eof));
-
-#define d_code(dist) \
- ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
-/* Mapping from a distance to a distance code. dist is the distance - 1 and
- * must not have side effects. _dist_code[256] and _dist_code[257] are never
- * used.
- */
-
-#ifndef DEBUG
-/* Inline versions of _tr_tally for speed: */
-
-#if defined(GEN_TREES_H) || !defined(STDC)
- extern uch _length_code[];
- extern uch _dist_code[];
-#else
- extern const uch _length_code[];
- extern const uch _dist_code[];
-#endif
-
-# define _tr_tally_lit(s, c, flush) \
- { uch cc = (c); \
- s->d_buf[s->last_lit] = 0; \
- s->l_buf[s->last_lit++] = cc; \
- s->dyn_ltree[cc].Freq++; \
- flush = (s->last_lit == s->lit_bufsize-1); \
- }
-# define _tr_tally_dist(s, distance, length, flush) \
- { uch len = (length); \
- ush dist = (distance); \
- s->d_buf[s->last_lit] = dist; \
- s->l_buf[s->last_lit++] = len; \
- dist--; \
- s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
- s->dyn_dtree[d_code(dist)].Freq++; \
- flush = (s->last_lit == s->lit_bufsize-1); \
- }
-#else
-# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
-# define _tr_tally_dist(s, distance, length, flush) \
- flush = _tr_tally(s, distance, length)
-#endif
-
-#endif /* _DEFLATE_H */
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inffast.c b/sys/contrib/opensolaris/uts/common/zmod/inffast.c
deleted file mode 100644
index a6dcf3f..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inffast.c
+++ /dev/null
@@ -1,320 +0,0 @@
-/* inffast.c -- fast decoding
- * Copyright (C) 1995-2004 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-#ifndef ASMINF
-
-/* Allow machine dependent optimization for post-increment or pre-increment.
- Based on testing to date,
- Pre-increment preferred for:
- - PowerPC G3 (Adler)
- - MIPS R5000 (Randers-Pehrson)
- Post-increment preferred for:
- - none
- No measurable difference:
- - Pentium III (Anderson)
- - M68060 (Nikl)
- */
-#ifdef POSTINC
-# define OFF 0
-# define PUP(a) *(a)++
-#else
-# define OFF 1
-# define PUP(a) *++(a)
-#endif
-
-/*
- Decode literal, length, and distance codes and write out the resulting
- literal and match bytes until either not enough input or output is
- available, an end-of-block is encountered, or a data error is encountered.
- When large enough input and output buffers are supplied to inflate(), for
- example, a 16K input buffer and a 64K output buffer, more than 95% of the
- inflate execution time is spent in this routine.
-
- Entry assumptions:
-
- state->mode == LEN
- strm->avail_in >= 6
- strm->avail_out >= 258
- start >= strm->avail_out
- state->bits < 8
-
- On return, state->mode is one of:
-
- LEN -- ran out of enough output space or enough available input
- TYPE -- reached end of block code, inflate() to interpret next block
- BAD -- error in block data
-
- Notes:
-
- - The maximum input bits used by a length/distance pair is 15 bits for the
- length code, 5 bits for the length extra, 15 bits for the distance code,
- and 13 bits for the distance extra. This totals 48 bits, or six bytes.
- Therefore if strm->avail_in >= 6, then there is enough input to avoid
- checking for available input while decoding.
-
- - The maximum bytes that a single length/distance pair can output is 258
- bytes, which is the maximum length that can be coded. inflate_fast()
- requires strm->avail_out >= 258 for each loop to avoid checking for
- output space.
- */
-void inflate_fast(strm, start)
-z_streamp strm;
-unsigned start; /* inflate()'s starting value for strm->avail_out */
-{
- struct inflate_state FAR *state;
- unsigned char FAR *in; /* local strm->next_in */
- unsigned char FAR *last; /* while in < last, enough input available */
- unsigned char FAR *out; /* local strm->next_out */
- unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
- unsigned char FAR *end; /* while out < end, enough space available */
-#ifdef INFLATE_STRICT
- unsigned dmax; /* maximum distance from zlib header */
-#endif
- unsigned wsize; /* window size or zero if not using window */
- unsigned whave; /* valid bytes in the window */
- unsigned write; /* window write index */
- unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
- unsigned long hold; /* local strm->hold */
- unsigned bits; /* local strm->bits */
- code const FAR *lcode; /* local strm->lencode */
- code const FAR *dcode; /* local strm->distcode */
- unsigned lmask; /* mask for first level of length codes */
- unsigned dmask; /* mask for first level of distance codes */
- code this; /* retrieved table entry */
- unsigned op; /* code bits, operation, extra bits, or */
- /* window position, window bytes to copy */
- unsigned len; /* match length, unused bytes */
- unsigned dist; /* match distance */
- unsigned char FAR *from; /* where to copy match from */
-
- /* copy state to local variables */
- state = (struct inflate_state FAR *)strm->state;
- in = strm->next_in - OFF;
- last = in + (strm->avail_in - 5);
- out = strm->next_out - OFF;
- beg = out - (start - strm->avail_out);
- end = out + (strm->avail_out - 257);
-#ifdef INFLATE_STRICT
- dmax = state->dmax;
-#endif
- wsize = state->wsize;
- whave = state->whave;
- write = state->write;
- window = state->window;
- hold = state->hold;
- bits = state->bits;
- lcode = state->lencode;
- dcode = state->distcode;
- lmask = (1U << state->lenbits) - 1;
- dmask = (1U << state->distbits) - 1;
-
- /* decode literals and length/distances until end-of-block or not enough
- input data or output space */
- do {
- if (bits < 15) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- this = lcode[hold & lmask];
- dolen:
- op = (unsigned)(this.bits);
- hold >>= op;
- bits -= op;
- op = (unsigned)(this.op);
- if (op == 0) { /* literal */
- Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
- "inflate: literal '%c'\n" :
- "inflate: literal 0x%02x\n", this.val));
- PUP(out) = (unsigned char)(this.val);
- }
- else if (op & 16) { /* length base */
- len = (unsigned)(this.val);
- op &= 15; /* number of extra bits */
- if (op) {
- if (bits < op) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- len += (unsigned)hold & ((1U << op) - 1);
- hold >>= op;
- bits -= op;
- }
- Tracevv((stderr, "inflate: length %u\n", len));
- if (bits < 15) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- this = dcode[hold & dmask];
- dodist:
- op = (unsigned)(this.bits);
- hold >>= op;
- bits -= op;
- op = (unsigned)(this.op);
- if (op & 16) { /* distance base */
- dist = (unsigned)(this.val);
- op &= 15; /* number of extra bits */
- if (bits < op) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- if (bits < op) {
- hold += (unsigned long)(PUP(in)) << bits;
- bits += 8;
- }
- }
- dist += (unsigned)hold & ((1U << op) - 1);
-#ifdef INFLATE_STRICT
- if (dist > dmax) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
-#endif
- hold >>= op;
- bits -= op;
- Tracevv((stderr, "inflate: distance %u\n", dist));
- op = (unsigned)(out - beg); /* max distance in output */
- if (dist > op) { /* see if copy from window */
- op = dist - op; /* distance back in window */
- if (op > whave) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
- from = window - OFF;
- if (write == 0) { /* very common case */
- from += wsize - op;
- if (op < len) { /* some from window */
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- else if (write < op) { /* wrap around window */
- from += wsize + write - op;
- op -= write;
- if (op < len) { /* some from end of window */
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = window - OFF;
- if (write < len) { /* some from start of window */
- op = write;
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- }
- else { /* contiguous in window */
- from += write - op;
- if (op < len) { /* some from window */
- len -= op;
- do {
- PUP(out) = PUP(from);
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- while (len > 2) {
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- len -= 3;
- }
- if (len) {
- PUP(out) = PUP(from);
- if (len > 1)
- PUP(out) = PUP(from);
- }
- }
- else {
- from = out - dist; /* copy direct from output */
- do { /* minimum length is three */
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- PUP(out) = PUP(from);
- len -= 3;
- } while (len > 2);
- if (len) {
- PUP(out) = PUP(from);
- if (len > 1)
- PUP(out) = PUP(from);
- }
- }
- }
- else if ((op & 64) == 0) { /* 2nd level distance code */
- this = dcode[this.val + (hold & ((1U << op) - 1))];
- goto dodist;
- }
- else {
- strm->msg = (char *)"invalid distance code";
- state->mode = BAD;
- break;
- }
- }
- else if ((op & 64) == 0) { /* 2nd level length code */
- this = lcode[this.val + (hold & ((1U << op) - 1))];
- goto dolen;
- }
- else if (op & 32) { /* end-of-block */
- Tracevv((stderr, "inflate: end of block\n"));
- state->mode = TYPE;
- break;
- }
- else {
- strm->msg = (char *)"invalid literal/length code";
- state->mode = BAD;
- break;
- }
- } while (in < last && out < end);
-
- /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
- len = bits >> 3;
- in -= len;
- bits -= len << 3;
- hold &= (1U << bits) - 1;
-
- /* update state and return */
- strm->next_in = in + OFF;
- strm->next_out = out + OFF;
- strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
- strm->avail_out = (unsigned)(out < end ?
- 257 + (end - out) : 257 - (out - end));
- state->hold = hold;
- state->bits = bits;
- return;
-}
-
-/*
- inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
- - Using bit fields for code structure
- - Different op definition to avoid & for extra bits (do & for table bits)
- - Three separate decoding do-loops for direct, window, and write == 0
- - Special case for distance > 1 copies to do overlapped load and store copy
- - Explicit branch predictions (based on measured branch probabilities)
- - Deferring match copy and interspersed it with decoding subsequent codes
- - Swapping literal/length else
- - Swapping window/direct else
- - Larger unrolled copy loops (three is about right)
- - Moving len -= 3 statement into middle of loop
- */
-
-#endif /* !ASMINF */
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inffast.h b/sys/contrib/opensolaris/uts/common/zmod/inffast.h
deleted file mode 100644
index 2d214ef..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inffast.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* inffast.h -- header to use inffast.c
- * Copyright (C) 1995-2003 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-void inflate_fast OF((z_streamp strm, unsigned start));
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inffixed.h b/sys/contrib/opensolaris/uts/common/zmod/inffixed.h
deleted file mode 100644
index ed55df8..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inffixed.h
+++ /dev/null
@@ -1,96 +0,0 @@
- /* inffixed.h -- table for decoding fixed codes
- * Generated automatically by makefixed().
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
- /* WARNING: this file should *not* be used by applications. It
- is part of the implementation of the compression library and
- is subject to change. Applications should only use zlib.h.
- */
-
- static const code lenfix[512] = {
- {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
- {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
- {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
- {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
- {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
- {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
- {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
- {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
- {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
- {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
- {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
- {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
- {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
- {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
- {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
- {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
- {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
- {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
- {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
- {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
- {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
- {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
- {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
- {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
- {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
- {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
- {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
- {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
- {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
- {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
- {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
- {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
- {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
- {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
- {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
- {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
- {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
- {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
- {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
- {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
- {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
- {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
- {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
- {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
- {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
- {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
- {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
- {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
- {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
- {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
- {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
- {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
- {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
- {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
- {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
- {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
- {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
- {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
- {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
- {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
- {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
- {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
- {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
- {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
- {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
- {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
- {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
- {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
- {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
- {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
- {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
- {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
- {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
- {0,9,255}
- };
-
- static const code distfix[32] = {
- {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
- {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
- {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
- {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
- {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
- {22,5,193},{64,5,0}
- };
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inflate.c b/sys/contrib/opensolaris/uts/common/zmod/inflate.c
deleted file mode 100644
index 023e7a1..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inflate.c
+++ /dev/null
@@ -1,1395 +0,0 @@
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* inflate.c -- zlib decompression
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Change history:
- *
- * 1.2.beta0 24 Nov 2002
- * - First version -- complete rewrite of inflate to simplify code, avoid
- * creation of window when not needed, minimize use of window when it is
- * needed, make inffast.c even faster, implement gzip decoding, and to
- * improve code readability and style over the previous zlib inflate code
- *
- * 1.2.beta1 25 Nov 2002
- * - Use pointers for available input and output checking in inffast.c
- * - Remove input and output counters in inffast.c
- * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
- * - Remove unnecessary second byte pull from length extra in inffast.c
- * - Unroll direct copy to three copies per loop in inffast.c
- *
- * 1.2.beta2 4 Dec 2002
- * - Change external routine names to reduce potential conflicts
- * - Correct filename to inffixed.h for fixed tables in inflate.c
- * - Make hbuf[] unsigned char to match parameter type in inflate.c
- * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
- * to avoid negation problem on Alphas (64 bit) in inflate.c
- *
- * 1.2.beta3 22 Dec 2002
- * - Add comments on state->bits assertion in inffast.c
- * - Add comments on op field in inftrees.h
- * - Fix bug in reuse of allocated window after inflateReset()
- * - Remove bit fields--back to byte structure for speed
- * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
- * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
- * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
- * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
- * - Use local copies of stream next and avail values, as well as local bit
- * buffer and bit count in inflate()--for speed when inflate_fast() not used
- *
- * 1.2.beta4 1 Jan 2003
- * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
- * - Move a comment on output buffer sizes from inffast.c to inflate.c
- * - Add comments in inffast.c to introduce the inflate_fast() routine
- * - Rearrange window copies in inflate_fast() for speed and simplification
- * - Unroll last copy for window match in inflate_fast()
- * - Use local copies of window variables in inflate_fast() for speed
- * - Pull out common write == 0 case for speed in inflate_fast()
- * - Make op and len in inflate_fast() unsigned for consistency
- * - Add FAR to lcode and dcode declarations in inflate_fast()
- * - Simplified bad distance check in inflate_fast()
- * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
- * source file infback.c to provide a call-back interface to inflate for
- * programs like gzip and unzip -- uses window as output buffer to avoid
- * window copying
- *
- * 1.2.beta5 1 Jan 2003
- * - Improved inflateBack() interface to allow the caller to provide initial
- * input in strm.
- * - Fixed stored blocks bug in inflateBack()
- *
- * 1.2.beta6 4 Jan 2003
- * - Added comments in inffast.c on effectiveness of POSTINC
- * - Typecasting all around to reduce compiler warnings
- * - Changed loops from while (1) or do {} while (1) to for (;;), again to
- * make compilers happy
- * - Changed type of window in inflateBackInit() to unsigned char *
- *
- * 1.2.beta7 27 Jan 2003
- * - Changed many types to unsigned or unsigned short to avoid warnings
- * - Added inflateCopy() function
- *
- * 1.2.0 9 Mar 2003
- * - Changed inflateBack() interface to provide separate opaque descriptors
- * for the in() and out() functions
- * - Changed inflateBack() argument and in_func typedef to swap the length
- * and buffer address return values for the input function
- * - Check next_in and next_out for Z_NULL on entry to inflate()
- *
- * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
- */
-
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-#ifdef MAKEFIXED
-# ifndef BUILDFIXED
-# define BUILDFIXED
-# endif
-#endif
-
-/* function prototypes */
-local void fixedtables OF((struct inflate_state FAR *state));
-local int updatewindow OF((z_streamp strm, unsigned out));
-#ifdef BUILDFIXED
- void makefixed OF((void));
-#endif
-local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf,
- unsigned len));
-
-int ZEXPORT inflateReset(strm)
-z_streamp strm;
-{
- struct inflate_state FAR *state;
-
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- strm->total_in = strm->total_out = state->total = 0;
- strm->msg = Z_NULL;
- strm->adler = 1; /* to support ill-conceived Java test suite */
- state->mode = HEAD;
- state->last = 0;
- state->havedict = 0;
- state->dmax = 32768U;
- state->head = Z_NULL;
- state->wsize = 0;
- state->whave = 0;
- state->write = 0;
- state->hold = 0;
- state->bits = 0;
- state->lencode = state->distcode = state->next = state->codes;
- Tracev((stderr, "inflate: reset\n"));
- return Z_OK;
-}
-
-int ZEXPORT inflatePrime(strm, bits, value)
-z_streamp strm;
-int bits;
-int value;
-{
- struct inflate_state FAR *state;
-
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
- value &= (1L << bits) - 1;
- state->hold += value << state->bits;
- state->bits += bits;
- return Z_OK;
-}
-
-int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
-z_streamp strm;
-int windowBits;
-const char *version;
-int stream_size;
-{
- struct inflate_state FAR *state;
-
- if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
- stream_size != (int)(sizeof(z_stream)))
- return Z_VERSION_ERROR;
- if (strm == Z_NULL) return Z_STREAM_ERROR;
- strm->msg = Z_NULL; /* in case we return an error */
- if (strm->zalloc == (alloc_func)0) {
- strm->zalloc = zcalloc;
- strm->opaque = (voidpf)0;
- }
- if (strm->zfree == (free_func)0) strm->zfree = zcfree;
- state = (struct inflate_state FAR *)
- ZALLOC(strm, 1, sizeof(struct inflate_state));
- if (state == Z_NULL) return Z_MEM_ERROR;
- Tracev((stderr, "inflate: allocated\n"));
- strm->state = (struct internal_state FAR *)state;
- if (windowBits < 0) {
- state->wrap = 0;
- windowBits = -windowBits;
- }
- else {
- state->wrap = (windowBits >> 4) + 1;
-#ifdef GUNZIP
- if (windowBits < 48) windowBits &= 15;
-#endif
- }
- if (windowBits < 8 || windowBits > 15) {
- ZFREE(strm, state);
- strm->state = Z_NULL;
- return Z_STREAM_ERROR;
- }
- state->wbits = (unsigned)windowBits;
- state->window = Z_NULL;
- return inflateReset(strm);
-}
-
-int ZEXPORT inflateInit_(strm, version, stream_size)
-z_streamp strm;
-const char *version;
-int stream_size;
-{
- return inflateInit2_(strm, DEF_WBITS, version, stream_size);
-}
-
-/*
- Return state with length and distance decoding tables and index sizes set to
- fixed code decoding. Normally this returns fixed tables from inffixed.h.
- If BUILDFIXED is defined, then instead this routine builds the tables the
- first time it's called, and returns those tables the first time and
- thereafter. This reduces the size of the code by about 2K bytes, in
- exchange for a little execution time. However, BUILDFIXED should not be
- used for threaded applications, since the rewriting of the tables and virgin
- may not be thread-safe.
- */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
-#ifdef BUILDFIXED
- static int virgin = 1;
- static code *lenfix, *distfix;
- static code fixed[544];
-
- /* build fixed huffman tables if first call (may not be thread safe) */
- if (virgin) {
- unsigned sym, bits;
- static code *next;
-
- /* literal/length table */
- sym = 0;
- while (sym < 144) state->lens[sym++] = 8;
- while (sym < 256) state->lens[sym++] = 9;
- while (sym < 280) state->lens[sym++] = 7;
- while (sym < 288) state->lens[sym++] = 8;
- next = fixed;
- lenfix = next;
- bits = 9;
- inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
-
- /* distance table */
- sym = 0;
- while (sym < 32) state->lens[sym++] = 5;
- distfix = next;
- bits = 5;
- inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
-
- /* do this just once */
- virgin = 0;
- }
-#else /* !BUILDFIXED */
-# include "inffixed.h"
-#endif /* BUILDFIXED */
- state->lencode = lenfix;
- state->lenbits = 9;
- state->distcode = distfix;
- state->distbits = 5;
-}
-
-#ifdef MAKEFIXED
-#include <stdio.h>
-
-/*
- Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also
- defines BUILDFIXED, so the tables are built on the fly. makefixed() writes
- those tables to stdout, which would be piped to inffixed.h. A small program
- can simply call makefixed to do this:
-
- void makefixed(void);
-
- int main(void)
- {
- makefixed();
- return 0;
- }
-
- Then that can be linked with zlib built with MAKEFIXED defined and run:
-
- a.out > inffixed.h
- */
-void makefixed()
-{
- unsigned low, size;
- struct inflate_state state;
-
- fixedtables(&state);
- puts(" /* inffixed.h -- table for decoding fixed codes");
- puts(" * Generated automatically by makefixed().");
- puts(" */");
- puts("");
- puts(" /* WARNING: this file should *not* be used by applications.");
- puts(" It is part of the implementation of this library and is");
- puts(" subject to change. Applications should only use zlib.h.");
- puts(" */");
- puts("");
- size = 1U << 9;
- printf(" static const code lenfix[%u] = {", size);
- low = 0;
- for (;;) {
- if ((low % 7) == 0) printf("\n ");
- printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits,
- state.lencode[low].val);
- if (++low == size) break;
- putchar(',');
- }
- puts("\n };");
- size = 1U << 5;
- printf("\n static const code distfix[%u] = {", size);
- low = 0;
- for (;;) {
- if ((low % 6) == 0) printf("\n ");
- printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
- state.distcode[low].val);
- if (++low == size) break;
- putchar(',');
- }
- puts("\n };");
-}
-#endif /* MAKEFIXED */
-
-/*
- Update the window with the last wsize (normally 32K) bytes written before
- returning. If window does not exist yet, create it. This is only called
- when a window is already in use, or when output has been written during this
- inflate call, but the end of the deflate stream has not been reached yet.
- It is also called to create a window for dictionary data when a dictionary
- is loaded.
-
- Providing output buffers larger than 32K to inflate() should provide a speed
- advantage, since only the last 32K of output is copied to the sliding window
- upon return from inflate(), and since all distances after the first 32K of
- output will fall in the output data, making match copies simpler and faster.
- The advantage may be dependent on the size of the processor's data caches.
- */
-local int updatewindow(strm, out)
-z_streamp strm;
-unsigned out;
-{
- struct inflate_state FAR *state;
- unsigned copy, dist;
-
- state = (struct inflate_state FAR *)strm->state;
-
- /* if it hasn't been done already, allocate space for the window */
- if (state->window == Z_NULL) {
- state->window = (unsigned char FAR *)
- ZALLOC(strm, 1U << state->wbits,
- sizeof(unsigned char));
- if (state->window == Z_NULL) return 1;
- }
-
- /* if window not in use yet, initialize */
- if (state->wsize == 0) {
- state->wsize = 1U << state->wbits;
- state->write = 0;
- state->whave = 0;
- }
-
- /* copy state->wsize or less output bytes into the circular window */
- copy = out - strm->avail_out;
- if (copy >= state->wsize) {
- zmemcpy(state->window, strm->next_out - state->wsize, state->wsize);
- state->write = 0;
- state->whave = state->wsize;
- }
- else {
- dist = state->wsize - state->write;
- if (dist > copy) dist = copy;
- zmemcpy(state->window + state->write, strm->next_out - copy, dist);
- copy -= dist;
- if (copy) {
- zmemcpy(state->window, strm->next_out - copy, copy);
- state->write = copy;
- state->whave = state->wsize;
- }
- else {
- state->write += dist;
- if (state->write == state->wsize) state->write = 0;
- if (state->whave < state->wsize) state->whave += dist;
- }
- }
- return 0;
-}
-
-/* Macros for inflate(): */
-
-/* check function to use adler32() for zlib or crc32() for gzip */
-#ifdef GUNZIP
-# define UPDATE(check, buf, len) \
- (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
-#else
-# define UPDATE(check, buf, len) adler32(check, buf, len)
-#endif
-
-/* check macros for header crc */
-#ifdef GUNZIP
-# define CRC2(check, word) \
- do { \
- hbuf[0] = (unsigned char)(word); \
- hbuf[1] = (unsigned char)((word) >> 8); \
- check = crc32(check, hbuf, 2); \
- } while (0)
-
-# define CRC4(check, word) \
- do { \
- hbuf[0] = (unsigned char)(word); \
- hbuf[1] = (unsigned char)((word) >> 8); \
- hbuf[2] = (unsigned char)((word) >> 16); \
- hbuf[3] = (unsigned char)((word) >> 24); \
- check = crc32(check, hbuf, 4); \
- } while (0)
-#endif
-
-/* Load registers with state in inflate() for speed */
-#define LOAD() \
- do { \
- put = strm->next_out; \
- left = strm->avail_out; \
- next = strm->next_in; \
- have = strm->avail_in; \
- hold = state->hold; \
- bits = state->bits; \
- } while (0)
-
-/* Restore state from registers in inflate() */
-#define RESTORE() \
- do { \
- strm->next_out = put; \
- strm->avail_out = left; \
- strm->next_in = next; \
- strm->avail_in = have; \
- state->hold = hold; \
- state->bits = bits; \
- } while (0)
-
-/* Clear the input bit accumulator */
-#define INITBITS() \
- do { \
- hold = 0; \
- bits = 0; \
- } while (0)
-
-/* Get a byte of input into the bit accumulator, or return from inflate()
- if there is no input available. */
-#define PULLBYTE() \
- do { \
- if (have == 0) goto inf_leave; \
- have--; \
- hold += (unsigned long)(*next++) << bits; \
- bits += 8; \
- } while (0)
-
-/* Assure that there are at least n bits in the bit accumulator. If there is
- not enough available input to do that, then return from inflate(). */
-#define NEEDBITS(n) \
- do { \
- while (bits < (unsigned)(n)) \
- PULLBYTE(); \
- } while (0)
-
-/* Return the low n bits of the bit accumulator (n < 16) */
-#define BITS(n) \
- ((unsigned)hold & ((1U << (n)) - 1))
-
-/* Remove n bits from the bit accumulator */
-#define DROPBITS(n) \
- do { \
- hold >>= (n); \
- bits -= (unsigned)(n); \
- } while (0)
-
-/* Remove zero to seven bits as needed to go to a byte boundary */
-#define BYTEBITS() \
- do { \
- hold >>= bits & 7; \
- bits -= bits & 7; \
- } while (0)
-
-/* Reverse the bytes in a 32-bit value */
-#define REVERSE(q) \
- ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
- (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
-
-/*
- inflate() uses a state machine to process as much input data and generate as
- much output data as possible before returning. The state machine is
- structured roughly as follows:
-
- for (;;) switch (state) {
- ...
- case STATEn:
- if (not enough input data or output space to make progress)
- return;
- ... make progress ...
- state = STATEm;
- break;
- ...
- }
-
- so when inflate() is called again, the same case is attempted again, and
- if the appropriate resources are provided, the machine proceeds to the
- next state. The NEEDBITS() macro is usually the way the state evaluates
- whether it can proceed or should return. NEEDBITS() does the return if
- the requested bits are not available. The typical use of the BITS macros
- is:
-
- NEEDBITS(n);
- ... do something with BITS(n) ...
- DROPBITS(n);
-
- where NEEDBITS(n) either returns from inflate() if there isn't enough
- input left to load n bits into the accumulator, or it continues. BITS(n)
- gives the low n bits in the accumulator. When done, DROPBITS(n) drops
- the low n bits off the accumulator. INITBITS() clears the accumulator
- and sets the number of available bits to zero. BYTEBITS() discards just
- enough bits to put the accumulator on a byte boundary. After BYTEBITS()
- and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
-
- NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
- if there is no input available. The decoding of variable length codes uses
- PULLBYTE() directly in order to pull just enough bytes to decode the next
- code, and no more.
-
- Some states loop until they get enough input, making sure that enough
- state information is maintained to continue the loop where it left off
- if NEEDBITS() returns in the loop. For example, want, need, and keep
- would all have to actually be part of the saved state in case NEEDBITS()
- returns:
-
- case STATEw:
- while (want < need) {
- NEEDBITS(n);
- keep[want++] = BITS(n);
- DROPBITS(n);
- }
- state = STATEx;
- case STATEx:
-
- As shown above, if the next state is also the next case, then the break
- is omitted.
-
- A state may also return if there is not enough output space available to
- complete that state. Those states are copying stored data, writing a
- literal byte, and copying a matching string.
-
- When returning, a "goto inf_leave" is used to update the total counters,
- update the check value, and determine whether any progress has been made
- during that inflate() call in order to return the proper return code.
- Progress is defined as a change in either strm->avail_in or strm->avail_out.
- When there is a window, goto inf_leave will update the window with the last
- output written. If a goto inf_leave occurs in the middle of decompression
- and there is no window currently, goto inf_leave will create one and copy
- output to the window for the next call of inflate().
-
- In this implementation, the flush parameter of inflate() only affects the
- return code (per zlib.h). inflate() always writes as much as possible to
- strm->next_out, given the space available and the provided input--the effect
- documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
- the allocation of and copying into a sliding window until necessary, which
- provides the effect documented in zlib.h for Z_FINISH when the entire input
- stream available. So the only thing the flush parameter actually does is:
- when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
- will return Z_BUF_ERROR if it has not reached the end of the stream.
- */
-
-int ZEXPORT inflate(strm, flush)
-z_streamp strm;
-int flush;
-{
- struct inflate_state FAR *state;
- unsigned char FAR *next; /* next input */
- unsigned char FAR *put; /* next output */
- unsigned have, left; /* available input and output */
- unsigned long hold; /* bit buffer */
- unsigned bits; /* bits in bit buffer */
- unsigned in, out; /* save starting available input and output */
- unsigned copy; /* number of stored or match bytes to copy */
- unsigned char FAR *from; /* where to copy match bytes from */
- code this; /* current decoding table entry */
- code last; /* parent table entry */
- unsigned len; /* length to copy for repeats, bits to drop */
- int ret; /* return code */
-#ifdef GUNZIP
- unsigned char hbuf[4]; /* buffer for gzip header crc calculation */
-#endif
- static const unsigned short order[19] = /* permutation of code lengths */
- {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
- if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL ||
- (strm->next_in == Z_NULL && strm->avail_in != 0))
- return Z_STREAM_ERROR;
-
- state = (struct inflate_state FAR *)strm->state;
- if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
- LOAD();
- in = have;
- out = left;
- ret = Z_OK;
- for (;;)
- switch (state->mode) {
- case HEAD:
- if (state->wrap == 0) {
- state->mode = TYPEDO;
- break;
- }
- NEEDBITS(16);
-#ifdef GUNZIP
- if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */
- state->check = crc32(0L, Z_NULL, 0);
- CRC2(state->check, hold);
- INITBITS();
- state->mode = FLAGS;
- break;
- }
- state->flags = 0; /* expect zlib header */
- if (state->head != Z_NULL)
- state->head->done = -1;
- if (!(state->wrap & 1) || /* check if zlib header allowed */
-#else
- if (
-#endif
- ((BITS(8) << 8) + (hold >> 8)) % 31) {
- strm->msg = (char *)"incorrect header check";
- state->mode = BAD;
- break;
- }
- if (BITS(4) != Z_DEFLATED) {
- strm->msg = (char *)"unknown compression method";
- state->mode = BAD;
- break;
- }
- DROPBITS(4);
- len = BITS(4) + 8;
- if (len > state->wbits) {
- strm->msg = (char *)"invalid window size";
- state->mode = BAD;
- break;
- }
- state->dmax = 1U << len;
- Tracev((stderr, "inflate: zlib header ok\n"));
- strm->adler = state->check = adler32(0L, Z_NULL, 0);
- state->mode = hold & 0x200 ? DICTID : TYPE;
- INITBITS();
- break;
-#ifdef GUNZIP
- case FLAGS:
- NEEDBITS(16);
- state->flags = (int)(hold);
- if ((state->flags & 0xff) != Z_DEFLATED) {
- strm->msg = (char *)"unknown compression method";
- state->mode = BAD;
- break;
- }
- if (state->flags & 0xe000) {
- strm->msg = (char *)"unknown header flags set";
- state->mode = BAD;
- break;
- }
- if (state->head != Z_NULL)
- state->head->text = (int)((hold >> 8) & 1);
- if (state->flags & 0x0200) CRC2(state->check, hold);
- INITBITS();
- state->mode = TIME;
- /*FALLTHRU*/
- case TIME:
- NEEDBITS(32);
- if (state->head != Z_NULL)
- state->head->time = hold;
- if (state->flags & 0x0200) CRC4(state->check, hold);
- INITBITS();
- state->mode = OS;
- /*FALLTHRU*/
- case OS:
- NEEDBITS(16);
- if (state->head != Z_NULL) {
- state->head->xflags = (int)(hold & 0xff);
- state->head->os = (int)(hold >> 8);
- }
- if (state->flags & 0x0200) CRC2(state->check, hold);
- INITBITS();
- state->mode = EXLEN;
- /*FALLTHRU*/
- case EXLEN:
- if (state->flags & 0x0400) {
- NEEDBITS(16);
- state->length = (unsigned)(hold);
- if (state->head != Z_NULL)
- state->head->extra_len = (unsigned)hold;
- if (state->flags & 0x0200) CRC2(state->check, hold);
- INITBITS();
- }
- else if (state->head != Z_NULL)
- state->head->extra = Z_NULL;
- state->mode = EXTRA;
- /*FALLTHRU*/
- case EXTRA:
- if (state->flags & 0x0400) {
- copy = state->length;
- if (copy > have) copy = have;
- if (copy) {
- if (state->head != Z_NULL &&
- state->head->extra != Z_NULL) {
- len = state->head->extra_len - state->length;
- zmemcpy(state->head->extra + len, next,
- len + copy > state->head->extra_max ?
- state->head->extra_max - len : copy);
- }
- if (state->flags & 0x0200)
- state->check = crc32(state->check, next, copy);
- have -= copy;
- next += copy;
- state->length -= copy;
- }
- if (state->length) goto inf_leave;
- }
- state->length = 0;
- state->mode = NAME;
- /*FALLTHRU*/
- case NAME:
- if (state->flags & 0x0800) {
- if (have == 0) goto inf_leave;
- copy = 0;
- do {
- len = (unsigned)(next[copy++]);
- if (state->head != Z_NULL &&
- state->head->name != Z_NULL &&
- state->length < state->head->name_max)
- state->head->name[state->length++] = len;
- } while (len && copy < have);
- if (state->flags & 0x0200)
- state->check = crc32(state->check, next, copy);
- have -= copy;
- next += copy;
- if (len) goto inf_leave;
- }
- else if (state->head != Z_NULL)
- state->head->name = Z_NULL;
- state->length = 0;
- state->mode = COMMENT;
- /*FALLTHRU*/
- case COMMENT:
- if (state->flags & 0x1000) {
- if (have == 0) goto inf_leave;
- copy = 0;
- do {
- len = (unsigned)(next[copy++]);
- if (state->head != Z_NULL &&
- state->head->comment != Z_NULL &&
- state->length < state->head->comm_max)
- state->head->comment[state->length++] = len;
- } while (len && copy < have);
- if (state->flags & 0x0200)
- state->check = crc32(state->check, next, copy);
- have -= copy;
- next += copy;
- if (len) goto inf_leave;
- }
- else if (state->head != Z_NULL)
- state->head->comment = Z_NULL;
- state->mode = HCRC;
- /*FALLTHRU*/
- case HCRC:
- if (state->flags & 0x0200) {
- NEEDBITS(16);
- if (hold != (state->check & 0xffff)) {
- strm->msg = (char *)"header crc mismatch";
- state->mode = BAD;
- break;
- }
- INITBITS();
- }
- if (state->head != Z_NULL) {
- state->head->hcrc = (int)((state->flags >> 9) & 1);
- state->head->done = 1;
- }
- strm->adler = state->check = crc32(0L, Z_NULL, 0);
- state->mode = TYPE;
- break;
-#endif
- case DICTID:
- NEEDBITS(32);
- strm->adler = state->check = REVERSE(hold);
- INITBITS();
- state->mode = DICT;
- /*FALLTHRU*/
- case DICT:
- if (state->havedict == 0) {
- RESTORE();
- return Z_NEED_DICT;
- }
- strm->adler = state->check = adler32(0L, Z_NULL, 0);
- state->mode = TYPE;
- /*FALLTHRU*/
- case TYPE:
- if (flush == Z_BLOCK) goto inf_leave;
- /*FALLTHRU*/
- case TYPEDO:
- if (state->last) {
- BYTEBITS();
- state->mode = CHECK;
- break;
- }
- NEEDBITS(3);
- state->last = BITS(1);
- DROPBITS(1);
- switch (BITS(2)) {
- case 0: /* stored block */
- Tracev((stderr, "inflate: stored block%s\n",
- state->last ? " (last)" : ""));
- state->mode = STORED;
- break;
- case 1: /* fixed block */
- fixedtables(state);
- Tracev((stderr, "inflate: fixed codes block%s\n",
- state->last ? " (last)" : ""));
- state->mode = LEN; /* decode codes */
- break;
- case 2: /* dynamic block */
- Tracev((stderr, "inflate: dynamic codes block%s\n",
- state->last ? " (last)" : ""));
- state->mode = TABLE;
- break;
- case 3:
- strm->msg = (char *)"invalid block type";
- state->mode = BAD;
- }
- DROPBITS(2);
- break;
- case STORED:
- BYTEBITS(); /* go to byte boundary */
- NEEDBITS(32);
- if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
- strm->msg = (char *)"invalid stored block lengths";
- state->mode = BAD;
- break;
- }
- state->length = (unsigned)hold & 0xffff;
- Tracev((stderr, "inflate: stored length %u\n",
- state->length));
- INITBITS();
- state->mode = COPY;
- /*FALLTHRU*/
- case COPY:
- copy = state->length;
- if (copy) {
- if (copy > have) copy = have;
- if (copy > left) copy = left;
- if (copy == 0) goto inf_leave;
- zmemcpy(put, next, copy);
- have -= copy;
- next += copy;
- left -= copy;
- put += copy;
- state->length -= copy;
- break;
- }
- Tracev((stderr, "inflate: stored end\n"));
- state->mode = TYPE;
- break;
- case TABLE:
- NEEDBITS(14);
- state->nlen = BITS(5) + 257;
- DROPBITS(5);
- state->ndist = BITS(5) + 1;
- DROPBITS(5);
- state->ncode = BITS(4) + 4;
- DROPBITS(4);
-#ifndef PKZIP_BUG_WORKAROUND
- if (state->nlen > 286 || state->ndist > 30) {
- strm->msg = (char *)"too many length or distance symbols";
- state->mode = BAD;
- break;
- }
-#endif
- Tracev((stderr, "inflate: table sizes ok\n"));
- state->have = 0;
- state->mode = LENLENS;
- /*FALLTHRU*/
- case LENLENS:
- while (state->have < state->ncode) {
- NEEDBITS(3);
- state->lens[order[state->have++]] = (unsigned short)BITS(3);
- DROPBITS(3);
- }
- while (state->have < 19)
- state->lens[order[state->have++]] = 0;
- state->next = state->codes;
- state->lencode = (code const FAR *)(state->next);
- state->lenbits = 7;
- ret = inflate_table(CODES, state->lens, 19, &(state->next),
- &(state->lenbits), state->work);
- if (ret) {
- strm->msg = (char *)"invalid code lengths set";
- state->mode = BAD;
- break;
- }
- Tracev((stderr, "inflate: code lengths ok\n"));
- state->have = 0;
- state->mode = CODELENS;
- /*FALLTHRU*/
- case CODELENS:
- while (state->have < state->nlen + state->ndist) {
- for (;;) {
- this = state->lencode[BITS(state->lenbits)];
- if ((unsigned)(this.bits) <= bits) break;
- PULLBYTE();
- }
- if (this.val < 16) {
- NEEDBITS(this.bits);
- DROPBITS(this.bits);
- state->lens[state->have++] = this.val;
- }
- else {
- if (this.val == 16) {
- NEEDBITS(this.bits + 2);
- DROPBITS(this.bits);
- if (state->have == 0) {
- strm->msg = (char *)"invalid bit length repeat";
- state->mode = BAD;
- break;
- }
- len = state->lens[state->have - 1];
- copy = 3 + BITS(2);
- DROPBITS(2);
- }
- else if (this.val == 17) {
- NEEDBITS(this.bits + 3);
- DROPBITS(this.bits);
- len = 0;
- copy = 3 + BITS(3);
- DROPBITS(3);
- }
- else {
- NEEDBITS(this.bits + 7);
- DROPBITS(this.bits);
- len = 0;
- copy = 11 + BITS(7);
- DROPBITS(7);
- }
- if (state->have + copy > state->nlen + state->ndist) {
- strm->msg = (char *)"invalid bit length repeat";
- state->mode = BAD;
- break;
- }
- while (copy--)
- state->lens[state->have++] = (unsigned short)len;
- }
- }
-
- /* handle error breaks in while */
- if (state->mode == BAD) break;
-
- /* build code tables */
- state->next = state->codes;
- state->lencode = (code const FAR *)(state->next);
- state->lenbits = 9;
- ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
- &(state->lenbits), state->work);
- if (ret) {
- strm->msg = (char *)"invalid literal/lengths set";
- state->mode = BAD;
- break;
- }
- state->distcode = (code const FAR *)(state->next);
- state->distbits = 6;
- ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
- &(state->next), &(state->distbits), state->work);
- if (ret) {
- strm->msg = (char *)"invalid distances set";
- state->mode = BAD;
- break;
- }
- Tracev((stderr, "inflate: codes ok\n"));
- state->mode = LEN;
- /*FALLTHRU*/
- case LEN:
- if (have >= 6 && left >= 258) {
- RESTORE();
- inflate_fast(strm, out);
- LOAD();
- break;
- }
- for (;;) {
- this = state->lencode[BITS(state->lenbits)];
- if ((unsigned)(this.bits) <= bits) break;
- PULLBYTE();
- }
- if (this.op && (this.op & 0xf0) == 0) {
- last = this;
- for (;;) {
- this = state->lencode[last.val +
- (BITS(last.bits + last.op) >> last.bits)];
- if ((unsigned)(last.bits + this.bits) <= bits) break;
- PULLBYTE();
- }
- DROPBITS(last.bits);
- }
- DROPBITS(this.bits);
- state->length = (unsigned)this.val;
- if ((int)(this.op) == 0) {
- Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
- "inflate: literal '%c'\n" :
- "inflate: literal 0x%02x\n", this.val));
- state->mode = LIT;
- break;
- }
- if (this.op & 32) {
- Tracevv((stderr, "inflate: end of block\n"));
- state->mode = TYPE;
- break;
- }
- if (this.op & 64) {
- strm->msg = (char *)"invalid literal/length code";
- state->mode = BAD;
- break;
- }
- state->extra = (unsigned)(this.op) & 15;
- state->mode = LENEXT;
- /*FALLTHRU*/
- case LENEXT:
- if (state->extra) {
- NEEDBITS(state->extra);
- state->length += BITS(state->extra);
- DROPBITS(state->extra);
- }
- Tracevv((stderr, "inflate: length %u\n", state->length));
- state->mode = DIST;
- /*FALLTHRU*/
- case DIST:
- for (;;) {
- this = state->distcode[BITS(state->distbits)];
- if ((unsigned)(this.bits) <= bits) break;
- PULLBYTE();
- }
- if ((this.op & 0xf0) == 0) {
- last = this;
- for (;;) {
- this = state->distcode[last.val +
- (BITS(last.bits + last.op) >> last.bits)];
- if ((unsigned)(last.bits + this.bits) <= bits) break;
- PULLBYTE();
- }
- DROPBITS(last.bits);
- }
- DROPBITS(this.bits);
- if (this.op & 64) {
- strm->msg = (char *)"invalid distance code";
- state->mode = BAD;
- break;
- }
- state->offset = (unsigned)this.val;
- state->extra = (unsigned)(this.op) & 15;
- state->mode = DISTEXT;
- /*FALLTHRU*/
- case DISTEXT:
- if (state->extra) {
- NEEDBITS(state->extra);
- state->offset += BITS(state->extra);
- DROPBITS(state->extra);
- }
-#ifdef INFLATE_STRICT
- if (state->offset > state->dmax) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
-#endif
- if (state->offset > state->whave + out - left) {
- strm->msg = (char *)"invalid distance too far back";
- state->mode = BAD;
- break;
- }
- Tracevv((stderr, "inflate: distance %u\n", state->offset));
- state->mode = MATCH;
- /*FALLTHRU*/
- case MATCH:
- if (left == 0) goto inf_leave;
- copy = out - left;
- if (state->offset > copy) { /* copy from window */
- copy = state->offset - copy;
- if (copy > state->write) {
- copy -= state->write;
- from = state->window + (state->wsize - copy);
- }
- else
- from = state->window + (state->write - copy);
- if (copy > state->length) copy = state->length;
- }
- else { /* copy from output */
- from = put - state->offset;
- copy = state->length;
- }
- if (copy > left) copy = left;
- left -= copy;
- state->length -= copy;
- do {
- *put++ = *from++;
- } while (--copy);
- if (state->length == 0) state->mode = LEN;
- break;
- case LIT:
- if (left == 0) goto inf_leave;
- *put++ = (unsigned char)(state->length);
- left--;
- state->mode = LEN;
- break;
- case CHECK:
- if (state->wrap) {
- NEEDBITS(32);
- out -= left;
- strm->total_out += out;
- state->total += out;
- if (out)
- strm->adler = state->check =
- UPDATE(state->check, put - out, out);
- out = left;
- if ((
-#ifdef GUNZIP
- state->flags ? hold :
-#endif
- REVERSE(hold)) != state->check) {
- strm->msg = (char *)"incorrect data check";
- state->mode = BAD;
- break;
- }
- INITBITS();
- Tracev((stderr, "inflate: check matches trailer\n"));
- }
-#ifdef GUNZIP
- state->mode = LENGTH;
- /*FALLTHRU*/
- case LENGTH:
- if (state->wrap && state->flags) {
- NEEDBITS(32);
- if (hold != (state->total & 0xffffffffUL)) {
- strm->msg = (char *)"incorrect length check";
- state->mode = BAD;
- break;
- }
- INITBITS();
- Tracev((stderr, "inflate: length matches trailer\n"));
- }
-#endif
- state->mode = DONE;
- /*FALLTHRU*/
- case DONE:
- ret = Z_STREAM_END;
- goto inf_leave;
- case BAD:
- ret = Z_DATA_ERROR;
- goto inf_leave;
- case MEM:
- return Z_MEM_ERROR;
- case SYNC:
- default:
- return Z_STREAM_ERROR;
- }
-
- /*
- Return from inflate(), updating the total counts and the check value.
- If there was no progress during the inflate() call, return a buffer
- error. Call updatewindow() to create and/or update the window state.
- Note: a memory error from inflate() is non-recoverable.
- */
- inf_leave:
- RESTORE();
- if (state->wsize || (state->mode < CHECK && out != strm->avail_out))
- if (updatewindow(strm, out)) {
- state->mode = MEM;
- return Z_MEM_ERROR;
- }
- in -= strm->avail_in;
- out -= strm->avail_out;
- strm->total_in += in;
- strm->total_out += out;
- state->total += out;
- if (state->wrap && out)
- strm->adler = state->check =
- UPDATE(state->check, strm->next_out - out, out);
- strm->data_type = state->bits + (state->last ? 64 : 0) +
- (state->mode == TYPE ? 128 : 0);
- if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
- ret = Z_BUF_ERROR;
- return ret;
-}
-
-int ZEXPORT inflateEnd(strm)
-z_streamp strm;
-{
- struct inflate_state FAR *state;
- if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
- return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- if (state->window != Z_NULL) ZFREE(strm, state->window);
- ZFREE(strm, strm->state);
- strm->state = Z_NULL;
- Tracev((stderr, "inflate: end\n"));
- return Z_OK;
-}
-
-int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-const Bytef *dictionary;
-uInt dictLength;
-{
- struct inflate_state FAR *state;
- unsigned long id;
-
- /* check state */
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- if (state->wrap != 0 && state->mode != DICT)
- return Z_STREAM_ERROR;
-
- /* check for correct dictionary id */
- if (state->mode == DICT) {
- id = adler32(0L, Z_NULL, 0);
- id = adler32(id, dictionary, dictLength);
- if (id != state->check)
- return Z_DATA_ERROR;
- }
-
- /* copy dictionary to window */
- if (updatewindow(strm, strm->avail_out)) {
- state->mode = MEM;
- return Z_MEM_ERROR;
- }
- if (dictLength > state->wsize) {
- zmemcpy(state->window, dictionary + dictLength - state->wsize,
- state->wsize);
- state->whave = state->wsize;
- }
- else {
- zmemcpy(state->window + state->wsize - dictLength, dictionary,
- dictLength);
- state->whave = dictLength;
- }
- state->havedict = 1;
- Tracev((stderr, "inflate: dictionary set\n"));
- return Z_OK;
-}
-
-int ZEXPORT inflateGetHeader(strm, head)
-z_streamp strm;
-gz_headerp head;
-{
- struct inflate_state FAR *state;
-
- /* check state */
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
-
- /* save header structure */
- state->head = head;
- head->done = 0;
- return Z_OK;
-}
-
-/*
- Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
- or when out of input. When called, *have is the number of pattern bytes
- found in order so far, in 0..3. On return *have is updated to the new
- state. If on return *have equals four, then the pattern was found and the
- return value is how many bytes were read including the last byte of the
- pattern. If *have is less than four, then the pattern has not been found
- yet and the return value is len. In the latter case, syncsearch() can be
- called again with more data and the *have state. *have is initialized to
- zero for the first call.
- */
-local unsigned syncsearch(have, buf, len)
-unsigned FAR *have;
-unsigned char FAR *buf;
-unsigned len;
-{
- unsigned got;
- unsigned next;
-
- got = *have;
- next = 0;
- while (next < len && got < 4) {
- if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
- got++;
- else if (buf[next])
- got = 0;
- else
- got = 4 - got;
- next++;
- }
- *have = got;
- return next;
-}
-
-int ZEXPORT inflateSync(strm)
-z_streamp strm;
-{
- unsigned len; /* number of bytes to look at or looked at */
- unsigned long in, out; /* temporary to save total_in and total_out */
- unsigned char buf[4]; /* to restore bit buffer to byte string */
- struct inflate_state FAR *state;
-
- /* check parameters */
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
- /* if first time, start search in bit buffer */
- if (state->mode != SYNC) {
- state->mode = SYNC;
- state->hold <<= state->bits & 7;
- state->bits -= state->bits & 7;
- len = 0;
- while (state->bits >= 8) {
- buf[len++] = (unsigned char)(state->hold);
- state->hold >>= 8;
- state->bits -= 8;
- }
- state->have = 0;
- (void) syncsearch(&(state->have), buf, len);
- }
-
- /* search available input */
- len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
- strm->avail_in -= len;
- strm->next_in += len;
- strm->total_in += len;
-
- /* return no joy or set up to restart inflate() on a new block */
- if (state->have != 4) return Z_DATA_ERROR;
- in = strm->total_in; out = strm->total_out;
- (void) inflateReset(strm);
- strm->total_in = in; strm->total_out = out;
- state->mode = TYPE;
- return Z_OK;
-}
-
-/*
- Returns true if inflate is currently at the end of a block generated by
- Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
- implementation to provide an additional safety check. PPP uses
- Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
- block. When decompressing, PPP checks that at the end of input packet,
- inflate is waiting for these length bytes.
- */
-int ZEXPORT inflateSyncPoint(strm)
-z_streamp strm;
-{
- struct inflate_state FAR *state;
-
- if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)strm->state;
- return state->mode == STORED && state->bits == 0;
-}
-
-int ZEXPORT inflateCopy(dest, source)
-z_streamp dest;
-z_streamp source;
-{
- struct inflate_state FAR *state;
- struct inflate_state FAR *copy;
- unsigned char FAR *window;
- unsigned wsize;
-
- /* check input */
- if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL ||
- source->zalloc == (alloc_func)0 || source->zfree == (free_func)0)
- return Z_STREAM_ERROR;
- state = (struct inflate_state FAR *)source->state;
-
- /* allocate space */
- copy = (struct inflate_state FAR *)
- ZALLOC(source, 1, sizeof(struct inflate_state));
- if (copy == Z_NULL) return Z_MEM_ERROR;
- window = Z_NULL;
- if (state->window != Z_NULL) {
- window = (unsigned char FAR *)
- ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
- if (window == Z_NULL) {
- ZFREE(source, copy);
- return Z_MEM_ERROR;
- }
- }
-
- /* copy state */
- zmemcpy(dest, source, sizeof(z_stream));
- zmemcpy(copy, state, sizeof(struct inflate_state));
- if (state->lencode >= state->codes &&
- state->lencode <= state->codes + ENOUGH - 1) {
- copy->lencode = copy->codes + (state->lencode - state->codes);
- copy->distcode = copy->codes + (state->distcode - state->codes);
- }
- copy->next = copy->codes + (state->next - state->codes);
- if (window != Z_NULL) {
- wsize = 1U << state->wbits;
- zmemcpy(window, state->window, wsize);
- }
- copy->window = window;
- dest->state = (struct internal_state FAR *)copy;
- return Z_OK;
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inflate.h b/sys/contrib/opensolaris/uts/common/zmod/inflate.h
deleted file mode 100644
index 4d28b22..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inflate.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2004 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-/* define NO_GZIP when compiling if you want to disable gzip header and
- trailer decoding by inflate(). NO_GZIP would be used to avoid linking in
- the crc code when it is not needed. For shared libraries, gzip decoding
- should be left enabled. */
-#ifndef NO_GZIP
-# define GUNZIP
-#endif
-
-/* Possible inflate modes between inflate() calls */
-typedef enum {
- HEAD, /* i: waiting for magic header */
- FLAGS, /* i: waiting for method and flags (gzip) */
- TIME, /* i: waiting for modification time (gzip) */
- OS, /* i: waiting for extra flags and operating system (gzip) */
- EXLEN, /* i: waiting for extra length (gzip) */
- EXTRA, /* i: waiting for extra bytes (gzip) */
- NAME, /* i: waiting for end of file name (gzip) */
- COMMENT, /* i: waiting for end of comment (gzip) */
- HCRC, /* i: waiting for header crc (gzip) */
- DICTID, /* i: waiting for dictionary check value */
- DICT, /* waiting for inflateSetDictionary() call */
- TYPE, /* i: waiting for type bits, including last-flag bit */
- TYPEDO, /* i: same, but skip check to exit inflate on new block */
- STORED, /* i: waiting for stored size (length and complement) */
- COPY, /* i/o: waiting for input or output to copy stored block */
- TABLE, /* i: waiting for dynamic block table lengths */
- LENLENS, /* i: waiting for code length code lengths */
- CODELENS, /* i: waiting for length/lit and distance code lengths */
- LEN, /* i: waiting for length/lit code */
- LENEXT, /* i: waiting for length extra bits */
- DIST, /* i: waiting for distance code */
- DISTEXT, /* i: waiting for distance extra bits */
- MATCH, /* o: waiting for output space to copy string */
- LIT, /* o: waiting for output space to write literal */
- CHECK, /* i: waiting for 32-bit check value */
- LENGTH, /* i: waiting for 32-bit length (gzip) */
- DONE, /* finished check, done -- remain here until reset */
- BAD, /* got a data error -- remain here until reset */
- MEM, /* got an inflate() memory error -- remain here until reset */
- SYNC /* looking for synchronization bytes to restart inflate() */
-} inflate_mode;
-
-/*
- State transitions between above modes -
-
- (most modes can go to the BAD or MEM mode -- not shown for clarity)
-
- Process header:
- HEAD -> (gzip) or (zlib)
- (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME
- NAME -> COMMENT -> HCRC -> TYPE
- (zlib) -> DICTID or TYPE
- DICTID -> DICT -> TYPE
- Read deflate blocks:
- TYPE -> STORED or TABLE or LEN or CHECK
- STORED -> COPY -> TYPE
- TABLE -> LENLENS -> CODELENS -> LEN
- Read deflate codes:
- LEN -> LENEXT or LIT or TYPE
- LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
- LIT -> LEN
- Process trailer:
- CHECK -> LENGTH -> DONE
- */
-
-/* state maintained between inflate() calls. Approximately 7K bytes. */
-struct inflate_state {
- inflate_mode mode; /* current inflate mode */
- int last; /* true if processing last block */
- int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
- int havedict; /* true if dictionary provided */
- int flags; /* gzip header method and flags (0 if zlib) */
- unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */
- unsigned long check; /* protected copy of check value */
- unsigned long total; /* protected copy of output count */
- gz_headerp head; /* where to save gzip header information */
- /* sliding window */
- unsigned wbits; /* log base 2 of requested window size */
- unsigned wsize; /* window size or zero if not using window */
- unsigned whave; /* valid bytes in the window */
- unsigned write; /* window write index */
- unsigned char FAR *window; /* allocated sliding window, if needed */
- /* bit accumulator */
- unsigned long hold; /* input bit accumulator */
- unsigned bits; /* number of bits in "in" */
- /* for string and stored block copying */
- unsigned length; /* literal or length of data to copy */
- unsigned offset; /* distance back to copy string from */
- /* for table and code decoding */
- unsigned extra; /* extra bits needed */
- /* fixed and dynamic code tables */
- code const FAR *lencode; /* starting table for length/literal codes */
- code const FAR *distcode; /* starting table for distance codes */
- unsigned lenbits; /* index bits for lencode */
- unsigned distbits; /* index bits for distcode */
- /* dynamic table building */
- unsigned ncode; /* number of code length code lengths */
- unsigned nlen; /* number of length code lengths */
- unsigned ndist; /* number of distance code lengths */
- unsigned have; /* number of code lengths in lens[] */
- code FAR *next; /* next available space in codes[] */
- unsigned short lens[320]; /* temporary storage for code lengths */
- unsigned short work[288]; /* work area for code table building */
- code codes[ENOUGH]; /* space for code tables */
-};
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inftrees.c b/sys/contrib/opensolaris/uts/common/zmod/inftrees.c
deleted file mode 100644
index 2d37167..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inftrees.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "zutil.h"
-#include "inftrees.h"
-
-#define MAXBITS 15
-
-static const char inflate_copyright[] =
- " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
-/*
- If you use the zlib library in a product, an acknowledgment is welcome
- in the documentation of your product. If for some reason you cannot
- include such an acknowledgment, I would appreciate that you keep this
- copyright string in the executable of your product.
- */
-
-/*
- Build a set of tables to decode the provided canonical Huffman code.
- The code lengths are lens[0..codes-1]. The result starts at *table,
- whose indices are 0..2^bits-1. work is a writable array of at least
- lens shorts, which is used as a work area. type is the type of code
- to be generated, CODES, LENS, or DISTS. On return, zero is success,
- -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
- on return points to the next available entry's address. bits is the
- requested root table index bits, and on return it is the actual root
- table index bits. It will differ if the request is greater than the
- longest code or if it is less than the shortest code.
- */
-int inflate_table(type, lens, codes, table, bits, work)
-codetype type;
-unsigned short FAR *lens;
-unsigned codes;
-code FAR * FAR *table;
-unsigned FAR *bits;
-unsigned short FAR *work;
-{
- unsigned len; /* a code's length in bits */
- unsigned sym; /* index of code symbols */
- unsigned min, max; /* minimum and maximum code lengths */
- unsigned root; /* number of index bits for root table */
- unsigned curr; /* number of index bits for current table */
- unsigned drop; /* code bits to drop for sub-table */
- int left; /* number of prefix codes available */
- unsigned used; /* code entries in table used */
- unsigned huff; /* Huffman code */
- unsigned incr; /* for incrementing code, index */
- unsigned fill; /* index for replicating entries */
- unsigned low; /* low bits for current root entry */
- unsigned mask; /* mask for low root bits */
- code this; /* table entry for duplication */
- code FAR *next; /* next available space in table */
- const unsigned short FAR *base; /* base value table to use */
- const unsigned short FAR *extra; /* extra bits table to use */
- int end; /* use base and extra for symbol > end */
- unsigned short count[MAXBITS+1]; /* number of codes of each length */
- unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
- static const unsigned short lbase[31] = { /* Length codes 257..285 base */
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
- 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
- static const unsigned short lext[31] = { /* Length codes 257..285 extra */
- 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
- 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
- static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
- 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
- 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
- 8193, 12289, 16385, 24577, 0, 0};
- static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
- 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
- 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
- 28, 28, 29, 29, 64, 64};
-
- /*
- Process a set of code lengths to create a canonical Huffman code. The
- code lengths are lens[0..codes-1]. Each length corresponds to the
- symbols 0..codes-1. The Huffman code is generated by first sorting the
- symbols by length from short to long, and retaining the symbol order
- for codes with equal lengths. Then the code starts with all zero bits
- for the first code of the shortest length, and the codes are integer
- increments for the same length, and zeros are appended as the length
- increases. For the deflate format, these bits are stored backwards
- from their more natural integer increment ordering, and so when the
- decoding tables are built in the large loop below, the integer codes
- are incremented backwards.
-
- This routine assumes, but does not check, that all of the entries in
- lens[] are in the range 0..MAXBITS. The caller must assure this.
- 1..MAXBITS is interpreted as that code length. zero means that that
- symbol does not occur in this code.
-
- The codes are sorted by computing a count of codes for each length,
- creating from that a table of starting indices for each length in the
- sorted table, and then entering the symbols in order in the sorted
- table. The sorted table is work[], with that space being provided by
- the caller.
-
- The length counts are used for other purposes as well, i.e. finding
- the minimum and maximum length codes, determining if there are any
- codes at all, checking for a valid set of lengths, and looking ahead
- at length counts to determine sub-table sizes when building the
- decoding tables.
- */
-
- /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
- for (len = 0; len <= MAXBITS; len++)
- count[len] = 0;
- for (sym = 0; sym < codes; sym++)
- count[lens[sym]]++;
-
- /* bound code lengths, force root to be within code lengths */
- root = *bits;
- for (max = MAXBITS; max >= 1; max--)
- if (count[max] != 0) break;
- if (root > max) root = max;
- if (max == 0) { /* no symbols to code at all */
- this.op = (unsigned char)64; /* invalid code marker */
- this.bits = (unsigned char)1;
- this.val = (unsigned short)0;
- *(*table)++ = this; /* make a table to force an error */
- *(*table)++ = this;
- *bits = 1;
- return 0; /* no symbols, but wait for decoding to report error */
- }
- for (min = 1; min <= MAXBITS; min++)
- if (count[min] != 0) break;
- if (root < min) root = min;
-
- /* check for an over-subscribed or incomplete set of lengths */
- left = 1;
- for (len = 1; len <= MAXBITS; len++) {
- left <<= 1;
- left -= count[len];
- if (left < 0) return -1; /* over-subscribed */
- }
- if (left > 0 && (type == CODES || max != 1))
- return -1; /* incomplete set */
-
- /* generate offsets into symbol table for each length for sorting */
- offs[1] = 0;
- for (len = 1; len < MAXBITS; len++)
- offs[len + 1] = offs[len] + count[len];
-
- /* sort symbols by length, by symbol order within each length */
- for (sym = 0; sym < codes; sym++)
- if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
-
- /*
- Create and fill in decoding tables. In this loop, the table being
- filled is at next and has curr index bits. The code being used is huff
- with length len. That code is converted to an index by dropping drop
- bits off of the bottom. For codes where len is less than drop + curr,
- those top drop + curr - len bits are incremented through all values to
- fill the table with replicated entries.
-
- root is the number of index bits for the root table. When len exceeds
- root, sub-tables are created pointed to by the root entry with an index
- of the low root bits of huff. This is saved in low to check for when a
- new sub-table should be started. drop is zero when the root table is
- being filled, and drop is root when sub-tables are being filled.
-
- When a new sub-table is needed, it is necessary to look ahead in the
- code lengths to determine what size sub-table is needed. The length
- counts are used for this, and so count[] is decremented as codes are
- entered in the tables.
-
- used keeps track of how many table entries have been allocated from the
- provided *table space. It is checked when a LENS table is being made
- against the space in *table, ENOUGH, minus the maximum space needed by
- the worst case distance code, MAXD. This should never happen, but the
- sufficiency of ENOUGH has not been proven exhaustively, hence the check.
- This assumes that when type == LENS, bits == 9.
-
- sym increments through all symbols, and the loop terminates when
- all codes of length max, i.e. all codes, have been processed. This
- routine permits incomplete codes, so another loop after this one fills
- in the rest of the decoding tables with invalid code markers.
- */
-
- /* set up for code type */
- switch (type) {
- case CODES:
- base = extra = work; /* dummy value--not used */
- end = 19;
- break;
- case LENS:
- base = lbase;
- base -= 257;
- extra = lext;
- extra -= 257;
- end = 256;
- break;
- default: /* DISTS */
- base = dbase;
- extra = dext;
- end = -1;
- }
-
- /* initialize state for loop */
- huff = 0; /* starting code */
- sym = 0; /* starting code symbol */
- len = min; /* starting code length */
- next = *table; /* current table to fill in */
- curr = root; /* current table index bits */
- drop = 0; /* current bits to drop from code for index */
- low = (unsigned)(-1); /* trigger new sub-table when len > root */
- used = 1U << root; /* use root table entries */
- mask = used - 1; /* mask for comparing low */
-
- /* check available table space */
- if (type == LENS && used >= ENOUGH - MAXD)
- return 1;
-
- /* process all codes and make table entries */
- for (;;) {
- /* create table entry */
- this.bits = (unsigned char)(len - drop);
- if ((int)(work[sym]) < end) {
- this.op = (unsigned char)0;
- this.val = work[sym];
- }
- else if ((int)(work[sym]) > end) {
- this.op = (unsigned char)(extra[work[sym]]);
- this.val = base[work[sym]];
- }
- else {
- this.op = (unsigned char)(32 + 64); /* end of block */
- this.val = 0;
- }
-
- /* replicate for those indices with low len bits equal to huff */
- incr = 1U << (len - drop);
- fill = 1U << curr;
- min = fill; /* save offset to next table */
- do {
- fill -= incr;
- next[(huff >> drop) + fill] = this;
- } while (fill != 0);
-
- /* backwards increment the len-bit code huff */
- incr = 1U << (len - 1);
- while (huff & incr)
- incr >>= 1;
- if (incr != 0) {
- huff &= incr - 1;
- huff += incr;
- }
- else
- huff = 0;
-
- /* go to next symbol, update count, len */
- sym++;
- if (--(count[len]) == 0) {
- if (len == max) break;
- len = lens[work[sym]];
- }
-
- /* create new sub-table if needed */
- if (len > root && (huff & mask) != low) {
- /* if first time, transition to sub-tables */
- if (drop == 0)
- drop = root;
-
- /* increment past last table */
- next += min; /* here min is 1 << curr */
-
- /* determine length of next table */
- curr = len - drop;
- left = (int)(1 << curr);
- while (curr + drop < max) {
- left -= count[curr + drop];
- if (left <= 0) break;
- curr++;
- left <<= 1;
- }
-
- /* check for enough space */
- used += 1U << curr;
- if (type == LENS && used >= ENOUGH - MAXD)
- return 1;
-
- /* point entry in root table to sub-table */
- low = huff & mask;
- (*table)[low].op = (unsigned char)curr;
- (*table)[low].bits = (unsigned char)root;
- (*table)[low].val = (unsigned short)(next - *table);
- }
- }
-
- /*
- Fill in rest of table for incomplete codes. This loop is similar to the
- loop above in incrementing huff for table indices. It is assumed that
- len is equal to curr + drop, so there is no loop needed to increment
- through high index bits. When the current sub-table is filled, the loop
- drops back to the root table to fill in any remaining entries there.
- */
- this.op = (unsigned char)64; /* invalid code marker */
- this.bits = (unsigned char)(len - drop);
- this.val = (unsigned short)0;
- while (huff != 0) {
- /* when done with sub-table, drop back to root table */
- if (drop != 0 && (huff & mask) != low) {
- drop = 0;
- len = root;
- next = *table;
- this.bits = (unsigned char)len;
- }
-
- /* put invalid code marker in table */
- next[huff >> drop] = this;
-
- /* backwards increment the len-bit code huff */
- incr = 1U << (len - 1);
- while (huff & incr)
- incr >>= 1;
- if (incr != 0) {
- huff &= incr - 1;
- huff += incr;
- }
- else
- huff = 0;
- }
-
- /* set return parameters */
- *table += used;
- *bits = root;
- return 0;
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/inftrees.h b/sys/contrib/opensolaris/uts/common/zmod/inftrees.h
deleted file mode 100644
index 546e8c0..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/inftrees.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* inftrees.h -- header to use inftrees.c
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-/* Structure for decoding tables. Each entry provides either the
- information needed to do the operation requested by the code that
- indexed that table entry, or it provides a pointer to another
- table that indexes more bits of the code. op indicates whether
- the entry is a pointer to another table, a literal, a length or
- distance, an end-of-block, or an invalid code. For a table
- pointer, the low four bits of op is the number of index bits of
- that table. For a length or distance, the low four bits of op
- is the number of extra bits to get after the code. bits is
- the number of bits in this code or part of the code to drop off
- of the bit buffer. val is the actual byte to output in the case
- of a literal, the base length or distance, or the offset from
- the current table to the next table. Each entry is four bytes. */
-typedef struct {
- unsigned char op; /* operation, extra bits, table bits */
- unsigned char bits; /* bits in this part of the code */
- unsigned short val; /* offset in table or code value */
-} code;
-
-/* op values as set by inflate_table():
- 00000000 - literal
- 0000tttt - table link, tttt != 0 is the number of table index bits
- 0001eeee - length or distance, eeee is the number of extra bits
- 01100000 - end of block
- 01000000 - invalid code
- */
-
-/* Maximum size of dynamic tree. The maximum found in a long but non-
- exhaustive search was 1444 code structures (852 for length/literals
- and 592 for distances, the latter actually the result of an
- exhaustive search). The true maximum is not known, but the value
- below is more than safe. */
-#define ENOUGH 2048
-#define MAXD 592
-
-/* Type of code to build for inftable() */
-typedef enum {
- CODES,
- LENS,
- DISTS
-} codetype;
-
-extern int inflate_table OF((codetype type, unsigned short FAR *lens,
- unsigned codes, code FAR * FAR *table,
- unsigned FAR *bits, unsigned short FAR *work));
diff --git a/sys/contrib/opensolaris/uts/common/zmod/trees.c b/sys/contrib/opensolaris/uts/common/zmod/trees.c
deleted file mode 100644
index ce0cebc..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/trees.c
+++ /dev/null
@@ -1,1219 +0,0 @@
-/* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-2005 Jean-loup Gailly
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * ALGORITHM
- *
- * The "deflation" process uses several Huffman trees. The more
- * common source values are represented by shorter bit sequences.
- *
- * Each code tree is stored in a compressed form which is itself
- * a Huffman encoding of the lengths of all the code strings (in
- * ascending order by source values). The actual code strings are
- * reconstructed from the lengths in the inflate process, as described
- * in the deflate specification.
- *
- * REFERENCES
- *
- * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
- * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
- *
- * Storer, James A.
- * Data Compression: Methods and Theory, pp. 49-50.
- * Computer Science Press, 1988. ISBN 0-7167-8156-5.
- *
- * Sedgewick, R.
- * Algorithms, p290.
- * Addison-Wesley, 1983. ISBN 0-201-06672-6.
- */
-
-/* #define GEN_TREES_H */
-
-#include "deflate.h"
-
-#ifdef DEBUG
-# include <ctype.h>
-#endif
-
-/* ===========================================================================
- * Constants
- */
-
-#define MAX_BL_BITS 7
-/* Bit length codes must not exceed MAX_BL_BITS bits */
-
-#define END_BLOCK 256
-/* end of block literal code */
-
-#define REP_3_6 16
-/* repeat previous bit length 3-6 times (2 bits of repeat count) */
-
-#define REPZ_3_10 17
-/* repeat a zero length 3-10 times (3 bits of repeat count) */
-
-#define REPZ_11_138 18
-/* repeat a zero length 11-138 times (7 bits of repeat count) */
-
-local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
- = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
-
-local const int extra_dbits[D_CODES] /* extra bits for each distance code */
- = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
-
-local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
- = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
-
-local const uch bl_order[BL_CODES]
- = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
-/* The lengths of the bit length codes are sent in order of decreasing
- * probability, to avoid transmitting the lengths for unused bit length codes.
- */
-
-#define Buf_size (8 * 2*sizeof(char))
-/* Number of bits used within bi_buf. (bi_buf might be implemented on
- * more than 16 bits on some systems.)
- */
-
-/* ===========================================================================
- * Local data. These are initialized only once.
- */
-
-#define DIST_CODE_LEN 512 /* see definition of array dist_code below */
-
-#if defined(GEN_TREES_H) || !defined(STDC)
-/* non ANSI compilers may not accept trees.h */
-
-local ct_data static_ltree[L_CODES+2];
-/* The static literal tree. Since the bit lengths are imposed, there is no
- * need for the L_CODES extra codes used during heap construction. However
- * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
- * below).
- */
-
-local ct_data static_dtree[D_CODES];
-/* The static distance tree. (Actually a trivial tree since all codes use
- * 5 bits.)
- */
-
-uch _dist_code[DIST_CODE_LEN];
-/* Distance codes. The first 256 values correspond to the distances
- * 3 .. 258, the last 256 values correspond to the top 8 bits of
- * the 15 bit distances.
- */
-
-uch _length_code[MAX_MATCH-MIN_MATCH+1];
-/* length code for each normalized match length (0 == MIN_MATCH) */
-
-local int base_length[LENGTH_CODES];
-/* First normalized length for each code (0 = MIN_MATCH) */
-
-local int base_dist[D_CODES];
-/* First normalized distance for each code (0 = distance of 1) */
-
-#else
-# include "trees.h"
-#endif /* GEN_TREES_H */
-
-struct static_tree_desc_s {
- const ct_data *static_tree; /* static tree or NULL */
- const intf *extra_bits; /* extra bits for each code or NULL */
- int extra_base; /* base index for extra_bits */
- int elems; /* max number of elements in the tree */
- int max_length; /* max bit length for the codes */
-};
-
-local static_tree_desc static_l_desc =
-{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
-
-local static_tree_desc static_d_desc =
-{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
-
-local static_tree_desc static_bl_desc =
-{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
-
-/* ===========================================================================
- * Local (static) routines in this file.
- */
-
-local void tr_static_init OF((void));
-local void init_block OF((deflate_state *s));
-local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
-local void gen_bitlen OF((deflate_state *s, tree_desc *desc));
-local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
-local void build_tree OF((deflate_state *s, tree_desc *desc));
-local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
-local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
-local int build_bl_tree OF((deflate_state *s));
-local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
- int blcodes));
-local void compress_block OF((deflate_state *s, ct_data *ltree,
- ct_data *dtree));
-local void set_data_type OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned value, int length));
-local void bi_windup OF((deflate_state *s));
-local void bi_flush OF((deflate_state *s));
-local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
- int header));
-
-#ifdef GEN_TREES_H
-local void gen_trees_header OF((void));
-#endif
-
-#ifndef DEBUG
-# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
- /* Send a code of the given tree. c and tree must not have side effects */
-
-#else /* DEBUG */
-# define send_code(s, c, tree) \
- { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
- send_bits(s, tree[c].Code, tree[c].Len); }
-#endif
-
-/* ===========================================================================
- * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
- */
-#define put_short(s, w) { \
- put_byte(s, (uch)((w) & 0xff)); \
- put_byte(s, (uch)((ush)(w) >> 8)); \
-}
-
-/* ===========================================================================
- * Send a value on a given number of bits.
- * IN assertion: length <= 16 and value fits in length bits.
- */
-#ifdef DEBUG
-local void send_bits OF((deflate_state *s, int value, int length));
-
-local void send_bits(s, value, length)
- deflate_state *s;
- int value; /* value to send */
- int length; /* number of bits */
-{
- Tracevv((stderr," l %2d v %4x ", length, value));
- Assert(length > 0 && length <= 15, "invalid length");
- s->bits_sent += (ulg)length;
-
- /* If not enough room in bi_buf, use (valid) bits from bi_buf and
- * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
- * unused bits in value.
- */
- if (s->bi_valid > (int)Buf_size - length) {
- s->bi_buf |= (value << s->bi_valid);
- put_short(s, s->bi_buf);
- s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
- s->bi_valid += length - Buf_size;
- } else {
- s->bi_buf |= value << s->bi_valid;
- s->bi_valid += length;
- }
-}
-#else /* !DEBUG */
-
-#define send_bits(s, value, length) \
-{ int len = length;\
- if (s->bi_valid > (int)Buf_size - len) {\
- int val = value;\
- s->bi_buf |= (val << s->bi_valid);\
- put_short(s, s->bi_buf);\
- s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
- s->bi_valid += len - Buf_size;\
- } else {\
- s->bi_buf |= (value) << s->bi_valid;\
- s->bi_valid += len;\
- }\
-}
-#endif /* DEBUG */
-
-
-/* the arguments must not have side effects */
-
-/* ===========================================================================
- * Initialize the various 'constant' tables.
- */
-local void tr_static_init()
-{
-#if defined(GEN_TREES_H) || !defined(STDC)
- static int static_init_done = 0;
- int n; /* iterates over tree elements */
- int bits; /* bit counter */
- int length; /* length value */
- int code; /* code value */
- int dist; /* distance index */
- ush bl_count[MAX_BITS+1];
- /* number of codes at each bit length for an optimal tree */
-
- if (static_init_done) return;
-
- /* For some embedded targets, global variables are not initialized: */
- static_l_desc.static_tree = static_ltree;
- static_l_desc.extra_bits = extra_lbits;
- static_d_desc.static_tree = static_dtree;
- static_d_desc.extra_bits = extra_dbits;
- static_bl_desc.extra_bits = extra_blbits;
-
- /* Initialize the mapping length (0..255) -> length code (0..28) */
- length = 0;
- for (code = 0; code < LENGTH_CODES-1; code++) {
- base_length[code] = length;
- for (n = 0; n < (1<<extra_lbits[code]); n++) {
- _length_code[length++] = (uch)code;
- }
- }
- Assert (length == 256, "tr_static_init: length != 256");
- /* Note that the length 255 (match length 258) can be represented
- * in two different ways: code 284 + 5 bits or code 285, so we
- * overwrite length_code[255] to use the best encoding:
- */
- _length_code[length-1] = (uch)code;
-
- /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
- dist = 0;
- for (code = 0 ; code < 16; code++) {
- base_dist[code] = dist;
- for (n = 0; n < (1<<extra_dbits[code]); n++) {
- _dist_code[dist++] = (uch)code;
- }
- }
- Assert (dist == 256, "tr_static_init: dist != 256");
- dist >>= 7; /* from now on, all distances are divided by 128 */
- for ( ; code < D_CODES; code++) {
- base_dist[code] = dist << 7;
- for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
- _dist_code[256 + dist++] = (uch)code;
- }
- }
- Assert (dist == 256, "tr_static_init: 256+dist != 512");
-
- /* Construct the codes of the static literal tree */
- for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
- n = 0;
- while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
- while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
- while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
- while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
- /* Codes 286 and 287 do not exist, but we must include them in the
- * tree construction to get a canonical Huffman tree (longest code
- * all ones)
- */
- gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
-
- /* The static distance tree is trivial: */
- for (n = 0; n < D_CODES; n++) {
- static_dtree[n].Len = 5;
- static_dtree[n].Code = bi_reverse((unsigned)n, 5);
- }
- static_init_done = 1;
-
-# ifdef GEN_TREES_H
- gen_trees_header();
-# endif
-#endif /* defined(GEN_TREES_H) || !defined(STDC) */
-}
-
-/* ===========================================================================
- * Genererate the file trees.h describing the static trees.
- */
-#ifdef GEN_TREES_H
-# ifndef DEBUG
-# include <stdio.h>
-# endif
-
-# define SEPARATOR(i, last, width) \
- ((i) == (last)? "\n};\n\n" : \
- ((i) % (width) == (width)-1 ? ",\n" : ", "))
-
-void gen_trees_header()
-{
- FILE *header = fopen("trees.h", "w");
- int i;
-
- Assert (header != NULL, "Can't open trees.h");
- fprintf(header,
- "/* header created automatically with -DGEN_TREES_H */\n\n");
-
- fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
- for (i = 0; i < L_CODES+2; i++) {
- fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
- static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
- }
-
- fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
- for (i = 0; i < D_CODES; i++) {
- fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
- static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
- }
-
- fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
- for (i = 0; i < DIST_CODE_LEN; i++) {
- fprintf(header, "%2u%s", _dist_code[i],
- SEPARATOR(i, DIST_CODE_LEN-1, 20));
- }
-
- fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
- for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
- fprintf(header, "%2u%s", _length_code[i],
- SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
- }
-
- fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
- for (i = 0; i < LENGTH_CODES; i++) {
- fprintf(header, "%1u%s", base_length[i],
- SEPARATOR(i, LENGTH_CODES-1, 20));
- }
-
- fprintf(header, "local const int base_dist[D_CODES] = {\n");
- for (i = 0; i < D_CODES; i++) {
- fprintf(header, "%5u%s", base_dist[i],
- SEPARATOR(i, D_CODES-1, 10));
- }
-
- fclose(header);
-}
-#endif /* GEN_TREES_H */
-
-/* ===========================================================================
- * Initialize the tree data structures for a new zlib stream.
- */
-void _tr_init(s)
- deflate_state *s;
-{
- tr_static_init();
-
- s->l_desc.dyn_tree = s->dyn_ltree;
- s->l_desc.stat_desc = &static_l_desc;
-
- s->d_desc.dyn_tree = s->dyn_dtree;
- s->d_desc.stat_desc = &static_d_desc;
-
- s->bl_desc.dyn_tree = s->bl_tree;
- s->bl_desc.stat_desc = &static_bl_desc;
-
- s->bi_buf = 0;
- s->bi_valid = 0;
- s->last_eob_len = 8; /* enough lookahead for inflate */
-#ifdef DEBUG
- s->compressed_len = 0L;
- s->bits_sent = 0L;
-#endif
-
- /* Initialize the first block of the first file: */
- init_block(s);
-}
-
-/* ===========================================================================
- * Initialize a new block.
- */
-local void init_block(s)
- deflate_state *s;
-{
- int n; /* iterates over tree elements */
-
- /* Initialize the trees. */
- for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
- for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
- for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-
- s->dyn_ltree[END_BLOCK].Freq = 1;
- s->opt_len = s->static_len = 0L;
- s->last_lit = s->matches = 0;
-}
-
-#define SMALLEST 1
-/* Index within the heap array of least frequent node in the Huffman tree */
-
-
-/* ===========================================================================
- * Remove the smallest element from the heap and recreate the heap with
- * one less element. Updates heap and heap_len.
- */
-#define pqremove(s, tree, top) \
-{\
- top = s->heap[SMALLEST]; \
- s->heap[SMALLEST] = s->heap[s->heap_len--]; \
- pqdownheap(s, tree, SMALLEST); \
-}
-
-/* ===========================================================================
- * Compares to subtrees, using the tree depth as tie breaker when
- * the subtrees have equal frequency. This minimizes the worst case length.
- */
-#define smaller(tree, n, m, depth) \
- (tree[n].Freq < tree[m].Freq || \
- (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
-
-/* ===========================================================================
- * Restore the heap property by moving down the tree starting at node k,
- * exchanging a node with the smallest of its two sons if necessary, stopping
- * when the heap property is re-established (each father smaller than its
- * two sons).
- */
-local void pqdownheap(s, tree, k)
- deflate_state *s;
- ct_data *tree; /* the tree to restore */
- int k; /* node to move down */
-{
- int v = s->heap[k];
- int j = k << 1; /* left son of k */
- while (j <= s->heap_len) {
- /* Set j to the smallest of the two sons: */
- if (j < s->heap_len &&
- smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
- j++;
- }
- /* Exit if v is smaller than both sons */
- if (smaller(tree, v, s->heap[j], s->depth)) break;
-
- /* Exchange v with the smallest son */
- s->heap[k] = s->heap[j]; k = j;
-
- /* And continue down the tree, setting j to the left son of k */
- j <<= 1;
- }
- s->heap[k] = v;
-}
-
-/* ===========================================================================
- * Compute the optimal bit lengths for a tree and update the total bit length
- * for the current block.
- * IN assertion: the fields freq and dad are set, heap[heap_max] and
- * above are the tree nodes sorted by increasing frequency.
- * OUT assertions: the field len is set to the optimal bit length, the
- * array bl_count contains the frequencies for each bit length.
- * The length opt_len is updated; static_len is also updated if stree is
- * not null.
- */
-local void gen_bitlen(s, desc)
- deflate_state *s;
- tree_desc *desc; /* the tree descriptor */
-{
- ct_data *tree = desc->dyn_tree;
- int max_code = desc->max_code;
- const ct_data *stree = desc->stat_desc->static_tree;
- const intf *extra = desc->stat_desc->extra_bits;
- int base = desc->stat_desc->extra_base;
- int max_length = desc->stat_desc->max_length;
- int h; /* heap index */
- int n, m; /* iterate over the tree elements */
- int bits; /* bit length */
- int xbits; /* extra bits */
- ush f; /* frequency */
- int overflow = 0; /* number of elements with bit length too large */
-
- for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
-
- /* In a first pass, compute the optimal bit lengths (which may
- * overflow in the case of the bit length tree).
- */
- tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
-
- for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
- n = s->heap[h];
- bits = tree[tree[n].Dad].Len + 1;
- if (bits > max_length) bits = max_length, overflow++;
- tree[n].Len = (ush)bits;
- /* We overwrite tree[n].Dad which is no longer needed */
-
- if (n > max_code) continue; /* not a leaf node */
-
- s->bl_count[bits]++;
- xbits = 0;
- if (n >= base) xbits = extra[n-base];
- f = tree[n].Freq;
- s->opt_len += (ulg)f * (bits + xbits);
- if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
- }
- if (overflow == 0) return;
-
- Trace((stderr,"\nbit length overflow\n"));
- /* This happens for example on obj2 and pic of the Calgary corpus */
-
- /* Find the first bit length which could increase: */
- do {
- bits = max_length-1;
- while (s->bl_count[bits] == 0) bits--;
- s->bl_count[bits]--; /* move one leaf down the tree */
- s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
- s->bl_count[max_length]--;
- /* The brother of the overflow item also moves one step up,
- * but this does not affect bl_count[max_length]
- */
- overflow -= 2;
- } while (overflow > 0);
-
- /* Now recompute all bit lengths, scanning in increasing frequency.
- * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
- * lengths instead of fixing only the wrong ones. This idea is taken
- * from 'ar' written by Haruhiko Okumura.)
- */
- for (bits = max_length; bits != 0; bits--) {
- n = s->bl_count[bits];
- while (n != 0) {
- m = s->heap[--h];
- if (m > max_code) continue;
- if ((unsigned) tree[m].Len != (unsigned) bits) {
- Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
- s->opt_len += ((long)bits - (long)tree[m].Len)
- *(long)tree[m].Freq;
- tree[m].Len = (ush)bits;
- }
- n--;
- }
- }
-}
-
-/* ===========================================================================
- * Generate the codes for a given tree and bit counts (which need not be
- * optimal).
- * IN assertion: the array bl_count contains the bit length statistics for
- * the given tree and the field len is set for all tree elements.
- * OUT assertion: the field code is set for all tree elements of non
- * zero code length.
- */
-local void gen_codes (tree, max_code, bl_count)
- ct_data *tree; /* the tree to decorate */
- int max_code; /* largest code with non zero frequency */
- ushf *bl_count; /* number of codes at each bit length */
-{
- ush next_code[MAX_BITS+1]; /* next code value for each bit length */
- ush code = 0; /* running code value */
- int bits; /* bit index */
- int n; /* code index */
-
- /* The distribution counts are first used to generate the code values
- * without bit reversal.
- */
- for (bits = 1; bits <= MAX_BITS; bits++) {
- next_code[bits] = code = (code + bl_count[bits-1]) << 1;
- }
- /* Check that the bit counts in bl_count are consistent. The last code
- * must be all ones.
- */
- Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
- "inconsistent bit counts");
- Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-
- for (n = 0; n <= max_code; n++) {
- int len = tree[n].Len;
- if (len == 0) continue;
- /* Now reverse the bits */
- tree[n].Code = bi_reverse(next_code[len]++, len);
-
- Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
- n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
- }
-}
-
-/* ===========================================================================
- * Construct one Huffman tree and assigns the code bit strings and lengths.
- * Update the total bit length for the current block.
- * IN assertion: the field freq is set for all tree elements.
- * OUT assertions: the fields len and code are set to the optimal bit length
- * and corresponding code. The length opt_len is updated; static_len is
- * also updated if stree is not null. The field max_code is set.
- */
-local void build_tree(s, desc)
- deflate_state *s;
- tree_desc *desc; /* the tree descriptor */
-{
- ct_data *tree = desc->dyn_tree;
- const ct_data *stree = desc->stat_desc->static_tree;
- int elems = desc->stat_desc->elems;
- int n, m; /* iterate over heap elements */
- int max_code = -1; /* largest code with non zero frequency */
- int node; /* new node being created */
-
- /* Construct the initial heap, with least frequent element in
- * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
- * heap[0] is not used.
- */
- s->heap_len = 0, s->heap_max = HEAP_SIZE;
-
- for (n = 0; n < elems; n++) {
- if (tree[n].Freq != 0) {
- s->heap[++(s->heap_len)] = max_code = n;
- s->depth[n] = 0;
- } else {
- tree[n].Len = 0;
- }
- }
-
- /* The pkzip format requires that at least one distance code exists,
- * and that at least one bit should be sent even if there is only one
- * possible code. So to avoid special checks later on we force at least
- * two codes of non zero frequency.
- */
- while (s->heap_len < 2) {
- node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
- tree[node].Freq = 1;
- s->depth[node] = 0;
- s->opt_len--; if (stree) s->static_len -= stree[node].Len;
- /* node is 0 or 1 so it does not have extra bits */
- }
- desc->max_code = max_code;
-
- /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
- * establish sub-heaps of increasing lengths:
- */
- for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
-
- /* Construct the Huffman tree by repeatedly combining the least two
- * frequent nodes.
- */
- node = elems; /* next internal node of the tree */
- do {
- pqremove(s, tree, n); /* n = node of least frequency */
- m = s->heap[SMALLEST]; /* m = node of next least frequency */
-
- s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
- s->heap[--(s->heap_max)] = m;
-
- /* Create a new node father of n and m */
- tree[node].Freq = tree[n].Freq + tree[m].Freq;
- s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
- s->depth[n] : s->depth[m]) + 1);
- tree[n].Dad = tree[m].Dad = (ush)node;
-#ifdef DUMP_BL_TREE
- if (tree == s->bl_tree) {
- fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
- node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
- }
-#endif
- /* and insert the new node in the heap */
- s->heap[SMALLEST] = node++;
- pqdownheap(s, tree, SMALLEST);
-
- } while (s->heap_len >= 2);
-
- s->heap[--(s->heap_max)] = s->heap[SMALLEST];
-
- /* At this point, the fields freq and dad are set. We can now
- * generate the bit lengths.
- */
- gen_bitlen(s, (tree_desc *)desc);
-
- /* The field len is now set, we can generate the bit codes */
- gen_codes ((ct_data *)tree, max_code, s->bl_count);
-}
-
-/* ===========================================================================
- * Scan a literal or distance tree to determine the frequencies of the codes
- * in the bit length tree.
- */
-local void scan_tree (s, tree, max_code)
- deflate_state *s;
- ct_data *tree; /* the tree to be scanned */
- int max_code; /* and its largest code of non zero frequency */
-{
- int n; /* iterates over all tree elements */
- int prevlen = -1; /* last emitted length */
- int curlen; /* length of current code */
- int nextlen = tree[0].Len; /* length of next code */
- int count = 0; /* repeat count of the current code */
- int max_count = 7; /* max repeat count */
- int min_count = 4; /* min repeat count */
-
- if (nextlen == 0) max_count = 138, min_count = 3;
- tree[max_code+1].Len = (ush)0xffff; /* guard */
-
- for (n = 0; n <= max_code; n++) {
- curlen = nextlen; nextlen = tree[n+1].Len;
- if (++count < max_count && curlen == nextlen) {
- continue;
- } else if (count < min_count) {
- s->bl_tree[curlen].Freq += count;
- } else if (curlen != 0) {
- if (curlen != prevlen) s->bl_tree[curlen].Freq++;
- s->bl_tree[REP_3_6].Freq++;
- } else if (count <= 10) {
- s->bl_tree[REPZ_3_10].Freq++;
- } else {
- s->bl_tree[REPZ_11_138].Freq++;
- }
- count = 0; prevlen = curlen;
- if (nextlen == 0) {
- max_count = 138, min_count = 3;
- } else if (curlen == nextlen) {
- max_count = 6, min_count = 3;
- } else {
- max_count = 7, min_count = 4;
- }
- }
-}
-
-/* ===========================================================================
- * Send a literal or distance tree in compressed form, using the codes in
- * bl_tree.
- */
-local void send_tree (s, tree, max_code)
- deflate_state *s;
- ct_data *tree; /* the tree to be scanned */
- int max_code; /* and its largest code of non zero frequency */
-{
- int n; /* iterates over all tree elements */
- int prevlen = -1; /* last emitted length */
- int curlen; /* length of current code */
- int nextlen = tree[0].Len; /* length of next code */
- int count = 0; /* repeat count of the current code */
- int max_count = 7; /* max repeat count */
- int min_count = 4; /* min repeat count */
-
- /* tree[max_code+1].Len = -1; */ /* guard already set */
- if (nextlen == 0) max_count = 138, min_count = 3;
-
- for (n = 0; n <= max_code; n++) {
- curlen = nextlen; nextlen = tree[n+1].Len;
- if (++count < max_count && curlen == nextlen) {
- continue;
- } else if (count < min_count) {
- do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
-
- } else if (curlen != 0) {
- if (curlen != prevlen) {
- send_code(s, curlen, s->bl_tree); count--;
- }
- Assert(count >= 3 && count <= 6, " 3_6?");
- send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
-
- } else if (count <= 10) {
- send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
-
- } else {
- send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
- }
- count = 0; prevlen = curlen;
- if (nextlen == 0) {
- max_count = 138, min_count = 3;
- } else if (curlen == nextlen) {
- max_count = 6, min_count = 3;
- } else {
- max_count = 7, min_count = 4;
- }
- }
-}
-
-/* ===========================================================================
- * Construct the Huffman tree for the bit lengths and return the index in
- * bl_order of the last bit length code to send.
- */
-local int build_bl_tree(s)
- deflate_state *s;
-{
- int max_blindex; /* index of last bit length code of non zero freq */
-
- /* Determine the bit length frequencies for literal and distance trees */
- scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
- scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
-
- /* Build the bit length tree: */
- build_tree(s, (tree_desc *)(&(s->bl_desc)));
- /* opt_len now includes the length of the tree representations, except
- * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
- */
-
- /* Determine the number of bit length codes to send. The pkzip format
- * requires that at least 4 bit length codes be sent. (appnote.txt says
- * 3 but the actual value used is 4.)
- */
- for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
- if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
- }
- /* Update opt_len to include the bit length tree and counts */
- s->opt_len += 3*(max_blindex+1) + 5+5+4;
- Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
- s->opt_len, s->static_len));
-
- return max_blindex;
-}
-
-/* ===========================================================================
- * Send the header for a block using dynamic Huffman trees: the counts, the
- * lengths of the bit length codes, the literal tree and the distance tree.
- * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
- */
-local void send_all_trees(s, lcodes, dcodes, blcodes)
- deflate_state *s;
- int lcodes, dcodes, blcodes; /* number of codes for each tree */
-{
- int rank; /* index in bl_order */
-
- Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
- Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
- "too many codes");
- Tracev((stderr, "\nbl counts: "));
- send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
- send_bits(s, dcodes-1, 5);
- send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
- for (rank = 0; rank < blcodes; rank++) {
- Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
- send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
- }
- Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
-
- send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
- Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
-
- send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
- Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
-}
-
-/* ===========================================================================
- * Send a stored block
- */
-void _tr_stored_block(s, buf, stored_len, eof)
- deflate_state *s;
- charf *buf; /* input block */
- ulg stored_len; /* length of input block */
- int eof; /* true if this is the last block for a file */
-{
- send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
-#ifdef DEBUG
- s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
- s->compressed_len += (stored_len + 4) << 3;
-#endif
- copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
-}
-
-/* ===========================================================================
- * Send one empty static block to give enough lookahead for inflate.
- * This takes 10 bits, of which 7 may remain in the bit buffer.
- * The current inflate code requires 9 bits of lookahead. If the
- * last two codes for the previous block (real code plus EOB) were coded
- * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
- * the last real code. In this case we send two empty static blocks instead
- * of one. (There are no problems if the previous block is stored or fixed.)
- * To simplify the code, we assume the worst case of last real code encoded
- * on one bit only.
- */
-void _tr_align(s)
- deflate_state *s;
-{
- send_bits(s, STATIC_TREES<<1, 3);
- send_code(s, END_BLOCK, static_ltree);
-#ifdef DEBUG
- s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
-#endif
- bi_flush(s);
- /* Of the 10 bits for the empty block, we have already sent
- * (10 - bi_valid) bits. The lookahead for the last real code (before
- * the EOB of the previous block) was thus at least one plus the length
- * of the EOB plus what we have just sent of the empty static block.
- */
- if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
- send_bits(s, STATIC_TREES<<1, 3);
- send_code(s, END_BLOCK, static_ltree);
-#ifdef DEBUG
- s->compressed_len += 10L;
-#endif
- bi_flush(s);
- }
- s->last_eob_len = 7;
-}
-
-/* ===========================================================================
- * Determine the best encoding for the current block: dynamic trees, static
- * trees or store, and output the encoded block to the zip file.
- */
-void _tr_flush_block(s, buf, stored_len, eof)
- deflate_state *s;
- charf *buf; /* input block, or NULL if too old */
- ulg stored_len; /* length of input block */
- int eof; /* true if this is the last block for a file */
-{
- ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
- int max_blindex = 0; /* index of last bit length code of non zero freq */
-
- /* Build the Huffman trees unless a stored block is forced */
- if (s->level > 0) {
-
- /* Check if the file is binary or text */
- if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN)
- set_data_type(s);
-
- /* Construct the literal and distance trees */
- build_tree(s, (tree_desc *)(&(s->l_desc)));
- Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
- s->static_len));
-
- build_tree(s, (tree_desc *)(&(s->d_desc)));
- Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
- s->static_len));
- /* At this point, opt_len and static_len are the total bit lengths of
- * the compressed block data, excluding the tree representations.
- */
-
- /* Build the bit length tree for the above two trees, and get the index
- * in bl_order of the last bit length code to send.
- */
- max_blindex = build_bl_tree(s);
-
- /* Determine the best encoding. Compute the block lengths in bytes. */
- opt_lenb = (s->opt_len+3+7)>>3;
- static_lenb = (s->static_len+3+7)>>3;
-
- Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
- opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
- s->last_lit));
-
- if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
-
- } else {
- Assert(buf != (char*)0, "lost buf");
- opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
- }
-
-#ifdef FORCE_STORED
- if (buf != (char*)0) { /* force stored block */
-#else
- if (stored_len+4 <= opt_lenb && buf != (char*)0) {
- /* 4: two words for the lengths */
-#endif
- /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
- * Otherwise we can't have processed more than WSIZE input bytes since
- * the last block flush, because compression would have been
- * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
- * transform a block into a stored block.
- */
- _tr_stored_block(s, buf, stored_len, eof);
-
-#ifdef FORCE_STATIC
- } else if (static_lenb >= 0) { /* force static trees */
-#else
- } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
-#endif
- send_bits(s, (STATIC_TREES<<1)+eof, 3);
- compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
-#ifdef DEBUG
- s->compressed_len += 3 + s->static_len;
-#endif
- } else {
- send_bits(s, (DYN_TREES<<1)+eof, 3);
- send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
- max_blindex+1);
- compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
-#ifdef DEBUG
- s->compressed_len += 3 + s->opt_len;
-#endif
- }
- Assert (s->compressed_len == s->bits_sent, "bad compressed size");
- /* The above check is made mod 2^32, for files larger than 512 MB
- * and uLong implemented on 32 bits.
- */
- init_block(s);
-
- if (eof) {
- bi_windup(s);
-#ifdef DEBUG
- s->compressed_len += 7; /* align on byte boundary */
-#endif
- }
- Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
- s->compressed_len-7*eof));
-}
-
-/* ===========================================================================
- * Save the match info and tally the frequency counts. Return true if
- * the current block must be flushed.
- */
-int _tr_tally (s, dist, lc)
- deflate_state *s;
- unsigned dist; /* distance of matched string */
- unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
-{
- s->d_buf[s->last_lit] = (ush)dist;
- s->l_buf[s->last_lit++] = (uch)lc;
- if (dist == 0) {
- /* lc is the unmatched char */
- s->dyn_ltree[lc].Freq++;
- } else {
- s->matches++;
- /* Here, lc is the match length - MIN_MATCH */
- dist--; /* dist = match distance - 1 */
- Assert((ush)dist < (ush)MAX_DIST(s) &&
- (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
- (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
-
- s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
- s->dyn_dtree[d_code(dist)].Freq++;
- }
-
-#ifdef TRUNCATE_BLOCK
- /* Try to guess if it is profitable to stop the current block here */
- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
- /* Compute an upper bound for the compressed length */
- ulg out_length = (ulg)s->last_lit*8L;
- ulg in_length = (ulg)((long)s->strstart - s->block_start);
- int dcode;
- for (dcode = 0; dcode < D_CODES; dcode++) {
- out_length += (ulg)s->dyn_dtree[dcode].Freq *
- (5L+extra_dbits[dcode]);
- }
- out_length >>= 3;
- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
- s->last_lit, in_length, out_length,
- 100L - out_length*100L/in_length));
- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
- }
-#endif
- return (s->last_lit == s->lit_bufsize-1);
- /* We avoid equality with lit_bufsize because of wraparound at 64K
- * on 16 bit machines and because stored blocks are restricted to
- * 64K-1 bytes.
- */
-}
-
-/* ===========================================================================
- * Send the block data compressed using the given Huffman trees
- */
-local void compress_block(s, ltree, dtree)
- deflate_state *s;
- ct_data *ltree; /* literal tree */
- ct_data *dtree; /* distance tree */
-{
- unsigned dist; /* distance of matched string */
- int lc; /* match length or unmatched char (if dist == 0) */
- unsigned lx = 0; /* running index in l_buf */
- unsigned code; /* the code to send */
- int extra; /* number of extra bits to send */
-
- if (s->last_lit != 0) do {
- dist = s->d_buf[lx];
- lc = s->l_buf[lx++];
- if (dist == 0) {
- send_code(s, lc, ltree); /* send a literal byte */
- Tracecv(isgraph(lc), (stderr," '%c' ", lc));
- } else {
- /* Here, lc is the match length - MIN_MATCH */
- code = _length_code[lc];
- send_code(s, code+LITERALS+1, ltree); /* send the length code */
- extra = extra_lbits[code];
- if (extra != 0) {
- lc -= base_length[code];
- send_bits(s, lc, extra); /* send the extra length bits */
- }
- dist--; /* dist is now the match distance - 1 */
- code = d_code(dist);
- Assert (code < D_CODES, "bad d_code");
-
- send_code(s, code, dtree); /* send the distance code */
- extra = extra_dbits[code];
- if (extra != 0) {
- dist -= base_dist[code];
- send_bits(s, dist, extra); /* send the extra distance bits */
- }
- } /* literal or match pair ? */
-
- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
- "pendingBuf overflow");
-
- } while (lx < s->last_lit);
-
- send_code(s, END_BLOCK, ltree);
- s->last_eob_len = ltree[END_BLOCK].Len;
-}
-
-/* ===========================================================================
- * Set the data type to BINARY or TEXT, using a crude approximation:
- * set it to Z_TEXT if all symbols are either printable characters (33 to 255)
- * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise.
- * IN assertion: the fields Freq of dyn_ltree are set.
- */
-local void set_data_type(s)
- deflate_state *s;
-{
- int n;
-
- for (n = 0; n < 9; n++)
- if (s->dyn_ltree[n].Freq != 0)
- break;
- if (n == 9)
- for (n = 14; n < 32; n++)
- if (s->dyn_ltree[n].Freq != 0)
- break;
- s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY;
-}
-
-/* ===========================================================================
- * Reverse the first len bits of a code, using straightforward code (a faster
- * method would use a table)
- * IN assertion: 1 <= len <= 15
- */
-local unsigned bi_reverse(code, len)
- unsigned code; /* the value to invert */
- int len; /* its bit length */
-{
- register unsigned res = 0;
- do {
- res |= code & 1;
- code >>= 1, res <<= 1;
- } while (--len > 0);
- return res >> 1;
-}
-
-/* ===========================================================================
- * Flush the bit buffer, keeping at most 7 bits in it.
- */
-local void bi_flush(s)
- deflate_state *s;
-{
- if (s->bi_valid == 16) {
- put_short(s, s->bi_buf);
- s->bi_buf = 0;
- s->bi_valid = 0;
- } else if (s->bi_valid >= 8) {
- put_byte(s, (Byte)s->bi_buf);
- s->bi_buf >>= 8;
- s->bi_valid -= 8;
- }
-}
-
-/* ===========================================================================
- * Flush the bit buffer and align the output on a byte boundary
- */
-local void bi_windup(s)
- deflate_state *s;
-{
- if (s->bi_valid > 8) {
- put_short(s, s->bi_buf);
- } else if (s->bi_valid > 0) {
- put_byte(s, (Byte)s->bi_buf);
- }
- s->bi_buf = 0;
- s->bi_valid = 0;
-#ifdef DEBUG
- s->bits_sent = (s->bits_sent+7) & ~7;
-#endif
-}
-
-/* ===========================================================================
- * Copy a stored block, storing first the length and its
- * one's complement if requested.
- */
-local void copy_block(s, buf, len, header)
- deflate_state *s;
- charf *buf; /* the input data */
- unsigned len; /* its length */
- int header; /* true if block header must be written */
-{
- bi_windup(s); /* align on byte boundary */
- s->last_eob_len = 8; /* enough lookahead for inflate */
-
- if (header) {
- put_short(s, (ush)len);
- put_short(s, (ush)~len);
-#ifdef DEBUG
- s->bits_sent += 2*16;
-#endif
- }
-#ifdef DEBUG
- s->bits_sent += (ulg)len<<3;
-#endif
- while (len--) {
- put_byte(s, *buf++);
- }
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/zconf.h b/sys/contrib/opensolaris/uts/common/zmod/zconf.h
deleted file mode 100644
index ccce7b2..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/zconf.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ZCONF_H
-#define _ZCONF_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * We don't want to turn on zlib's debugging.
- */
-#undef DEBUG
-
-/*
- * We define our own memory allocation and deallocation routines that use kmem.
- */
-#define MY_ZCALLOC
-
-/*
- * We don't define HAVE_MEMCPY here, but do in zutil.c, and implement our
- * our versions of zmemcpy(), zmemzero(), and zmemcmp().
- */
-
-/*
- * We have a sufficiently capable compiler as to not need zlib's compiler hack.
- */
-#define NO_DUMMY_DECL
-
-#define compressBound(len) (len + (len >> 12) + (len >> 14) + 11)
-
-#define z_off_t off_t
-#define OF(p) p
-#define ZEXTERN extern
-#define ZEXPORT
-#define ZEXPORTVA
-#define FAR
-
-#define deflateInit_ z_deflateInit_
-#define deflate z_deflate
-#define deflateEnd z_deflateEnd
-#define inflateInit_ z_inflateInit_
-#define inflate z_inflate
-#define inflateEnd z_inflateEnd
-#define deflateInit2_ z_deflateInit2_
-#define deflateSetDictionary z_deflateSetDictionary
-#define deflateCopy z_deflateCopy
-#define deflateReset z_deflateReset
-#define deflateParams z_deflateParams
-#define deflateBound z_deflateBound
-#define deflatePrime z_deflatePrime
-#define inflateInit2_ z_inflateInit2_
-#define inflateSetDictionary z_inflateSetDictionary
-#define inflateSync z_inflateSync
-#define inflateSyncPoint z_inflateSyncPoint
-#define inflateCopy z_inflateCopy
-#define inflateReset z_inflateReset
-#define inflateBack z_inflateBack
-#define inflateBackEnd z_inflateBackEnd
-#define compress zz_compress
-#define compress2 zz_compress2
-#define uncompress zz_uncompress
-#define adler32 z_adler32
-#define crc32 z_crc32
-#define get_crc_table z_get_crc_table
-#define zError z_zError
-
-#define MAX_MEM_LEVEL 9
-#define MAX_WBITS 15
-
-typedef unsigned char Byte;
-typedef unsigned int uInt;
-typedef unsigned long uLong;
-typedef Byte Bytef;
-typedef char charf;
-typedef int intf;
-typedef uInt uIntf;
-typedef uLong uLongf;
-typedef void *voidpc;
-typedef void *voidpf;
-typedef void *voidp;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZCONF_H */
diff --git a/sys/contrib/opensolaris/uts/common/zmod/zlib.h b/sys/contrib/opensolaris/uts/common/zmod/zlib.h
deleted file mode 100644
index 9b971a0..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/zlib.h
+++ /dev/null
@@ -1,1359 +0,0 @@
-/* zlib.h -- interface of the 'zlib' general purpose compression library
- version 1.2.3, July 18th, 2005
-
- Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
-
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any damages
- arising from the use of this software.
-
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
-
- Jean-loup Gailly Mark Adler
- jloup@gzip.org madler@alumni.caltech.edu
-
-
- The data format used by the zlib library is described by RFCs (Request for
- Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
- (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
-*/
-
-#ifndef _ZLIB_H
-#define _ZLIB_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "zconf.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ZLIB_VERSION "1.2.3"
-#define ZLIB_VERNUM 0x1230
-
-/*
- The 'zlib' compression library provides in-memory compression and
- decompression functions, including integrity checks of the uncompressed
- data. This version of the library supports only one compression method
- (deflation) but other algorithms will be added later and will have the same
- stream interface.
-
- Compression can be done in a single step if the buffers are large
- enough (for example if an input file is mmap'ed), or can be done by
- repeated calls of the compression function. In the latter case, the
- application must provide more input and/or consume the output
- (providing more output space) before each call.
-
- The compressed data format used by default by the in-memory functions is
- the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
- around a deflate stream, which is itself documented in RFC 1951.
-
- The library also supports reading and writing files in gzip (.gz) format
- with an interface similar to that of stdio using the functions that start
- with "gz". The gzip format is different from the zlib format. gzip is a
- gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
-
- This library can optionally read and write gzip streams in memory as well.
-
- The zlib format was designed to be compact and fast for use in memory
- and on communications channels. The gzip format was designed for single-
- file compression on file systems, has a larger header than zlib to maintain
- directory information, and uses a different, slower check method than zlib.
-
- The library does not install any signal handler. The decoder checks
- the consistency of the compressed data, so the library should never
- crash even in case of corrupted input.
-*/
-
-typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
-typedef void (*free_func) OF((voidpf opaque, voidpf address));
-
-struct internal_state;
-
-typedef struct z_stream_s {
- Bytef *next_in; /* next input byte */
- uInt avail_in; /* number of bytes available at next_in */
- uLong total_in; /* total nb of input bytes read so far */
-
- Bytef *next_out; /* next output byte should be put there */
- uInt avail_out; /* remaining free space at next_out */
- uLong total_out; /* total nb of bytes output so far */
-
- char *msg; /* last error message, NULL if no error */
- struct internal_state FAR *state; /* not visible by applications */
-
- alloc_func zalloc; /* used to allocate the internal state */
- free_func zfree; /* used to free the internal state */
- voidpf opaque; /* private data object passed to zalloc and zfree */
-
- int data_type; /* best guess about the data type: binary or text */
- uLong adler; /* adler32 value of the uncompressed data */
- uLong reserved; /* reserved for future use */
-} z_stream;
-
-typedef z_stream FAR *z_streamp;
-
-/*
- gzip header information passed to and from zlib routines. See RFC 1952
- for more details on the meanings of these fields.
-*/
-typedef struct gz_header_s {
- int text; /* true if compressed data believed to be text */
- uLong time; /* modification time */
- int xflags; /* extra flags (not used when writing a gzip file) */
- int os; /* operating system */
- Bytef *extra; /* pointer to extra field or Z_NULL if none */
- uInt extra_len; /* extra field length (valid if extra != Z_NULL) */
- uInt extra_max; /* space at extra (only when reading header) */
- Bytef *name; /* pointer to zero-terminated file name or Z_NULL */
- uInt name_max; /* space at name (only when reading header) */
- Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */
- uInt comm_max; /* space at comment (only when reading header) */
- int hcrc; /* true if there was or will be a header crc */
- int done; /* true when done reading gzip header (not used
- when writing a gzip file) */
-} gz_header;
-
-typedef gz_header FAR *gz_headerp;
-
-/*
- The application must update next_in and avail_in when avail_in has
- dropped to zero. It must update next_out and avail_out when avail_out
- has dropped to zero. The application must initialize zalloc, zfree and
- opaque before calling the init function. All other fields are set by the
- compression library and must not be updated by the application.
-
- The opaque value provided by the application will be passed as the first
- parameter for calls of zalloc and zfree. This can be useful for custom
- memory management. The compression library attaches no meaning to the
- opaque value.
-
- zalloc must return Z_NULL if there is not enough memory for the object.
- If zlib is used in a multi-threaded application, zalloc and zfree must be
- thread safe.
-
- On 16-bit systems, the functions zalloc and zfree must be able to allocate
- exactly 65536 bytes, but will not be required to allocate more than this
- if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
- pointers returned by zalloc for objects of exactly 65536 bytes *must*
- have their offset normalized to zero. The default allocation function
- provided by this library ensures this (see zutil.c). To reduce memory
- requirements and avoid any allocation of 64K objects, at the expense of
- compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
-
- The fields total_in and total_out can be used for statistics or
- progress reports. After compression, total_in holds the total size of
- the uncompressed data and may be saved for use in the decompressor
- (particularly if the decompressor wants to decompress everything in
- a single step).
-*/
-
- /* constants */
-
-#define Z_NO_FLUSH 0
-#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
-#define Z_SYNC_FLUSH 2
-#define Z_FULL_FLUSH 3
-#define Z_FINISH 4
-#define Z_BLOCK 5
-/* Allowed flush values; see deflate() and inflate() below for details */
-
-#define Z_OK 0
-#define Z_STREAM_END 1
-#define Z_NEED_DICT 2
-#define Z_ERRNO (-1)
-#define Z_STREAM_ERROR (-2)
-#define Z_DATA_ERROR (-3)
-#define Z_MEM_ERROR (-4)
-#define Z_BUF_ERROR (-5)
-#define Z_VERSION_ERROR (-6)
-/* Return codes for the compression/decompression functions. Negative
- * values are errors, positive values are used for special but normal events.
- */
-
-#define Z_NO_COMPRESSION 0
-#define Z_BEST_SPEED 1
-#define Z_BEST_COMPRESSION 9
-#define Z_DEFAULT_COMPRESSION (-1)
-/* compression levels */
-
-#define Z_FILTERED 1
-#define Z_HUFFMAN_ONLY 2
-#define Z_RLE 3
-#define Z_FIXED 4
-#define Z_DEFAULT_STRATEGY 0
-/* compression strategy; see deflateInit2() below for details */
-
-#define Z_BINARY 0
-#define Z_TEXT 1
-#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */
-#define Z_UNKNOWN 2
-/* Possible values of the data_type field (though see inflate()) */
-
-#define Z_DEFLATED 8
-/* The deflate compression method (the only one supported in this version) */
-
-#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
-
-#define zlib_version zlibVersion()
-/* for compatibility with versions < 1.0.2 */
-
- /* basic functions */
-
-ZEXTERN const char * ZEXPORT zlibVersion OF((void));
-/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
- If the first character differs, the library code actually used is
- not compatible with the zlib.h header file used by the application.
- This check is automatically made by deflateInit and inflateInit.
- */
-
-/*
-ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
-
- Initializes the internal stream state for compression. The fields
- zalloc, zfree and opaque must be initialized before by the caller.
- If zalloc and zfree are set to Z_NULL, deflateInit updates them to
- use default allocation functions.
-
- The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
- 1 gives best speed, 9 gives best compression, 0 gives no compression at
- all (the input data is simply copied a block at a time).
- Z_DEFAULT_COMPRESSION requests a default compromise between speed and
- compression (currently equivalent to level 6).
-
- deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_STREAM_ERROR if level is not a valid compression level,
- Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
- with the version assumed by the caller (ZLIB_VERSION).
- msg is set to null if there is no error message. deflateInit does not
- perform any compression: this will be done by deflate().
-*/
-
-
-ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
-/*
- deflate compresses as much data as possible, and stops when the input
- buffer becomes empty or the output buffer becomes full. It may introduce some
- output latency (reading input without producing any output) except when
- forced to flush.
-
- The detailed semantics are as follows. deflate performs one or both of the
- following actions:
-
- - Compress more input starting at next_in and update next_in and avail_in
- accordingly. If not all input can be processed (because there is not
- enough room in the output buffer), next_in and avail_in are updated and
- processing will resume at this point for the next call of deflate().
-
- - Provide more output starting at next_out and update next_out and avail_out
- accordingly. This action is forced if the parameter flush is non zero.
- Forcing flush frequently degrades the compression ratio, so this parameter
- should be set only when necessary (in interactive applications).
- Some output may be provided even if flush is not set.
-
- Before the call of deflate(), the application should ensure that at least
- one of the actions is possible, by providing more input and/or consuming
- more output, and updating avail_in or avail_out accordingly; avail_out
- should never be zero before the call. The application can consume the
- compressed output when it wants, for example when the output buffer is full
- (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
- and with zero avail_out, it must be called again after making room in the
- output buffer because there might be more output pending.
-
- Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
- decide how much data to accumualte before producing output, in order to
- maximize compression.
-
- If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
- flushed to the output buffer and the output is aligned on a byte boundary, so
- that the decompressor can get all input data available so far. (In particular
- avail_in is zero after the call if enough output space has been provided
- before the call.) Flushing may degrade compression for some compression
- algorithms and so it should be used only when necessary.
-
- If flush is set to Z_FULL_FLUSH, all output is flushed as with
- Z_SYNC_FLUSH, and the compression state is reset so that decompression can
- restart from this point if previous compressed data has been damaged or if
- random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
- compression.
-
- If deflate returns with avail_out == 0, this function must be called again
- with the same value of the flush parameter and more output space (updated
- avail_out), until the flush is complete (deflate returns with non-zero
- avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
- avail_out is greater than six to avoid repeated flush markers due to
- avail_out == 0 on return.
-
- If the parameter flush is set to Z_FINISH, pending input is processed,
- pending output is flushed and deflate returns with Z_STREAM_END if there
- was enough output space; if deflate returns with Z_OK, this function must be
- called again with Z_FINISH and more output space (updated avail_out) but no
- more input data, until it returns with Z_STREAM_END or an error. After
- deflate has returned Z_STREAM_END, the only possible operations on the
- stream are deflateReset or deflateEnd.
-
- Z_FINISH can be used immediately after deflateInit if all the compression
- is to be done in a single step. In this case, avail_out must be at least
- the value returned by deflateBound (see below). If deflate does not return
- Z_STREAM_END, then it must be called again as described above.
-
- deflate() sets strm->adler to the adler32 checksum of all input read
- so far (that is, total_in bytes).
-
- deflate() may update strm->data_type if it can make a good guess about
- the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
- binary. This field is only for information purposes and does not affect
- the compression algorithm in any manner.
-
- deflate() returns Z_OK if some progress has been made (more input
- processed or more output produced), Z_STREAM_END if all input has been
- consumed and all output has been produced (only when flush is set to
- Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
- if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
- (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
- fatal, and deflate() can be called again with more input and more output
- space to continue compressing.
-*/
-
-
-ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
-/*
- All dynamically allocated data structures for this stream are freed.
- This function discards any unprocessed input and does not flush any
- pending output.
-
- deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
- stream state was inconsistent, Z_DATA_ERROR if the stream was freed
- prematurely (some input or output was discarded). In the error case,
- msg may be set but then points to a static string (which must not be
- deallocated).
-*/
-
-
-/*
-ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
-
- Initializes the internal stream state for decompression. The fields
- next_in, avail_in, zalloc, zfree and opaque must be initialized before by
- the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
- value depends on the compression method), inflateInit determines the
- compression method from the zlib header and allocates all data structures
- accordingly; otherwise the allocation will be deferred to the first call of
- inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to
- use default allocation functions.
-
- inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
- version assumed by the caller. msg is set to null if there is no error
- message. inflateInit does not perform any decompression apart from reading
- the zlib header if present: this will be done by inflate(). (So next_in and
- avail_in may be modified, but next_out and avail_out are unchanged.)
-*/
-
-
-ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
-/*
- inflate decompresses as much data as possible, and stops when the input
- buffer becomes empty or the output buffer becomes full. It may introduce
- some output latency (reading input without producing any output) except when
- forced to flush.
-
- The detailed semantics are as follows. inflate performs one or both of the
- following actions:
-
- - Decompress more input starting at next_in and update next_in and avail_in
- accordingly. If not all input can be processed (because there is not
- enough room in the output buffer), next_in is updated and processing
- will resume at this point for the next call of inflate().
-
- - Provide more output starting at next_out and update next_out and avail_out
- accordingly. inflate() provides as much output as possible, until there
- is no more input data or no more space in the output buffer (see below
- about the flush parameter).
-
- Before the call of inflate(), the application should ensure that at least
- one of the actions is possible, by providing more input and/or consuming
- more output, and updating the next_* and avail_* values accordingly.
- The application can consume the uncompressed output when it wants, for
- example when the output buffer is full (avail_out == 0), or after each
- call of inflate(). If inflate returns Z_OK and with zero avail_out, it
- must be called again after making room in the output buffer because there
- might be more output pending.
-
- The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH,
- Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much
- output as possible to the output buffer. Z_BLOCK requests that inflate() stop
- if and when it gets to the next deflate block boundary. When decoding the
- zlib or gzip format, this will cause inflate() to return immediately after
- the header and before the first block. When doing a raw inflate, inflate()
- will go ahead and process the first block, and will return when it gets to
- the end of that block, or when it runs out of data.
-
- The Z_BLOCK option assists in appending to or combining deflate streams.
- Also to assist in this, on return inflate() will set strm->data_type to the
- number of unused bits in the last byte taken from strm->next_in, plus 64
- if inflate() is currently decoding the last block in the deflate stream,
- plus 128 if inflate() returned immediately after decoding an end-of-block
- code or decoding the complete header up to just before the first byte of the
- deflate stream. The end-of-block will not be indicated until all of the
- uncompressed data from that block has been written to strm->next_out. The
- number of unused bits may in general be greater than seven, except when
- bit 7 of data_type is set, in which case the number of unused bits will be
- less than eight.
-
- inflate() should normally be called until it returns Z_STREAM_END or an
- error. However if all decompression is to be performed in a single step
- (a single call of inflate), the parameter flush should be set to
- Z_FINISH. In this case all pending input is processed and all pending
- output is flushed; avail_out must be large enough to hold all the
- uncompressed data. (The size of the uncompressed data may have been saved
- by the compressor for this purpose.) The next operation on this stream must
- be inflateEnd to deallocate the decompression state. The use of Z_FINISH
- is never required, but can be used to inform inflate that a faster approach
- may be used for the single inflate() call.
-
- In this implementation, inflate() always flushes as much output as
- possible to the output buffer, and always uses the faster approach on the
- first call. So the only effect of the flush parameter in this implementation
- is on the return value of inflate(), as noted below, or when it returns early
- because Z_BLOCK is used.
-
- If a preset dictionary is needed after this call (see inflateSetDictionary
- below), inflate sets strm->adler to the adler32 checksum of the dictionary
- chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
- strm->adler to the adler32 checksum of all output produced so far (that is,
- total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
- below. At the end of the stream, inflate() checks that its computed adler32
- checksum is equal to that saved by the compressor and returns Z_STREAM_END
- only if the checksum is correct.
-
- inflate() will decompress and check either zlib-wrapped or gzip-wrapped
- deflate data. The header type is detected automatically. Any information
- contained in the gzip header is not retained, so applications that need that
- information should instead use raw inflate, see inflateInit2() below, or
- inflateBack() and perform their own processing of the gzip header and
- trailer.
-
- inflate() returns Z_OK if some progress has been made (more input processed
- or more output produced), Z_STREAM_END if the end of the compressed data has
- been reached and all uncompressed output has been produced, Z_NEED_DICT if a
- preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
- corrupted (input stream not conforming to the zlib format or incorrect check
- value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
- if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory,
- Z_BUF_ERROR if no progress is possible or if there was not enough room in the
- output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
- inflate() can be called again with more input and more output space to
- continue decompressing. If Z_DATA_ERROR is returned, the application may then
- call inflateSync() to look for a good compression block if a partial recovery
- of the data is desired.
-*/
-
-
-ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
-/*
- All dynamically allocated data structures for this stream are freed.
- This function discards any unprocessed input and does not flush any
- pending output.
-
- inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
- was inconsistent. In the error case, msg may be set but then points to a
- static string (which must not be deallocated).
-*/
-
- /* Advanced functions */
-
-/*
- The following functions are needed only in some special applications.
-*/
-
-/*
-ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
- int level,
- int method,
- int windowBits,
- int memLevel,
- int strategy));
-
- This is another version of deflateInit with more compression options. The
- fields next_in, zalloc, zfree and opaque must be initialized before by
- the caller.
-
- The method parameter is the compression method. It must be Z_DEFLATED in
- this version of the library.
-
- The windowBits parameter is the base two logarithm of the window size
- (the size of the history buffer). It should be in the range 8..15 for this
- version of the library. Larger values of this parameter result in better
- compression at the expense of memory usage. The default value is 15 if
- deflateInit is used instead.
-
- windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
- determines the window size. deflate() will then generate raw deflate data
- with no zlib header or trailer, and will not compute an adler32 check value.
-
- windowBits can also be greater than 15 for optional gzip encoding. Add
- 16 to windowBits to write a simple gzip header and trailer around the
- compressed data instead of a zlib wrapper. The gzip header will have no
- file name, no extra data, no comment, no modification time (set to zero),
- no header crc, and the operating system will be set to 255 (unknown). If a
- gzip stream is being written, strm->adler is a crc32 instead of an adler32.
-
- The memLevel parameter specifies how much memory should be allocated
- for the internal compression state. memLevel=1 uses minimum memory but
- is slow and reduces compression ratio; memLevel=9 uses maximum memory
- for optimal speed. The default value is 8. See zconf.h for total memory
- usage as a function of windowBits and memLevel.
-
- The strategy parameter is used to tune the compression algorithm. Use the
- value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
- filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
- string match), or Z_RLE to limit match distances to one (run-length
- encoding). Filtered data consists mostly of small values with a somewhat
- random distribution. In this case, the compression algorithm is tuned to
- compress them better. The effect of Z_FILTERED is to force more Huffman
- coding and less string matching; it is somewhat intermediate between
- Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as
- Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy
- parameter only affects the compression ratio but not the correctness of the
- compressed output even if it is not set appropriately. Z_FIXED prevents the
- use of dynamic Huffman codes, allowing for a simpler decoder for special
- applications.
-
- deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
- method). msg is set to null if there is no error message. deflateInit2 does
- not perform any compression: this will be done by deflate().
-*/
-
-ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
- const Bytef *dictionary,
- uInt dictLength));
-/*
- Initializes the compression dictionary from the given byte sequence
- without producing any compressed output. This function must be called
- immediately after deflateInit, deflateInit2 or deflateReset, before any
- call of deflate. The compressor and decompressor must use exactly the same
- dictionary (see inflateSetDictionary).
-
- The dictionary should consist of strings (byte sequences) that are likely
- to be encountered later in the data to be compressed, with the most commonly
- used strings preferably put towards the end of the dictionary. Using a
- dictionary is most useful when the data to be compressed is short and can be
- predicted with good accuracy; the data can then be compressed better than
- with the default empty dictionary.
-
- Depending on the size of the compression data structures selected by
- deflateInit or deflateInit2, a part of the dictionary may in effect be
- discarded, for example if the dictionary is larger than the window size in
- deflate or deflate2. Thus the strings most likely to be useful should be
- put at the end of the dictionary, not at the front. In addition, the
- current implementation of deflate will use at most the window size minus
- 262 bytes of the provided dictionary.
-
- Upon return of this function, strm->adler is set to the adler32 value
- of the dictionary; the decompressor may later use this value to determine
- which dictionary has been used by the compressor. (The adler32 value
- applies to the whole dictionary even if only a subset of the dictionary is
- actually used by the compressor.) If a raw deflate was requested, then the
- adler32 value is not computed and strm->adler is not set.
-
- deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
- parameter is invalid (such as NULL dictionary) or the stream state is
- inconsistent (for example if deflate has already been called for this stream
- or if the compression method is bsort). deflateSetDictionary does not
- perform any compression: this will be done by deflate().
-*/
-
-ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
- z_streamp source));
-/*
- Sets the destination stream as a complete copy of the source stream.
-
- This function can be useful when several compression strategies will be
- tried, for example when there are several ways of pre-processing the input
- data with a filter. The streams that will be discarded should then be freed
- by calling deflateEnd. Note that deflateCopy duplicates the internal
- compression state which can be quite large, so this strategy is slow and
- can consume lots of memory.
-
- deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
- (such as zalloc being NULL). msg is left unchanged in both source and
- destination.
-*/
-
-ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
-/*
- This function is equivalent to deflateEnd followed by deflateInit,
- but does not free and reallocate all the internal compression state.
- The stream will keep the same compression level and any other attributes
- that may have been set by deflateInit2.
-
- deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent (such as zalloc or state being NULL).
-*/
-
-ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
- int level,
- int strategy));
-/*
- Dynamically update the compression level and compression strategy. The
- interpretation of level and strategy is as in deflateInit2. This can be
- used to switch between compression and straight copy of the input data, or
- to switch to a different kind of input data requiring a different
- strategy. If the compression level is changed, the input available so far
- is compressed with the old level (and may be flushed); the new level will
- take effect only at the next call of deflate().
-
- Before the call of deflateParams, the stream state must be set as for
- a call of deflate(), since the currently available input may have to
- be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
- deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
- stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
- if strm->avail_out was zero.
-*/
-
-ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
- int good_length,
- int max_lazy,
- int nice_length,
- int max_chain));
-/*
- Fine tune deflate's internal compression parameters. This should only be
- used by someone who understands the algorithm used by zlib's deflate for
- searching for the best matching string, and even then only by the most
- fanatic optimizer trying to squeeze out the last compressed bit for their
- specific input data. Read the deflate.c source code for the meaning of the
- max_lazy, good_length, nice_length, and max_chain parameters.
-
- deflateTune() can be called after deflateInit() or deflateInit2(), and
- returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
- */
-
-ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
- uLong sourceLen));
-/*
- deflateBound() returns an upper bound on the compressed size after
- deflation of sourceLen bytes. It must be called after deflateInit()
- or deflateInit2(). This would be used to allocate an output buffer
- for deflation in a single pass, and so would be called before deflate().
-*/
-
-ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
- int bits,
- int value));
-/*
- deflatePrime() inserts bits in the deflate output stream. The intent
- is that this function is used to start off the deflate output with the
- bits leftover from a previous deflate stream when appending to it. As such,
- this function can only be used for raw deflate, and must be used before the
- first deflate() call after a deflateInit2() or deflateReset(). bits must be
- less than or equal to 16, and that many of the least significant bits of
- value will be inserted in the output.
-
- deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent.
-*/
-
-ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
- gz_headerp head));
-/*
- deflateSetHeader() provides gzip header information for when a gzip
- stream is requested by deflateInit2(). deflateSetHeader() may be called
- after deflateInit2() or deflateReset() and before the first call of
- deflate(). The text, time, os, extra field, name, and comment information
- in the provided gz_header structure are written to the gzip header (xflag is
- ignored -- the extra flags are set according to the compression level). The
- caller must assure that, if not Z_NULL, name and comment are terminated with
- a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
- available there. If hcrc is true, a gzip header crc is included. Note that
- the current versions of the command-line version of gzip (up through version
- 1.3.x) do not support header crc's, and will report that it is a "multi-part
- gzip file" and give up.
-
- If deflateSetHeader is not used, the default gzip header has text false,
- the time set to zero, and os set to 255, with no extra, name, or comment
- fields. The gzip header is returned to the default state by deflateReset().
-
- deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent.
-*/
-
-/*
-ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
- int windowBits));
-
- This is another version of inflateInit with an extra parameter. The
- fields next_in, avail_in, zalloc, zfree and opaque must be initialized
- before by the caller.
-
- The windowBits parameter is the base two logarithm of the maximum window
- size (the size of the history buffer). It should be in the range 8..15 for
- this version of the library. The default value is 15 if inflateInit is used
- instead. windowBits must be greater than or equal to the windowBits value
- provided to deflateInit2() while compressing, or it must be equal to 15 if
- deflateInit2() was not used. If a compressed stream with a larger window
- size is given as input, inflate() will return with the error code
- Z_DATA_ERROR instead of trying to allocate a larger window.
-
- windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
- determines the window size. inflate() will then process raw deflate data,
- not looking for a zlib or gzip header, not generating a check value, and not
- looking for any check values for comparison at the end of the stream. This
- is for use with other formats that use the deflate compressed data format
- such as zip. Those formats provide their own check values. If a custom
- format is developed using the raw deflate format for compressed data, it is
- recommended that a check value such as an adler32 or a crc32 be applied to
- the uncompressed data as is done in the zlib, gzip, and zip formats. For
- most applications, the zlib format should be used as is. Note that comments
- above on the use in deflateInit2() applies to the magnitude of windowBits.
-
- windowBits can also be greater than 15 for optional gzip decoding. Add
- 32 to windowBits to enable zlib and gzip decoding with automatic header
- detection, or add 16 to decode only the gzip format (the zlib format will
- return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is
- a crc32 instead of an adler32.
-
- inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg
- is set to null if there is no error message. inflateInit2 does not perform
- any decompression apart from reading the zlib header if present: this will
- be done by inflate(). (So next_in and avail_in may be modified, but next_out
- and avail_out are unchanged.)
-*/
-
-ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
- const Bytef *dictionary,
- uInt dictLength));
-/*
- Initializes the decompression dictionary from the given uncompressed byte
- sequence. This function must be called immediately after a call of inflate,
- if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
- can be determined from the adler32 value returned by that call of inflate.
- The compressor and decompressor must use exactly the same dictionary (see
- deflateSetDictionary). For raw inflate, this function can be called
- immediately after inflateInit2() or inflateReset() and before any call of
- inflate() to set the dictionary. The application must insure that the
- dictionary that was used for compression is provided.
-
- inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
- parameter is invalid (such as NULL dictionary) or the stream state is
- inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
- expected one (incorrect adler32 value). inflateSetDictionary does not
- perform any decompression: this will be done by subsequent calls of
- inflate().
-*/
-
-ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
-/*
- Skips invalid compressed data until a full flush point (see above the
- description of deflate with Z_FULL_FLUSH) can be found, or until all
- available input is skipped. No output is provided.
-
- inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
- if no more input was provided, Z_DATA_ERROR if no flush point has been found,
- or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
- case, the application may save the current current value of total_in which
- indicates where valid compressed data was found. In the error case, the
- application may repeatedly call inflateSync, providing more input each time,
- until success or end of the input data.
-*/
-
-ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
- z_streamp source));
-/*
- Sets the destination stream as a complete copy of the source stream.
-
- This function can be useful when randomly accessing a large stream. The
- first pass through the stream can periodically record the inflate state,
- allowing restarting inflate at those points when randomly accessing the
- stream.
-
- inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
- (such as zalloc being NULL). msg is left unchanged in both source and
- destination.
-*/
-
-ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
-/*
- This function is equivalent to inflateEnd followed by inflateInit,
- but does not free and reallocate all the internal decompression state.
- The stream will keep attributes that may have been set by inflateInit2.
-
- inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent (such as zalloc or state being NULL).
-*/
-
-ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
- int bits,
- int value));
-/*
- This function inserts bits in the inflate input stream. The intent is
- that this function is used to start inflating at a bit position in the
- middle of a byte. The provided bits will be used before any bytes are used
- from next_in. This function should only be used with raw inflate, and
- should be used before the first inflate() call after inflateInit2() or
- inflateReset(). bits must be less than or equal to 16, and that many of the
- least significant bits of value will be inserted in the input.
-
- inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent.
-*/
-
-ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
- gz_headerp head));
-/*
- inflateGetHeader() requests that gzip header information be stored in the
- provided gz_header structure. inflateGetHeader() may be called after
- inflateInit2() or inflateReset(), and before the first call of inflate().
- As inflate() processes the gzip stream, head->done is zero until the header
- is completed, at which time head->done is set to one. If a zlib stream is
- being decoded, then head->done is set to -1 to indicate that there will be
- no gzip header information forthcoming. Note that Z_BLOCK can be used to
- force inflate() to return immediately after header processing is complete
- and before any actual data is decompressed.
-
- The text, time, xflags, and os fields are filled in with the gzip header
- contents. hcrc is set to true if there is a header CRC. (The header CRC
- was valid if done is set to one.) If extra is not Z_NULL, then extra_max
- contains the maximum number of bytes to write to extra. Once done is true,
- extra_len contains the actual extra field length, and extra contains the
- extra field, or that field truncated if extra_max is less than extra_len.
- If name is not Z_NULL, then up to name_max characters are written there,
- terminated with a zero unless the length is greater than name_max. If
- comment is not Z_NULL, then up to comm_max characters are written there,
- terminated with a zero unless the length is greater than comm_max. When
- any of extra, name, or comment are not Z_NULL and the respective field is
- not present in the header, then that field is set to Z_NULL to signal its
- absence. This allows the use of deflateSetHeader() with the returned
- structure to duplicate the header. However if those fields are set to
- allocated memory, then the application will need to save those pointers
- elsewhere so that they can be eventually freed.
-
- If inflateGetHeader is not used, then the header information is simply
- discarded. The header is always checked for validity, including the header
- CRC if present. inflateReset() will reset the process to discard the header
- information. The application would need to call inflateGetHeader() again to
- retrieve the header from the next gzip stream.
-
- inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
- stream state was inconsistent.
-*/
-
-/*
-ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
- unsigned char FAR *window));
-
- Initialize the internal stream state for decompression using inflateBack()
- calls. The fields zalloc, zfree and opaque in strm must be initialized
- before the call. If zalloc and zfree are Z_NULL, then the default library-
- derived memory allocation routines are used. windowBits is the base two
- logarithm of the window size, in the range 8..15. window is a caller
- supplied buffer of that size. Except for special applications where it is
- assured that deflate was used with small window sizes, windowBits must be 15
- and a 32K byte window must be supplied to be able to decompress general
- deflate streams.
-
- See inflateBack() for the usage of these routines.
-
- inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
- the paramaters are invalid, Z_MEM_ERROR if the internal state could not
- be allocated, or Z_VERSION_ERROR if the version of the library does not
- match the version of the header file.
-*/
-
-typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *));
-typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
-
-ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
- in_func in, void FAR *in_desc,
- out_func out, void FAR *out_desc));
-/*
- inflateBack() does a raw inflate with a single call using a call-back
- interface for input and output. This is more efficient than inflate() for
- file i/o applications in that it avoids copying between the output and the
- sliding window by simply making the window itself the output buffer. This
- function trusts the application to not change the output buffer passed by
- the output function, at least until inflateBack() returns.
-
- inflateBackInit() must be called first to allocate the internal state
- and to initialize the state with the user-provided window buffer.
- inflateBack() may then be used multiple times to inflate a complete, raw
- deflate stream with each call. inflateBackEnd() is then called to free
- the allocated state.
-
- A raw deflate stream is one with no zlib or gzip header or trailer.
- This routine would normally be used in a utility that reads zip or gzip
- files and writes out uncompressed files. The utility would decode the
- header and process the trailer on its own, hence this routine expects
- only the raw deflate stream to decompress. This is different from the
- normal behavior of inflate(), which expects either a zlib or gzip header and
- trailer around the deflate stream.
-
- inflateBack() uses two subroutines supplied by the caller that are then
- called by inflateBack() for input and output. inflateBack() calls those
- routines until it reads a complete deflate stream and writes out all of the
- uncompressed data, or until it encounters an error. The function's
- parameters and return types are defined above in the in_func and out_func
- typedefs. inflateBack() will call in(in_desc, &buf) which should return the
- number of bytes of provided input, and a pointer to that input in buf. If
- there is no input available, in() must return zero--buf is ignored in that
- case--and inflateBack() will return a buffer error. inflateBack() will call
- out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out()
- should return zero on success, or non-zero on failure. If out() returns
- non-zero, inflateBack() will return with an error. Neither in() nor out()
- are permitted to change the contents of the window provided to
- inflateBackInit(), which is also the buffer that out() uses to write from.
- The length written by out() will be at most the window size. Any non-zero
- amount of input may be provided by in().
-
- For convenience, inflateBack() can be provided input on the first call by
- setting strm->next_in and strm->avail_in. If that input is exhausted, then
- in() will be called. Therefore strm->next_in must be initialized before
- calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called
- immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in
- must also be initialized, and then if strm->avail_in is not zero, input will
- initially be taken from strm->next_in[0 .. strm->avail_in - 1].
-
- The in_desc and out_desc parameters of inflateBack() is passed as the
- first parameter of in() and out() respectively when they are called. These
- descriptors can be optionally used to pass any information that the caller-
- supplied in() and out() functions need to do their job.
-
- On return, inflateBack() will set strm->next_in and strm->avail_in to
- pass back any unused input that was provided by the last in() call. The
- return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
- if in() or out() returned an error, Z_DATA_ERROR if there was a format
- error in the deflate stream (in which case strm->msg is set to indicate the
- nature of the error), or Z_STREAM_ERROR if the stream was not properly
- initialized. In the case of Z_BUF_ERROR, an input or output error can be
- distinguished using strm->next_in which will be Z_NULL only if in() returned
- an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to
- out() returning non-zero. (in() will always be called before out(), so
- strm->next_in is assured to be defined if out() returns non-zero.) Note
- that inflateBack() cannot return Z_OK.
-*/
-
-ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
-/*
- All memory allocated by inflateBackInit() is freed.
-
- inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
- state was inconsistent.
-*/
-
-ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
-/* Return flags indicating compile-time options.
-
- Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
- 1.0: size of uInt
- 3.2: size of uLong
- 5.4: size of voidpf (pointer)
- 7.6: size of z_off_t
-
- Compiler, assembler, and debug options:
- 8: DEBUG
- 9: ASMV or ASMINF -- use ASM code
- 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
- 11: 0 (reserved)
-
- One-time table building (smaller code, but not thread-safe if true):
- 12: BUILDFIXED -- build static block decoding tables when needed
- 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
- 14,15: 0 (reserved)
-
- Library content (indicates missing functionality):
- 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
- deflate code when not needed)
- 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
- and decode gzip streams (to avoid linking crc code)
- 18-19: 0 (reserved)
-
- Operation variations (changes in library functionality):
- 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
- 21: FASTEST -- deflate algorithm with only one, lowest compression level
- 22,23: 0 (reserved)
-
- The sprintf variant used by gzprintf (zero is best):
- 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
- 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
- 26: 0 = returns value, 1 = void -- 1 means inferred string length returned
-
- Remainder:
- 27-31: 0 (reserved)
- */
-
-
- /* utility functions */
-
-/*
- The following utility functions are implemented on top of the
- basic stream-oriented functions. To simplify the interface, some
- default options are assumed (compression level and memory usage,
- standard memory allocation functions). The source code of these
- utility functions can easily be modified if you need special options.
-*/
-
-ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
- const Bytef *source, uLong sourceLen));
-/*
- Compresses the source buffer into the destination buffer. sourceLen is
- the byte length of the source buffer. Upon entry, destLen is the total
- size of the destination buffer, which must be at least the value returned
- by compressBound(sourceLen). Upon exit, destLen is the actual size of the
- compressed buffer.
- This function can be used to compress a whole file at once if the
- input file is mmap'ed.
- compress returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_BUF_ERROR if there was not enough room in the output
- buffer.
-*/
-
-ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen,
- const Bytef *source, uLong sourceLen,
- int level));
-/*
- Compresses the source buffer into the destination buffer. The level
- parameter has the same meaning as in deflateInit. sourceLen is the byte
- length of the source buffer. Upon entry, destLen is the total size of the
- destination buffer, which must be at least the value returned by
- compressBound(sourceLen). Upon exit, destLen is the actual size of the
- compressed buffer.
-
- compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
- memory, Z_BUF_ERROR if there was not enough room in the output buffer,
- Z_STREAM_ERROR if the level parameter is invalid.
-*/
-
-ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
-/*
- compressBound() returns an upper bound on the compressed size after
- compress() or compress2() on sourceLen bytes. It would be used before
- a compress() or compress2() call to allocate the destination buffer.
-*/
-
-ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
- const Bytef *source, uLong sourceLen));
-/*
- Decompresses the source buffer into the destination buffer. sourceLen is
- the byte length of the source buffer. Upon entry, destLen is the total
- size of the destination buffer, which must be large enough to hold the
- entire uncompressed data. (The size of the uncompressed data must have
- been saved previously by the compressor and transmitted to the decompressor
- by some mechanism outside the scope of this compression library.)
- Upon exit, destLen is the actual size of the compressed buffer.
- This function can be used to decompress a whole file at once if the
- input file is mmap'ed.
-
- uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_BUF_ERROR if there was not enough room in the output
- buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.
-*/
-
-
-typedef voidp gzFile;
-
-ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
-/*
- Opens a gzip (.gz) file for reading or writing. The mode parameter
- is as in fopen ("rb" or "wb") but can also include a compression level
- ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
- Huffman only compression as in "wb1h", or 'R' for run-length encoding
- as in "wb1R". (See the description of deflateInit2 for more information
- about the strategy parameter.)
-
- gzopen can be used to read a file which is not in gzip format; in this
- case gzread will directly read from the file without decompression.
-
- gzopen returns NULL if the file could not be opened or if there was
- insufficient memory to allocate the (de)compression state; errno
- can be checked to distinguish the two cases (if errno is zero, the
- zlib error is Z_MEM_ERROR). */
-
-ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
-/*
- gzdopen() associates a gzFile with the file descriptor fd. File
- descriptors are obtained from calls like open, dup, creat, pipe or
- fileno (in the file has been previously opened with fopen).
- The mode parameter is as in gzopen.
- The next call of gzclose on the returned gzFile will also close the
- file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
- descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
- gzdopen returns NULL if there was insufficient memory to allocate
- the (de)compression state.
-*/
-
-ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
-/*
- Dynamically update the compression level or strategy. See the description
- of deflateInit2 for the meaning of these parameters.
- gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
- opened for writing.
-*/
-
-ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
-/*
- Reads the given number of uncompressed bytes from the compressed file.
- If the input file was not in gzip format, gzread copies the given number
- of bytes into the buffer.
- gzread returns the number of uncompressed bytes actually read (0 for
- end of file, -1 for error). */
-
-ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
- voidpc buf, unsigned len));
-/*
- Writes the given number of uncompressed bytes into the compressed file.
- gzwrite returns the number of uncompressed bytes actually written
- (0 in case of error).
-*/
-
-ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...));
-/*
- Converts, formats, and writes the args to the compressed file under
- control of the format string, as in fprintf. gzprintf returns the number of
- uncompressed bytes actually written (0 in case of error). The number of
- uncompressed bytes written is limited to 4095. The caller should assure that
- this limit is not exceeded. If it is exceeded, then gzprintf() will return
- return an error (0) with nothing written. In this case, there may also be a
- buffer overflow with unpredictable consequences, which is possible only if
- zlib was compiled with the insecure functions sprintf() or vsprintf()
- because the secure snprintf() or vsnprintf() functions were not available.
-*/
-
-ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
-/*
- Writes the given null-terminated string to the compressed file, excluding
- the terminating null character.
- gzputs returns the number of characters written, or -1 in case of error.
-*/
-
-ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
-/*
- Reads bytes from the compressed file until len-1 characters are read, or
- a newline character is read and transferred to buf, or an end-of-file
- condition is encountered. The string is then terminated with a null
- character.
- gzgets returns buf, or Z_NULL in case of error.
-*/
-
-ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
-/*
- Writes c, converted to an unsigned char, into the compressed file.
- gzputc returns the value that was written, or -1 in case of error.
-*/
-
-ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
-/*
- Reads one byte from the compressed file. gzgetc returns this byte
- or -1 in case of end of file or error.
-*/
-
-ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
-/*
- Push one character back onto the stream to be read again later.
- Only one character of push-back is allowed. gzungetc() returns the
- character pushed, or -1 on failure. gzungetc() will fail if a
- character has been pushed but not read yet, or if c is -1. The pushed
- character will be discarded if the stream is repositioned with gzseek()
- or gzrewind().
-*/
-
-ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
-/*
- Flushes all pending output into the compressed file. The parameter
- flush is as in the deflate() function. The return value is the zlib
- error number (see function gzerror below). gzflush returns Z_OK if
- the flush parameter is Z_FINISH and all output could be flushed.
- gzflush should be called only when strictly necessary because it can
- degrade compression.
-*/
-
-ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
- z_off_t offset, int whence));
-/*
- Sets the starting position for the next gzread or gzwrite on the
- given compressed file. The offset represents a number of bytes in the
- uncompressed data stream. The whence parameter is defined as in lseek(2);
- the value SEEK_END is not supported.
- If the file is opened for reading, this function is emulated but can be
- extremely slow. If the file is opened for writing, only forward seeks are
- supported; gzseek then compresses a sequence of zeroes up to the new
- starting position.
-
- gzseek returns the resulting offset location as measured in bytes from
- the beginning of the uncompressed stream, or -1 in case of error, in
- particular if the file is opened for writing and the new starting position
- would be before the current position.
-*/
-
-ZEXTERN int ZEXPORT gzrewind OF((gzFile file));
-/*
- Rewinds the given file. This function is supported only for reading.
-
- gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
-*/
-
-ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file));
-/*
- Returns the starting position for the next gzread or gzwrite on the
- given compressed file. This position represents a number of bytes in the
- uncompressed data stream.
-
- gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
-*/
-
-ZEXTERN int ZEXPORT gzeof OF((gzFile file));
-/*
- Returns 1 when EOF has previously been detected reading the given
- input stream, otherwise zero.
-*/
-
-ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
-/*
- Returns 1 if file is being read directly without decompression, otherwise
- zero.
-*/
-
-ZEXTERN int ZEXPORT gzclose OF((gzFile file));
-/*
- Flushes all pending output if necessary, closes the compressed file
- and deallocates all the (de)compression state. The return value is the zlib
- error number (see function gzerror below).
-*/
-
-ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
-/*
- Returns the error message for the last error which occurred on the
- given compressed file. errnum is set to zlib error number. If an
- error occurred in the file system and not in the compression library,
- errnum is set to Z_ERRNO and the application may consult errno
- to get the exact error code.
-*/
-
-ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
-/*
- Clears the error and end-of-file flags for file. This is analogous to the
- clearerr() function in stdio. This is useful for continuing to read a gzip
- file that is being written concurrently.
-*/
-
- /* checksum functions */
-
-/*
- These functions are not related to compression but are exported
- anyway because they might be useful in applications using the
- compression library.
-*/
-
-ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
-/*
- Update a running Adler-32 checksum with the bytes buf[0..len-1] and
- return the updated checksum. If buf is NULL, this function returns
- the required initial value for the checksum.
- An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
- much faster. Usage example:
-
- uLong adler = adler32(0L, Z_NULL, 0);
-
- while (read_buffer(buffer, length) != EOF) {
- adler = adler32(adler, buffer, length);
- }
- if (adler != original_adler) error();
-*/
-
-ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
- z_off_t len2));
-/*
- Combine two Adler-32 checksums into one. For two sequences of bytes, seq1
- and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
- each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of
- seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.
-*/
-
-ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
-/*
- Update a running CRC-32 with the bytes buf[0..len-1] and return the
- updated CRC-32. If buf is NULL, this function returns the required initial
- value for the for the crc. Pre- and post-conditioning (one's complement) is
- performed within this function so it shouldn't be done by the application.
- Usage example:
-
- uLong crc = crc32(0L, Z_NULL, 0);
-
- while (read_buffer(buffer, length) != EOF) {
- crc = crc32(crc, buffer, length);
- }
- if (crc != original_crc) error();
-*/
-
-ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
-
-/*
- Combine two CRC-32 check values into one. For two sequences of bytes,
- seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
- calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32
- check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
- len2.
-*/
-
-
- /* various hacks, don't look :) */
-
-/* deflateInit and inflateInit are macros to allow checking the zlib version
- * and the compiler's view of z_stream:
- */
-ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
- const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
- const char *version, int stream_size));
-ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method,
- int windowBits, int memLevel,
- int strategy, const char *version,
- int stream_size));
-ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits,
- const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
- unsigned char FAR *window,
- const char *version,
- int stream_size));
-#define deflateInit(strm, level) \
- deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream))
-#define inflateInit(strm) \
- inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream))
-#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
- deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
- (strategy), ZLIB_VERSION, sizeof(z_stream))
-#define inflateInit2(strm, windowBits) \
- inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
-#define inflateBackInit(strm, windowBits, window) \
- inflateBackInit_((strm), (windowBits), (window), \
- ZLIB_VERSION, sizeof(z_stream))
-
-
-#if !defined(_ZUTIL_H) && !defined(NO_DUMMY_DECL)
- struct internal_state {int dummy;}; /* hack for buggy compilers */
-#endif
-
-ZEXTERN const char * ZEXPORT zError OF((int));
-ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z));
-ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void));
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZLIB_H */
diff --git a/sys/contrib/opensolaris/uts/common/zmod/zmod.c b/sys/contrib/opensolaris/uts/common/zmod/zmod.c
deleted file mode 100644
index 2627239..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/zmod.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/zmod.h>
-
-#include "zlib.h"
-
-/*
- * Uncompress the buffer 'src' into the buffer 'dst'. The caller must store
- * the expected decompressed data size externally so it can be passed in.
- * The resulting decompressed size is then returned through dstlen. This
- * function return Z_OK on success, or another error code on failure.
- */
-int
-z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
-{
- z_stream zs;
- int err;
-
- bzero(&zs, sizeof (zs));
- zs.next_in = (uchar_t *)src;
- zs.avail_in = srclen;
- zs.next_out = dst;
- zs.avail_out = *dstlen;
-
- if ((err = inflateInit(&zs)) != Z_OK)
- return (err);
-
- if ((err = inflate(&zs, Z_FINISH)) != Z_STREAM_END) {
- (void) inflateEnd(&zs);
- return (err == Z_OK ? Z_BUF_ERROR : err);
- }
-
- *dstlen = zs.total_out;
- return (inflateEnd(&zs));
-}
-
-int
-z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
- int level)
-{
-
- z_stream zs;
- int err;
-
- bzero(&zs, sizeof (zs));
- zs.next_in = (uchar_t *)src;
- zs.avail_in = srclen;
- zs.next_out = dst;
- zs.avail_out = *dstlen;
-
- if ((err = deflateInit(&zs, level)) != Z_OK)
- return (err);
-
- if ((err = deflate(&zs, Z_FINISH)) != Z_STREAM_END) {
- (void) deflateEnd(&zs);
- return (err == Z_OK ? Z_BUF_ERROR : err);
- }
-
- *dstlen = zs.total_out;
- return (deflateEnd(&zs));
-}
-
-int
-z_compress(void *dst, size_t *dstlen, const void *src, size_t srclen)
-{
- return (z_compress_level(dst, dstlen, src, srclen,
- Z_DEFAULT_COMPRESSION));
-}
-
-/*
- * Convert a zlib error code into a string error message.
- */
-const char *
-z_strerror(int err)
-{
- int i = Z_NEED_DICT - err;
-
- if (i < 0 || i > Z_NEED_DICT - Z_VERSION_ERROR)
- return ("unknown error");
-
- return (zError(err));
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/zmod_subr.c b/sys/contrib/opensolaris/uts/common/zmod/zmod_subr.c
deleted file mode 100644
index 0542712..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/zmod_subr.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/systm.h>
-#include <sys/cmn_err.h>
-#include <sys/kobj.h>
-
-struct zchdr {
- uint_t zch_magic;
- uint_t zch_size;
-};
-
-#define ZCH_MAGIC 0x3cc13cc1
-
-/*ARGSUSED*/
-void *
-zcalloc(void *opaque, uint_t items, uint_t size)
-{
- size_t nbytes = sizeof (struct zchdr) + items * size;
- struct zchdr *z = kobj_zalloc(nbytes, KM_NOWAIT|KM_TMP);
-
- if (z == NULL)
- return (NULL);
-
- z->zch_magic = ZCH_MAGIC;
- z->zch_size = nbytes;
-
- return (z + 1);
-}
-
-/*ARGSUSED*/
-void
-zcfree(void *opaque, void *ptr)
-{
- struct zchdr *z = ((struct zchdr *)ptr) - 1;
-
- if (z->zch_magic != ZCH_MAGIC)
- panic("zcfree region corrupt: hdr=%p ptr=%p", (void *)z, ptr);
-
- kobj_free(z, z->zch_size);
-}
-
-void
-zmemcpy(void *dest, const void *source, uint_t len)
-{
- bcopy(source, dest, len);
-}
-
-int
-zmemcmp(const void *s1, const void *s2, uint_t len)
-{
- return (bcmp(s1, s2, len));
-}
-
-void
-zmemzero(void *dest, uint_t len)
-{
- bzero(dest, len);
-}
diff --git a/sys/contrib/opensolaris/uts/common/zmod/zutil.c b/sys/contrib/opensolaris/uts/common/zmod/zutil.c
deleted file mode 100644
index 7d46e30..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/zutil.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* zutil.c -- target dependent utility functions for the compression library
- * Copyright (C) 1995-2005 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "zutil.h"
-
-#ifndef NO_DUMMY_DECL
-struct internal_state {int dummy;}; /* for buggy compilers */
-#endif
-
-const char * const z_errmsg[10] = {
-"need dictionary", /* Z_NEED_DICT 2 */
-"stream end", /* Z_STREAM_END 1 */
-"", /* Z_OK 0 */
-"file error", /* Z_ERRNO (-1) */
-"stream error", /* Z_STREAM_ERROR (-2) */
-"data error", /* Z_DATA_ERROR (-3) */
-"insufficient memory", /* Z_MEM_ERROR (-4) */
-"buffer error", /* Z_BUF_ERROR (-5) */
-"incompatible version",/* Z_VERSION_ERROR (-6) */
-""};
-
-
-const char * ZEXPORT zlibVersion()
-{
- return ZLIB_VERSION;
-}
-
-uLong ZEXPORT zlibCompileFlags()
-{
- uLong flags;
-
- flags = 0;
- switch (sizeof(uInt)) {
- case 2: break;
- case 4: flags += 1; break;
- case 8: flags += 2; break;
- default: flags += 3;
- }
- switch (sizeof(uLong)) {
- case 2: break;
- case 4: flags += 1 << 2; break;
- case 8: flags += 2 << 2; break;
- default: flags += 3 << 2;
- }
- switch (sizeof(voidpf)) {
- case 2: break;
- case 4: flags += 1 << 4; break;
- case 8: flags += 2 << 4; break;
- default: flags += 3 << 4;
- }
- switch (sizeof(z_off_t)) {
- case 2: break;
- case 4: flags += 1 << 6; break;
- case 8: flags += 2 << 6; break;
- default: flags += 3 << 6;
- }
-#ifdef DEBUG
- flags += 1 << 8;
-#endif
-#if defined(ASMV) || defined(ASMINF)
- flags += 1 << 9;
-#endif
-#ifdef ZLIB_WINAPI
- flags += 1 << 10;
-#endif
-#ifdef BUILDFIXED
- flags += 1 << 12;
-#endif
-#ifdef DYNAMIC_CRC_TABLE
- flags += 1 << 13;
-#endif
-#ifdef NO_GZCOMPRESS
- flags += 1L << 16;
-#endif
-#ifdef NO_GZIP
- flags += 1L << 17;
-#endif
-#ifdef PKZIP_BUG_WORKAROUND
- flags += 1L << 20;
-#endif
-#ifdef FASTEST
- flags += 1L << 21;
-#endif
-#ifdef STDC
-# ifdef NO_vsnprintf
- flags += 1L << 25;
-# ifdef HAS_vsprintf_void
- flags += 1L << 26;
-# endif
-# else
-# ifdef HAS_vsnprintf_void
- flags += 1L << 26;
-# endif
-# endif
-#else
- flags += 1L << 24;
-# ifdef NO_snprintf
- flags += 1L << 25;
-# ifdef HAS_sprintf_void
- flags += 1L << 26;
-# endif
-# else
-# ifdef HAS_snprintf_void
- flags += 1L << 26;
-# endif
-# endif
-#endif
- return flags;
-}
-
-#ifdef DEBUG
-
-# ifndef verbose
-# define verbose 0
-# endif
-int z_verbose = verbose;
-
-void z_error (m)
- char *m;
-{
- fprintf(stderr, "%s\n", m);
- exit(1);
-}
-#endif
-
-/* exported to allow conversion of error code to string for compress() and
- * uncompress()
- */
-const char * ZEXPORT zError(err)
- int err;
-{
- return ERR_MSG(err);
-}
-
-#if defined(_WIN32_WCE)
- /* The Microsoft C Run-Time Library for Windows CE doesn't have
- * errno. We define it as a global variable to simplify porting.
- * Its value is always 0 and should not be used.
- */
- int errno = 0;
-#endif
-
-#define HAVE_MEMCPY
-#ifndef HAVE_MEMCPY
-
-void zmemcpy(dest, source, len)
- Bytef* dest;
- const Bytef* source;
- uInt len;
-{
- if (len == 0) return;
- do {
- *dest++ = *source++; /* ??? to be unrolled */
- } while (--len != 0);
-}
-
-int zmemcmp(s1, s2, len)
- const Bytef* s1;
- const Bytef* s2;
- uInt len;
-{
- uInt j;
-
- for (j = 0; j < len; j++) {
- if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
- }
- return 0;
-}
-
-void zmemzero(dest, len)
- Bytef* dest;
- uInt len;
-{
- if (len == 0) return;
- do {
- *dest++ = 0; /* ??? to be unrolled */
- } while (--len != 0);
-}
-#endif
-
-
-#ifdef SYS16BIT
-
-#ifdef __TURBOC__
-/* Turbo C in 16-bit mode */
-
-# define MY_ZCALLOC
-
-/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
- * and farmalloc(64K) returns a pointer with an offset of 8, so we
- * must fix the pointer. Warning: the pointer must be put back to its
- * original form in order to free it, use zcfree().
- */
-
-#define MAX_PTR 10
-/* 10*64K = 640K */
-
-local int next_ptr = 0;
-
-typedef struct ptr_table_s {
- voidpf org_ptr;
- voidpf new_ptr;
-} ptr_table;
-
-local ptr_table table[MAX_PTR];
-/* This table is used to remember the original form of pointers
- * to large buffers (64K). Such pointers are normalized with a zero offset.
- * Since MSDOS is not a preemptive multitasking OS, this table is not
- * protected from concurrent access. This hack doesn't work anyway on
- * a protected system like OS/2. Use Microsoft C instead.
- */
-
-voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
-{
- voidpf buf = opaque; /* just to make some compilers happy */
- ulg bsize = (ulg)items*size;
-
- /* If we allocate less than 65520 bytes, we assume that farmalloc
- * will return a usable pointer which doesn't have to be normalized.
- */
- if (bsize < 65520L) {
- buf = farmalloc(bsize);
- if (*(ush*)&buf != 0) return buf;
- } else {
- buf = farmalloc(bsize + 16L);
- }
- if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
- table[next_ptr].org_ptr = buf;
-
- /* Normalize the pointer to seg:0 */
- *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
- *(ush*)&buf = 0;
- table[next_ptr++].new_ptr = buf;
- return buf;
-}
-
-void zcfree (voidpf opaque, voidpf ptr)
-{
- int n;
- if (*(ush*)&ptr != 0) { /* object < 64K */
- farfree(ptr);
- return;
- }
- /* Find the original pointer */
- for (n = 0; n < next_ptr; n++) {
- if (ptr != table[n].new_ptr) continue;
-
- farfree(table[n].org_ptr);
- while (++n < next_ptr) {
- table[n-1] = table[n];
- }
- next_ptr--;
- return;
- }
- ptr = opaque; /* just to make some compilers happy */
- Assert(0, "zcfree: ptr not found");
-}
-
-#endif /* __TURBOC__ */
-
-
-#ifdef M_I86
-/* Microsoft C in 16-bit mode */
-
-# define MY_ZCALLOC
-
-#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
-# define _halloc halloc
-# define _hfree hfree
-#endif
-
-voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
-{
- if (opaque) opaque = 0; /* to make compiler happy */
- return _halloc((long)items, size);
-}
-
-void zcfree (voidpf opaque, voidpf ptr)
-{
- if (opaque) opaque = 0; /* to make compiler happy */
- _hfree(ptr);
-}
-
-#endif /* M_I86 */
-
-#endif /* SYS16BIT */
-
-
-#ifndef MY_ZCALLOC /* Any system without a special alloc function */
-
-#ifndef STDC
-extern voidp malloc OF((uInt size));
-extern voidp calloc OF((uInt items, uInt size));
-extern void free OF((voidpf ptr));
-#endif
-
-voidpf zcalloc (opaque, items, size)
- voidpf opaque;
- unsigned items;
- unsigned size;
-{
- if (opaque) items += size - size; /* make compiler happy */
- return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
- (voidpf)calloc(items, size);
-}
-
-void zcfree (opaque, ptr)
- voidpf opaque;
- voidpf ptr;
-{
- free(ptr);
- if (opaque) return; /* make compiler happy */
-}
-
-#endif /* MY_ZCALLOC */
diff --git a/sys/contrib/opensolaris/uts/common/zmod/zutil.h b/sys/contrib/opensolaris/uts/common/zmod/zutil.h
deleted file mode 100644
index 1d02c1d..0000000
--- a/sys/contrib/opensolaris/uts/common/zmod/zutil.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2005 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
- part of the implementation of the compression library and is
- subject to change. Applications should only use zlib.h.
- */
-
-#ifndef _ZUTIL_H
-#define _ZUTIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#define ZLIB_INTERNAL
-#include "zlib.h"
-
-#ifdef STDC
-# ifndef _WIN32_WCE
-# include <stddef.h>
-# endif
-# include <string.h>
-# include <stdlib.h>
-#endif
-#ifdef NO_ERRNO_H
-# ifdef _WIN32_WCE
- /* The Microsoft C Run-Time Library for Windows CE doesn't have
- * errno. We define it as a global variable to simplify porting.
- * Its value is always 0 and should not be used. We rename it to
- * avoid conflict with other libraries that use the same workaround.
- */
-# define errno z_errno
-# endif
- extern int errno;
-#else
-# ifndef _WIN32_WCE
-# include <sys/errno.h>
-# endif
-#endif
-
-#ifndef local
-# define local static
-#endif
-/* compile with -Dlocal if your debugger can't find static symbols */
-
-typedef unsigned char uch;
-typedef uch FAR uchf;
-typedef unsigned short ush;
-typedef ush FAR ushf;
-typedef unsigned long ulg;
-
-extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
-/* (size given to avoid silly warnings with Visual C++) */
-
-#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
-
-#define ERR_RETURN(strm,err) \
- return (strm->msg = (char*)ERR_MSG(err), (err))
-/* To be used only when the state is known to be valid */
-
- /* common constants */
-
-#ifndef DEF_WBITS
-# define DEF_WBITS MAX_WBITS
-#endif
-/* default windowBits for decompression. MAX_WBITS is for compression only */
-
-#if MAX_MEM_LEVEL >= 8
-# define DEF_MEM_LEVEL 8
-#else
-# define DEF_MEM_LEVEL MAX_MEM_LEVEL
-#endif
-/* default memLevel */
-
-#define STORED_BLOCK 0
-#define STATIC_TREES 1
-#define DYN_TREES 2
-/* The three kinds of block type */
-
-#define MIN_MATCH 3
-#define MAX_MATCH 258
-/* The minimum and maximum match lengths */
-
-#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
-
- /* target dependencies */
-
-#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
-# define OS_CODE 0x00
-# if defined(__TURBOC__) || defined(__BORLANDC__)
-# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
- /* Allow compilation with ANSI keywords only enabled */
- void _Cdecl farfree( void *block );
- void *_Cdecl farmalloc( unsigned long nbytes );
-# else
-# include <alloc.h>
-# endif
-# else /* MSC or DJGPP */
-# include <malloc.h>
-# endif
-#endif
-
-#ifdef AMIGA
-# define OS_CODE 0x01
-#endif
-
-#if defined(VAXC) || defined(VMS)
-# define OS_CODE 0x02
-# define F_OPEN(name, mode) \
- fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
-#endif
-
-#if defined(ATARI) || defined(atarist)
-# define OS_CODE 0x05
-#endif
-
-#ifdef OS2
-# define OS_CODE 0x06
-# ifdef M_I86
- #include <malloc.h>
-# endif
-#endif
-
-#if defined(MACOS) || defined(TARGET_OS_MAC)
-# define OS_CODE 0x07
-# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
-# include <unix.h> /* for fdopen */
-# else
-# ifndef fdopen
-# define fdopen(fd,mode) NULL /* No fdopen() */
-# endif
-# endif
-#endif
-
-#ifdef TOPS20
-# define OS_CODE 0x0a
-#endif
-
-#ifdef WIN32
-# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */
-# define OS_CODE 0x0b
-# endif
-#endif
-
-#ifdef __50SERIES /* Prime/PRIMOS */
-# define OS_CODE 0x0f
-#endif
-
-#if defined(_BEOS_) || defined(RISCOS)
-# define fdopen(fd,mode) NULL /* No fdopen() */
-#endif
-
-#if (defined(_MSC_VER) && (_MSC_VER > 600))
-# if defined(_WIN32_WCE)
-# define fdopen(fd,mode) NULL /* No fdopen() */
-# ifndef _PTRDIFF_T_DEFINED
- typedef int ptrdiff_t;
-# define _PTRDIFF_T_DEFINED
-# endif
-# else
-# define fdopen(fd,type) _fdopen(fd,type)
-# endif
-#endif
-
- /* common defaults */
-
-#ifndef OS_CODE
-# define OS_CODE 0x03 /* assume Unix */
-#endif
-
-#ifndef F_OPEN
-# define F_OPEN(name, mode) fopen((name), (mode))
-#endif
-
- /* functions */
-
-#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
-# ifndef HAVE_VSNPRINTF
-# define HAVE_VSNPRINTF
-# endif
-#endif
-#if defined(__CYGWIN__)
-# ifndef HAVE_VSNPRINTF
-# define HAVE_VSNPRINTF
-# endif
-#endif
-#ifndef HAVE_VSNPRINTF
-# ifdef MSDOS
- /* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
- but for now we just assume it doesn't. */
-# define NO_vsnprintf
-# endif
-# ifdef __TURBOC__
-# define NO_vsnprintf
-# endif
-# ifdef WIN32
- /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
-# if !defined(vsnprintf) && !defined(NO_vsnprintf)
-# define vsnprintf _vsnprintf
-# endif
-# endif
-# ifdef __SASC
-# define NO_vsnprintf
-# endif
-#endif
-#ifdef VMS
-# define NO_vsnprintf
-#endif
-
-#if defined(pyr)
-# define NO_MEMCPY
-#endif
-#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
- /* Use our own functions for small and medium model with MSC <= 5.0.
- * You may have to use the same strategy for Borland C (untested).
- * The __SC__ check is for Symantec.
- */
-# define NO_MEMCPY
-#endif
-#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
-# define HAVE_MEMCPY
-#endif
-#ifdef HAVE_MEMCPY
-# ifdef SMALL_MEDIUM /* MSDOS small or medium model */
-# define zmemcpy _fmemcpy
-# define zmemcmp _fmemcmp
-# define zmemzero(dest, len) _fmemset(dest, 0, len)
-# else
-# define zmemcpy memcpy
-# define zmemcmp memcmp
-# define zmemzero(dest, len) memset(dest, 0, len)
-# endif
-#else
- extern void zmemcpy OF((void* dest, const void* source, uInt len));
- extern int zmemcmp OF((const void* s1, const void* s2, uInt len));
- extern void zmemzero OF((void* dest, uInt len));
-#endif
-
-/* Diagnostic functions */
-#ifdef DEBUG
-# include <stdio.h>
- extern int z_verbose;
- extern void z_error OF((char *m));
-# define Assert(cond,msg) {if(!(cond)) z_error(msg);}
-# define Trace(x) {if (z_verbose>=0) fprintf x ;}
-# define Tracev(x) {if (z_verbose>0) fprintf x ;}
-# define Tracevv(x) {if (z_verbose>1) fprintf x ;}
-# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
-# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
-#else
-# define Assert(cond,msg)
-# define Trace(x)
-# define Tracev(x)
-# define Tracevv(x)
-# define Tracec(c,x)
-# define Tracecv(c,x)
-#endif
-
-
-voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
-void zcfree OF((voidpf opaque, voidpf ptr));
-
-#define ZALLOC(strm, items, size) \
- (*((strm)->zalloc))((strm)->opaque, (items), (size))
-#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
-#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
-
-#endif /* _ZUTIL_H */
OpenPOWER on IntegriCloud