summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2015-10-07 18:07:27 -0300
committerRenato Botelho <renato@netgate.com>2015-10-07 18:07:27 -0300
commit945ed01c4bae06169f63978e43029c04d4abd731 (patch)
tree5cd3ae372187bd25416aeec230f31242c3729be7
parent8aae621ec1e941b8f27411df1bace42778e61b99 (diff)
parent36b47c3278c31b909b37616c58ccf4f148a9e47c (diff)
downloadFreeBSD-src-945ed01c4bae06169f63978e43029c04d4abd731.zip
FreeBSD-src-945ed01c4bae06169f63978e43029c04d4abd731.tar.gz
Merge branch 'stable/10' into devel
-rw-r--r--ObsoleteFiles.inc8
-rw-r--r--cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c2
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c157
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs.844
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_main.c35
-rw-r--r--cddl/contrib/opensolaris/cmd/zhack/zhack.c20
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_main.c5
-rw-r--r--cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c56
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c9
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h4
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c4
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c198
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c22
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h19
-rw-r--r--contrib/binutils/gas/dwarf2dbg.c18
-rw-r--r--contrib/gcclibs/libcpp/files.c4
-rw-r--r--contrib/libcxxrt/libelftc_dem_gnu3.c502
-rw-r--r--etc/Makefile11
-rw-r--r--games/grdc/grdc.c10
-rw-r--r--gnu/usr.bin/binutils/libbfd/Makefile.i3862
-rw-r--r--gnu/usr.bin/groff/src/utils/indxbib/Makefile6
-rw-r--r--kerberos5/libexec/kdigest/Makefile2
-rw-r--r--kerberos5/usr.bin/hxtool/Makefile2
-rw-r--r--kerberos5/usr.bin/kadmin/Makefile2
-rw-r--r--kerberos5/usr.bin/kcc/Makefile2
-rw-r--r--kerberos5/usr.sbin/iprop-log/Makefile2
-rw-r--r--kerberos5/usr.sbin/ktutil/Makefile2
-rw-r--r--lib/clang/include/Makefile2
-rw-r--r--lib/libc/stdio/open_memstream.36
-rw-r--r--lib/libc/sys/Makefile.inc2
-rw-r--r--lib/libc/sys/madvise.28
-rw-r--r--lib/libc/tests/gen/posix_spawn/Makefile2
-rw-r--r--lib/libusb/Makefile11
-rw-r--r--lib/libutil/Makefile6
-rw-r--r--lib/libutil/kinfo_getvmobject.374
-rw-r--r--lib/libutil/kinfo_getvmobject.c94
-rw-r--r--lib/libutil/libutil.h2
-rw-r--r--lib/libz/Makefile7
-rw-r--r--lib/libz/inflate.c2
-rw-r--r--release/arm/BEAGLEBONE.conf2
-rw-r--r--release/arm/CUBOX-HUMMINGBOARD.conf2
-rw-r--r--release/arm/GUMSTIX.conf2
-rw-r--r--release/arm/PANDABOARD.conf2
-rw-r--r--release/arm/RPI-B.conf2
-rw-r--r--release/arm/RPI2.conf2
-rw-r--r--release/arm/WANDBOARD.conf2
-rw-r--r--release/doc/en_US.ISO8859-1/relnotes/article.xml60
-rw-r--r--release/doc/share/xml/security.xml7
-rw-r--r--release/doc/share/xml/sponsor.ent3
-rwxr-xr-xrelease/release.sh12
-rw-r--r--release/tools/vmimage.subr2
-rw-r--r--sbin/dmesg/dmesg.c3
-rw-r--r--sbin/ipfw/ipfw2.c8
-rw-r--r--secure/lib/libcrypto/Makefile8
-rw-r--r--share/man/man4/ctl.463
-rw-r--r--share/man/man4/ng_pppoe.418
-rw-r--r--share/man/man7/hier.74
-rw-r--r--share/man/man9/VOP_ADVISE.95
-rw-r--r--share/man/man9/atomic.9129
-rw-r--r--share/man/man9/printf.93
-rw-r--r--share/mk/bsd.lib.mk2
-rw-r--r--sys/amd64/amd64/elf_machdep.c17
-rw-r--r--sys/arm/arm/elf_machdep.c9
-rw-r--r--sys/cam/ata/ata_da.c27
-rw-r--r--sys/cam/ata/ata_xpt.c3
-rw-r--r--sys/cam/cam_periph.c9
-rw-r--r--sys/cam/cam_periph.h3
-rw-r--r--sys/cam/cam_xpt.c16
-rw-r--r--sys/cam/ctl/README.ctl.txt64
-rw-r--r--sys/cam/ctl/ctl.c6143
-rw-r--r--sys/cam/ctl/ctl.h54
-rw-r--r--sys/cam/ctl/ctl_backend.c56
-rw-r--r--sys/cam/ctl/ctl_backend.h47
-rw-r--r--sys/cam/ctl/ctl_backend_block.c1223
-rw-r--r--sys/cam/ctl/ctl_backend_block.h72
-rw-r--r--sys/cam/ctl/ctl_backend_ramdisk.c360
-rw-r--r--sys/cam/ctl/ctl_cmd_table.c381
-rw-r--r--sys/cam/ctl/ctl_error.c279
-rw-r--r--sys/cam/ctl/ctl_error.h11
-rw-r--r--sys/cam/ctl/ctl_frontend.c109
-rw-r--r--sys/cam/ctl/ctl_frontend.h5
-rw-r--r--sys/cam/ctl/ctl_frontend_cam_sim.c66
-rw-r--r--sys/cam/ctl/ctl_frontend_internal.c1612
-rw-r--r--sys/cam/ctl/ctl_frontend_internal.h154
-rw-r--r--sys/cam/ctl/ctl_frontend_ioctl.c439
-rw-r--r--sys/cam/ctl/ctl_frontend_iscsi.c209
-rw-r--r--sys/cam/ctl/ctl_ha.c1029
-rw-r--r--sys/cam/ctl/ctl_ha.h200
-rw-r--r--sys/cam/ctl/ctl_io.h204
-rw-r--r--sys/cam/ctl/ctl_ioctl.h78
-rw-r--r--sys/cam/ctl/ctl_private.h165
-rw-r--r--sys/cam/ctl/ctl_scsi_all.c2
-rw-r--r--sys/cam/ctl/ctl_ser_table.c2
-rw-r--r--sys/cam/ctl/ctl_tpc.c125
-rw-r--r--sys/cam/ctl/ctl_tpc_local.c56
-rw-r--r--sys/cam/ctl/ctl_util.c34
-rw-r--r--sys/cam/ctl/ctl_util.h4
-rw-r--r--sys/cam/ctl/scsi_ctl.c72
-rw-r--r--sys/cam/scsi/scsi_all.c210
-rw-r--r--sys/cam/scsi/scsi_all.h45
-rw-r--r--sys/cam/scsi/scsi_cd.c3
-rw-r--r--sys/cam/scsi/scsi_cd.h130
-rw-r--r--sys/cam/scsi/scsi_ch.c12
-rw-r--r--sys/cam/scsi/scsi_da.c15
-rw-r--r--sys/cam/scsi/scsi_low.c9
-rw-r--r--sys/cam/scsi/scsi_pass.c10
-rw-r--r--sys/cam/scsi/scsi_pt.c3
-rw-r--r--sys/cam/scsi/scsi_sa.c3
-rw-r--r--sys/cam/scsi/scsi_sg.c10
-rw-r--r--sys/cam/scsi/scsi_target.c9
-rw-r--r--sys/cam/scsi/scsi_xpt.c11
-rw-r--r--sys/cddl/compat/opensolaris/sys/kstat.h5
-rw-r--r--sys/cddl/contrib/opensolaris/common/avl/avl.c10
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c54
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h18
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/Makefile.files6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c3973
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c111
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c567
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c195
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c343
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c1208
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c49
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c735
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c111
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c79
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c287
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c32
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c38
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c29
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c59
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c11
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c366
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c25
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c58
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c33
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c48
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h37
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bpobj.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bqueue.h54
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h169
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h16
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h37
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h35
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h106
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa.h1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h31
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h18
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h42
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_priority.h41
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c86
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c48
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c15
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c15
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c57
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c54
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c25
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c143
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c23
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c27
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h11
-rw-r--r--sys/compat/linprocfs/linprocfs.c12
-rw-r--r--sys/conf/files6
-rw-r--r--sys/contrib/ipfilter/netinet/ip_state.c3
-rw-r--r--sys/dev/ahci/ahci_pci.c5
-rw-r--r--sys/dev/bxe/ecore_hsi.h6
-rw-r--r--sys/dev/hwpmc/hwpmc_core.c2
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c6
-rw-r--r--sys/dev/isp/isp.c267
-rw-r--r--sys/dev/isp/isp_freebsd.c545
-rw-r--r--sys/dev/isp/isp_freebsd.h4
-rw-r--r--sys/dev/isp/isp_library.c281
-rw-r--r--sys/dev/isp/isp_library.h5
-rw-r--r--sys/dev/isp/isp_target.h2
-rw-r--r--sys/dev/isp/ispmbox.h4
-rw-r--r--sys/dev/isp/ispvar.h64
-rw-r--r--sys/dev/uart/uart_bus_pci.c1
-rw-r--r--sys/fs/fifofs/fifo_vnops.c11
-rw-r--r--sys/fs/procfs/procfs_map.c14
-rw-r--r--sys/gnu/fs/reiserfs/reiserfs_vfsops.c1
-rw-r--r--sys/i386/i386/elf_machdep.c13
-rw-r--r--sys/ia64/ia64/elf_machdep.c39
-rw-r--r--sys/kern/kern_proc.c22
-rw-r--r--sys/kern/kern_rmlock.c6
-rw-r--r--sys/kern/kern_tc.c21
-rw-r--r--sys/kern/kern_umtx.c24
-rw-r--r--sys/kern/link_elf.c30
-rw-r--r--sys/kern/link_elf_obj.c51
-rw-r--r--sys/kern/makesyscalls.sh2
-rw-r--r--sys/kern/sched_4bsd.c4
-rw-r--r--sys/kern/sched_ule.c4
-rw-r--r--sys/kern/subr_param.c4
-rw-r--r--sys/kern/subr_prf.c2
-rw-r--r--sys/kern/subr_syscall.c10
-rw-r--r--sys/kern/sys_process.c2
-rw-r--r--sys/kern/vfs_mountroot.c14
-rw-r--r--sys/kern/vfs_subr.c4
-rw-r--r--sys/libkern/asprintf.c77
-rw-r--r--sys/mips/mips/elf_machdep.c33
-rw-r--r--sys/modules/ctl/Makefile3
-rw-r--r--sys/netgraph/ng_pppoe.c45
-rw-r--r--sys/netgraph/ng_pppoe.h13
-rw-r--r--sys/netinet/tcp_usrreq.c2
-rw-r--r--sys/netinet6/in6.c2
-rw-r--r--sys/powerpc/powerpc/elf32_machdep.c33
-rw-r--r--sys/powerpc/powerpc/elf64_machdep.c25
-rw-r--r--sys/rpc/rpcsec_gss/svc_rpcsec_gss.c7
-rw-r--r--sys/sparc64/sparc64/elf_machdep.c5
-rw-r--r--sys/sys/elf_common.h22
-rw-r--r--sys/sys/linker.h2
-rw-r--r--sys/sys/systm.h4
-rw-r--r--sys/sys/user.h21
-rw-r--r--sys/sys/vnode.h5
-rw-r--r--sys/vm/vm_kern.c43
-rw-r--r--sys/vm/vm_object.c149
-rw-r--r--sys/vm/vm_object.h1
-rw-r--r--sys/vm/vm_page.c4
-rw-r--r--sys/vm/vm_page.h1
-rw-r--r--sys/vm/vm_pageout.c75
-rw-r--r--sys/x86/acpica/madt.c97
-rw-r--r--tests/sys/vm/Makefile2
-rw-r--r--tests/sys/vm/mmap_test.c232
-rwxr-xr-xtools/build/options/makeman16
-rw-r--r--usr.bin/ar/ar.13
-rw-r--r--usr.bin/ar/ar.c29
-rw-r--r--usr.bin/bmake/Makefile.inc2
-rw-r--r--usr.bin/ctlstat/ctlstat.816
-rw-r--r--usr.bin/ctlstat/ctlstat.c126
-rw-r--r--usr.bin/elfdump/elfdump.c2
-rw-r--r--usr.bin/login/login.c1
-rw-r--r--usr.bin/login/login_fbtab.c3
-rw-r--r--usr.bin/systat/iostat.c22
-rw-r--r--usr.bin/systat/netstat.c2
-rw-r--r--usr.bin/systat/pigs.c4
-rw-r--r--usr.bin/systat/vmstat.c9
-rw-r--r--usr.bin/vmstat/vmstat.835
-rw-r--r--usr.bin/vmstat/vmstat.c140
-rw-r--r--usr.bin/vtfontcvt/vtfontcvt.c44
-rw-r--r--usr.bin/w/Makefile2
-rw-r--r--usr.bin/w/w.c12
-rw-r--r--usr.bin/yacc/tests/Makefile2
-rw-r--r--usr.sbin/acpi/acpiconf/acpiconf.812
-rw-r--r--usr.sbin/bhyve/acpi.c4
-rw-r--r--usr.sbin/ctladm/ctladm.8223
-rw-r--r--usr.sbin/ctladm/ctladm.c1060
-rw-r--r--usr.sbin/ctld/ctl.conf.522
-rw-r--r--usr.sbin/ctld/ctld.c56
-rw-r--r--usr.sbin/ctld/ctld.h10
-rw-r--r--usr.sbin/ctld/discovery.c1
-rw-r--r--usr.sbin/ctld/isns.c6
-rw-r--r--usr.sbin/ctld/kernel.c62
-rw-r--r--usr.sbin/ctld/keys.c21
-rw-r--r--usr.sbin/ctld/login.c7
-rw-r--r--usr.sbin/ctld/parse.y85
-rw-r--r--usr.sbin/ctld/pdu.c2
-rw-r--r--usr.sbin/ctld/token.l5
-rw-r--r--usr.sbin/etcupdate/etcupdate.816
-rw-r--r--usr.sbin/gssd/gssd.c4
-rw-r--r--usr.sbin/gstat/gstat.c8
-rw-r--r--usr.sbin/iscsid/iscsid.h1
-rw-r--r--usr.sbin/iscsid/keys.c20
-rw-r--r--usr.sbin/mergemaster/mergemaster.813
-rw-r--r--usr.sbin/ndiscvt/inf.c6
-rw-r--r--usr.sbin/ndiscvt/inf.h2
-rw-r--r--usr.sbin/ntp/ntpdc/Makefile3
-rw-r--r--usr.sbin/pmcstat/pmcstat.814
-rw-r--r--usr.sbin/pmcstat/pmcstat.c47
-rw-r--r--usr.sbin/rpcbind/rpcb_svc_com.c27
-rw-r--r--usr.sbin/sesutil/Makefile6
-rw-r--r--usr.sbin/sesutil/sesutil.873
-rw-r--r--usr.sbin/sesutil/sesutil.c224
-rw-r--r--usr.sbin/sysrc/Makefile4
302 files changed, 16890 insertions, 13226 deletions
diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index 113c4fb..18fb323 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -6403,6 +6403,12 @@ OLD_LIBS+=usr/lib/libposix1e.so.2
OLD_LIBS+=usr/lib/libskey.so.2
OLD_LIBS+=usr/lib/libusbhid.so.0
OLD_LIBS+=usr/lib/libvgl.so.2
+# 20030218: OpenSSL 0.9.7 import
+OLD_FILES+=usr/include/des.h
+OLD_FILES+=usr/lib/libdes.a
+OLD_FILES+=usr/lib/libdes.so
+OLD_LIBS+=usr/lib/libdes.so.3
+OLD_FILES+=usr/lib/libdes_p.a
# 200302XX
OLD_LIBS+=usr/lib/libacl.so.3
OLD_LIBS+=usr/lib/libasn1.so.5
@@ -6461,6 +6467,8 @@ OLD_LIBS+=usr/lib/libtermcap.so.2
OLD_LIBS+=usr/lib/libutil.so.2
OLD_LIBS+=usr/lib/libvgl.so.1
OLD_LIBS+=usr/lib/libwrap.so.2
+# 19991216
+OLD_FILES+=usr/sbin/xntpdc
# 199909XX
OLD_LIBS+=usr/lib/libc_r.so.3
# ???
diff --git a/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c b/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
index 9b2e37b..9432161 100644
--- a/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
+++ b/cddl/contrib/opensolaris/cmd/sgs/tools/common/sgsmsg.c
@@ -132,6 +132,8 @@ typedef struct msg_string {
static msg_string *msg_head;
static msg_string *msg_tail;
+int aok;
+
/*
* message_append() is responsible for both inserting strings into
* the master Str_tbl as well as maintaining a list of the
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index 34912cd..114bfaf 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <stdio.h>
@@ -95,6 +95,8 @@ int zopt_objects = 0;
libzfs_handle_t *g_zfs;
uint64_t max_inflight = 1000;
+static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
+
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
@@ -418,6 +420,79 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
zap_cursor_fini(&zc);
}
+static void
+dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ bpobj_phys_t *bpop = data;
+ char bytes[32], comp[32], uncomp[32];
+
+ if (bpop == NULL)
+ return;
+
+ zdb_nicenum(bpop->bpo_bytes, bytes);
+ zdb_nicenum(bpop->bpo_comp, comp);
+ zdb_nicenum(bpop->bpo_uncomp, uncomp);
+
+ (void) printf("\t\tnum_blkptrs = %llu\n",
+ (u_longlong_t)bpop->bpo_num_blkptrs);
+ (void) printf("\t\tbytes = %s\n", bytes);
+ if (size >= BPOBJ_SIZE_V1) {
+ (void) printf("\t\tcomp = %s\n", comp);
+ (void) printf("\t\tuncomp = %s\n", uncomp);
+ }
+ if (size >= sizeof (*bpop)) {
+ (void) printf("\t\tsubobjs = %llu\n",
+ (u_longlong_t)bpop->bpo_subobjs);
+ (void) printf("\t\tnum_subobjs = %llu\n",
+ (u_longlong_t)bpop->bpo_num_subobjs);
+ }
+
+ if (dump_opt['d'] < 5)
+ return;
+
+ for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
+ char blkbuf[BP_SPRINTF_LEN];
+ blkptr_t bp;
+
+ int err = dmu_read(os, object,
+ i * sizeof (bp), sizeof (bp), &bp, 0);
+ if (err != 0) {
+ (void) printf("got error %u from dmu_read\n", err);
+ break;
+ }
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
+ (void) printf("\t%s\n", blkbuf);
+ }
+}
+
+/* ARGSUSED */
+static void
+dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ dmu_object_info_t doi;
+
+ VERIFY0(dmu_object_info(os, object, &doi));
+ uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
+
+ int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
+ if (err != 0) {
+ (void) printf("got error %u from dmu_read\n", err);
+ kmem_free(subobjs, doi.doi_max_offset);
+ return;
+ }
+
+ int64_t last_nonzero = -1;
+ for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
+ if (subobjs[i] != 0)
+ last_nonzero = i;
+ }
+
+ for (int64_t i = 0; i <= last_nonzero; i++) {
+ (void) printf("\t%llu\n", (longlong_t)subobjs[i]);
+ }
+ kmem_free(subobjs, doi.doi_max_offset);
+}
+
/*ARGSUSED*/
static void
dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
@@ -1130,7 +1205,9 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
if (BP_IS_HOLE(bp)) {
(void) snprintf(blkbuf + strlen(blkbuf),
- buflen - strlen(blkbuf), "B=%llu",
+ buflen - strlen(blkbuf),
+ "%llxL B=%llu",
+ (u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)bp->blk_birth);
} else {
(void) snprintf(blkbuf + strlen(blkbuf),
@@ -1397,7 +1474,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
}
static void
-dump_bpobj(bpobj_t *bpo, char *name, int indent)
+dump_full_bpobj(bpobj_t *bpo, char *name, int indent)
{
char bytes[32];
char comp[32];
@@ -1411,11 +1488,12 @@ dump_bpobj(bpobj_t *bpo, char *name, int indent)
zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
(void) printf(" %*s: object %llu, %llu local blkptrs, "
- "%llu subobjs, %s (%s/%s comp)\n",
+ "%llu subobjs in object %llu, %s (%s/%s comp)\n",
indent * 8, name,
(u_longlong_t)bpo->bpo_object,
(u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
(u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
+ (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
bytes, comp, uncomp);
for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
@@ -1432,7 +1510,7 @@ dump_bpobj(bpobj_t *bpo, char *name, int indent)
error, (u_longlong_t)subobj);
continue;
}
- dump_bpobj(&subbpo, "subobj", indent + 1);
+ dump_full_bpobj(&subbpo, "subobj", indent + 1);
bpobj_close(&subbpo);
}
} else {
@@ -1466,7 +1544,7 @@ dump_deadlist(dsl_deadlist_t *dl)
return;
if (dl->dl_oldfmt) {
- dump_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
+ dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
return;
}
@@ -1491,8 +1569,7 @@ dump_deadlist(dsl_deadlist_t *dl)
(void) snprintf(buf, sizeof (buf), "mintxg %llu -> ",
(longlong_t)dle->dle_mintxg,
(longlong_t)dle->dle_bpobj.bpo_object);
-
- dump_bpobj(&dle->dle_bpobj, buf, 0);
+ dump_full_bpobj(&dle->dle_bpobj, buf, 0);
} else {
(void) printf("mintxg %llu -> obj %llu\n",
(longlong_t)dle->dle_mintxg,
@@ -1684,8 +1761,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
dump_uint64, /* object array */
dump_none, /* packed nvlist */
dump_packed_nvlist, /* packed nvlist size */
- dump_none, /* bplist */
- dump_none, /* bplist header */
+ dump_none, /* bpobj */
+ dump_bpobj, /* bpobj header */
dump_none, /* SPA space map header */
dump_none, /* SPA space map */
dump_none, /* ZIL intent log */
@@ -1732,7 +1809,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
dump_zap, /* deadlist */
dump_none, /* deadlist hdr */
dump_zap, /* dsl clones */
- dump_none, /* bpobj subobjs */
+ dump_bpobj_subobjs, /* bpobj subobjs */
dump_unknown, /* Unknown type, must be last */
};
@@ -2147,7 +2224,7 @@ dump_label(const char *dev)
(void) close(fd);
}
-static uint64_t num_large_blocks;
+static uint64_t dataset_feature_count[SPA_FEATURES];
/*ARGSUSED*/
static int
@@ -2161,8 +2238,15 @@ dump_one_dir(const char *dsname, void *arg)
(void) printf("Could not open %s, error %d\n", dsname, error);
return (0);
}
- if (dmu_objset_ds(os)->ds_large_blocks)
- num_large_blocks++;
+
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (!dmu_objset_ds(os)->ds_feature_inuse[f])
+ continue;
+ ASSERT(spa_feature_table[f].fi_flags &
+ ZFEATURE_FLAG_PER_DATASET);
+ dataset_feature_count[f]++;
+ }
+
dump_dir(os);
dmu_objset_disown(os, FTAG);
fuid_table_destroy();
@@ -2354,6 +2438,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
dmu_object_type_t type;
boolean_t is_metadata;
+ if (bp == NULL)
+ return (0);
+
if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
@@ -2843,7 +2930,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@@ -2958,13 +3045,13 @@ dump_zpool(spa_t *spa)
dump_metaslab_groups(spa);
if (dump_opt['d'] || dump_opt['i']) {
- uint64_t refcount;
dump_dir(dp->dp_meta_objset);
if (dump_opt['d'] >= 3) {
- dump_bpobj(&spa->spa_deferred_bpobj,
+ dump_full_bpobj(&spa->spa_deferred_bpobj,
"Deferred frees", 0);
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
- dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
+ dump_full_bpobj(
+ &spa->spa_dsl_pool->dp_free_bpobj,
"Pool snapshot frees", 0);
}
@@ -2979,17 +3066,29 @@ dump_zpool(spa_t *spa)
(void) dmu_objset_find(spa_name(spa), dump_one_dir,
NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
- (void) feature_get_refcount(spa,
- &spa_feature_table[SPA_FEATURE_LARGE_BLOCKS], &refcount);
- if (num_large_blocks != refcount) {
- (void) printf("large_blocks feature refcount mismatch: "
- "expected %lld != actual %lld\n",
- (longlong_t)num_large_blocks,
- (longlong_t)refcount);
- rc = 2;
- } else {
- (void) printf("Verified large_blocks feature refcount "
- "is correct (%llu)\n", (longlong_t)refcount);
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ uint64_t refcount;
+
+ if (!(spa_feature_table[f].fi_flags &
+ ZFEATURE_FLAG_PER_DATASET)) {
+ ASSERT0(dataset_feature_count[f]);
+ continue;
+ }
+ (void) feature_get_refcount(spa,
+ &spa_feature_table[f], &refcount);
+ if (dataset_feature_count[f] != refcount) {
+ (void) printf("%s feature refcount mismatch: "
+ "%lld datasets != %lld refcount\n",
+ spa_feature_table[f].fi_uname,
+ (longlong_t)dataset_feature_count[f],
+ (longlong_t)refcount);
+ rc = 2;
+ } else {
+ (void) printf("Verified %s feature refcount "
+ "of %llu is correct\n",
+ spa_feature_table[f].fi_uname,
+ (longlong_t)refcount);
+ }
}
}
if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
index b7d9ac0..e534ae9 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
@@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd July 30, 2015
+.Dd September 14, 2015
.Dt ZFS 8
.Os
.Sh NAME
@@ -191,11 +191,13 @@
.Nm
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
.Op Fl d | e
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem
.Nm
.Cm allow
@@ -939,7 +941,23 @@ Disabling checksums is
.Em NOT
a recommended practice.
.It Sy compression Ns = Ns Cm on | off | lzjb | gzip | gzip- Ns Ar N | Cm zle | Cm lz4
-Controls the compression algorithm used for this dataset. The
+Controls the compression algorithm used for this dataset.
+Setting compression to
+.Cm on
+indicates that the current default compression algorithm should be used.
+The default balances compression and decompression speed, with compression
+ratio and is expected to work well on a wide variety of workloads.
+Unlike all other settings for this property, on does not select a fixed
+compression type.
+As new compression algorithms are added to ZFS and enabled on a pool, the
+default compression algorithm may change.
+The current default compression algorthm is either
+.Cm lzjb
+or, if the
+.Sy lz4_compress
+feature is enabled,
+.Cm lz4 .
+The
.Cm lzjb
compression algorithm is optimized for performance while providing decent data
compression. Setting compression to
@@ -2126,7 +2144,8 @@ Property name
.It value
Property value
.It source
-Property source. Can either be local, default, temporary, inherited, or none
+Property source. Can either be local, default, temporary, inherited, received,
+or none
(\&-).
.El
.Pp
@@ -2192,8 +2211,11 @@ The default value is all sources.
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot Ns ...
.Xc
.Pp
-Clears the specified property, causing it to be inherited from an ancestor. If
-no ancestor has the property set, then the default value is used. See the
+Clears the specified property, causing it to be inherited from an ancestor,
+restored to default if no ancestor has the property set, or with the
+.Fl S
+option reverted to the received value if one exists.
+See the
.Qq Sx Properties
section for a listing of default values, and details on which properties can be
inherited.
@@ -2201,8 +2223,10 @@ inherited.
.It Fl r
Recursively inherit the given property for all children.
.It Fl S
-For properties with a received value, revert to this value. This flag has no
-effect on properties that do not have a received value.
+Revert the property to the received value if one exists; otherwise operate as
+if the
+.Fl S
+option was not specified.
.El
.It Xo
.Nm
@@ -2689,6 +2713,7 @@ feature.
.Nm
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Xc
.It Xo
@@ -2696,6 +2721,7 @@ feature.
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
.Op Fl d | e
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem
.Xc
.Pp
@@ -2780,6 +2806,10 @@ receive operation.
Do not actually receive the stream. This can be useful in conjunction with the
.Fl v
option to verify the name the receive operation would use.
+.It Fl o Sy origin Ns = Ns Ar snapshot
+Forces the stream to be received as a clone of the given snapshot.
+This is only valid if the stream is an incremental stream whose source
+is the same as the provided origin.
.It Fl F
Force a rollback of the file system to the most recent snapshot before
performing the receive operation. If receiving an incremental replication
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
index 089772f..e35ce01 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
@@ -264,8 +264,9 @@ get_usage(zfs_help_t idx)
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
return (gettext("\treceive|recv [-vnFu] <filesystem|volume|"
- "snapshot>\n"
- "\treceive|recv [-vnFu] [-d | -e] <filesystem>\n"));
+ "snapshot>\n"
+ "\treceive|recv [-vnFu] [-o origin=<snapshot>] [-d | -e] "
+ "<filesystem>\n"));
case HELP_RENAME:
return (gettext("\trename [-f] <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
@@ -791,7 +792,7 @@ zfs_do_create(int argc, char **argv)
nomem();
break;
case 'o':
- if (parseprop(props, optarg))
+ if (parseprop(props, optarg) != 0)
goto error;
break;
case 's':
@@ -1927,9 +1928,13 @@ zfs_do_inherit(int argc, char **argv)
if (prop == ZFS_PROP_QUOTA ||
prop == ZFS_PROP_RESERVATION ||
prop == ZFS_PROP_REFQUOTA ||
- prop == ZFS_PROP_REFRESERVATION)
+ prop == ZFS_PROP_REFRESERVATION) {
(void) fprintf(stderr, gettext("use 'zfs set "
"%s=none' to clear\n"), propname);
+ (void) fprintf(stderr, gettext("use 'zfs "
+ "inherit -S %s' to revert to received "
+ "value\n"), propname);
+ }
return (1);
}
if (received && (prop == ZFS_PROP_VOLSIZE ||
@@ -3659,7 +3664,7 @@ zfs_do_snapshot(int argc, char **argv)
while ((c = getopt(argc, argv, "ro:")) != -1) {
switch (c) {
case 'o':
- if (parseprop(props, optarg))
+ if (parseprop(props, optarg) != 0)
return (1);
break;
case 'r':
@@ -3918,10 +3923,19 @@ zfs_do_receive(int argc, char **argv)
{
int c, err;
recvflags_t flags = { 0 };
+ nvlist_t *props;
+ nvpair_t *nvp = NULL;
+
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
- while ((c = getopt(argc, argv, ":denuvF")) != -1) {
+ while ((c = getopt(argc, argv, ":o:denuvF")) != -1) {
switch (c) {
+ case 'o':
+ if (parseprop(props, optarg) != 0)
+ return (1);
+ break;
case 'd':
flags.isprefix = B_TRUE;
break;
@@ -3966,6 +3980,13 @@ zfs_do_receive(int argc, char **argv)
usage(B_FALSE);
}
+ while ((nvp = nvlist_next_nvpair(props, nvp))) {
+ if (strcmp(nvpair_name(nvp), "origin") != 0) {
+ (void) fprintf(stderr, gettext("invalid option"));
+ usage(B_FALSE);
+ }
+ }
+
if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
gettext("Error: Backup stream can not be read "
@@ -3974,7 +3995,7 @@ zfs_do_receive(int argc, char **argv)
return (1);
}
- err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL);
+ err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);
return (err != 0);
}
diff --git a/cddl/contrib/opensolaris/cmd/zhack/zhack.c b/cddl/contrib/opensolaris/cmd/zhack/zhack.c
index 6e3f029..f4434a1 100644
--- a/cddl/contrib/opensolaris/cmd/zhack/zhack.c
+++ b/cddl/contrib/opensolaris/cmd/zhack/zhack.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -294,8 +294,8 @@ zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
feature_enable_sync(spa, feature, tx);
spa_history_log_internal(spa, "zhack enable feature", tx,
- "name=%s can_readonly=%u",
- feature->fi_guid, feature->fi_can_readonly);
+ "guid=%s flags=%x",
+ feature->fi_guid, feature->fi_flags);
}
static void
@@ -314,9 +314,7 @@ zhack_do_feature_enable(int argc, char **argv)
*/
desc = NULL;
feature.fi_uname = "zhack";
- feature.fi_mos = B_FALSE;
- feature.fi_can_readonly = B_FALSE;
- feature.fi_activate_on_enable = B_FALSE;
+ feature.fi_flags = 0;
feature.fi_depends = nodeps;
feature.fi_feature = SPA_FEATURE_NONE;
@@ -324,7 +322,7 @@ zhack_do_feature_enable(int argc, char **argv)
while ((c = getopt(argc, argv, "rmd:")) != -1) {
switch (c) {
case 'r':
- feature.fi_can_readonly = B_TRUE;
+ feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
break;
case 'd':
desc = strdup(optarg);
@@ -413,7 +411,7 @@ zhack_do_feature_ref(int argc, char **argv)
* disk later.
*/
feature.fi_uname = "zhack";
- feature.fi_mos = B_FALSE;
+ feature.fi_flags = 0;
feature.fi_desc = NULL;
feature.fi_depends = nodeps;
feature.fi_feature = SPA_FEATURE_NONE;
@@ -422,7 +420,7 @@ zhack_do_feature_ref(int argc, char **argv)
while ((c = getopt(argc, argv, "md")) != -1) {
switch (c) {
case 'm':
- feature.fi_mos = B_TRUE;
+ feature.fi_flags |= ZFEATURE_FLAG_MOS;
break;
case 'd':
decr = B_TRUE;
@@ -455,10 +453,10 @@ zhack_do_feature_ref(int argc, char **argv)
if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
feature.fi_guid)) {
- feature.fi_can_readonly = B_FALSE;
+ feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
feature.fi_guid)) {
- feature.fi_can_readonly = B_TRUE;
+ feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
} else {
fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
}
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
index d8243f3..a3eabd1 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
@@ -4986,7 +4986,8 @@ zpool_do_upgrade(int argc, char **argv)
"---------------\n");
for (i = 0; i < SPA_FEATURES; i++) {
zfeature_info_t *fi = &spa_feature_table[i];
- const char *ro = fi->fi_can_readonly ?
+ const char *ro =
+ (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
" (read-only compatible)" : "";
(void) printf("%-37s%s\n", fi->fi_uname, ro);
diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
index d99d801..f6dedc2 100644
--- a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
+++ b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
#include <ctype.h>
@@ -34,6 +34,7 @@
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
+#include <stddef.h>
#include <sys/dmu.h>
#include <sys/zfs_ioctl.h>
@@ -83,7 +84,6 @@ safe_malloc(size_t size)
*
* Read while computing incremental checksum
*/
-
static size_t
ssread(void *buf, size_t len, zio_cksum_t *cksum)
{
@@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_t *cksum)
if ((outlen = fread(buf, len, 1, send_stream)) == 0)
return (0);
- if (do_cksum && cksum) {
+ if (do_cksum) {
if (do_byteswap)
fletcher_4_incremental_byteswap(buf, len, cksum);
else
@@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_t *cksum)
return (outlen);
}
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+ if (r == 0)
+ return (0);
+ zio_cksum_t saved_cksum = *cksum;
+ r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), cksum);
+ if (r == 0)
+ return (0);
+ if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+ !ZIO_CHECKSUM_EQUAL(saved_cksum,
+ drr->drr_u.drr_checksum.drr_checksum)) {
+ fprintf(stderr, "invalid checksum\n");
+ (void) printf("Incorrect checksum in record header.\n");
+ (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+ saved_cksum.zc_word[0],
+ saved_cksum.zc_word[1],
+ saved_cksum.zc_word[2],
+ saved_cksum.zc_word[3]);
+ exit(1);
+ }
+ return (sizeof (*drr));
+}
+
/*
* Print part of a block in ASCII characters
*/
@@ -183,8 +211,10 @@ main(int argc, char *argv[])
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+ struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
char c;
boolean_t verbose = B_FALSE;
+ boolean_t very_verbose = B_FALSE;
boolean_t first = B_TRUE;
/*
* dump flag controls whether the contents of any modified data blocks
@@ -202,11 +232,14 @@ main(int argc, char *argv[])
do_cksum = B_FALSE;
break;
case 'v':
+ if (verbose)
+ very_verbose = B_TRUE;
verbose = B_TRUE;
break;
case 'd':
dump = B_TRUE;
verbose = B_TRUE;
+ very_verbose = B_TRUE;
break;
case ':':
(void) fprintf(stderr,
@@ -230,7 +263,7 @@ main(int argc, char *argv[])
send_stream = stdin;
pcksum = zc;
- while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+ while (read_hdr(drr, &zc)) {
/*
* If this is the first DMU record being processed, check for
@@ -432,7 +465,7 @@ main(int argc, char *argv[])
if (verbose) {
(void) printf("WRITE object = %llu type = %u "
"checksum type = %u\n"
- "offset = %llu length = %llu "
+ " offset = %llu length = %llu "
"props = %llx\n",
(u_longlong_t)drrw->drr_object,
drrw->drr_type,
@@ -476,9 +509,9 @@ main(int argc, char *argv[])
if (verbose) {
(void) printf("WRITE_BYREF object = %llu "
"checksum type = %u props = %llx\n"
- "offset = %llu length = %llu\n"
+ " offset = %llu length = %llu\n"
"toguid = %llx refguid = %llx\n"
- "refobject = %llu refoffset = %llu\n",
+ " refobject = %llu refoffset = %llu\n",
(u_longlong_t)drrwbr->drr_object,
drrwbr->drr_checksumtype,
(u_longlong_t)drrwbr->drr_key.ddk_prop,
@@ -538,7 +571,7 @@ main(int argc, char *argv[])
if (verbose) {
(void) printf("WRITE_EMBEDDED object = %llu "
"offset = %llu length = %llu\n"
- "toguid = %llx comp = %u etype = %u "
+ " toguid = %llx comp = %u etype = %u "
"lsize = %u psize = %u\n",
(u_longlong_t)drrwe->drr_object,
(u_longlong_t)drrwe->drr_offset,
@@ -553,6 +586,13 @@ main(int argc, char *argv[])
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
}
+ if (drr->drr_type != DRR_BEGIN && very_verbose) {
+ (void) printf(" checksum = %llx/%llx/%llx/%llx\n",
+ (longlong_t)drrc->drr_checksum.zc_word[0],
+ (longlong_t)drrc->drr_checksum.zc_word[1],
+ (longlong_t)drrc->drr_checksum.zc_word[2],
+ (longlong_t)drrc->drr_checksum.zc_word[3]);
+ }
pcksum = zc;
}
free(buf);
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index f76c8ea..7cc8d5f 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -3586,7 +3586,8 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
*/
n = ztest_random(regions) * stride + ztest_random(width);
s = 1 + ztest_random(2 * width - 1);
- dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
+ dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
+ ZIO_PRIORITY_SYNC_READ);
/*
* Pick a random index and compute the offsets into packobj and bigobj.
@@ -5705,8 +5706,10 @@ ztest_run(ztest_shared_t *zs)
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
- for (uint64_t object = 1; object < 50; object++)
- dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
+ for (uint64_t object = 1; object < 50; object++) {
+ dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
+ ZIO_PRIORITY_SYNC_READ);
+ }
spa_close(spa, FTAG);
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
index 8a707d1..44bd58b 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
@@ -668,8 +668,8 @@ typedef struct recvflags {
boolean_t nomount;
} recvflags_t;
-extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
- int, avl_tree_t *);
+extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
+ recvflags_t *, int, avl_tree_t *);
typedef enum diff_flags {
ZFS_DIFF_PARSEABLE = 0x1,
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
index 67514b1..c677822 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
@@ -3535,7 +3535,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
}
static int
-zbookmark_compare(const void *a, const void *b)
+zbookmark_mem_compare(const void *a, const void *b)
{
return (memcmp(a, b, sizeof (zbookmark_phys_t)));
}
@@ -3598,7 +3598,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
zc.zc_nvlist_dst_size;
count -= zc.zc_nvlist_dst_size;
- qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare);
+ qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
index 91857b6..e44ccfd 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
@@ -64,8 +64,9 @@ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
/* We need to use something for ENODATA. */
#define ENODATA EIDRM
-static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
- int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
+static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
+ recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
+ uint64_t *);
static const zio_cksum_t zero_cksum = { 0 };
@@ -188,10 +189,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
}
static int
-cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+ zio_cksum_t *zc, int outfd)
{
- fletcher_4_incremental_native(buf, len, zc);
- return (write(outfd, buf, len));
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ fletcher_4_incremental_native(drr,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+ if (drr->drr_type != DRR_BEGIN) {
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+ drr_checksum.drr_checksum));
+ drr->drr_u.drr_checksum.drr_checksum = *zc;
+ }
+ fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), zc);
+ if (write(outfd, drr, sizeof (*drr)) == -1)
+ return (errno);
+ if (payload_len != 0) {
+ fletcher_4_incremental_native(payload, payload_len, zc);
+ if (write(outfd, payload, payload_len) == -1)
+ return (errno);
+ }
+ return (0);
}
/*
@@ -218,26 +237,18 @@ cksummer(void *arg)
char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
- struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
- struct drr_end *drre = &thedrr.drr_u.drr_end;
- struct drr_object *drro = &thedrr.drr_u.drr_object;
- struct drr_write *drrw = &thedrr.drr_u.drr_write;
- struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
- struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
FILE *ofp;
int outfd;
- dmu_replay_record_t wbr_drr = {0};
- struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
dedup_table_t ddt;
zio_cksum_t stream_cksum;
uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
uint64_t numbuckets;
ddt.max_ddt_size =
- MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
- SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+ MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
+ SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
- numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+ numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
/*
* numbuckets must be a power of 2. Increase number to
@@ -253,32 +264,29 @@ cksummer(void *arg)
ddt.numhashbits = high_order_bit(numbuckets) - 1;
ddt.ddt_full = B_FALSE;
- /* Initialize the write-by-reference block. */
- wbr_drr.drr_type = DRR_WRITE_BYREF;
- wbr_drr.drr_payloadlen = 0;
-
outfd = dda->outputfd;
ofp = fdopen(dda->inputfd, "r");
- while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+ while (ssread(drr, sizeof (*drr), ofp) != 0) {
switch (drr->drr_type) {
case DRR_BEGIN:
{
- int fflags;
+ struct drr_begin *drrb = &drr->drr_u.drr_begin;
+ int fflags;
+ int sz = 0;
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
/* set the DEDUP feature flag for this stream */
fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
fflags |= (DMU_BACKUP_FEATURE_DEDUP |
DMU_BACKUP_FEATURE_DEDUPPROPS);
DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
- int sz = drr->drr_payloadlen;
+ sz = drr->drr_payloadlen;
if (sz > SPA_MAXBLOCKSIZE) {
buf = zfs_realloc(dda->dedup_hdl, buf,
@@ -287,64 +295,60 @@ cksummer(void *arg)
(void) ssread(buf, sz, ofp);
if (ferror(stdin))
perror("fread");
- if (cksum_and_write(buf, sz, &stream_cksum,
- outfd) == -1)
- goto out;
}
+ if (dump_record(drr, buf, sz, &stream_cksum,
+ outfd) != 0)
+ goto out;
break;
}
case DRR_END:
{
+ struct drr_end *drre = &drr->drr_u.drr_end;
/* use the recalculated checksum */
- ZIO_SET_CHECKSUM(&drre->drr_checksum,
- stream_cksum.zc_word[0], stream_cksum.zc_word[1],
- stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
- if ((write(outfd, drr,
- sizeof (dmu_replay_record_t))) == -1)
+ drre->drr_checksum = stream_cksum;
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
goto out;
break;
}
case DRR_OBJECT:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
+ struct drr_object *drro = &drr->drr_u.drr_object;
if (drro->drr_bonuslen > 0) {
(void) ssread(buf,
P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
ofp);
- if (cksum_and_write(buf,
- P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
- &stream_cksum, outfd) == -1)
- goto out;
}
+ if (dump_record(drr, buf,
+ P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+ &stream_cksum, outfd) != 0)
+ goto out;
break;
}
case DRR_SPILL:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
+ struct drr_spill *drrs = &drr->drr_u.drr_spill;
(void) ssread(buf, drrs->drr_length, ofp);
- if (cksum_and_write(buf, drrs->drr_length,
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, buf, drrs->drr_length,
+ &stream_cksum, outfd) != 0)
goto out;
break;
}
case DRR_FREEOBJECTS:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
goto out;
break;
}
case DRR_WRITE:
{
+ struct drr_write *drrw = &drr->drr_u.drr_write;
dataref_t dataref;
(void) ssread(buf, drrw->drr_length, ofp);
@@ -382,7 +386,13 @@ cksummer(void *arg)
if (ddt_update(dda->dedup_hdl, &ddt,
&drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
&dataref)) {
+ dmu_replay_record_t wbr_drr = {0};
+ struct drr_write_byref *wbr_drrr =
+ &wbr_drr.drr_u.drr_write_byref;
+
/* block already present in stream */
+ wbr_drr.drr_type = DRR_WRITE_BYREF;
+
wbr_drrr->drr_object = drrw->drr_object;
wbr_drrr->drr_offset = drrw->drr_offset;
wbr_drrr->drr_length = drrw->drr_length;
@@ -402,19 +412,13 @@ cksummer(void *arg)
wbr_drrr->drr_key.ddk_prop =
drrw->drr_key.ddk_prop;
- if (cksum_and_write(&wbr_drr,
- sizeof (dmu_replay_record_t), &stream_cksum,
- outfd) == -1)
+ if (dump_record(&wbr_drr, NULL, 0,
+ &stream_cksum, outfd) != 0)
goto out;
} else {
/* block not previously seen */
- if (cksum_and_write(drr,
- sizeof (dmu_replay_record_t), &stream_cksum,
- outfd) == -1)
- goto out;
- if (cksum_and_write(buf,
- drrw->drr_length,
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, buf, drrw->drr_length,
+ &stream_cksum, outfd) != 0)
goto out;
}
break;
@@ -422,28 +426,27 @@ cksummer(void *arg)
case DRR_WRITE_EMBEDDED:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
+ struct drr_write_embedded *drrwe =
+ &drr->drr_u.drr_write_embedded;
(void) ssread(buf,
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
- if (cksum_and_write(buf,
+ if (dump_record(drr, buf,
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
- &stream_cksum, outfd) == -1)
+ &stream_cksum, outfd) != 0)
goto out;
break;
}
case DRR_FREE:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
goto out;
break;
}
default:
- (void) printf("INVALID record type 0x%x\n",
+ (void) fprintf(stderr, "INVALID record type 0x%x\n",
drr->drr_type);
/* should never happen, so assert */
assert(B_FALSE);
@@ -1470,18 +1473,11 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sizeof (drr.drr_u.drr_begin.drr_toname),
"%s@%s", zhp->zfs_name, tosnap);
drr.drr_payloadlen = buflen;
- err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
- /* write header nvlist */
- if (err != -1 && packbuf != NULL) {
- err = cksum_and_write(packbuf, buflen, &zc,
- outfd);
- }
+ err = dump_record(&drr, packbuf, buflen, &zc, outfd);
free(packbuf);
- if (err == -1) {
- err = errno;
+ if (err != 0)
goto stderr_out;
- }
/* write end record */
bzero(&drr, sizeof (drr));
@@ -1714,6 +1710,8 @@ recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
int rv;
int len = ilen;
+ assert(ilen <= SPA_MAXBLOCKSIZE);
+
do {
rv = read(fd, cp, len);
cp += rv;
@@ -2501,7 +2499,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
* zfs_receive_one() will take care of it (ie,
* recv_skip() and return 0).
*/
- error = zfs_receive_impl(hdl, destname, flags, fd,
+ error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
action_handlep);
if (error == ENODATA) {
@@ -2634,9 +2632,9 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
*/
static int
zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
- recvflags_t *flags, dmu_replay_record_t *drr,
- dmu_replay_record_t *drr_noswap, const char *sendfs,
- nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
+ const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
+ dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
+ avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
uint64_t *action_handlep)
{
zfs_cmd_t zc = { 0 };
@@ -2801,10 +2799,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}
if (flags->verbose)
(void) printf("found clone origin %s\n", zc.zc_string);
+ } else if (originsnap) {
+ (void) strncpy(zc.zc_string, originsnap, ZFS_MAXNAMELEN);
+ if (flags->verbose)
+ (void) printf("using provided clone origin %s\n",
+ zc.zc_string);
}
stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
- (drrb->drr_flags & DRR_FLAG_CLONE));
+ (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap);
if (stream_wantsnewfs) {
/*
@@ -3182,9 +3185,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}
static int
-zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
- int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
- char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
+zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
+ const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
+ nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
+ uint64_t *action_handlep)
{
int err;
dmu_replay_record_t drr, drr_noswap;
@@ -3203,6 +3207,12 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
"(%s) does not exist"), tosnap);
return (zfs_error(hdl, EZFS_NOENT, errbuf));
}
+ if (originsnap &&
+ !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
+ "(%s) does not exist"), originsnap);
+ return (zfs_error(hdl, EZFS_NOENT, errbuf));
+ }
/* read in the BEGIN record */
if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
@@ -3275,14 +3285,14 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
*cp = '\0';
sendfs = nonpackage_sendfs;
}
- return (zfs_receive_one(hdl, infd, tosnap, flags,
- &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
- top_zfs, cleanup_fd, action_handlep));
+ return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
+ &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
+ cleanup_fd, action_handlep));
} else {
assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM);
- return (zfs_receive_package(hdl, infd, tosnap, flags,
- &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
+ return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
+ &zcksum, top_zfs, cleanup_fd, action_handlep));
}
}
@@ -3293,18 +3303,24 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
* (-1 will override -2).
*/
int
-zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
- int infd, avl_tree_t *stream_avl)
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
+ recvflags_t *flags, int infd, avl_tree_t *stream_avl)
{
char *top_zfs = NULL;
int err;
int cleanup_fd;
uint64_t action_handle = 0;
+ char *originsnap = NULL;
+ if (props) {
+ err = nvlist_lookup_string(props, "origin", &originsnap);
+ if (err && err != ENOENT)
+ return (err);
+ }
cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
VERIFY(cleanup_fd >= 0);
- err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
+ err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
stream_avl, &top_zfs, cleanup_fd, &action_handle);
VERIFY(0 == close(cleanup_fd));
diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
index 52bd580..5ba660d 100644
--- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
+++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -532,18 +532,30 @@ lzc_send(const char *snapname, const char *from, int fd,
}
/*
- * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
+ * "from" can be NULL, a snapshot, or a bookmark.
+ *
+ * If from is NULL, a full (non-incremental) stream will be estimated. This
+ * is calculated very efficiently.
+ *
+ * If from is a snapshot, lzc_send_space uses the deadlists attached to
+ * each snapshot to efficiently estimate the stream size.
+ *
+ * If from is a bookmark, the indirect blocks in the destination snapshot
+ * are traversed, looking for blocks with a birth time since the creation TXG of
+ * the snapshot this bookmark was created from. This will result in
+ * significantly more I/O and be less efficient than a send space estimation on
+ * an equivalent snapshot.
*/
int
-lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
+lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
int err;
args = fnvlist_alloc();
- if (fromsnap != NULL)
- fnvlist_add_string(args, "fromsnap", fromsnap);
+ if (from != NULL)
+ fnvlist_add_string(args, "from", from);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
index 03027c3..9fabc62 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
@@ -135,8 +135,18 @@ extern int aok;
/*
* DTrace SDT probes have different signatures in userland than they do in
- * kernel. If they're being used in kernel code, re-define them out of
+ * the kernel. If they're being used in kernel code, re-define them out of
* existence for their counterparts in libzpool.
+ *
+ * Here's an example of how to use the set-error probes in userland:
+ * zfs$target:::set-error /arg0 == EBUSY/ {stack();}
+ *
+ * Here's an example of how to use DTRACE_PROBE probes in userland:
+ * If there is a probe declared as follows:
+ * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn);
+ * Then you can use it as follows:
+ * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/
+ * {printf("%u %p\n", arg1, arg2);}
*/
#ifdef DTRACE_PROBE
@@ -645,13 +655,6 @@ extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern zoneid_t getzoneid(void);
/* Random compatibility stuff. */
-#define lbolt (gethrtime() >> 23)
-#define lbolt64 (gethrtime() >> 23)
-
-extern uint64_t physmem;
-
-#define gethrestime_sec() time(NULL)
-
#define pwrite64(d, p, n, o) pwrite(d, p, n, o)
#define readdir64(d) readdir(d)
#define SIGPENDING(td) (0)
diff --git a/contrib/binutils/gas/dwarf2dbg.c b/contrib/binutils/gas/dwarf2dbg.c
index 671a9b0..528f676 100644
--- a/contrib/binutils/gas/dwarf2dbg.c
+++ b/contrib/binutils/gas/dwarf2dbg.c
@@ -1271,6 +1271,7 @@ out_debug_line (segT line_seg)
symbolS *line_end;
struct line_seg *s;
enum dwarf2_format d2f;
+ int sizeof_initial_length;
int sizeof_offset;
subseg_set (line_seg, 0);
@@ -1287,27 +1288,24 @@ out_debug_line (segT line_seg)
d2f = DWARF2_FORMAT ();
if (d2f == dwarf2_format_32bit)
{
- expr.X_add_number = -4;
- emit_expr (&expr, 4);
- sizeof_offset = 4;
+ sizeof_initial_length = sizeof_offset = 4;
}
else if (d2f == dwarf2_format_64bit)
{
- expr.X_add_number = -12;
- out_four (-1);
- emit_expr (&expr, 8);
+ sizeof_initial_length = 12;
sizeof_offset = 8;
+ out_four (-1);
}
else if (d2f == dwarf2_format_64bit_irix)
{
- expr.X_add_number = -8;
- emit_expr (&expr, 8);
- sizeof_offset = 8;
+ sizeof_initial_length = sizeof_offset = 8;
}
else
{
as_fatal (_("internal error: unknown dwarf2 format"));
}
+ expr.X_add_number = -sizeof_initial_length;
+ emit_expr (&expr, sizeof_offset);
/* Version. */
out_two (2);
@@ -1316,7 +1314,7 @@ out_debug_line (segT line_seg)
expr.X_op = O_subtract;
expr.X_add_symbol = prologue_end;
expr.X_op_symbol = line_start;
- expr.X_add_number = - (4 + 2 + 4);
+ expr.X_add_number = - (sizeof_initial_length + 2 + sizeof_offset);
emit_expr (&expr, sizeof_offset);
/* Parameters of the state machine. */
diff --git a/contrib/gcclibs/libcpp/files.c b/contrib/gcclibs/libcpp/files.c
index 366d30a..95cda0e 100644
--- a/contrib/gcclibs/libcpp/files.c
+++ b/contrib/gcclibs/libcpp/files.c
@@ -567,7 +567,7 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
SSIZE_MAX to be much smaller than the actual range of the
type. Use INTTYPE_MAXIMUM unconditionally to ensure this
does not bite us. */
- if (file->st.st_size > INTTYPE_MAXIMUM (ssize_t))
+ if (file->st.st_size > SSIZE_MAX)
{
cpp_error (pfile, CPP_DL_ERROR, "%s is too large", file->path);
return false;
@@ -581,7 +581,7 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
file->path);
return false;
}
- else if (offset > INTTYPE_MAXIMUM (ssize_t) || (ssize_t)offset > size)
+ else if (offset > SSIZE_MAX || (ssize_t)offset > size)
{
cpp_error (pfile, CPP_DL_ERROR, "current position of %s is too large",
file->path);
diff --git a/contrib/libcxxrt/libelftc_dem_gnu3.c b/contrib/libcxxrt/libelftc_dem_gnu3.c
index 11c2ca2..70ef3e8 100644
--- a/contrib/libcxxrt/libelftc_dem_gnu3.c
+++ b/contrib/libcxxrt/libelftc_dem_gnu3.c
@@ -59,7 +59,7 @@ struct vector_str {
enum type_qualifier {
TYPE_PTR, TYPE_REF, TYPE_CMX, TYPE_IMG, TYPE_EXT, TYPE_RST, TYPE_VAT,
- TYPE_CST
+ TYPE_CST, TYPE_VEC
};
struct vector_type_qualifier {
@@ -397,6 +397,8 @@ static int cpp_demangle_read_array(struct cpp_demangle_data *);
static int cpp_demangle_read_encoding(struct cpp_demangle_data *);
static int cpp_demangle_read_expr_primary(struct cpp_demangle_data *);
static int cpp_demangle_read_expression(struct cpp_demangle_data *);
+static int cpp_demangle_read_expression_flat(struct cpp_demangle_data *,
+ char **);
static int cpp_demangle_read_expression_binary(struct cpp_demangle_data *,
const char *, size_t);
static int cpp_demangle_read_expression_unary(struct cpp_demangle_data *,
@@ -408,8 +410,12 @@ static int cpp_demangle_read_function(struct cpp_demangle_data *, int *,
static int cpp_demangle_local_source_name(struct cpp_demangle_data *ddata);
static int cpp_demangle_read_local_name(struct cpp_demangle_data *);
static int cpp_demangle_read_name(struct cpp_demangle_data *);
+static int cpp_demangle_read_name_flat(struct cpp_demangle_data *,
+ char**);
static int cpp_demangle_read_nested_name(struct cpp_demangle_data *);
static int cpp_demangle_read_number(struct cpp_demangle_data *, long *);
+static int cpp_demangle_read_number_as_string(struct cpp_demangle_data *,
+ char **);
static int cpp_demangle_read_nv_offset(struct cpp_demangle_data *);
static int cpp_demangle_read_offset(struct cpp_demangle_data *);
static int cpp_demangle_read_offset_number(struct cpp_demangle_data *);
@@ -423,6 +429,8 @@ static int cpp_demangle_read_tmpl_arg(struct cpp_demangle_data *);
static int cpp_demangle_read_tmpl_args(struct cpp_demangle_data *);
static int cpp_demangle_read_tmpl_param(struct cpp_demangle_data *);
static int cpp_demangle_read_type(struct cpp_demangle_data *, int);
+static int cpp_demangle_read_type_flat(struct cpp_demangle_data *,
+ char **);
static int cpp_demangle_read_uqname(struct cpp_demangle_data *);
static int cpp_demangle_read_v_offset(struct cpp_demangle_data *);
static char *decode_fp_to_double(const char *, size_t);
@@ -601,17 +609,18 @@ cpp_demangle_push_fp(struct cpp_demangle_data *ddata,
fp = ddata->cur;
while (*ddata->cur != 'E')
++ddata->cur;
- ++ddata->cur;
if ((f = decoder(fp, ddata->cur - fp)) == NULL)
return (0);
rtn = 0;
if ((len = strlen(f)) > 0)
- rtn = cpp_demangle_push_str(ddata, f, len);
+ rtn = cpp_demangle_push_str(ddata, f, len);
free(f);
+ ++ddata->cur;
+
return (rtn);
}
@@ -695,7 +704,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, "*", 1))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
@@ -706,7 +716,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, "&", 1))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
@@ -717,7 +728,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " complex", 8))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
@@ -726,23 +738,26 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (!cpp_demangle_push_str(ddata, " imaginary", 10))
goto clean;
if (type_str != NULL) {
- if (!vector_str_push(&subst_v, " imaginary", 10))
+ if (!vector_str_push(&subst_v, " imaginary",
+ 10))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
case TYPE_EXT:
- if (e_idx > v->ext_name.size - 1)
+ if (v->ext_name.size == 0 ||
+ e_idx > v->ext_name.size - 1)
goto clean;
- if ((e_len = strlen(v->ext_name.container[e_idx])) == 0)
+ if ((e_len = strlen(v->ext_name.container[e_idx])) ==
+ 0)
goto clean;
- if ((buf = malloc(sizeof(char) * (e_len + 1))) == NULL)
+ if ((buf = malloc(e_len + 2)) == NULL)
goto clean;
-
- memcpy(buf, " ", 1);
- memcpy(buf + 1, v->ext_name.container[e_idx], e_len);
+ snprintf(buf, e_len + 2, " %s",
+ v->ext_name.container[e_idx]);
if (!cpp_demangle_push_str(ddata, buf, e_len + 1)) {
free(buf);
@@ -755,7 +770,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
free(buf);
goto clean;
}
- if (!cpp_demangle_push_subst_v(ddata, &subst_v)) {
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v)) {
free(buf);
goto clean;
}
@@ -770,7 +786,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " restrict", 9))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
@@ -781,7 +798,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " volatile", 9))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
@@ -792,11 +810,42 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " const", 6))
goto clean;
- if (!cpp_demangle_push_subst_v(ddata, &subst_v))
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v))
goto clean;
}
break;
+ case TYPE_VEC:
+ if (v->ext_name.size == 0 ||
+ e_idx > v->ext_name.size - 1)
+ goto clean;
+ if ((e_len = strlen(v->ext_name.container[e_idx])) ==
+ 0)
+ goto clean;
+ if ((buf = malloc(e_len + 12)) == NULL)
+ goto clean;
+ snprintf(buf, e_len + 12, " __vector(%s)",
+ v->ext_name.container[e_idx]);
+ if (!cpp_demangle_push_str(ddata, buf, e_len + 11)) {
+ free(buf);
+ goto clean;
+ }
+ if (type_str != NULL) {
+ if (!vector_str_push(&subst_v, buf,
+ e_len + 11)) {
+ free(buf);
+ goto clean;
+ }
+ if (!cpp_demangle_push_subst_v(ddata,
+ &subst_v)) {
+ free(buf);
+ goto clean;
+ }
+ }
+ free(buf);
+ ++e_idx;
+ break;
};
--idx;
}
@@ -947,10 +996,14 @@ cpp_demangle_read_expr_primary(struct cpp_demangle_data *ddata)
switch (*ddata->cur) {
case 'b':
+ if (*(ddata->cur + 2) != 'E')
+ return (0);
switch (*(++ddata->cur)) {
case '0':
+ ddata->cur += 2;
return (cpp_demangle_push_str(ddata, "false", 5));
case '1':
+ ddata->cur += 2;
return (cpp_demangle_push_str(ddata, "true", 4));
default:
return (0);
@@ -999,7 +1052,8 @@ cpp_demangle_read_expr_primary(struct cpp_demangle_data *ddata)
++ddata->cur;
}
++ddata->cur;
- return (cpp_demangle_push_str(ddata, num, ddata->cur - num));
+ return (cpp_demangle_push_str(ddata, num,
+ ddata->cur - num - 1));
default:
return (0);
@@ -1291,6 +1345,38 @@ cpp_demangle_read_expression(struct cpp_demangle_data *ddata)
}
static int
+cpp_demangle_read_expression_flat(struct cpp_demangle_data *ddata, char **str)
+{
+ struct vector_str *output;
+ size_t i, p_idx, idx, exp_len;
+ char *exp;
+
+ output = ddata->push_head > 0 ? &ddata->output_tmp :
+ &ddata->output;
+
+ p_idx = output->size;
+
+ if (!cpp_demangle_read_expression(ddata))
+ return (0);
+
+ if ((exp = vector_str_substr(output, p_idx, output->size - 1,
+ &exp_len)) == NULL)
+ return (0);
+
+ idx = output->size;
+ for (i = p_idx; i < idx; ++i) {
+ if (!vector_str_pop(output)) {
+ free(exp);
+ return (0);
+ }
+ }
+
+ *str = exp;
+
+ return (1);
+}
+
+static int
cpp_demangle_read_expression_binary(struct cpp_demangle_data *ddata,
const char *name, size_t len)
{
@@ -1419,12 +1505,65 @@ cpp_demangle_read_function(struct cpp_demangle_data *ddata, int *ext_c,
static int
cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
{
+ char *name, *type, *num_str;
+ long offset;
+ int rtn;
if (ddata == NULL || *ddata->cur == '\0')
return (0);
/* special name */
switch (SIMPLE_HASH(*ddata->cur, *(ddata->cur + 1))) {
+ case SIMPLE_HASH('G', 'A'):
+ if (!cpp_demangle_push_str(ddata, "hidden alias for ", 17))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ return (cpp_demangle_read_encoding(ddata));
+
+ case SIMPLE_HASH('G', 'R'):
+ if (!cpp_demangle_push_str(ddata, "reference temporary #", 21))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ if (!cpp_demangle_read_name_flat(ddata, &name))
+ return (0);
+ rtn = 0;
+ if (!cpp_demangle_read_number_as_string(ddata, &num_str))
+ goto clean1;
+ if (!cpp_demangle_push_str(ddata, num_str, strlen(num_str)))
+ goto clean2;
+ if (!cpp_demangle_push_str(ddata, " for ", 5))
+ goto clean2;
+ if (!cpp_demangle_push_str(ddata, name, strlen(name)))
+ goto clean2;
+ rtn = 1;
+ clean2:
+ free(num_str);
+ clean1:
+ free(name);
+ return (rtn);
+
+ case SIMPLE_HASH('G', 'T'):
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ switch (*ddata->cur) {
+ case 'n':
+ if (!cpp_demangle_push_str(ddata,
+ "non-transaction clone for ", 26))
+ return (0);
+ case 't':
+ default:
+ if (!cpp_demangle_push_str(ddata,
+ "transaction clone for ", 22))
+ return (0);
+ }
+ ++ddata->cur;
+ return (cpp_demangle_read_encoding(ddata));
+
case SIMPLE_HASH('G', 'V'):
/* sentry object for 1 time init */
if (!cpp_demangle_push_str(ddata, "guard variable for ", 20))
@@ -1446,14 +1585,49 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
return (0);
return (cpp_demangle_read_encoding(ddata));
+ case SIMPLE_HASH('T', 'C'):
+ /* construction vtable */
+ if (!cpp_demangle_push_str(ddata, "construction vtable for ",
+ 24))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ if (!cpp_demangle_read_type_flat(ddata, &type))
+ return (0);
+ rtn = 0;
+ if (!cpp_demangle_read_number(ddata, &offset))
+ goto clean3;
+ if (*ddata->cur++ != '_')
+ goto clean3;
+ if (!cpp_demangle_read_type(ddata, 0))
+ goto clean3;
+ if (!cpp_demangle_push_str(ddata, "-in-", 4))
+ goto clean3;
+ if (!cpp_demangle_push_str(ddata, type, strlen(type)))
+ goto clean3;
+ rtn = 1;
+ clean3:
+ free(type);
+ return (rtn);
+
case SIMPLE_HASH('T', 'D'):
/* typeinfo common proxy */
break;
+ case SIMPLE_HASH('T', 'F'):
+ /* typeinfo fn */
+ if (!cpp_demangle_push_str(ddata, "typeinfo fn for ", 16))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ return (cpp_demangle_read_type(ddata, 0));
+
case SIMPLE_HASH('T', 'h'):
/* virtual function non-virtual override thunk */
- if (cpp_demangle_push_str(ddata,
- "virtual function non-virtual override ", 38) == 0)
+ if (!cpp_demangle_push_str(ddata,
+ "virtual function non-virtual override ", 38))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
@@ -1462,24 +1636,51 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
return (0);
return (cpp_demangle_read_encoding(ddata));
+ case SIMPLE_HASH('T', 'H'):
+ /* TLS init function */
+ if (!cpp_demangle_push_str(ddata, "TLS init function for ",
+ 22))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ break;
+
case SIMPLE_HASH('T', 'I'):
/* typeinfo structure */
- /* FALLTHROUGH */
+ if (!cpp_demangle_push_str(ddata, "typeinfo for ", 13))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ return (cpp_demangle_read_type(ddata, 0));
+
+ case SIMPLE_HASH('T', 'J'):
+ /* java class */
+ if (!cpp_demangle_push_str(ddata, "java Class for ", 15))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ return (cpp_demangle_read_type(ddata, 0));
+
case SIMPLE_HASH('T', 'S'):
/* RTTI name (NTBS) */
- if (!cpp_demangle_push_str(ddata, "typeinfo for ", 14))
+ if (!cpp_demangle_push_str(ddata, "typeinfo name for ", 18))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
- return (cpp_demangle_read_type(ddata, 1));
+ return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'T'):
/* VTT table */
if (!cpp_demangle_push_str(ddata, "VTT for ", 8))
return (0);
ddata->cur += 2;
- return (cpp_demangle_read_type(ddata, 1));
+ if (*ddata->cur == '\0')
+ return (0);
+ return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'v'):
/* virtual function virtual override thunk */
@@ -1500,7 +1701,17 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
- return (cpp_demangle_read_type(ddata, 1));
+ return (cpp_demangle_read_type(ddata, 0));
+
+ case SIMPLE_HASH('T', 'W'):
+ /* TLS wrapper function */
+ if (!cpp_demangle_push_str(ddata, "TLS wrapper function for ",
+ 25))
+ return (0);
+ ddata->cur += 2;
+ if (*ddata->cur == '\0')
+ return (0);
+ break;
};
return (cpp_demangle_read_name(ddata));
@@ -1618,6 +1829,38 @@ clean:
}
static int
+cpp_demangle_read_name_flat(struct cpp_demangle_data *ddata, char **str)
+{
+ struct vector_str *output;
+ size_t i, p_idx, idx, name_len;
+ char *name;
+
+ output = ddata->push_head > 0 ? &ddata->output_tmp :
+ &ddata->output;
+
+ p_idx = output->size;
+
+ if (!cpp_demangle_read_name(ddata))
+ return (0);
+
+ if ((name = vector_str_substr(output, p_idx, output->size - 1,
+ &name_len)) == NULL)
+ return (0);
+
+ idx = output->size;
+ for (i = p_idx; i < idx; ++i) {
+ if (!vector_str_pop(output)) {
+ free(name);
+ return (0);
+ }
+ }
+
+ *str = name;
+
+ return (1);
+}
+
+static int
cpp_demangle_read_nested_name(struct cpp_demangle_data *ddata)
{
struct vector_str *output, v;
@@ -1743,6 +1986,24 @@ cpp_demangle_read_number(struct cpp_demangle_data *ddata, long *rtn)
}
static int
+cpp_demangle_read_number_as_string(struct cpp_demangle_data *ddata, char **str)
+{
+ long n;
+
+ if (!cpp_demangle_read_number(ddata, &n)) {
+ *str = NULL;
+ return (0);
+ }
+
+ if (asprintf(str, "%ld", n) < 0) {
+ *str = NULL;
+ return (0);
+ }
+
+ return (1);
+}
+
+static int
cpp_demangle_read_nv_offset(struct cpp_demangle_data *ddata)
{
@@ -1877,11 +2138,11 @@ cpp_demangle_read_sname(struct cpp_demangle_data *ddata)
len <= 0)
return (0);
- if (len == 12 && (memcmp("_GLOBAL__N_1", ddata->cur, 12) == 0))
+ if (len == 12 && (memcmp("_GLOBAL__N_1", ddata->cur, 12) == 0))
err = cpp_demangle_push_str(ddata, "(anonymous namespace)", 21);
else
err = cpp_demangle_push_str(ddata, ddata->cur, len);
-
+
if (err == 0)
return (0);
@@ -2232,7 +2493,7 @@ cpp_demangle_read_type(struct cpp_demangle_data *ddata, int delimit)
size_t p_idx, type_str_len;
int extern_c, is_builtin;
long len;
- char *type_str;
+ char *type_str, *exp_str, *num_str;
if (ddata == NULL)
return (0);
@@ -2274,7 +2535,7 @@ cpp_demangle_read_type(struct cpp_demangle_data *ddata, int delimit)
extern_c = 0;
is_builtin = 1;
p_idx = output->size;
- type_str = NULL;
+ type_str = exp_str = num_str = NULL;
again:
/* builtin type */
switch (*ddata->cur) {
@@ -2320,6 +2581,82 @@ again:
++ddata->cur;
goto rtn;
+ case 'D':
+ ++ddata->cur;
+ switch (*ddata->cur) {
+ case 'd':
+ /* IEEE 754r decimal floating point (64 bits) */
+ if (!cpp_demangle_push_str(ddata, "decimal64", 9))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 'e':
+ /* IEEE 754r decimal floating point (128 bits) */
+ if (!cpp_demangle_push_str(ddata, "decimal128", 10))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 'f':
+ /* IEEE 754r decimal floating point (32 bits) */
+ if (!cpp_demangle_push_str(ddata, "decimal32", 9))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 'h':
+ /* IEEE 754r half-precision floating point (16 bits) */
+ if (!cpp_demangle_push_str(ddata, "half", 4))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 'i':
+ /* char32_t */
+ if (!cpp_demangle_push_str(ddata, "char32_t", 8))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 'n':
+ /* std::nullptr_t (i.e., decltype(nullptr)) */
+ if (!cpp_demangle_push_str(ddata, "decltype(nullptr)",
+ 17))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 's':
+ /* char16_t */
+ if (!cpp_demangle_push_str(ddata, "char16_t", 8))
+ goto clean;
+ ++ddata->cur;
+ break;
+ case 'v':
+ /* gcc vector_size extension. */
+ ++ddata->cur;
+ if (*ddata->cur == '_') {
+ ++ddata->cur;
+ if (!cpp_demangle_read_expression_flat(ddata,
+ &exp_str))
+ goto clean;
+ if (!vector_str_push(&v.ext_name, exp_str,
+ strlen(exp_str)))
+ goto clean;
+ } else {
+ if (!cpp_demangle_read_number_as_string(ddata,
+ &num_str))
+ goto clean;
+ if (!vector_str_push(&v.ext_name, num_str,
+ strlen(num_str)))
+ goto clean;
+ }
+ if (*ddata->cur != '_')
+ goto clean;
+ ++ddata->cur;
+ if (!vector_type_qualifier_push(&v, TYPE_VEC))
+ goto clean;
+ goto again;
+ default:
+ goto clean;
+ }
+ goto rtn;
+
case 'e':
/* long double */
if (!cpp_demangle_push_str(ddata, "long double", 11))
@@ -2414,7 +2751,7 @@ again:
case 'o':
/* unsigned __int128 */
- if (!cpp_demangle_push_str(ddata, "unsigned _;int128", 17))
+ if (!cpp_demangle_push_str(ddata, "unsigned __int128", 17))
goto clean;
++ddata->cur;
goto rtn;
@@ -2485,6 +2822,8 @@ again:
if (!vector_str_push(&v.ext_name, ddata->cur, len))
return (0);
ddata->cur += len;
+ if (!vector_type_qualifier_push(&v, TYPE_EXT))
+ goto clean;
goto again;
case 'v':
@@ -2549,6 +2888,8 @@ rtn:
goto clean;
free(type_str);
+ free(exp_str);
+ free(num_str);
vector_type_qualifier_dest(&v);
if (ddata->push_head > 0) {
@@ -2580,11 +2921,45 @@ rtn:
return (1);
clean:
free(type_str);
+ free(exp_str);
+ free(num_str);
vector_type_qualifier_dest(&v);
return (0);
}
+static int
+cpp_demangle_read_type_flat(struct cpp_demangle_data *ddata, char **str)
+{
+ struct vector_str *output;
+ size_t i, p_idx, idx, type_len;
+ char *type;
+
+ output = ddata->push_head > 0 ? &ddata->output_tmp :
+ &ddata->output;
+
+ p_idx = output->size;
+
+ if (!cpp_demangle_read_type(ddata, 0))
+ return (0);
+
+ if ((type = vector_str_substr(output, p_idx, output->size - 1,
+ &type_len)) == NULL)
+ return (0);
+
+ idx = output->size;
+ for (i = p_idx; i < idx; ++i) {
+ if (!vector_str_pop(output)) {
+ free(type);
+ return (0);
+ }
+ }
+
+ *str = type;
+
+ return (1);
+}
+
/*
* read unqualified-name, unqualified name are operator-name, ctor-dtor-name,
* source-name
@@ -3008,39 +3383,38 @@ cpp_demangle_read_uqname(struct cpp_demangle_data *ddata)
if (ELFTC_ISDIGIT(*ddata->cur) != 0)
return (cpp_demangle_read_sname(ddata));
-
- /* local source name */
- if (*ddata->cur == 'L')
- return (cpp_demangle_local_source_name(ddata));
-
- return (1);
-}
-
-/*
- * Read local source name.
- *
- * References:
- * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
- * http://gcc.gnu.org/viewcvs?view=rev&revision=124467
- */
-static int
-cpp_demangle_local_source_name(struct cpp_demangle_data *ddata)
-{
- /* L */
- if (ddata == NULL || *ddata->cur != 'L')
- return (0);
- ++ddata->cur;
-
- /* source name */
- if (!cpp_demangle_read_sname(ddata))
- return (0);
-
- /* discriminator */
- if (*ddata->cur == '_') {
- ++ddata->cur;
- while (ELFTC_ISDIGIT(*ddata->cur) != 0)
- ++ddata->cur;
- }
+ /* local source name */
+ if (*ddata->cur == 'L')
+ return (cpp_demangle_local_source_name(ddata));
+
+ return (1);
+}
+
+/*
+ * Read local source name.
+ *
+ * References:
+ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
+ * http://gcc.gnu.org/viewcvs?view=rev&revision=124467
+ */
+static int
+cpp_demangle_local_source_name(struct cpp_demangle_data *ddata)
+{
+ /* L */
+ if (ddata == NULL || *ddata->cur != 'L')
+ return (0);
+ ++ddata->cur;
+
+ /* source name */
+ if (!cpp_demangle_read_sname(ddata))
+ return (0);
+
+ /* discriminator */
+ if (*ddata->cur == '_') {
+ ++ddata->cur;
+ while (ELFTC_ISDIGIT(*ddata->cur) != 0)
+ ++ddata->cur;
+ }
return (1);
}
diff --git a/etc/Makefile b/etc/Makefile
index 2b02717..1927580 100644
--- a/etc/Makefile
+++ b/etc/Makefile
@@ -154,16 +154,13 @@ BIN1+= regdomain.xml
# -rwxr-xr-x root:wheel, for the new cron root:wheel
BIN2= netstart pccard_ether rc.suspend rc.resume
-MTREE= BSD.include.dist BSD.root.dist BSD.usr.dist BSD.var.dist
+MTREE= BSD.debug.dist BSD.include.dist BSD.root.dist BSD.usr.dist BSD.var.dist
.if ${MK_TESTS} != "no"
MTREE+= BSD.tests.dist
.endif
.if ${MK_SENDMAIL} != "no"
MTREE+= BSD.sendmail.dist
.endif
-.if ${MK_DEBUG_FILES} != "no"
-MTREE+= BSD.debug.dist
-.endif
PPPCNF= ppp.conf
@@ -336,10 +333,8 @@ MTREE_CMD?= mtree
MTREES= mtree/BSD.root.dist / \
mtree/BSD.var.dist /var \
mtree/BSD.usr.dist /usr \
- mtree/BSD.include.dist /usr/include
-.if ${MK_DEBUG_FILES} != "no"
-MTREES+= mtree/BSD.debug.dist /usr/lib
-.endif
+ mtree/BSD.include.dist /usr/include \
+ mtree/BSD.debug.dist /usr/lib
.if ${MK_GROFF} != "no"
MTREES+= mtree/BSD.groff.dist /usr
.endif
diff --git a/games/grdc/grdc.c b/games/grdc/grdc.c
index 4632c79..04cc00b 100644
--- a/games/grdc/grdc.c
+++ b/games/grdc/grdc.c
@@ -150,14 +150,14 @@ main(int argc, char *argv[])
set(tm->tm_min/10, 14);
if (t12) {
- if (tm->tm_hour > 12) {
- tm->tm_hour -= 12;
- mvaddstr(YBASE + 5, XBASE + 52, "PM");
- } else {
+ if (tm->tm_hour < 12) {
if (tm->tm_hour == 0)
tm->tm_hour = 12;
-
mvaddstr(YBASE + 5, XBASE + 52, "AM");
+ } else {
+ if (tm->tm_hour > 12)
+ tm->tm_hour -= 12;
+ mvaddstr(YBASE + 5, XBASE + 52, "PM");
}
}
diff --git a/gnu/usr.bin/binutils/libbfd/Makefile.i386 b/gnu/usr.bin/binutils/libbfd/Makefile.i386
index e6d24a9..e41e30d 100644
--- a/gnu/usr.bin/binutils/libbfd/Makefile.i386
+++ b/gnu/usr.bin/binutils/libbfd/Makefile.i386
@@ -17,3 +17,5 @@ VECS= ${DEFAULT_VECTOR} \
peigen.c: peXXigen.c
sed -e s/XX/pe/g ${.ALLSRC} > ${.TARGET}
+
+CLEANFILES+= peigen.c
diff --git a/gnu/usr.bin/groff/src/utils/indxbib/Makefile b/gnu/usr.bin/groff/src/utils/indxbib/Makefile
index b2c59a2..048fed0 100644
--- a/gnu/usr.bin/groff/src/utils/indxbib/Makefile
+++ b/gnu/usr.bin/groff/src/utils/indxbib/Makefile
@@ -5,9 +5,7 @@ SRCS= indxbib.cpp signal.c
DPADD= ${LIBBIB} ${LIBGROFF} ${LIBM}
LDADD= ${LIBBIB} ${LIBGROFF} -lm
CLEANFILES= ${MAN}
-
-beforeinstall:
- ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m ${NOBINMODE} \
- ${DIST_DIR}/eign ${DESTDIR}${SHAREDIR}/dict/
+FILES= ${DIST_DIR}/eign
+FILESDIR= ${SHAREDIR}/dict/
.include <bsd.prog.mk>
diff --git a/kerberos5/libexec/kdigest/Makefile b/kerberos5/libexec/kdigest/Makefile
index c88e3b9..ed77fff 100644
--- a/kerberos5/libexec/kdigest/Makefile
+++ b/kerberos5/libexec/kdigest/Makefile
@@ -13,6 +13,8 @@ SRCS= kdigest.c \
kdigest-commands.c \
kdigest-commands.h
+CLEANFILES= kdigest-commands.h kdigest-commands.c
+
kdigest-commands.h: kdigest-commands.in
${SLC} ${.ALLSRC:M*.in}
diff --git a/kerberos5/usr.bin/hxtool/Makefile b/kerberos5/usr.bin/hxtool/Makefile
index 3946484..a6af319 100644
--- a/kerberos5/usr.bin/hxtool/Makefile
+++ b/kerberos5/usr.bin/hxtool/Makefile
@@ -10,6 +10,8 @@ DPADD= ${LIBHX509} ${LIBROKEN} ${LIBASN1} ${LIBCRYPTO} ${LIBCRYPT} ${LIBSL} ${LI
LDADD= -lhx509 -lroken -lasn1 -lcrypto -lcrypt ${LIBSL} ${LIBVERS} -ledit
SRCS= hxtool.c hxtool-commands.c hxtool-commands.h
+CLEANFILES= hxtool-commands.h hxtool-commands.c
+
hxtool-commands.h: hxtool-commands.in
${SLC} ${.ALLSRC:M*.in}
diff --git a/kerberos5/usr.bin/kadmin/Makefile b/kerberos5/usr.bin/kadmin/Makefile
index c04ad36..894533d 100644
--- a/kerberos5/usr.bin/kadmin/Makefile
+++ b/kerberos5/usr.bin/kadmin/Makefile
@@ -36,6 +36,8 @@ LDADD= -lkadm5clnt -lkadm5srv -lhdb -lkrb5 -lhx509 \
-ledit -lncurses ${LDAPLDADD}
LDFLAGS=${LDAPLDFLAGS}
+CLEANFILES= kadmin-commands.h kadmin-commands.c
+
.include <bsd.prog.mk>
kadmin-commands.h: ${KRB5DIR}/kadmin/kadmin-commands.in
diff --git a/kerberos5/usr.bin/kcc/Makefile b/kerberos5/usr.bin/kcc/Makefile
index 3da43d7..d3f4dc3 100644
--- a/kerberos5/usr.bin/kcc/Makefile
+++ b/kerberos5/usr.bin/kcc/Makefile
@@ -19,6 +19,8 @@ SRCS= kcc.c \
kswitch.c \
copy_cred_cache.c
+CLEANFILES= kcc-commands.h kcc-commands.c
+
kcc-commands.h: kcc-commands.in
${SLC} ${.ALLSRC:M*.in}
diff --git a/kerberos5/usr.sbin/iprop-log/Makefile b/kerberos5/usr.sbin/iprop-log/Makefile
index b5b29e2..48b8232 100644
--- a/kerberos5/usr.sbin/iprop-log/Makefile
+++ b/kerberos5/usr.sbin/iprop-log/Makefile
@@ -14,6 +14,8 @@ LDADD= -lkadm5srv -lhdb -lkrb5 -lasn1 -lcrypto -lcrypt ${LIBSL} -lroken \
${LIBVERS} -ledit
LDFLAGS=${LDAPLDFLAGS}
+CLEANFILES= iprop-commands.h iprop-commands.c
+
iprop-commands.h: iprop-commands.in
${SLC} ${.ALLSRC:M*.in}
diff --git a/kerberos5/usr.sbin/ktutil/Makefile b/kerberos5/usr.sbin/ktutil/Makefile
index f7d340f..85517a7 100644
--- a/kerberos5/usr.sbin/ktutil/Makefile
+++ b/kerberos5/usr.sbin/ktutil/Makefile
@@ -22,6 +22,8 @@ DPADD= ${LIBKADM5CLNT} ${LIBKRB5} ${LIBSL} ${LIBROKEN} ${LIBVERS} \
LDADD= -lkadm5clnt -lkrb5 ${LIBSL} -lroken ${LIBVERS} \
-lasn1 -lcrypto -lcrypt -ledit
+CLEANFILES= ktutil-commands.h ktutil-commands.c
+
.include <bsd.prog.mk>
ktutil-commands.h: ${KRB5DIR}/admin/ktutil-commands.in
diff --git a/lib/clang/include/Makefile b/lib/clang/include/Makefile
index 77b4d99..36eb08e 100644
--- a/lib/clang/include/Makefile
+++ b/lib/clang/include/Makefile
@@ -45,6 +45,6 @@ INCS= __wmmintrin_aes.h \
xopintrin.h \
${GENINCS}
GENINCS= arm_neon.h
-CLEANFILES= ${GENINCS}
+CLEANFILES= ${GENINCS} ${GENINCS:C/\.h$/.d/}
.include <bsd.prog.mk>
diff --git a/lib/libc/stdio/open_memstream.3 b/lib/libc/stdio/open_memstream.3
index e01952b..1a0cb07 100644
--- a/lib/libc/stdio/open_memstream.3
+++ b/lib/libc/stdio/open_memstream.3
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 28, 2014
+.Dd August 1, 2015
.Dt OPEN_MEMSTREAM 3
.Os
.Sh NAME
@@ -86,13 +86,13 @@ will contain the start of the memory buffer and the variable referenced by
will contain the smaller of the current position and the current buffer length.
.Pp
After a successful call to
-.Xr fflush 3,
+.Xr fflush 3 ,
the pointer referenced by
.Fa bufp
and the variable referenced by
.Fa sizep
are only valid until the next write operation or a call to
-.Xr fclose 3.
+.Xr fclose 3 .
.Pp
Once a stream is closed,
the allocated buffer referenced by
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index fbd28cd..3b1f8fd 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -100,8 +100,6 @@ ASM+=$(_asm)
.endif
.endfor
-OBJS+= ${ASM} ${PSEUDO}
-
SASM= ${ASM:S/.o/.S/}
SPSEUDO= ${PSEUDO:S/.o/.S/}
diff --git a/lib/libc/sys/madvise.2 b/lib/libc/sys/madvise.2
index 357e05e..499bf67 100644
--- a/lib/libc/sys/madvise.2
+++ b/lib/libc/sys/madvise.2
@@ -28,7 +28,7 @@
.\" @(#)madvise.2 8.1 (Berkeley) 6/9/93
.\" $FreeBSD$
.\"
-.Dd January 30, 2014
+.Dd July 12, 2015
.Dt MADVISE 2
.Os
.Sh NAME
@@ -79,9 +79,9 @@ pages in from backing store, but quickly map the pages already in memory
into the calling process.
.It Dv MADV_DONTNEED
Allows the VM system to decrease the in-memory priority
-of pages in the specified range.
-Additionally future references to
-this address range will incur a page fault.
+of pages in the specified address range.
+Consequently, future references to this address range are more likely
+to incur a page fault.
.It Dv MADV_FREE
Gives the VM system the freedom to free pages,
and tells the system that information in the specified page range
diff --git a/lib/libc/tests/gen/posix_spawn/Makefile b/lib/libc/tests/gen/posix_spawn/Makefile
index d2dfa54..f20bbd7 100644
--- a/lib/libc/tests/gen/posix_spawn/Makefile
+++ b/lib/libc/tests/gen/posix_spawn/Makefile
@@ -21,6 +21,8 @@ PROGS+= h_spawnattr
SCRIPTS= h_nonexec
SCRIPTS+= h_zero
+CLEANFILES+= h_nonexec
+
.include "../../Makefile.netbsd-tests"
h_zero:
diff --git a/lib/libusb/Makefile b/lib/libusb/Makefile
index 92c0843..15b2533 100644
--- a/lib/libusb/Makefile
+++ b/lib/libusb/Makefile
@@ -36,16 +36,11 @@ SRCS+= libusb10_io.c
.if defined(COMPAT_32BIT)
CFLAGS+= -DCOMPAT_32BIT
+.else
+FILES= libusb-0.1.pc libusb-1.0.pc libusb-2.0.pc
+FILESDIR= ${LIBDATADIR}/pkgconfig
.endif
-beforeinstall:
- ${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \
- ${.CURDIR}/libusb-0.1.pc ${DESTDIR}${LIBDATADIR}/pkgconfig
- ${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \
- ${.CURDIR}/libusb-1.0.pc ${DESTDIR}${LIBDATADIR}/pkgconfig
- ${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \
- ${.CURDIR}/libusb-2.0.pc ${DESTDIR}${LIBDATADIR}/pkgconfig
-
#
# Cross platform support
#
diff --git a/lib/libutil/Makefile b/lib/libutil/Makefile
index 144fb5c..4e10729 100644
--- a/lib/libutil/Makefile
+++ b/lib/libutil/Makefile
@@ -10,7 +10,8 @@ SHLIB_MAJOR= 9
SRCS= _secure_path.c auth.c expand_number.c flopen.c fparseln.c gr_util.c \
hexdump.c humanize_number.c kinfo_getfile.c kinfo_getfile.c \
- kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c kld.c \
+ kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c \
+ kinfo_getvmobject.c kld.c \
login_auth.c login_cap.c \
login_class.c login_crypt.c login_ok.c login_times.c login_tty.c \
pidfile.c property.c pty.c pw_util.c quotafile.c realhostname.c \
@@ -27,7 +28,8 @@ CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../libc/gen/
MAN+= expand_number.3 flopen.3 fparseln.3 hexdump.3 \
humanize_number.3 kinfo_getallproc.3 kinfo_getfile.3 \
- kinfo_getproc.3 kinfo_getvmmap.3 kld.3 login_auth.3 login_cap.3 \
+ kinfo_getproc.3 kinfo_getvmmap.3 kinfo_getvmobject.3 kld.3 \
+ login_auth.3 login_cap.3 \
login_class.3 login_ok.3 login_times.3 login_tty.3 pidfile.3 \
property.3 pty.3 quotafile.3 realhostname.3 realhostname_sa.3 \
_secure_path.3 trimdomain.3 uucplock.3 pw_util.3
diff --git a/lib/libutil/kinfo_getvmobject.3 b/lib/libutil/kinfo_getvmobject.3
new file mode 100644
index 0000000..dc0edd2
--- /dev/null
+++ b/lib/libutil/kinfo_getvmobject.3
@@ -0,0 +1,74 @@
+.\"
+.\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd May 27, 2015
+.Dt KINFO_GETVMOBJECT 3
+.Os
+.Sh NAME
+.Nm kinfo_getvmobject
+.Nd function for getting system-wide memory information
+.Sh LIBRARY
+.Lb libutil
+.Sh SYNOPSIS
+.In sys/types.h
+.In sys/user.h
+.In libutil.h
+.Ft struct kinfo_vmobject *
+.Fn kinfo_getvmobject "int *cntp"
+.Sh DESCRIPTION
+This function is used to obtain information about the objects using memory
+in the system.
+.Pp
+The
+.Ar cntp
+argument allows the caller to know how many records are returned.
+.Pp
+This function is a wrapper around the
+.Dq vm.objects
+.Xr sysctl 3
+MIB.
+While the kernel returns a packed structure, this function expands the
+data into a fixed record format.
+.Sh RETURN VALUES
+On success the
+.Fn kinfo_getvmobject
+function returns a pointer to an array of
+.Vt struct kinfo_vmobject
+structures as defined by
+.In sys/user.h .
+The array is allocated by an internal call to
+.Xr malloc 3
+and must be freed by the caller with a call to
+.Xr free 3 .
+On failure the
+.Fn kinfo_getvmobject
+function returns
+.Dv NULL .
+.Sh SEE ALSO
+.Xr free 3 ,
+.Xr kinfo_getvmmap 3 ,
+.Xr malloc 3
diff --git a/lib/libutil/kinfo_getvmobject.c b/lib/libutil/kinfo_getvmobject.c
new file mode 100644
index 0000000..7e031da
--- /dev/null
+++ b/lib/libutil/kinfo_getvmobject.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2013 Hudson River Trading LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/user.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libutil.h"
+
+struct kinfo_vmobject *
+kinfo_getvmobject(int *cntp)
+{
+ char *buf, *bp, *ep;
+ struct kinfo_vmobject *kvo, *list, *kp;
+ size_t len;
+ int cnt, i;
+
+ buf = NULL;
+ for (i = 0; i < 3; i++) {
+ if (sysctlbyname("vm.objects", NULL, &len, NULL, 0) < 0)
+ return (NULL);
+ buf = reallocf(buf, len);
+ if (buf == NULL)
+ return (NULL);
+ if (sysctlbyname("vm.objects", buf, &len, NULL, 0) == 0)
+ goto unpack;
+ if (errno != ENOMEM) {
+ free(buf);
+ return (NULL);
+ }
+ }
+ free(buf);
+ return (NULL);
+
+unpack:
+ /* Count items */
+ cnt = 0;
+ bp = buf;
+ ep = buf + len;
+ while (bp < ep) {
+ kvo = (struct kinfo_vmobject *)(uintptr_t)bp;
+ bp += kvo->kvo_structsize;
+ cnt++;
+ }
+
+ list = calloc(cnt, sizeof(*list));
+ if (list == NULL) {
+ free(buf);
+ return (NULL);
+ }
+
+ /* Unpack */
+ bp = buf;
+ kp = list;
+ while (bp < ep) {
+ kvo = (struct kinfo_vmobject *)(uintptr_t)bp;
+ memcpy(kp, kvo, kvo->kvo_structsize);
+ bp += kvo->kvo_structsize;
+ kp->kvo_structsize = sizeof(*kp);
+ kp++;
+ }
+ free(buf);
+ *cntp = cnt;
+ return (list);
+}
diff --git a/lib/libutil/libutil.h b/lib/libutil/libutil.h
index b8b9836..b20ffa2 100644
--- a/lib/libutil/libutil.h
+++ b/lib/libutil/libutil.h
@@ -102,6 +102,8 @@ struct kinfo_file *
kinfo_getfile(pid_t _pid, int *_cntp);
struct kinfo_vmentry *
kinfo_getvmmap(pid_t _pid, int *_cntp);
+struct kinfo_vmobject *
+ kinfo_getvmobject(int *_cntp);
struct kinfo_proc *
kinfo_getallproc(int *_cntp);
struct kinfo_proc *
diff --git a/lib/libz/Makefile b/lib/libz/Makefile
index dce0ab3..dca7a1b 100644
--- a/lib/libz/Makefile
+++ b/lib/libz/Makefile
@@ -68,9 +68,10 @@ test: example minigzip
(export LD_LIBRARY_PATH=. ; \
echo hello world | ./minigzip | ./minigzip -d )
-beforeinstall:
- ${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \
- ${.CURDIR}/zlib.pc ${DESTDIR}${LIBDATADIR}/pkgconfig
+.ifndef COMPAT_32BIT
+FILES= zlib.pc
+FILESDIR= ${LIBDATADIR}/pkgconfig
+.endif
.include <bsd.lib.mk>
diff --git a/lib/libz/inflate.c b/lib/libz/inflate.c
index 870f89b..b51a8a5 100644
--- a/lib/libz/inflate.c
+++ b/lib/libz/inflate.c
@@ -1504,7 +1504,7 @@ z_streamp strm;
{
struct inflate_state FAR *state;
- if (strm == Z_NULL || strm->state == Z_NULL) return -1L << 16;
+ if (strm == Z_NULL || strm->state == Z_NULL) return -(1L << 16);
state = (struct inflate_state FAR *)strm->state;
return ((long)(state->back) << 16) +
(state->mode == COPY ? state->length :
diff --git a/release/arm/BEAGLEBONE.conf b/release/arm/BEAGLEBONE.conf
index e7ef4f3..5eba7b9 100644
--- a/release/arm/BEAGLEBONE.conf
+++ b/release/arm/BEAGLEBONE.conf
@@ -26,6 +26,8 @@ arm_install_uboot() {
chroot ${CHROOTDIR} cp -p ${UBOOT_DIR}/MLO ${FATMOUNT}/MLO
chroot ${CHROOTDIR} cp -p ${UBOOT_DIR}/u-boot.img ${FATMOUNT}/u-boot.img
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
sync
umount_loop ${CHROOTDIR}/${FATMOUNT}
diff --git a/release/arm/CUBOX-HUMMINGBOARD.conf b/release/arm/CUBOX-HUMMINGBOARD.conf
index 5c82d40..987fe23 100644
--- a/release/arm/CUBOX-HUMMINGBOARD.conf
+++ b/release/arm/CUBOX-HUMMINGBOARD.conf
@@ -28,6 +28,8 @@ arm_install_uboot() {
chroot ${CHROOTDIR} mount_msdosfs /dev/${mddev}s1 ${FATMOUNT}
chroot ${CHROOTDIR} mount /dev/${mddev}s2a ${UFSMOUNT}
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
sync
umount_loop ${CHROOTDIR}/${FATMOUNT}
diff --git a/release/arm/GUMSTIX.conf b/release/arm/GUMSTIX.conf
index 92e8476..1f3d8da 100644
--- a/release/arm/GUMSTIX.conf
+++ b/release/arm/GUMSTIX.conf
@@ -26,6 +26,8 @@ arm_install_uboot() {
chroot ${CHROOTDIR} cp -p ${UBOOT_DIR}/MLO ${FATMOUNT}/MLO
chroot ${CHROOTDIR} cp -p ${UBOOT_DIR}/u-boot.img ${FATMOUNT}/u-boot.img
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
sync
umount_loop ${CHROOTDIR}/${FATMOUNT}
diff --git a/release/arm/PANDABOARD.conf b/release/arm/PANDABOARD.conf
index 4b7f5eb..341fdeb 100644
--- a/release/arm/PANDABOARD.conf
+++ b/release/arm/PANDABOARD.conf
@@ -26,6 +26,8 @@ arm_install_uboot() {
chroot ${CHROOTDIR} cp -p ${UBOOT_DIR}/MLO ${FATMOUNT}/MLO
chroot ${CHROOTDIR} cp -p ${UBOOT_DIR}/u-boot.img ${FATMOUNT}/u-boot.img
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
sync
umount_loop ${CHROOTDIR}/${FATMOUNT}
diff --git a/release/arm/RPI-B.conf b/release/arm/RPI-B.conf
index ad44995..0129a2e 100644
--- a/release/arm/RPI-B.conf
+++ b/release/arm/RPI-B.conf
@@ -30,6 +30,8 @@ arm_install_uboot() {
${FATMOUNT}/${_UF}
done
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/dtb/rpi.dtb \
${FATMOUNT}/rpi.dtb
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
diff --git a/release/arm/RPI2.conf b/release/arm/RPI2.conf
index dc13c83..24fb491 100644
--- a/release/arm/RPI2.conf
+++ b/release/arm/RPI2.conf
@@ -31,6 +31,8 @@ arm_install_uboot() {
${FATMOUNT}/${_UF}
done
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/dtb/rpi2.dtb \
${FATMOUNT}/rpi2.dtb
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
diff --git a/release/arm/WANDBOARD.conf b/release/arm/WANDBOARD.conf
index 2cc0995..971aa1c 100644
--- a/release/arm/WANDBOARD.conf
+++ b/release/arm/WANDBOARD.conf
@@ -29,6 +29,8 @@ arm_install_uboot() {
chroot ${CHROOTDIR} mount_msdosfs /dev/${mddev}s1 ${FATMOUNT}
chroot ${CHROOTDIR} mount /dev/${mddev}s2a ${UFSMOUNT}
chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr ${FATMOUNT}/ubldr
+ chroot ${CHROOTDIR} cp -p ${UFSMOUNT}/boot/ubldr.bin \
+ ${FATMOUNT}/ubldr.bin
chroot ${CHROOTDIR} touch ${UFSMOUNT}/firstboot
sync
umount_loop ${CHROOTDIR}/${FATMOUNT}
diff --git a/release/doc/en_US.ISO8859-1/relnotes/article.xml b/release/doc/en_US.ISO8859-1/relnotes/article.xml
index 857428f..825fa10 100644
--- a/release/doc/en_US.ISO8859-1/relnotes/article.xml
+++ b/release/doc/en_US.ISO8859-1/relnotes/article.xml
@@ -166,13 +166,42 @@
<sect2 xml:id="userland-programs">
<title>Userland Application Changes</title>
- <para>&nbsp;</para>
+ <para revision="286064">The &man.jail.8; utility has been
+ updated to include a new flag, <literal>-l</literal>, which
+ ensures a clean environment in the target jail when used.
+ Additionally, &man.jail.8; will run a shell within the target
+ jail when run no commands are specified.</para>
+
+ <para revision="287122">The &man.mkimg.1; utility has been
+ updated to include support for <literal>NTFS</literal>
+ filesystems in both <acronym>MBR</acronym> and
+ <acronym>GPT</acronym> partitioning schemes.</para>
+
+ <para revision="288202" contrib="sponsor" sponsor="&ff;">The
+ &man.ar.1; utility now enables deterministic mode
+ (<literal>-D</literal>) by default. This behavior can be
+ disabled by specifying the <literal>-U</literal> flag.</para>
+
+ <para revision="287746">The &man.pciconf.8; utility has been
+ updated to use the PCI ID database from the <filename
+ role="package">misc/pciids</filename> package, if present,
+ falling back to the PCI ID database in the &os; base
+ system.</para>
+
+ <para revision="288710" contrib="sponsor" sponsor="&gandi;">A
+ new utility, &man.sesutil.8;, has been added, which is used
+ to manage &man.ses.4; devices.</para>
</sect2>
<sect2 xml:id="userland-contrib">
<title>Contributed Software</title>
- <para>&nbsp;</para>
+ <para revision="286082">The &man.libarchive.3; library has been
+ updated to properly skip a sparse file entry in a &man.tar.1;
+ file, which would previously produce errors.</para>
+
+ <para revision="286751">Timezone data files have been updated to
+ version 2015f.</para>
</sect2>
<sect2 xml:id="userland-installer">
@@ -218,7 +247,10 @@
<sect2 xml:id="kernel-bugfix">
<title>Kernel Bug Fixes</title>
- <para>&nbsp;</para>
+ <para revision="288167" contrib="sponsor"
+ sponsor="&multiplay;">The &man.kqueue.2; system call has been
+ updated to handle write events to files larger than 2
+ gigabytes.</para>
</sect2>
<sect2 xml:id="kernel-config">
@@ -230,7 +262,10 @@
<sect2 xml:id="kernel-sysctl">
<title>System Tuning and Controls</title>
- <para>&nbsp;</para>
+ <para revision="287037">The &man.uart.4; driver has been
+ updated to allow tuning pulses per second captured in the
+ CTS line during runtime, whereas previously only the DCD line
+ could be used without rebuilding the kernel.</para>
</sect2>
</sect1>
@@ -249,7 +284,12 @@
<sect2 xml:id="drivers-storage">
<title>Storage Drivers</title>
- <para>&nbsp;</para>
+ <para revision="288732" contrib="sponsor" sponsor="&ix;">The
+ <acronym>CTL</acronym> High Availability implementation has
+ been rewritten.</para>
+
+ <para revision="288810">The &man.ctl.4; driver has been updated
+ to support CD-ROM and removable devices.</para>
</sect2>
<sect2 xml:id="drivers-network">
@@ -276,7 +316,9 @@
<sect2 xml:id="hardware-virtualization">
<title>Virtualization Support</title>
- <para>&nbsp;</para>
+ <para revision="287802">The &man.xen.4; driver has been updated
+ to include support for <literal>blkif</literal> indirect
+ segment I/O.</para>
</sect2>
<sect2 xml:id="hardware-arm">
@@ -307,7 +349,11 @@
<sect2 xml:id="storage-zfs">
<title>ZFS</title>
- <para>&nbsp;</para>
+ <para revision="287665" contrib="sponsor"
+ sponsor="&clusterhq;">The &man.zfs.8; <literal>l2arc</literal>
+ code has been updated to take <literal>ashift</literal> into
+ account when gathering buffers to be written to the
+ <literal>l2arc</literal> device.</para>
</sect2>
<sect2 xml:id="storage-geom">
diff --git a/release/doc/share/xml/security.xml b/release/doc/share/xml/security.xml
index d5718ae..3a34520 100644
--- a/release/doc/share/xml/security.xml
+++ b/release/doc/share/xml/security.xml
@@ -32,6 +32,13 @@
<entry>25&nbsp;August&nbsp;2015</entry>
<entry><para>Multiple vulnerabilities</para></entry>
</row>
+
+ <row>
+ <entry><link
+ xlink:href="&security.url;/FreeBSD-SA-15:24.rpcbind.asc">FreeBSD-SA-15:24.rpcbind</link></entry>
+ <entry>29&nbsp;September&nbsp;2015</entry>
+ <entry><para>Remote denial of service</para></entry>
+ </row>
</tbody>
</tgroup>
</informaltable>
diff --git a/release/doc/share/xml/sponsor.ent b/release/doc/share/xml/sponsor.ent
index 27f1410..fa8c866 100644
--- a/release/doc/share/xml/sponsor.ent
+++ b/release/doc/share/xml/sponsor.ent
@@ -13,12 +13,15 @@
<!ENTITY citrix "Citrix Systems">
<!ENTITY citrix.rd "Citrix Systems R&amp;D">
+<!ENTITY clusterhq "ClusterHQ">
+
<!ENTITY darpa "DARPA">
<!ENTITY darpa_afrl "DARPA, AFRL">
<!ENTITY ff "The&nbsp;&os;&nbsp;Foundation">
<!ENTITY ff.url "https://www.FreeBSDFoundation.org/">
+<!ENTITY gandi "Gandi.net">
<!ENTITY google "Google">
<!ENTITY intelcorp "Intel Corporation">
diff --git a/release/release.sh b/release/release.sh
index 94a9bb6..e168422 100755
--- a/release/release.sh
+++ b/release/release.sh
@@ -311,6 +311,18 @@ chroot_build_target() {
# chroot_build_release(): Invoke the 'make release' target.
chroot_build_release() {
load_target_env
+ if [ ! -z "${WITH_VMIMAGES}" ]; then
+ if [ -z "${VMFORMATS}" ]; then
+ VMFORMATS="$(eval chroot ${CHROOTDIR} \
+ make -C /usr/src/release -V VMFORMATS)"
+ fi
+ if [ -z "${VMSIZE}" ]; then
+ VMSIZE="$(eval chroot ${CHROOTDIR} \
+ make -C /usr/src/release -V VMSIZE)"
+ fi
+ RELEASE_RMAKEFLAGS="${RELEASE_RMAKEFLAGS} \
+ VMFORMATS=\"${VMFORMATS}\" VMSIZE=${VMSIZE}"
+ fi
eval chroot ${CHROOTDIR} make -C /usr/src/release \
${RELEASE_RMAKEFLAGS} release
eval chroot ${CHROOTDIR} make -C /usr/src/release \
diff --git a/release/tools/vmimage.subr b/release/tools/vmimage.subr
index dae2ee1..8f058a9 100644
--- a/release/tools/vmimage.subr
+++ b/release/tools/vmimage.subr
@@ -102,7 +102,7 @@ vm_copy_base() {
umount_loop /dev/${mdnew}
rmdir ${DESTDIR}/new
- tunefs -j enable /dev/${mdnew}
+ tunefs -n enable /dev/${mdnew}
mdconfig -d -u ${mdnew}
mv ${VMBASE}.tmp ${VMBASE}
}
diff --git a/sbin/dmesg/dmesg.c b/sbin/dmesg/dmesg.c
index f0fcb81..75b926c 100644
--- a/sbin/dmesg/dmesg.c
+++ b/sbin/dmesg/dmesg.c
@@ -116,6 +116,9 @@ main(int argc, char *argv[])
*/
if (sysctlbyname("kern.msgbuf", NULL, &buflen, NULL, 0) == -1)
err(1, "sysctl kern.msgbuf");
+ /* Allocate extra room for growth between the sysctl calls. */
+ buflen += buflen/8;
+ /* Allocate more than sysctl sees, for room to append \n\0. */
if ((bp = malloc(buflen + 2)) == NULL)
errx(1, "malloc failed");
if (sysctlbyname("kern.msgbuf", bp, &buflen, NULL, 0) == -1)
diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c
index d20a4f6..8cc92e1 100644
--- a/sbin/ipfw/ipfw2.c
+++ b/sbin/ipfw/ipfw2.c
@@ -2279,14 +2279,14 @@ fill_ip(ipfw_insn_ip *cmd, char *av, int cblen)
case '/':
masklen = atoi(p);
if (masklen == 0)
- d[1] = htonl(0); /* mask */
+ d[1] = htonl(0U); /* mask */
else if (masklen > 32)
errx(EX_DATAERR, "bad width ``%s''", p);
else
- d[1] = htonl(~0 << (32 - masklen));
+ d[1] = htonl(~0U << (32 - masklen));
break;
case '{': /* no mask, assume /24 and put back the '{' */
- d[1] = htonl(~0 << (32 - 24));
+ d[1] = htonl(~0U << (32 - 24));
*(--p) = md;
break;
@@ -2295,7 +2295,7 @@ fill_ip(ipfw_insn_ip *cmd, char *av, int cblen)
/* FALLTHROUGH */
case 0: /* initialization value */
default:
- d[1] = htonl(~0); /* force /32 */
+ d[1] = htonl(~0U); /* force /32 */
break;
}
d[0] &= d[1]; /* mask base address with mask */
diff --git a/secure/lib/libcrypto/Makefile b/secure/lib/libcrypto/Makefile
index 330b08f..d0b2120 100644
--- a/secure/lib/libcrypto/Makefile
+++ b/secure/lib/libcrypto/Makefile
@@ -402,14 +402,6 @@ opensslconf.h: opensslconf-${MACHINE_CPUARCH}.h
.endif
cp -f ${.ALLSRC} ${.TARGET}
-OLDSYMLINKS+= libdes.a libdes.so libdes.so.3 libdes_p.a
-afterinstall:
- @${ECHO} "Removing stale symlinks."
- rm -f ${DESTDIR}${INCLUDEDIR}/des.h
-.for symlink in ${OLDSYMLINKS}
- rm -f ${DESTDIR}${LIBDIR}/${symlink}
-.endfor
-
.include <bsd.lib.mk>
.if ${MACHINE_CPUARCH} == "amd64"
diff --git a/share/man/man4/ctl.4 b/share/man/man4/ctl.4
index 31c3737..7e6cb8d 100644
--- a/share/man/man4/ctl.4
+++ b/share/man/man4/ctl.4
@@ -1,4 +1,5 @@
.\" Copyright (c) 2013 Edward Tomasz Napierala
+.\" Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
@@ -23,7 +24,7 @@
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
-.Dd August 9, 2015
+.Dd September 27, 2015
.Dt CTL 4
.Os
.Sh NAME
@@ -52,7 +53,7 @@ It supports features such as:
.Pp
.Bl -bullet -compact
.It
-Disk and processor device emulation
+Disk, processor and cdrom device emulation
.It
Tagged queueing
.It
@@ -80,6 +81,8 @@ Mode sense/select support
.It
Error injection support
.It
+High Availability clustering support with ALUA
+.It
All I/O handled in-kernel, no userland context switch overhead
.El
.Pp
@@ -99,9 +102,57 @@ log commands with errors;
.It 2
log all commands;
.It 4
-log received data for commands except READ/WRITE.
+log data for commands other then READ/WRITE.
.El
Defaults to 0.
+.It Va kern.cam.ctl.ha_id
+Specifies unique position of this node within High Availability cluster.
+Default is 0 -- no HA, 1 and 2 -- HA enabled at specified position.
+.It Va kern.cam.ctl.ha_mode
+Specifies High Availability cluster operation mode:
+.Bl -tag -offset indent -compact
+.It 0
+Active/Standby -- primary node has backend access and processes requests,
+while secondary can only do basic LUN discovery and reservation;
+.It 1
+Active/Active -- both nodes have backend access and process requests,
+while secondary node synchronizes processing with primary one;
+.It 2
+Active/Active -- primary node has backend access and processes requests,
+while secondary node forwards all requests and data to primary one;
+.El
+All above modes require established connection between HA cluster nodes.
+If connection is not configured, secondary node will report Unavailable
+state; if configured but not established -- Transitioning state.
+Defaults to 0.
+.It Va kern.cam.ctl.ha_peer
+String value, specifying method to establish connection to peer HA node.
+Can be "listen IP:port", "connect IP:port" or empty.
+.It Va kern.cam.ctl.ha_link
+Reports present state of connection between HA cluster nodes:
+.Bl -tag -offset indent -compact
+.It 0
+not configured;
+.It 1
+configured but not established;
+.It 2
+established.
+.El
+.It Va kern.cam.ctl.ha_role
+Specifies default role of this node:
+.Bl -tag -offset indent -compact
+.It 0
+primary;
+.It 1
+secondary.
+.El
+This role can be overriden on per-LUN basis using "ha_role" LUN option,
+so that for one LUN one node is primary, while for another -- another.
+Role change from primary to secondary for HA modes 0 and 2 closes backends,
+the opposite change -- opens.
+If there is no primary node (both nodes are secondary, or secondary node has
+no connection to primary one), secondary node(s) report Transitioning state.
+State with two primary nodes is illegal (split brain condition).
.It Va kern.cam.ctl.iscsi.debug
Verbosity level for log messages from the kernel part of iSCSI target.
Set to 0 to disable logging or 1 to warn about potential problems.
@@ -131,5 +182,7 @@ subsystem first appeared in
.Sh AUTHORS
The
.Nm
-subsystem was written by
-.An Kenneth Merry Aq ken@FreeBSD.org .
+subsystem was originally written by
+.An Kenneth Merry Aq Mt ken@FreeBSD.org .
+Later work was done by
+.An Alexander Motin Aq Mt mav@FreeBSD.org .
diff --git a/share/man/man4/ng_pppoe.4 b/share/man/man4/ng_pppoe.4
index a124d67..563673f 100644
--- a/share/man/man4/ng_pppoe.4
+++ b/share/man/man4/ng_pppoe.4
@@ -35,7 +35,7 @@
.\" $FreeBSD$
.\" $Whistle: ng_pppoe.8,v 1.1 1999/01/25 23:46:27 archie Exp $
.\"
-.Dd November 13, 2012
+.Dd September 15, 2015
.Dt NG_PPPOE 4
.Os
.Sh NAME
@@ -187,7 +187,7 @@ above messages, and reports the Access Concentrator Name.
The four commands above use a common data structure:
.Bd -literal -offset 4n
struct ngpppoe_sts {
- char hook[NG_HOOKSIZ]; /* hook associated with event session */
+ char hook[NG_HOOKSIZ];
};
.Ed
.Bl -tag -width 3n
@@ -244,6 +244,20 @@ hook, or when user wants to override this address with another one.
.Tn ASCII
form of this message is
.Qq Li setenaddr .
+.It Dv NGM_PPPOE_SETMAXP Pq Ic setmaxp
+Set the node PPP-Max-Payload value as described in RFC 4638.
+This message applies only to a client configuration.
+.Tn ASCII
+form of this message is
+.Qq Li setmaxp .
+.Pp
+Data structure returned to client is:
+.Bd -literal -offset 4n
+struct ngpppoe_maxp {
+ char hook[NG_HOOKSIZ];
+ uint16_t data;
+};
+.Ed
.El
.Sh SHUTDOWN
This node shuts down upon receipt of a
diff --git a/share/man/man7/hier.7 b/share/man/man7/hier.7
index c24769a..f693041 100644
--- a/share/man/man7/hier.7
+++ b/share/man/man7/hier.7
@@ -32,7 +32,7 @@
.\" @(#)hier.7 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd January 14, 2015
+.Dd September 24, 2015
.Dt HIER 7
.Os
.Sh NAME
@@ -391,6 +391,8 @@ shared libraries for compatibility
.It Pa aout/
a.out backward compatibility libraries
.El
+.It Pa debug/
+standalone debug data for the base system libraries and binaries
.It Pa dtrace/
DTrace library scripts
.It Pa engines/
diff --git a/share/man/man9/VOP_ADVISE.9 b/share/man/man9/VOP_ADVISE.9
index 50cd860..7cc2916 100644
--- a/share/man/man9/VOP_ADVISE.9
+++ b/share/man/man9/VOP_ADVISE.9
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 3, 2013
+.Dd September 26, 2015
.Dt VOP_ADVISE 9
.Os
.Sh NAME
@@ -52,6 +52,9 @@ The vnode of the file.
The start of the range of file data.
.It Fa end
The end of the range of file data.
+A value of
+.Dv OFF_MAX
+indicates that the advice is to be applied up to the end of the file.
.It Fa advice
The type of operation to apply to the file data.
Possible values are:
diff --git a/share/man/man9/atomic.9 b/share/man/man9/atomic.9
index 727ef47..5939b9c 100644
--- a/share/man/man9/atomic.9
+++ b/share/man/man9/atomic.9
@@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd June 20, 2015
+.Dd August 14, 2015
.Dt ATOMIC 9
.Os
.Sh NAME
@@ -67,8 +67,8 @@
.Ft int
.Fn atomic_testandset_<type> "volatile <type> *p" "u_int v"
.Sh DESCRIPTION
-Each of the atomic operations is guaranteed to be atomic in the presence of
-interrupts.
+Each of the atomic operations is guaranteed to be atomic across multiple
+threads and in the presence of interrupts.
They can be used to implement reference counts or as building blocks for more
advanced synchronization primitives such as mutexes.
.Ss Types
@@ -108,71 +108,94 @@ unsigned 16-bit integer
.El
.Pp
These must not be used in MI code because the instructions to implement them
-efficiently may not be available.
-.Ss Memory Barriers
-Memory barriers are used to guarantee the order of data accesses in
-two ways.
-First, they specify hints to the compiler to not re-order or optimize the
-operations.
-Second, on architectures that do not guarantee ordered data accesses,
-special instructions or special variants of instructions are used to indicate
-to the processor that data accesses need to occur in a certain order.
-As a result, most of the atomic operations have three variants in order to
-include optional memory barriers.
-The first form just performs the operation without any explicit barriers.
-The second form uses a read memory barrier, and the third variant uses a write
-memory barrier.
-.Pp
-The second variant of each operation includes an
+efficiently might not be available.
+.Ss Acquire and Release Operations
+By default, a thread's accesses to different memory locations might not be
+performed in
+.Em program order ,
+that is, the order in which the accesses appear in the source code.
+To optimize the program's execution, both the compiler and processor might
+reorder the thread's accesses.
+However, both ensure that their reordering of the accesses is not visible to
+the thread.
+Otherwise, the traditional memory model that is expected by single-threaded
+programs would be violated.
+Nonetheless, other threads in a multithreaded program, such as the
+.Fx
+kernel, might observe the reordering.
+Moreover, in some cases, such as the implementation of synchronization between
+threads, arbitrary reordering might result in the incorrect execution of the
+program.
+To constrain the reordering that both the compiler and processor might perform
+on a thread's accesses, the thread should use atomic operations with
.Em acquire
-memory barrier.
-This barrier ensures that the effects of this operation are completed before the
-effects of any later data accesses.
-As a result, the operation is said to have acquire semantics as it acquires a
-pseudo-lock requiring further operations to wait until it has completed.
-To denote this, the suffix
+and
+.Em release
+semantics.
+.Pp
+Most of the atomic operations on memory have three variants.
+The first variant performs the operation without imposing any ordering
+constraints on memory accesses to other locations.
+The second variant has acquire semantics, and the third variant has release
+semantics.
+In effect, operations with acquire and release semantics establish one-way
+barriers to reordering.
+.Pp
+When an atomic operation has acquire semantics, the effects of the operation
+must have completed before any subsequent load or store (by program order) is
+performed.
+Conversely, acquire semantics do not require that prior loads or stores have
+completed before the atomic operation is performed.
+To denote acquire semantics, the suffix
.Dq Li _acq
is inserted into the function name immediately prior to the
.Dq Li _ Ns Aq Fa type
suffix.
-For example, to subtract two integers ensuring that any later writes will
-happen after the subtraction is performed, use
+For example, to subtract two integers ensuring that subsequent loads and
+stores happen after the subtraction is performed, use
.Fn atomic_subtract_acq_int .
.Pp
-The third variant of each operation includes a
-.Em release
-memory barrier.
-This ensures that all effects of all previous data accesses are completed
-before this operation takes place.
-As a result, the operation is said to have release semantics as it releases
-any pending data accesses to be completed before its operation is performed.
-To denote this, the suffix
+When an atomic operation has release semantics, the effects of all prior
+loads or stores (by program order) must have completed before the operation
+is performed.
+Conversely, release semantics do not require that the effects of the
+atomic operation must have completed before any subsequent load or store is
+performed.
+To denote release semantics, the suffix
.Dq Li _rel
is inserted into the function name immediately prior to the
.Dq Li _ Ns Aq Fa type
suffix.
-For example, to add two long integers ensuring that all previous
-writes will happen first, use
+For example, to add two long integers ensuring that all prior loads and
+stores happen before the addition, use
.Fn atomic_add_rel_long .
.Pp
-A practical example of using memory barriers is to ensure that data accesses
-that are protected by a lock are all performed while the lock is held.
-To achieve this, one would use a read barrier when acquiring the lock to
-guarantee that the lock is held before any protected operations are performed.
-Finally, one would use a write barrier when releasing the lock to ensure that
-all of the protected operations are completed before the lock is released.
+The one-way barriers provided by acquire and release operations allow the
+implementations of common synchronization primitives to express their
+ordering requirements without also imposing unnecessary ordering.
+For example, for a critical section guarded by a mutex, an acquire operation
+when the mutex is locked and a release operation when the mutex is unlocked
+will prevent any loads or stores from moving outside of the critical
+section.
+However, they will not prevent the compiler or processor from moving loads
+or stores into the critical section, which does not violate the semantics of
+a mutex.
.Ss Multiple Processors
-The current set of atomic operations do not necessarily guarantee atomicity
-across multiple processors.
-To guarantee atomicity across processors, not only does the individual
-operation need to be atomic on the processor performing the operation, but
-the result of the operation needs to be pushed out to stable storage and the
-caches of all other processors on the system need to invalidate any cache
-lines that include the affected memory region.
-On the
+In multiprocessor systems, the atomicity of the atomic operations on memory
+depends on support for cache coherence in the underlying architecture.
+In general, cache coherence on the default memory type,
+.Dv VM_MEMATTR_DEFAULT ,
+is guaranteed by all architectures that are supported by
+.Fx .
+For example, cache coherence is guaranteed on write-back memory by the
+.Tn amd64
+and
.Tn i386
-architecture, the cache coherency model requires that the hardware perform
-this task, thus the atomic operations are atomic across multiple processors.
+architectures.
+However, on some architectures, cache coherence might not be enabled on all
+memory types.
+To determine if cache coherence is enabled for a non-default memory type,
+consult the architecture's documentation.
On the
.Tn ia64
architecture, coherency is only guaranteed for pages that are configured to
diff --git a/share/man/man9/printf.9 b/share/man/man9/printf.9
index 571e7e6..9abb125 100644
--- a/share/man/man9/printf.9
+++ b/share/man/man9/printf.9
@@ -67,7 +67,8 @@ The
.Fn log
function sends the message to the kernel logging facility, using
the log level as indicated by
-.Fa pri .
+.Fa pri ,
+and to the console if no process is yet reading the log.
.Pp
Each of these related functions use the
.Fa fmt
diff --git a/share/mk/bsd.lib.mk b/share/mk/bsd.lib.mk
index bda3d47..8f426f4 100644
--- a/share/mk/bsd.lib.mk
+++ b/share/mk/bsd.lib.mk
@@ -258,7 +258,7 @@ ${LINTLIB}: ${LINTOBJS}
all: ${_LIBS}
-.if ${MK_MAN} != "no"
+.if ${MK_MAN} != "no" && !defined(LIBRARIES_ONLY)
all: _manpages
.endif
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index 23fa39b..f9ba30d 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -167,6 +167,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Size rtype, symidx;
const Elf_Rel *rel;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -202,29 +203,29 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_X86_64_64: /* S + A */
- addr = lookup(lf, symidx, 1);
+ error = lookup(lf, symidx, 1, &addr);
val = addr + addend;
- if (addr == 0)
+ if (error != 0)
return -1;
if (*where != val)
*where = val;
break;
case R_X86_64_PC32: /* S + A - P */
- addr = lookup(lf, symidx, 1);
+ error = lookup(lf, symidx, 1, &addr);
where32 = (Elf32_Addr *)where;
val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where);
- if (addr == 0)
+ if (error != 0)
return -1;
if (*where32 != val32)
*where32 = val32;
break;
case R_X86_64_32S: /* S + A sign extend */
- addr = lookup(lf, symidx, 1);
+ error = lookup(lf, symidx, 1, &addr);
val32 = (Elf32_Addr)(addr + addend);
where32 = (Elf32_Addr *)where;
- if (addr == 0)
+ if (error != 0)
return -1;
if (*where32 != val32)
*where32 = val32;
@@ -241,8 +242,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
case R_X86_64_GLOB_DAT: /* S */
case R_X86_64_JMP_SLOT: /* XXX need addend + offset */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
if (*where != addr)
*where = addr;
diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c
index 8ef9bd4..931a82b 100644
--- a/sys/arm/arm/elf_machdep.c
+++ b/sys/arm/arm/elf_machdep.c
@@ -132,6 +132,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Word rtype, symidx;
const Elf_Rel *rel;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -167,8 +168,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_ARM_ABS32:
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
*where += addr;
break;
@@ -183,8 +184,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_ARM_JUMP_SLOT:
- addr = lookup(lf, symidx, 1);
- if (addr) {
+ error = lookup(lf, symidx, 1, &addr);
+ if (error == 0) {
*where = addr;
return (0);
}
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c
index ed8eb4d..62eab67 100644
--- a/sys/cam/ata/ata_da.c
+++ b/sys/cam/ata/ata_da.c
@@ -233,16 +233,31 @@ static struct ada_quirk_entry ada_quirk_table[] =
/*quirks*/ADA_Q_4K
},
{
+ /* WDC Caviar Red Advanced Format (4k) drives */
+ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????CX*", "*" },
+ /*quirks*/ADA_Q_4K
+ },
+ {
/* WDC Caviar Green Advanced Format (4k) drives */
{ T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????RS*", "*" },
/*quirks*/ADA_Q_4K
},
{
- /* WDC Caviar Green Advanced Format (4k) drives */
+ /* WDC Caviar Green/Red Advanced Format (4k) drives */
{ T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????RX*", "*" },
/*quirks*/ADA_Q_4K
},
{
+ /* WDC Caviar Red Advanced Format (4k) drives */
+ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????CX*", "*" },
+ /*quirks*/ADA_Q_4K
+ },
+ {
+ /* WDC Caviar Black Advanced Format (4k) drives */
+ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????EX*", "*" },
+ /*quirks*/ADA_Q_4K
+ },
+ {
/* WDC Caviar Green Advanced Format (4k) drives */
{ T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????RS*", "*" },
/*quirks*/ADA_Q_4K
@@ -1797,6 +1812,16 @@ adadone(struct cam_periph *periph, union ccb *done_ccb)
TAILQ_INIT(&queue);
TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue);
+ /*
+ * Normally, the xpt_release_ccb() above would make sure
+ * that when we have more work to do, that work would
+ * get kicked off. However, we specifically keep
+ * trim_running set to 0 before the call above to allow
+ * other I/O to progress when many BIO_DELETE requests
+ * are pushed down. We set trim_running to 0 and call
+ * daschedule again so that we don't stall if there are
+ * no other I/Os pending apart from BIO_DELETEs.
+ */
softc->trim_running = 0;
adaschedule(periph);
cam_periph_unlock(periph);
diff --git a/sys/cam/ata/ata_xpt.c b/sys/cam/ata/ata_xpt.c
index a442ec1..3429bb29a6 100644
--- a/sys/cam/ata/ata_xpt.c
+++ b/sys/cam/ata/ata_xpt.c
@@ -1090,7 +1090,8 @@ notsata:
periph_qual = SID_QUAL(inq_buf);
- if (periph_qual != SID_QUAL_LU_CONNECTED)
+ if (periph_qual != SID_QUAL_LU_CONNECTED &&
+ periph_qual != SID_QUAL_LU_OFFLINE)
break;
/*
diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c
index 53454f8..625be21 100644
--- a/sys/cam/cam_periph.c
+++ b/sys/cam/cam_periph.c
@@ -716,16 +716,19 @@ camperiphfree(struct cam_periph *periph)
* buffers to map stuff in and out, we're limited to the buffer size.
*/
int
-cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
+cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
+ u_int maxmap)
{
int numbufs, i, j;
int flags[CAM_PERIPH_MAXMAPS];
u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
u_int32_t lengths[CAM_PERIPH_MAXMAPS];
u_int32_t dirs[CAM_PERIPH_MAXMAPS];
- /* Some controllers may not be able to handle more data. */
- size_t maxmap = DFLTPHYS;
+ if (maxmap == 0)
+ maxmap = DFLTPHYS; /* traditional default */
+ else if (maxmap > MAXPHYS)
+ maxmap = MAXPHYS; /* for safety */
switch(ccb->ccb_h.func_code) {
case XPT_DEV_MATCH:
if (ccb->cdm.match_buf_len == 0) {
diff --git a/sys/cam/cam_periph.h b/sys/cam/cam_periph.h
index ebcf1a4..e28d5b1 100644
--- a/sys/cam/cam_periph.h
+++ b/sys/cam/cam_periph.h
@@ -160,7 +160,8 @@ int cam_periph_hold(struct cam_periph *periph, int priority);
void cam_periph_unhold(struct cam_periph *periph);
void cam_periph_invalidate(struct cam_periph *periph);
int cam_periph_mapmem(union ccb *ccb,
- struct cam_periph_map_info *mapinfo);
+ struct cam_periph_map_info *mapinfo,
+ u_int maxmap);
void cam_periph_unmapmem(union ccb *ccb,
struct cam_periph_map_info *mapinfo);
union ccb *cam_periph_getccb(struct cam_periph *periph,
diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c
index 645ded0..ba0863a 100644
--- a/sys/cam/cam_xpt.c
+++ b/sys/cam/cam_xpt.c
@@ -93,6 +93,8 @@ struct xpt_task {
};
struct xpt_softc {
+ uint32_t xpt_generation;
+
/* number of high powered commands that can go through right now */
struct mtx xpt_highpower_lock;
STAILQ_HEAD(highpowerlist, cam_ed) highpowerq;
@@ -154,6 +156,8 @@ MTX_SYSINIT(xpt_topo_init, &xsoftc.xpt_topo_lock, "XPT topology lock", MTX_DEF);
TUNABLE_INT("kern.cam.boot_delay", &xsoftc.boot_delay);
SYSCTL_INT(_kern_cam, OID_AUTO, boot_delay, CTLFLAG_RDTUN,
&xsoftc.boot_delay, 0, "Bus registration wait time");
+SYSCTL_UINT(_kern_cam, OID_AUTO, xpt_generation, CTLFLAG_RD,
+ &xsoftc.xpt_generation, 0, "CAM peripheral generation count");
struct cam_doneq {
struct mtx_padalign cam_doneq_mtx;
@@ -536,7 +540,7 @@ xptdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *
* Map the pattern and match buffers into kernel
* virtual address space.
*/
- error = cam_periph_mapmem(inccb, &mapinfo);
+ error = cam_periph_mapmem(inccb, &mapinfo, MAXPHYS);
if (error) {
inccb->ccb_h.path = old_path;
@@ -981,6 +985,7 @@ xpt_add_periph(struct cam_periph *periph)
device->generation++;
SLIST_INSERT_HEAD(&device->periphs, periph, periph_links);
mtx_unlock(&device->target->bus->eb_mtx);
+ atomic_add_32(&xsoftc.xpt_generation, 1);
}
return (status);
@@ -997,6 +1002,7 @@ xpt_remove_periph(struct cam_periph *periph)
device->generation++;
SLIST_REMOVE(&device->periphs, periph, cam_periph, periph_links);
mtx_unlock(&device->target->bus->eb_mtx);
+ atomic_add_32(&xsoftc.xpt_generation, 1);
}
}
@@ -4262,8 +4268,10 @@ xpt_async(u_int32_t async_code, struct cam_path *path, void *async_arg)
}
memcpy(ccb->casync.async_arg_ptr, async_arg, size);
ccb->casync.async_arg_size = size;
- } else if (size < 0)
+ } else if (size < 0) {
+ ccb->casync.async_arg_ptr = async_arg;
ccb->casync.async_arg_size = size;
+ }
if (path->device != NULL && path->device->lun_id != CAM_LUN_WILDCARD)
xpt_freeze_devq(path, 1);
else
@@ -4521,7 +4529,7 @@ xpt_get_ccb_nowait(struct cam_periph *periph)
{
union ccb *new_ccb;
- new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_NOWAIT);
+ new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_NOWAIT);
if (new_ccb == NULL)
return (NULL);
periph->periph_allocated++;
@@ -4535,7 +4543,7 @@ xpt_get_ccb(struct cam_periph *periph)
union ccb *new_ccb;
cam_periph_unlock(periph);
- new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_WAITOK);
+ new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_WAITOK);
cam_periph_lock(periph);
periph->periph_allocated++;
cam_ccbq_take_opening(&periph->path->device->ccbq);
diff --git a/sys/cam/ctl/README.ctl.txt b/sys/cam/ctl/README.ctl.txt
index a6de201..89030e2 100644
--- a/sys/cam/ctl/README.ctl.txt
+++ b/sys/cam/ctl/README.ctl.txt
@@ -19,9 +19,9 @@ Userland Commands
Introduction:
============
-CTL is a disk and processor device emulation subsystem originally written
-for Copan Systems under Linux starting in 2003. It has been shipping in
-Copan (now SGI) products since 2005.
+CTL is a disk, processor and cdrom device emulation subsystem originally
+written for Copan Systems under Linux starting in 2003. It has been
+shipping in Copan (now SGI) products since 2005.
It was ported to FreeBSD in 2008, and thanks to an agreement between SGI
(who acquired Copan's assets in 2010) and Spectra Logic in 2010, CTL is
@@ -31,7 +31,7 @@ that Spectra would work to get CTL into the FreeBSD tree.
Features:
========
- - Disk and processor device emulation.
+ - Disk, processor and cdrom device emulation.
- Tagged queueing
- SCSI task attribute support (ordered, head of queue, simple tags)
- SCSI implicit command ordering support. (e.g. if a read follows a mode
@@ -40,28 +40,24 @@ Features:
- Support for multiple ports
- Support for multiple simultaneous initiators
- Support for multiple simultaneous backing stores
+ - Support for VMWare VAAI: COMPARE AND WRITE, XCOPY, WRITE SAME and
+ UNMAP commands
+ - Support for Microsoft ODX: POPULATE TOKEN/WRITE USING TOKEN, WRITE SAME
+ and UNMAP commands
- Persistent reservation support
- Mode sense/select support
- Error injection support
- - High Availability support (1)
+ - High Availability clustering support with ALUA
- All I/O handled in-kernel, no userland context switch overhead.
-(1) HA Support is just an API stub, and needs much more to be fully
- functional. See the to-do list below.
-
Configuring and Running CTL:
===========================
- - After applying the CTL patchset to your tree, build world and install it
- on your target system.
-
- - Add 'device ctl' to your kernel configuration file.
+ - Add 'device ctl' to your kernel configuration file or load the module.
- If you're running with a 8Gb or 4Gb Qlogic FC board, add
- 'options ISP_TARGET_MODE' to your kernel config file. Keep in mind that
- the isp(4) driver can run in target or initiator mode, but not both on
- the same machine. 'device ispfw' or loading the ispfw module is also
- recommended.
+ 'options ISP_TARGET_MODE' to your kernel config file. 'device ispfw' or
+ loading the ispfw module is also recommended.
- Rebuild and install a new kernel.
@@ -245,27 +241,6 @@ To Do List:
another data structure in the stack, more memory allocations, etc. This
will also require changes to the CAM CCB structure to support CTL.
- - Full-featured High Availability support. The HA API that is in ctl_ha.h
- is essentially a renamed version of Copan's HA API. There is no
- substance to it, but it remains in CTL to show what needs to be done to
- implement active/active HA from a CTL standpoint. The things that would
- need to be done include:
- - A kernel level software API for message passing as well as DMA
- between at least two nodes.
- - Hardware support and drivers for inter-node communication. This
- could be as simples as ethernet hardware and drivers.
- - A "supervisor", or startup framework to control and coordinate
- HA startup, failover (going from active/active to single mode),
- and failback (going from single mode to active/active).
- - HA support in other components of the stack. The goal behind HA
- is that one node can fail and another node can seamlessly take
- over handling I/O requests. This requires support from pretty
- much every component in the storage stack, from top to bottom.
- CTL is one piece of it, but you also need support in the RAID
- stack/filesystem/backing store. You also need full configuration
- mirroring, and all peer nodes need to be able to talk to the
- underlying storage hardware.
-
Code Roadmap:
============
@@ -318,7 +293,6 @@ These files define the basic CTL backend API. The comments in the header
explain the API.
ctl_backend_block.c
-ctl_backend_block.h:
-------------------
The block and file backend. This allows for using a disk or a file as the
@@ -366,21 +340,11 @@ This is a CTL frontend port that is also a CAM SIM. The idea is that this
frontend allows for using CTL without any target-capable hardware. So any
LUNs you create in CTL are visible via this port.
-
-ctl_frontend_internal.c
-ctl_frontend_internal.h:
------------------------
-
-This is a frontend port written for Copan to do some system-specific tasks
-that required sending commands into CTL from inside the kernel. This isn't
-entirely relevant to FreeBSD in general, but can perhaps be repurposed or
-removed later.
-
+ctl_ha.c:
ctl_ha.h:
--------
-This is a stubbed-out High Availability API. See the comments in the
-header and the description of what is needed as far as HA support above.
+This is a High Availability API and TCP-based interlink implementation.
ctl_io.h:
--------
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
index e07e2d0..ea04eef 100644
--- a/sys/cam/ctl/ctl.c
+++ b/sys/cam/ctl/ctl.c
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 2003-2009 Silicon Graphics International Corp.
* Copyright (c) 2012 The FreeBSD Foundation
+ * Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Portions of this software were developed by Edward Tomasz Napierala
@@ -68,11 +69,11 @@ __FBSDID("$FreeBSD$");
#include <cam/cam.h>
#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_cd.h>
#include <cam/scsi/scsi_da.h>
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_util.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
@@ -85,25 +86,6 @@ __FBSDID("$FreeBSD$");
struct ctl_softc *control_softc = NULL;
/*
- * Size and alignment macros needed for Copan-specific HA hardware. These
- * can go away when the HA code is re-written, and uses busdma for any
- * hardware.
- */
-#define CTL_ALIGN_8B(target, source, type) \
- if (((uint32_t)source & 0x7) != 0) \
- target = (type)(source + (0x8 - ((uint32_t)source & 0x7)));\
- else \
- target = (type)source;
-
-#define CTL_SIZE_8B(target, size) \
- if ((size & 0x7) != 0) \
- target = size + (0x8 - (size & 0x7)); \
- else \
- target = size;
-
-#define CTL_ALIGN_8B_MARGIN 16
-
-/*
* Template mode pages.
*/
@@ -280,6 +262,26 @@ const static struct scsi_control_page control_page_changeable = {
/*extended_selftest_completion_time*/{0, 0}
};
+#define CTL_CEM_LEN (sizeof(struct scsi_control_ext_page) - 4)
+
+const static struct scsi_control_ext_page control_ext_page_default = {
+ /*page_code*/SMS_CONTROL_MODE_PAGE | SMPH_SPF,
+ /*subpage_code*/0x01,
+ /*page_length*/{CTL_CEM_LEN >> 8, CTL_CEM_LEN},
+ /*flags*/0,
+ /*prio*/0,
+ /*max_sense*/0
+};
+
+const static struct scsi_control_ext_page control_ext_page_changeable = {
+ /*page_code*/SMS_CONTROL_MODE_PAGE | SMPH_SPF,
+ /*subpage_code*/0x01,
+ /*page_length*/{CTL_CEM_LEN >> 8, CTL_CEM_LEN},
+ /*flags*/0,
+ /*prio*/0,
+ /*max_sense*/0
+};
+
const static struct scsi_info_exceptions_page ie_page_default = {
/*page_code*/SMS_INFO_EXCEPTIONS_PAGE,
/*page_length*/sizeof(struct scsi_info_exceptions_page) - 2,
@@ -352,11 +354,51 @@ const static struct ctl_logical_block_provisioning_page lbp_page_changeable = {{
}
};
-/*
- * XXX KDM move these into the softc.
- */
-static int rcv_sync_msg;
-static uint8_t ctl_pause_rtr;
+const static struct scsi_cddvd_capabilities_page cddvd_page_default = {
+ /*page_code*/SMS_CDDVD_CAPS_PAGE,
+ /*page_length*/sizeof(struct scsi_cddvd_capabilities_page) - 2,
+ /*caps1*/0x3f,
+ /*caps2*/0x00,
+ /*caps3*/0xf0,
+ /*caps4*/0x00,
+ /*caps5*/0x29,
+ /*caps6*/0x00,
+ /*obsolete*/{0, 0},
+ /*nvol_levels*/{0, 0},
+ /*buffer_size*/{8, 0},
+ /*obsolete2*/{0, 0},
+ /*reserved*/0,
+ /*digital*/0,
+ /*obsolete3*/0,
+ /*copy_management*/0,
+ /*reserved2*/0,
+ /*rotation_control*/0,
+ /*cur_write_speed*/0,
+ /*num_speed_descr*/0,
+};
+
+const static struct scsi_cddvd_capabilities_page cddvd_page_changeable = {
+ /*page_code*/SMS_CDDVD_CAPS_PAGE,
+ /*page_length*/sizeof(struct scsi_cddvd_capabilities_page) - 2,
+ /*caps1*/0,
+ /*caps2*/0,
+ /*caps3*/0,
+ /*caps4*/0,
+ /*caps5*/0,
+ /*caps6*/0,
+ /*obsolete*/{0, 0},
+ /*nvol_levels*/{0, 0},
+ /*buffer_size*/{0, 0},
+ /*obsolete2*/{0, 0},
+ /*reserved*/0,
+ /*digital*/0,
+ /*obsolete3*/0,
+ /*copy_management*/0,
+ /*reserved2*/0,
+ /*rotation_control*/0,
+ /*cur_write_speed*/0,
+ /*num_speed_descr*/0,
+};
SYSCTL_NODE(_kern_cam, OID_AUTO, ctl, CTLFLAG_RD, 0, "CAM Target Layer");
static int worker_threads = -1;
@@ -379,23 +421,13 @@ SYSCTL_INT(_kern_cam_ctl, OID_AUTO, debug, CTLFLAG_RWTUN,
static void ctl_isc_event_handler(ctl_ha_channel chanel, ctl_ha_event event,
int param);
static void ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest);
+static void ctl_copy_sense_data_back(union ctl_io *src, union ctl_ha_msg *dest);
static int ctl_init(void);
void ctl_shutdown(void);
static int ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td);
static int ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td);
-static void ctl_ioctl_online(void *arg);
-static void ctl_ioctl_offline(void *arg);
-static int ctl_ioctl_lun_enable(void *arg, int lun_id);
-static int ctl_ioctl_lun_disable(void *arg, int lun_id);
-static int ctl_ioctl_do_datamove(struct ctl_scsiio *ctsio);
static int ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio);
-static int ctl_ioctl_submit_wait(union ctl_io *io);
-static void ctl_ioctl_datamove(union ctl_io *io);
-static void ctl_ioctl_done(union ctl_io *io);
-static void ctl_ioctl_hard_startstop_callback(void *arg,
- struct cfi_metatask *metatask);
-static void ctl_ioctl_bbrread_callback(void *arg,struct cfi_metatask *metatask);
-static int ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
+static void ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
struct ctl_ooa *ooa_hdr,
struct ctl_ooa_entry *kern_entries);
static int ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
@@ -405,10 +437,6 @@ static int ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *lun,
static int ctl_free_lun(struct ctl_lun *lun);
static void ctl_create_lun(struct ctl_be_lun *be_lun);
static struct ctl_port * ctl_io_port(struct ctl_io_hdr *io_hdr);
-/**
-static void ctl_failover_change_pages(struct ctl_softc *softc,
- struct ctl_scsiio *ctsio, int master);
-**/
static int ctl_do_mode_select(union ctl_io *io);
static int ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun,
@@ -445,10 +473,7 @@ static int ctl_check_blocked(struct ctl_lun *lun);
static int ctl_scsiio_lun_check(struct ctl_lun *lun,
const struct ctl_cmd_entry *entry,
struct ctl_scsiio *ctsio);
-//static int ctl_check_rtr(union ctl_io *pending_io, struct ctl_softc *softc);
-static void ctl_failover(void);
-static void ctl_clear_ua(struct ctl_softc *ctl_softc, uint32_t initidx,
- ctl_ua_type ua_type);
+static void ctl_failover_lun(union ctl_io *io);
static int ctl_scsiio_precheck(struct ctl_softc *ctl_softc,
struct ctl_scsiio *ctsio);
static int ctl_scsiio(struct ctl_scsiio *ctsio);
@@ -456,11 +481,14 @@ static int ctl_scsiio(struct ctl_scsiio *ctsio);
static int ctl_bus_reset(struct ctl_softc *ctl_softc, union ctl_io *io);
static int ctl_target_reset(struct ctl_softc *ctl_softc, union ctl_io *io,
ctl_ua_type ua_type);
-static int ctl_lun_reset(struct ctl_lun *lun, union ctl_io *io,
+static int ctl_do_lun_reset(struct ctl_lun *lun, union ctl_io *io,
ctl_ua_type ua_type);
+static int ctl_lun_reset(struct ctl_softc *ctl_softc, union ctl_io *io);
static int ctl_abort_task(union ctl_io *io);
static int ctl_abort_task_set(union ctl_io *io);
+static int ctl_query_task(union ctl_io *io, int task_set);
static int ctl_i_t_nexus_reset(union ctl_io *io);
+static int ctl_query_async_event(union ctl_io *io);
static void ctl_run_task(union ctl_io *io);
#ifdef CTL_IO_DELAY
static void ctl_datamove_timer_wakeup(void *arg);
@@ -478,7 +506,7 @@ static int ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
ctl_ha_dt_cb callback);
static void ctl_datamove_remote_read(union ctl_io *io);
static void ctl_datamove_remote(union ctl_io *io);
-static int ctl_process_done(union ctl_io *io);
+static void ctl_process_done(union ctl_io *io);
static void ctl_lun_thread(void *arg);
static void ctl_thresh_thread(void *arg);
static void ctl_work_thread(void *arg);
@@ -493,6 +521,11 @@ static const struct ctl_cmd_entry *
static int ctl_cmd_applicable(uint8_t lun_type,
const struct ctl_cmd_entry *entry);
+static uint64_t ctl_get_prkey(struct ctl_lun *lun, uint32_t residx);
+static void ctl_clr_prkey(struct ctl_lun *lun, uint32_t residx);
+static void ctl_alloc_prkey(struct ctl_lun *lun, uint32_t residx);
+static void ctl_set_prkey(struct ctl_lun *lun, uint32_t residx, uint64_t key);
+
/*
* Load the serialization table. This isn't very pretty, but is probably
* the easiest way to do it.
@@ -513,7 +546,6 @@ static struct cdevsw ctl_cdevsw = {
MALLOC_DEFINE(M_CTL, "ctlmem", "Memory used for CTL");
-MALLOC_DEFINE(M_CTLIO, "ctlio", "Memory used for CTL requests");
static int ctl_module_event_handler(module_t, int /*modeventtype_t*/, void *);
@@ -526,12 +558,157 @@ static moduledata_t ctl_moduledata = {
DECLARE_MODULE(ctl, ctl_moduledata, SI_SUB_CONFIGURE, SI_ORDER_THIRD);
MODULE_VERSION(ctl, 1);
-static struct ctl_frontend ioctl_frontend =
+static struct ctl_frontend ha_frontend =
{
- .name = "ioctl",
+ .name = "ha",
};
static void
+ctl_ha_datamove(union ctl_io *io)
+{
+ struct ctl_lun *lun;
+ struct ctl_sg_entry *sgl;
+ union ctl_ha_msg msg;
+ uint32_t sg_entries_sent;
+ int do_sg_copy, i, j;
+
+ lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ memset(&msg.dt, 0, sizeof(msg.dt));
+ msg.hdr.msg_type = CTL_MSG_DATAMOVE;
+ msg.hdr.original_sc = io->io_hdr.original_sc;
+ msg.hdr.serializing_sc = io;
+ msg.hdr.nexus = io->io_hdr.nexus;
+ msg.hdr.status = io->io_hdr.status;
+ msg.dt.flags = io->io_hdr.flags;
+
+ /*
+ * We convert everything into a S/G list here. We can't
+ * pass by reference, only by value between controllers.
+ * So we can't pass a pointer to the S/G list, only as many
+ * S/G entries as we can fit in here. If it's possible for
+ * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries,
+ * then we need to break this up into multiple transfers.
+ */
+ if (io->scsiio.kern_sg_entries == 0) {
+ msg.dt.kern_sg_entries = 1;
+#if 0
+ if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
+ msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr;
+ } else {
+ /* XXX KDM use busdma here! */
+ msg.dt.sg_list[0].addr =
+ (void *)vtophys(io->scsiio.kern_data_ptr);
+ }
+#else
+ KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0,
+ ("HA does not support BUS_ADDR"));
+ msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr;
+#endif
+ msg.dt.sg_list[0].len = io->scsiio.kern_data_len;
+ do_sg_copy = 0;
+ } else {
+ msg.dt.kern_sg_entries = io->scsiio.kern_sg_entries;
+ do_sg_copy = 1;
+ }
+
+ msg.dt.kern_data_len = io->scsiio.kern_data_len;
+ msg.dt.kern_total_len = io->scsiio.kern_total_len;
+ msg.dt.kern_data_resid = io->scsiio.kern_data_resid;
+ msg.dt.kern_rel_offset = io->scsiio.kern_rel_offset;
+ msg.dt.sg_sequence = 0;
+
+ /*
+ * Loop until we've sent all of the S/G entries. On the
+ * other end, we'll recompose these S/G entries into one
+ * contiguous list before processing.
+ */
+ for (sg_entries_sent = 0; sg_entries_sent < msg.dt.kern_sg_entries;
+ msg.dt.sg_sequence++) {
+ msg.dt.cur_sg_entries = MIN((sizeof(msg.dt.sg_list) /
+ sizeof(msg.dt.sg_list[0])),
+ msg.dt.kern_sg_entries - sg_entries_sent);
+ if (do_sg_copy != 0) {
+ sgl = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr;
+ for (i = sg_entries_sent, j = 0;
+ i < msg.dt.cur_sg_entries; i++, j++) {
+#if 0
+ if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
+ msg.dt.sg_list[j].addr = sgl[i].addr;
+ } else {
+ /* XXX KDM use busdma here! */
+ msg.dt.sg_list[j].addr =
+ (void *)vtophys(sgl[i].addr);
+ }
+#else
+ KASSERT((io->io_hdr.flags &
+ CTL_FLAG_BUS_ADDR) == 0,
+ ("HA does not support BUS_ADDR"));
+ msg.dt.sg_list[j].addr = sgl[i].addr;
+#endif
+ msg.dt.sg_list[j].len = sgl[i].len;
+ }
+ }
+
+ sg_entries_sent += msg.dt.cur_sg_entries;
+ msg.dt.sg_last = (sg_entries_sent >= msg.dt.kern_sg_entries);
+ if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
+ sizeof(msg.dt) - sizeof(msg.dt.sg_list) +
+ sizeof(struct ctl_sg_entry) * msg.dt.cur_sg_entries,
+ M_WAITOK) > CTL_HA_STATUS_SUCCESS) {
+ io->io_hdr.port_status = 31341;
+ io->scsiio.be_move_done(io);
+ return;
+ }
+ msg.dt.sent_sg_entries = sg_entries_sent;
+ }
+
+ /*
+ * Officially handover the request from us to peer.
+ * If failover has just happened, then we must return error.
+ * If failover happen just after, then it is not our problem.
+ */
+ if (lun)
+ mtx_lock(&lun->lun_lock);
+ if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
+ if (lun)
+ mtx_unlock(&lun->lun_lock);
+ io->io_hdr.port_status = 31342;
+ io->scsiio.be_move_done(io);
+ return;
+ }
+ io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
+ io->io_hdr.flags |= CTL_FLAG_DMA_INPROG;
+ if (lun)
+ mtx_unlock(&lun->lun_lock);
+}
+
+static void
+ctl_ha_done(union ctl_io *io)
+{
+ union ctl_ha_msg msg;
+
+ if (io->io_hdr.io_type == CTL_IO_SCSI) {
+ memset(&msg, 0, sizeof(msg));
+ msg.hdr.msg_type = CTL_MSG_FINISH_IO;
+ msg.hdr.original_sc = io->io_hdr.original_sc;
+ msg.hdr.nexus = io->io_hdr.nexus;
+ msg.hdr.status = io->io_hdr.status;
+ msg.scsi.scsi_status = io->scsiio.scsi_status;
+ msg.scsi.tag_num = io->scsiio.tag_num;
+ msg.scsi.tag_type = io->scsiio.tag_type;
+ msg.scsi.sense_len = io->scsiio.sense_len;
+ msg.scsi.sense_residual = io->scsiio.sense_residual;
+ msg.scsi.residual = io->scsiio.residual;
+ memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data,
+ io->scsiio.sense_len);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
+ sizeof(msg.scsi) - sizeof(msg.scsi.sense_data) +
+ msg.scsi.sense_len, M_WAITOK);
+ }
+ ctl_free_io(io);
+}
+
+static void
ctl_isc_handler_finish_xfer(struct ctl_softc *ctl_softc,
union ctl_ha_msg *msg_info)
{
@@ -552,9 +729,7 @@ ctl_isc_handler_finish_xfer(struct ctl_softc *ctl_softc,
ctsio->sense_residual = msg_info->scsi.sense_residual;
ctsio->residual = msg_info->scsi.residual;
memcpy(&ctsio->sense_data, &msg_info->scsi.sense_data,
- sizeof(ctsio->sense_data));
- memcpy(&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
- &msg_info->scsi.lbalen, sizeof(msg_info->scsi.lbalen));
+ msg_info->scsi.sense_len);
ctl_enqueue_isc((union ctl_io *)ctsio);
}
@@ -571,39 +746,575 @@ ctl_isc_handler_finish_ser_only(struct ctl_softc *ctl_softc,
}
ctsio = &msg_info->hdr.serializing_sc->scsiio;
-#if 0
- /*
- * Attempt to catch the situation where an I/O has
- * been freed, and we're using it again.
- */
- if (ctsio->io_hdr.io_type == 0xff) {
- union ctl_io *tmp_io;
- tmp_io = (union ctl_io *)ctsio;
- printf("%s: %p use after free!\n", __func__,
- ctsio);
- printf("%s: type %d msg %d cdb %x iptl: "
- "%d:%d:%d:%d tag 0x%04x "
- "flag %#x status %x\n",
- __func__,
- tmp_io->io_hdr.io_type,
- tmp_io->io_hdr.msg_type,
- tmp_io->scsiio.cdb[0],
- tmp_io->io_hdr.nexus.initid.id,
- tmp_io->io_hdr.nexus.targ_port,
- tmp_io->io_hdr.nexus.targ_target.id,
- tmp_io->io_hdr.nexus.targ_lun,
- (tmp_io->io_hdr.io_type ==
- CTL_IO_TASK) ?
- tmp_io->taskio.tag_num :
- tmp_io->scsiio.tag_num,
- tmp_io->io_hdr.flags,
- tmp_io->io_hdr.status);
- }
-#endif
ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO;
ctl_enqueue_isc((union ctl_io *)ctsio);
}
+void
+ctl_isc_announce_lun(struct ctl_lun *lun)
+{
+ struct ctl_softc *softc = lun->ctl_softc;
+ union ctl_ha_msg *msg;
+ struct ctl_ha_msg_lun_pr_key pr_key;
+ int i, k;
+
+ if (softc->ha_link != CTL_HA_LINK_ONLINE)
+ return;
+ mtx_lock(&lun->lun_lock);
+ i = sizeof(msg->lun);
+ if (lun->lun_devid)
+ i += lun->lun_devid->len;
+ i += sizeof(pr_key) * lun->pr_key_count;
+alloc:
+ mtx_unlock(&lun->lun_lock);
+ msg = malloc(i, M_CTL, M_WAITOK);
+ mtx_lock(&lun->lun_lock);
+ k = sizeof(msg->lun);
+ if (lun->lun_devid)
+ k += lun->lun_devid->len;
+ k += sizeof(pr_key) * lun->pr_key_count;
+ if (i < k) {
+ free(msg, M_CTL);
+ i = k;
+ goto alloc;
+ }
+ bzero(&msg->lun, sizeof(msg->lun));
+ msg->hdr.msg_type = CTL_MSG_LUN_SYNC;
+ msg->hdr.nexus.targ_lun = lun->lun;
+ msg->hdr.nexus.targ_mapped_lun = lun->lun;
+ msg->lun.flags = lun->flags;
+ msg->lun.pr_generation = lun->pr_generation;
+ msg->lun.pr_res_idx = lun->pr_res_idx;
+ msg->lun.pr_res_type = lun->pr_res_type;
+ msg->lun.pr_key_count = lun->pr_key_count;
+ i = 0;
+ if (lun->lun_devid) {
+ msg->lun.lun_devid_len = lun->lun_devid->len;
+ memcpy(&msg->lun.data[i], lun->lun_devid->data,
+ msg->lun.lun_devid_len);
+ i += msg->lun.lun_devid_len;
+ }
+ for (k = 0; k < CTL_MAX_INITIATORS; k++) {
+ if ((pr_key.pr_key = ctl_get_prkey(lun, k)) == 0)
+ continue;
+ pr_key.pr_iid = k;
+ memcpy(&msg->lun.data[i], &pr_key, sizeof(pr_key));
+ i += sizeof(pr_key);
+ }
+ mtx_unlock(&lun->lun_lock);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->port, sizeof(msg->port) + i,
+ M_WAITOK);
+ free(msg, M_CTL);
+
+ if (lun->flags & CTL_LUN_PRIMARY_SC) {
+ for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
+ ctl_isc_announce_mode(lun, -1,
+ lun->mode_pages.index[i].page_code & SMPH_PC_MASK,
+ lun->mode_pages.index[i].subpage);
+ }
+ }
+}
+
+void
+ctl_isc_announce_port(struct ctl_port *port)
+{
+ struct ctl_softc *softc = port->ctl_softc;
+ union ctl_ha_msg *msg;
+ int i;
+
+ if (port->targ_port < softc->port_min ||
+ port->targ_port >= softc->port_max ||
+ softc->ha_link != CTL_HA_LINK_ONLINE)
+ return;
+ i = sizeof(msg->port) + strlen(port->port_name) + 1;
+ if (port->lun_map)
+ i += sizeof(uint32_t) * CTL_MAX_LUNS;
+ if (port->port_devid)
+ i += port->port_devid->len;
+ if (port->target_devid)
+ i += port->target_devid->len;
+ if (port->init_devid)
+ i += port->init_devid->len;
+ msg = malloc(i, M_CTL, M_WAITOK);
+ bzero(&msg->port, sizeof(msg->port));
+ msg->hdr.msg_type = CTL_MSG_PORT_SYNC;
+ msg->hdr.nexus.targ_port = port->targ_port;
+ msg->port.port_type = port->port_type;
+ msg->port.physical_port = port->physical_port;
+ msg->port.virtual_port = port->virtual_port;
+ msg->port.status = port->status;
+ i = 0;
+ msg->port.name_len = sprintf(&msg->port.data[i],
+ "%d:%s", softc->ha_id, port->port_name) + 1;
+ i += msg->port.name_len;
+ if (port->lun_map) {
+ msg->port.lun_map_len = sizeof(uint32_t) * CTL_MAX_LUNS;
+ memcpy(&msg->port.data[i], port->lun_map,
+ msg->port.lun_map_len);
+ i += msg->port.lun_map_len;
+ }
+ if (port->port_devid) {
+ msg->port.port_devid_len = port->port_devid->len;
+ memcpy(&msg->port.data[i], port->port_devid->data,
+ msg->port.port_devid_len);
+ i += msg->port.port_devid_len;
+ }
+ if (port->target_devid) {
+ msg->port.target_devid_len = port->target_devid->len;
+ memcpy(&msg->port.data[i], port->target_devid->data,
+ msg->port.target_devid_len);
+ i += msg->port.target_devid_len;
+ }
+ if (port->init_devid) {
+ msg->port.init_devid_len = port->init_devid->len;
+ memcpy(&msg->port.data[i], port->init_devid->data,
+ msg->port.init_devid_len);
+ i += msg->port.init_devid_len;
+ }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->port, sizeof(msg->port) + i,
+ M_WAITOK);
+ free(msg, M_CTL);
+}
+
+void
+ctl_isc_announce_iid(struct ctl_port *port, int iid)
+{
+ struct ctl_softc *softc = port->ctl_softc;
+ union ctl_ha_msg *msg;
+ int i, l;
+
+ if (port->targ_port < softc->port_min ||
+ port->targ_port >= softc->port_max ||
+ softc->ha_link != CTL_HA_LINK_ONLINE)
+ return;
+ mtx_lock(&softc->ctl_lock);
+ i = sizeof(msg->iid);
+ l = 0;
+ if (port->wwpn_iid[iid].name)
+ l = strlen(port->wwpn_iid[iid].name) + 1;
+ i += l;
+ msg = malloc(i, M_CTL, M_NOWAIT);
+ if (msg == NULL) {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
+ bzero(&msg->iid, sizeof(msg->iid));
+ msg->hdr.msg_type = CTL_MSG_IID_SYNC;
+ msg->hdr.nexus.targ_port = port->targ_port;
+ msg->hdr.nexus.initid = iid;
+ msg->iid.in_use = port->wwpn_iid[iid].in_use;
+ msg->iid.name_len = l;
+ msg->iid.wwpn = port->wwpn_iid[iid].wwpn;
+ if (port->wwpn_iid[iid].name)
+ strlcpy(msg->iid.data, port->wwpn_iid[iid].name, l);
+ mtx_unlock(&softc->ctl_lock);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->iid, i, M_NOWAIT);
+ free(msg, M_CTL);
+}
+
+void
+ctl_isc_announce_mode(struct ctl_lun *lun, uint32_t initidx,
+ uint8_t page, uint8_t subpage)
+{
+ struct ctl_softc *softc = lun->ctl_softc;
+ union ctl_ha_msg msg;
+ int i;
+
+ if (softc->ha_link != CTL_HA_LINK_ONLINE)
+ return;
+ for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
+ if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) ==
+ page && lun->mode_pages.index[i].subpage == subpage)
+ break;
+ }
+ if (i == CTL_NUM_MODE_PAGES)
+ return;
+ bzero(&msg.mode, sizeof(msg.mode));
+ msg.hdr.msg_type = CTL_MSG_MODE_SYNC;
+ msg.hdr.nexus.targ_port = initidx / CTL_MAX_INIT_PER_PORT;
+ msg.hdr.nexus.initid = initidx % CTL_MAX_INIT_PER_PORT;
+ msg.hdr.nexus.targ_lun = lun->lun;
+ msg.hdr.nexus.targ_mapped_lun = lun->lun;
+ msg.mode.page_code = page;
+ msg.mode.subpage = subpage;
+ msg.mode.page_len = lun->mode_pages.index[i].page_len;
+ memcpy(msg.mode.data, lun->mode_pages.index[i].page_data,
+ msg.mode.page_len);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg.mode, sizeof(msg.mode),
+ M_WAITOK);
+}
+
+static void
+ctl_isc_ha_link_up(struct ctl_softc *softc)
+{
+ struct ctl_port *port;
+ struct ctl_lun *lun;
+ union ctl_ha_msg msg;
+ int i;
+
+ /* Announce this node parameters to peer for validation. */
+ msg.login.msg_type = CTL_MSG_LOGIN;
+ msg.login.version = CTL_HA_VERSION;
+ msg.login.ha_mode = softc->ha_mode;
+ msg.login.ha_id = softc->ha_id;
+ msg.login.max_luns = CTL_MAX_LUNS;
+ msg.login.max_ports = CTL_MAX_PORTS;
+ msg.login.max_init_per_port = CTL_MAX_INIT_PER_PORT;
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg.login, sizeof(msg.login),
+ M_WAITOK);
+
+ STAILQ_FOREACH(port, &softc->port_list, links) {
+ ctl_isc_announce_port(port);
+ for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
+ if (port->wwpn_iid[i].in_use)
+ ctl_isc_announce_iid(port, i);
+ }
+ }
+ STAILQ_FOREACH(lun, &softc->lun_list, links)
+ ctl_isc_announce_lun(lun);
+}
+
+static void
+ctl_isc_ha_link_down(struct ctl_softc *softc)
+{
+ struct ctl_port *port;
+ struct ctl_lun *lun;
+ union ctl_io *io;
+ int i;
+
+ mtx_lock(&softc->ctl_lock);
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ mtx_lock(&lun->lun_lock);
+ if (lun->flags & CTL_LUN_PEER_SC_PRIMARY) {
+ lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+ ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+ }
+ mtx_unlock(&lun->lun_lock);
+
+ mtx_unlock(&softc->ctl_lock);
+ io = ctl_alloc_io(softc->othersc_pool);
+ mtx_lock(&softc->ctl_lock);
+ ctl_zero_io(io);
+ io->io_hdr.msg_type = CTL_MSG_FAILOVER;
+ io->io_hdr.nexus.targ_mapped_lun = lun->lun;
+ ctl_enqueue_isc(io);
+ }
+
+ STAILQ_FOREACH(port, &softc->port_list, links) {
+ if (port->targ_port >= softc->port_min &&
+ port->targ_port < softc->port_max)
+ continue;
+ port->status &= ~CTL_PORT_STATUS_ONLINE;
+ for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
+ port->wwpn_iid[i].in_use = 0;
+ free(port->wwpn_iid[i].name, M_CTL);
+ port->wwpn_iid[i].name = NULL;
+ }
+ }
+ mtx_unlock(&softc->ctl_lock);
+}
+
+static void
+ctl_isc_ua(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
+{
+ struct ctl_lun *lun;
+ uint32_t iid = ctl_get_initindex(&msg->hdr.nexus);
+
+ mtx_lock(&softc->ctl_lock);
+ if (msg->hdr.nexus.targ_lun < CTL_MAX_LUNS &&
+ (lun = softc->ctl_luns[msg->hdr.nexus.targ_mapped_lun]) != NULL) {
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (msg->ua.ua_type == CTL_UA_THIN_PROV_THRES &&
+ msg->ua.ua_set)
+ memcpy(lun->ua_tpt_info, msg->ua.ua_info, 8);
+ if (msg->ua.ua_all) {
+ if (msg->ua.ua_set)
+ ctl_est_ua_all(lun, iid, msg->ua.ua_type);
+ else
+ ctl_clr_ua_all(lun, iid, msg->ua.ua_type);
+ } else {
+ if (msg->ua.ua_set)
+ ctl_est_ua(lun, iid, msg->ua.ua_type);
+ else
+ ctl_clr_ua(lun, iid, msg->ua.ua_type);
+ }
+ mtx_unlock(&lun->lun_lock);
+ } else
+ mtx_unlock(&softc->ctl_lock);
+}
+
+static void
+ctl_isc_lun_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
+{
+ struct ctl_lun *lun;
+ struct ctl_ha_msg_lun_pr_key pr_key;
+ int i, k;
+ ctl_lun_flags oflags;
+ uint32_t targ_lun;
+
+ targ_lun = msg->hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0;
+ if (msg->lun.lun_devid_len != i || (i > 0 &&
+ memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) {
+ mtx_unlock(&lun->lun_lock);
+ printf("%s: Received conflicting HA LUN %d\n",
+ __func__, msg->hdr.nexus.targ_lun);
+ return;
+ } else {
+ /* Record whether peer is primary. */
+ oflags = lun->flags;
+ if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) &&
+ (msg->lun.flags & CTL_LUN_DISABLED) == 0)
+ lun->flags |= CTL_LUN_PEER_SC_PRIMARY;
+ else
+ lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY;
+ if (oflags != lun->flags)
+ ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+
+ /* If peer is primary and we are not -- use data */
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
+ (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) {
+ lun->pr_generation = msg->lun.pr_generation;
+ lun->pr_res_idx = msg->lun.pr_res_idx;
+ lun->pr_res_type = msg->lun.pr_res_type;
+ lun->pr_key_count = msg->lun.pr_key_count;
+ for (k = 0; k < CTL_MAX_INITIATORS; k++)
+ ctl_clr_prkey(lun, k);
+ for (k = 0; k < msg->lun.pr_key_count; k++) {
+ memcpy(&pr_key, &msg->lun.data[i],
+ sizeof(pr_key));
+ ctl_alloc_prkey(lun, pr_key.pr_iid);
+ ctl_set_prkey(lun, pr_key.pr_iid,
+ pr_key.pr_key);
+ i += sizeof(pr_key);
+ }
+ }
+
+ mtx_unlock(&lun->lun_lock);
+ CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n",
+ __func__, msg->hdr.nexus.targ_lun,
+ (msg->lun.flags & CTL_LUN_PRIMARY_SC) ?
+ "primary" : "secondary"));
+
+ /* If we are primary but peer doesn't know -- notify */
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) &&
+ (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0)
+ ctl_isc_announce_lun(lun);
+ }
+}
+
+static void
+ctl_isc_port_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
+{
+ struct ctl_port *port;
+ struct ctl_lun *lun;
+ int i, new;
+
+ port = softc->ctl_ports[msg->hdr.nexus.targ_port];
+ if (port == NULL) {
+ CTL_DEBUG_PRINT(("%s: New port %d\n", __func__,
+ msg->hdr.nexus.targ_port));
+ new = 1;
+ port = malloc(sizeof(*port), M_CTL, M_WAITOK | M_ZERO);
+ port->frontend = &ha_frontend;
+ port->targ_port = msg->hdr.nexus.targ_port;
+ port->fe_datamove = ctl_ha_datamove;
+ port->fe_done = ctl_ha_done;
+ } else if (port->frontend == &ha_frontend) {
+ CTL_DEBUG_PRINT(("%s: Updated port %d\n", __func__,
+ msg->hdr.nexus.targ_port));
+ new = 0;
+ } else {
+ printf("%s: Received conflicting HA port %d\n",
+ __func__, msg->hdr.nexus.targ_port);
+ return;
+ }
+ port->port_type = msg->port.port_type;
+ port->physical_port = msg->port.physical_port;
+ port->virtual_port = msg->port.virtual_port;
+ port->status = msg->port.status;
+ i = 0;
+ free(port->port_name, M_CTL);
+ port->port_name = strndup(&msg->port.data[i], msg->port.name_len,
+ M_CTL);
+ i += msg->port.name_len;
+ if (msg->port.lun_map_len != 0) {
+ if (port->lun_map == NULL)
+ port->lun_map = malloc(sizeof(uint32_t) * CTL_MAX_LUNS,
+ M_CTL, M_WAITOK);
+ memcpy(port->lun_map, &msg->port.data[i],
+ sizeof(uint32_t) * CTL_MAX_LUNS);
+ i += msg->port.lun_map_len;
+ } else {
+ free(port->lun_map, M_CTL);
+ port->lun_map = NULL;
+ }
+ if (msg->port.port_devid_len != 0) {
+ if (port->port_devid == NULL ||
+ port->port_devid->len != msg->port.port_devid_len) {
+ free(port->port_devid, M_CTL);
+ port->port_devid = malloc(sizeof(struct ctl_devid) +
+ msg->port.port_devid_len, M_CTL, M_WAITOK);
+ }
+ memcpy(port->port_devid->data, &msg->port.data[i],
+ msg->port.port_devid_len);
+ port->port_devid->len = msg->port.port_devid_len;
+ i += msg->port.port_devid_len;
+ } else {
+ free(port->port_devid, M_CTL);
+ port->port_devid = NULL;
+ }
+ if (msg->port.target_devid_len != 0) {
+ if (port->target_devid == NULL ||
+ port->target_devid->len != msg->port.target_devid_len) {
+ free(port->target_devid, M_CTL);
+ port->target_devid = malloc(sizeof(struct ctl_devid) +
+ msg->port.target_devid_len, M_CTL, M_WAITOK);
+ }
+ memcpy(port->target_devid->data, &msg->port.data[i],
+ msg->port.target_devid_len);
+ port->target_devid->len = msg->port.target_devid_len;
+ i += msg->port.target_devid_len;
+ } else {
+ free(port->target_devid, M_CTL);
+ port->target_devid = NULL;
+ }
+ if (msg->port.init_devid_len != 0) {
+ if (port->init_devid == NULL ||
+ port->init_devid->len != msg->port.init_devid_len) {
+ free(port->init_devid, M_CTL);
+ port->init_devid = malloc(sizeof(struct ctl_devid) +
+ msg->port.init_devid_len, M_CTL, M_WAITOK);
+ }
+ memcpy(port->init_devid->data, &msg->port.data[i],
+ msg->port.init_devid_len);
+ port->init_devid->len = msg->port.init_devid_len;
+ i += msg->port.init_devid_len;
+ } else {
+ free(port->init_devid, M_CTL);
+ port->init_devid = NULL;
+ }
+ if (new) {
+ if (ctl_port_register(port) != 0) {
+ printf("%s: ctl_port_register() failed with error\n",
+ __func__);
+ }
+ }
+ mtx_lock(&softc->ctl_lock);
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
+ continue;
+ mtx_lock(&lun->lun_lock);
+ ctl_est_ua_all(lun, -1, CTL_UA_INQ_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+ }
+ mtx_unlock(&softc->ctl_lock);
+}
+
+static void
+ctl_isc_iid_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
+{
+ struct ctl_port *port;
+ int iid;
+
+ port = softc->ctl_ports[msg->hdr.nexus.targ_port];
+ if (port == NULL) {
+ printf("%s: Received IID for unknown port %d\n",
+ __func__, msg->hdr.nexus.targ_port);
+ return;
+ }
+ iid = msg->hdr.nexus.initid;
+ port->wwpn_iid[iid].in_use = msg->iid.in_use;
+ port->wwpn_iid[iid].wwpn = msg->iid.wwpn;
+ free(port->wwpn_iid[iid].name, M_CTL);
+ if (msg->iid.name_len) {
+ port->wwpn_iid[iid].name = strndup(&msg->iid.data[0],
+ msg->iid.name_len, M_CTL);
+ } else
+ port->wwpn_iid[iid].name = NULL;
+}
+
+static void
+ctl_isc_login(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
+{
+
+ if (msg->login.version != CTL_HA_VERSION) {
+ printf("CTL HA peers have different versions %d != %d\n",
+ msg->login.version, CTL_HA_VERSION);
+ ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
+ return;
+ }
+ if (msg->login.ha_mode != softc->ha_mode) {
+ printf("CTL HA peers have different ha_mode %d != %d\n",
+ msg->login.ha_mode, softc->ha_mode);
+ ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
+ return;
+ }
+ if (msg->login.ha_id == softc->ha_id) {
+ printf("CTL HA peers have same ha_id %d\n", msg->login.ha_id);
+ ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
+ return;
+ }
+ if (msg->login.max_luns != CTL_MAX_LUNS ||
+ msg->login.max_ports != CTL_MAX_PORTS ||
+ msg->login.max_init_per_port != CTL_MAX_INIT_PER_PORT) {
+ printf("CTL HA peers have different limits\n");
+ ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
+ return;
+ }
+}
+
+static void
+ctl_isc_mode_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len)
+{
+ struct ctl_lun *lun;
+ int i;
+ uint32_t initidx, targ_lun;
+
+ targ_lun = msg->hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
+ if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) ==
+ msg->mode.page_code &&
+ lun->mode_pages.index[i].subpage == msg->mode.subpage)
+ break;
+ }
+ if (i == CTL_NUM_MODE_PAGES) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ memcpy(lun->mode_pages.index[i].page_data, msg->mode.data,
+ lun->mode_pages.index[i].page_len);
+ initidx = ctl_get_initindex(&msg->hdr.nexus);
+ if (initidx != -1)
+ ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+}
+
/*
* ISC (Inter Shelf Communication) event handler. Events from the HA
* subsystem come in here.
@@ -611,60 +1322,38 @@ ctl_isc_handler_finish_ser_only(struct ctl_softc *ctl_softc,
static void
ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
{
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
union ctl_io *io;
struct ctl_prio *presio;
ctl_ha_status isc_status;
- softc = control_softc;
- io = NULL;
-
-
-#if 0
- printf("CTL: Isc Msg event %d\n", event);
-#endif
+ CTL_DEBUG_PRINT(("CTL: Isc Msg event %d\n", event));
if (event == CTL_HA_EVT_MSG_RECV) {
- union ctl_ha_msg msg_info;
+ union ctl_ha_msg *msg, msgbuf;
- isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), /*wait*/ 0);
-#if 0
- printf("CTL: msg_type %d\n", msg_info.msg_type);
-#endif
- if (isc_status != 0) {
- printf("Error receiving message, status = %d\n",
- isc_status);
+ if (param > sizeof(msgbuf))
+ msg = malloc(param, M_CTL, M_WAITOK);
+ else
+ msg = &msgbuf;
+ isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_CTL, msg, param,
+ M_WAITOK);
+ if (isc_status != CTL_HA_STATUS_SUCCESS) {
+ printf("%s: Error receiving message: %d\n",
+ __func__, isc_status);
+ if (msg != &msgbuf)
+ free(msg, M_CTL);
return;
}
- switch (msg_info.hdr.msg_type) {
+ CTL_DEBUG_PRINT(("CTL: msg_type %d\n", msg->msg_type));
+ switch (msg->hdr.msg_type) {
case CTL_MSG_SERIALIZE:
-#if 0
- printf("Serialize\n");
-#endif
- io = ctl_alloc_io_nowait(softc->othersc_pool);
- if (io == NULL) {
- printf("ctl_isc_event_handler: can't allocate "
- "ctl_io!\n");
- /* Bad Juju */
- /* Need to set busy and send msg back */
- msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
- msg_info.hdr.status = CTL_SCSI_ERROR;
- msg_info.scsi.scsi_status = SCSI_STATUS_BUSY;
- msg_info.scsi.sense_len = 0;
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 0) > CTL_HA_STATUS_SUCCESS){
- }
- goto bailout;
- }
+ io = ctl_alloc_io(softc->othersc_pool);
ctl_zero_io(io);
- // populate ctsio from msg_info
+ // populate ctsio from msg
io->io_hdr.io_type = CTL_IO_SCSI;
io->io_hdr.msg_type = CTL_MSG_SERIALIZE;
- io->io_hdr.original_sc = msg_info.hdr.original_sc;
-#if 0
- printf("pOrig %x\n", (int)msg_info.original_sc);
-#endif
+ io->io_hdr.original_sc = msg->hdr.original_sc;
io->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC |
CTL_FLAG_IO_ACTIVE;
/*
@@ -674,19 +1363,23 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
*
* XXX KDM add another flag that is more specific.
*/
- if (softc->ha_mode == CTL_HA_MODE_SER_ONLY)
+ if (softc->ha_mode != CTL_HA_MODE_XFER)
io->io_hdr.flags |= CTL_FLAG_INT_COPY;
- io->io_hdr.nexus = msg_info.hdr.nexus;
+ io->io_hdr.nexus = msg->hdr.nexus;
#if 0
- printf("targ %d, port %d, iid %d, lun %d\n",
- io->io_hdr.nexus.targ_target.id,
+ printf("port %u, iid %u, lun %u\n",
io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.initid.id,
+ io->io_hdr.nexus.initid,
io->io_hdr.nexus.targ_lun);
#endif
- io->scsiio.tag_num = msg_info.scsi.tag_num;
- io->scsiio.tag_type = msg_info.scsi.tag_type;
- memcpy(io->scsiio.cdb, msg_info.scsi.cdb,
+ io->scsiio.tag_num = msg->scsi.tag_num;
+ io->scsiio.tag_type = msg->scsi.tag_type;
+#ifdef CTL_TIME_IO
+ io->io_hdr.start_time = time_uptime;
+ getbinuptime(&io->io_hdr.start_bt);
+#endif /* CTL_TIME_IO */
+ io->scsiio.cdb_len = msg->scsi.cdb_len;
+ memcpy(io->scsiio.cdb, msg->scsi.cdb,
CTL_MAX_CDBLEN);
if (softc->ha_mode == CTL_HA_MODE_XFER) {
const struct ctl_cmd_entry *entry;
@@ -704,7 +1397,7 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
struct ctl_sg_entry *sgl;
int i, j;
- io = msg_info.hdr.original_sc;
+ io = msg->hdr.original_sc;
if (io == NULL) {
printf("%s: original_sc == NULL!\n", __func__);
/* XXX KDM do something here */
@@ -716,97 +1409,68 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
* Keep track of this, we need to send it back over
* when the datamove is complete.
*/
- io->io_hdr.serializing_sc = msg_info.hdr.serializing_sc;
+ io->io_hdr.serializing_sc = msg->hdr.serializing_sc;
+ if (msg->hdr.status == CTL_SUCCESS)
+ io->io_hdr.status = msg->hdr.status;
- if (msg_info.dt.sg_sequence == 0) {
- /*
- * XXX KDM we use the preallocated S/G list
- * here, but we'll need to change this to
- * dynamic allocation if we need larger S/G
- * lists.
- */
- if (msg_info.dt.kern_sg_entries >
- sizeof(io->io_hdr.remote_sglist) /
- sizeof(io->io_hdr.remote_sglist[0])) {
- printf("%s: number of S/G entries "
- "needed %u > allocated num %zd\n",
- __func__,
- msg_info.dt.kern_sg_entries,
- sizeof(io->io_hdr.remote_sglist)/
- sizeof(io->io_hdr.remote_sglist[0]));
-
- /*
- * XXX KDM send a message back to
- * the other side to shut down the
- * DMA. The error will come back
- * through via the normal channel.
- */
- break;
- }
- sgl = io->io_hdr.remote_sglist;
- memset(sgl, 0,
- sizeof(io->io_hdr.remote_sglist));
+ if (msg->dt.sg_sequence == 0) {
+#ifdef CTL_TIME_IO
+ getbinuptime(&io->io_hdr.dma_start_bt);
+#endif
+ i = msg->dt.kern_sg_entries +
+ msg->dt.kern_data_len /
+ CTL_HA_DATAMOVE_SEGMENT + 1;
+ sgl = malloc(sizeof(*sgl) * i, M_CTL,
+ M_WAITOK | M_ZERO);
+ io->io_hdr.remote_sglist = sgl;
+ io->io_hdr.local_sglist =
+ &sgl[msg->dt.kern_sg_entries];
io->scsiio.kern_data_ptr = (uint8_t *)sgl;
io->scsiio.kern_sg_entries =
- msg_info.dt.kern_sg_entries;
+ msg->dt.kern_sg_entries;
io->scsiio.rem_sg_entries =
- msg_info.dt.kern_sg_entries;
+ msg->dt.kern_sg_entries;
io->scsiio.kern_data_len =
- msg_info.dt.kern_data_len;
+ msg->dt.kern_data_len;
io->scsiio.kern_total_len =
- msg_info.dt.kern_total_len;
+ msg->dt.kern_total_len;
io->scsiio.kern_data_resid =
- msg_info.dt.kern_data_resid;
+ msg->dt.kern_data_resid;
io->scsiio.kern_rel_offset =
- msg_info.dt.kern_rel_offset;
- /*
- * Clear out per-DMA flags.
- */
- io->io_hdr.flags &= ~CTL_FLAG_RDMA_MASK;
- /*
- * Add per-DMA flags that are set for this
- * particular DMA request.
- */
- io->io_hdr.flags |= msg_info.dt.flags &
- CTL_FLAG_RDMA_MASK;
+ msg->dt.kern_rel_offset;
+ io->io_hdr.flags &= ~CTL_FLAG_BUS_ADDR;
+ io->io_hdr.flags |= msg->dt.flags &
+ CTL_FLAG_BUS_ADDR;
} else
sgl = (struct ctl_sg_entry *)
io->scsiio.kern_data_ptr;
- for (i = msg_info.dt.sent_sg_entries, j = 0;
- i < (msg_info.dt.sent_sg_entries +
- msg_info.dt.cur_sg_entries); i++, j++) {
- sgl[i].addr = msg_info.dt.sg_list[j].addr;
- sgl[i].len = msg_info.dt.sg_list[j].len;
+ for (i = msg->dt.sent_sg_entries, j = 0;
+ i < (msg->dt.sent_sg_entries +
+ msg->dt.cur_sg_entries); i++, j++) {
+ sgl[i].addr = msg->dt.sg_list[j].addr;
+ sgl[i].len = msg->dt.sg_list[j].len;
#if 0
- printf("%s: L: %p,%d -> %p,%d j=%d, i=%d\n",
- __func__,
- msg_info.dt.sg_list[j].addr,
- msg_info.dt.sg_list[j].len,
- sgl[i].addr, sgl[i].len, j, i);
+ printf("%s: DATAMOVE: %p,%lu j=%d, i=%d\n",
+ __func__, sgl[i].addr, sgl[i].len, j, i);
#endif
}
-#if 0
- memcpy(&sgl[msg_info.dt.sent_sg_entries],
- msg_info.dt.sg_list,
- sizeof(*sgl) * msg_info.dt.cur_sg_entries);
-#endif
/*
* If this is the last piece of the I/O, we've got
* the full S/G list. Queue processing in the thread.
* Otherwise wait for the next piece.
*/
- if (msg_info.dt.sg_last != 0)
+ if (msg->dt.sg_last != 0)
ctl_enqueue_isc(io);
break;
}
/* Performed on the Serializing (primary) SC, XFER mode only */
case CTL_MSG_DATAMOVE_DONE: {
- if (msg_info.hdr.serializing_sc == NULL) {
+ if (msg->hdr.serializing_sc == NULL) {
printf("%s: serializing_sc == NULL!\n",
__func__);
/* XXX KDM now what? */
@@ -817,33 +1481,38 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
* there was a failure, so we can return status
* back to the initiator.
*/
- io = msg_info.hdr.serializing_sc;
+ io = msg->hdr.serializing_sc;
io->io_hdr.msg_type = CTL_MSG_DATAMOVE_DONE;
- io->io_hdr.status = msg_info.hdr.status;
- io->scsiio.scsi_status = msg_info.scsi.scsi_status;
- io->scsiio.sense_len = msg_info.scsi.sense_len;
- io->scsiio.sense_residual =msg_info.scsi.sense_residual;
- io->io_hdr.port_status = msg_info.scsi.fetd_status;
- io->scsiio.residual = msg_info.scsi.residual;
- memcpy(&io->scsiio.sense_data,&msg_info.scsi.sense_data,
- sizeof(io->scsiio.sense_data));
+ io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
+ io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
+ io->io_hdr.port_status = msg->scsi.fetd_status;
+ io->scsiio.residual = msg->scsi.residual;
+ if (msg->hdr.status != CTL_STATUS_NONE) {
+ io->io_hdr.status = msg->hdr.status;
+ io->scsiio.scsi_status = msg->scsi.scsi_status;
+ io->scsiio.sense_len = msg->scsi.sense_len;
+ io->scsiio.sense_residual =msg->scsi.sense_residual;
+ memcpy(&io->scsiio.sense_data,
+ &msg->scsi.sense_data,
+ msg->scsi.sense_len);
+ if (msg->hdr.status == CTL_SUCCESS)
+ io->io_hdr.flags |= CTL_FLAG_STATUS_SENT;
+ }
ctl_enqueue_isc(io);
break;
}
/* Preformed on Originating SC, SER_ONLY mode */
case CTL_MSG_R2R:
- io = msg_info.hdr.original_sc;
+ io = msg->hdr.original_sc;
if (io == NULL) {
- printf("%s: Major Bummer\n", __func__);
- return;
- } else {
-#if 0
- printf("pOrig %x\n",(int) ctsio);
-#endif
+ printf("%s: original_sc == NULL!\n",
+ __func__);
+ break;
}
+ io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
io->io_hdr.msg_type = CTL_MSG_R2R;
- io->io_hdr.serializing_sc = msg_info.hdr.serializing_sc;
+ io->io_hdr.serializing_sc = msg->hdr.serializing_sc;
ctl_enqueue_isc(io);
break;
@@ -855,22 +1524,20 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
*/
case CTL_MSG_FINISH_IO:
if (softc->ha_mode == CTL_HA_MODE_XFER)
- ctl_isc_handler_finish_xfer(softc,
- &msg_info);
+ ctl_isc_handler_finish_xfer(softc, msg);
else
- ctl_isc_handler_finish_ser_only(softc,
- &msg_info);
+ ctl_isc_handler_finish_ser_only(softc, msg);
break;
/* Preformed on Originating SC */
case CTL_MSG_BAD_JUJU:
- io = msg_info.hdr.original_sc;
+ io = msg->hdr.original_sc;
if (io == NULL) {
printf("%s: Bad JUJU!, original_sc is NULL!\n",
__func__);
break;
}
- ctl_copy_sense_data(&msg_info, io);
+ ctl_copy_sense_data(msg, io);
/*
* IO should have already been cleaned up on other
* SC so clear this flag so we won't send a message
@@ -879,7 +1546,7 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC;
io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE;
- /* io = msg_info.hdr.serializing_sc; */
+ /* io = msg->hdr.serializing_sc; */
io->io_hdr.msg_type = CTL_MSG_BAD_JUJU;
ctl_enqueue_isc(io);
break;
@@ -887,90 +1554,109 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
/* Handle resets sent from the other side */
case CTL_MSG_MANAGE_TASKS: {
struct ctl_taskio *taskio;
- taskio = (struct ctl_taskio *)ctl_alloc_io_nowait(
+ taskio = (struct ctl_taskio *)ctl_alloc_io(
softc->othersc_pool);
- if (taskio == NULL) {
- printf("ctl_isc_event_handler: can't allocate "
- "ctl_io!\n");
- /* Bad Juju */
- /* should I just call the proper reset func
- here??? */
- goto bailout;
- }
ctl_zero_io((union ctl_io *)taskio);
taskio->io_hdr.io_type = CTL_IO_TASK;
taskio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC;
- taskio->io_hdr.nexus = msg_info.hdr.nexus;
- taskio->task_action = msg_info.task.task_action;
- taskio->tag_num = msg_info.task.tag_num;
- taskio->tag_type = msg_info.task.tag_type;
+ taskio->io_hdr.nexus = msg->hdr.nexus;
+ taskio->task_action = msg->task.task_action;
+ taskio->tag_num = msg->task.tag_num;
+ taskio->tag_type = msg->task.tag_type;
#ifdef CTL_TIME_IO
taskio->io_hdr.start_time = time_uptime;
- getbintime(&taskio->io_hdr.start_bt);
-#if 0
- cs_prof_gettime(&taskio->io_hdr.start_ticks);
-#endif
+ getbinuptime(&taskio->io_hdr.start_bt);
#endif /* CTL_TIME_IO */
ctl_run_task((union ctl_io *)taskio);
break;
}
/* Persistent Reserve action which needs attention */
case CTL_MSG_PERS_ACTION:
- presio = (struct ctl_prio *)ctl_alloc_io_nowait(
+ presio = (struct ctl_prio *)ctl_alloc_io(
softc->othersc_pool);
- if (presio == NULL) {
- printf("ctl_isc_event_handler: can't allocate "
- "ctl_io!\n");
- /* Bad Juju */
- /* Need to set busy and send msg back */
- goto bailout;
- }
ctl_zero_io((union ctl_io *)presio);
presio->io_hdr.msg_type = CTL_MSG_PERS_ACTION;
- presio->pr_msg = msg_info.pr;
+ presio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC;
+ presio->io_hdr.nexus = msg->hdr.nexus;
+ presio->pr_msg = msg->pr;
ctl_enqueue_isc((union ctl_io *)presio);
break;
- case CTL_MSG_SYNC_FE:
- rcv_sync_msg = 1;
+ case CTL_MSG_UA:
+ ctl_isc_ua(softc, msg, param);
+ break;
+ case CTL_MSG_PORT_SYNC:
+ ctl_isc_port_sync(softc, msg, param);
+ break;
+ case CTL_MSG_LUN_SYNC:
+ ctl_isc_lun_sync(softc, msg, param);
+ break;
+ case CTL_MSG_IID_SYNC:
+ ctl_isc_iid_sync(softc, msg, param);
+ break;
+ case CTL_MSG_LOGIN:
+ ctl_isc_login(softc, msg, param);
+ break;
+ case CTL_MSG_MODE_SYNC:
+ ctl_isc_mode_sync(softc, msg, param);
break;
default:
- printf("How did I get here?\n");
+ printf("Received HA message of unknown type %d\n",
+ msg->hdr.msg_type);
+ ctl_ha_msg_abort(CTL_HA_CHAN_CTL);
+ break;
}
- } else if (event == CTL_HA_EVT_MSG_SENT) {
- if (param != CTL_HA_STATUS_SUCCESS) {
- printf("Bad status from ctl_ha_msg_send status %d\n",
- param);
+ if (msg != &msgbuf)
+ free(msg, M_CTL);
+ } else if (event == CTL_HA_EVT_LINK_CHANGE) {
+ printf("CTL: HA link status changed from %d to %d\n",
+ softc->ha_link, param);
+ if (param == softc->ha_link)
+ return;
+ if (softc->ha_link == CTL_HA_LINK_ONLINE) {
+ softc->ha_link = param;
+ ctl_isc_ha_link_down(softc);
+ } else {
+ softc->ha_link = param;
+ if (softc->ha_link == CTL_HA_LINK_ONLINE)
+ ctl_isc_ha_link_up(softc);
}
return;
- } else if (event == CTL_HA_EVT_DISCONNECT) {
- printf("CTL: Got a disconnect from Isc\n");
- return;
} else {
printf("ctl_isc_event_handler: Unknown event %d\n", event);
return;
}
-
-bailout:
- return;
}
static void
ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest)
{
- struct scsi_sense_data *sense;
- sense = &dest->scsiio.sense_data;
- bcopy(&src->scsi.sense_data, sense, sizeof(*sense));
+ memcpy(&dest->scsiio.sense_data, &src->scsi.sense_data,
+ src->scsi.sense_len);
dest->scsiio.scsi_status = src->scsi.scsi_status;
dest->scsiio.sense_len = src->scsi.sense_len;
dest->io_hdr.status = src->hdr.status;
}
static void
+ctl_copy_sense_data_back(union ctl_io *src, union ctl_ha_msg *dest)
+{
+
+ memcpy(&dest->scsi.sense_data, &src->scsiio.sense_data,
+ src->scsiio.sense_len);
+ dest->scsi.scsi_status = src->scsiio.scsi_status;
+ dest->scsi.sense_len = src->scsiio.sense_len;
+ dest->hdr.status = src->io_hdr.status;
+}
+
+void
ctl_est_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua)
{
+ struct ctl_softc *softc = lun->ctl_softc;
ctl_ua_type *pu;
+ if (initidx < softc->init_min || initidx >= softc->init_max)
+ return;
mtx_assert(&lun->lun_lock, MA_OWNED);
pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT];
if (pu == NULL)
@@ -978,28 +1664,40 @@ ctl_est_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua)
pu[initidx % CTL_MAX_INIT_PER_PORT] |= ua;
}
-static void
-ctl_est_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua)
+void
+ctl_est_ua_port(struct ctl_lun *lun, int port, uint32_t except, ctl_ua_type ua)
{
- int i, j;
+ int i;
mtx_assert(&lun->lun_lock, MA_OWNED);
- for (i = 0; i < CTL_MAX_PORTS; i++) {
- if (lun->pending_ua[i] == NULL)
+ if (lun->pending_ua[port] == NULL)
+ return;
+ for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) {
+ if (port * CTL_MAX_INIT_PER_PORT + i == except)
continue;
- for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) {
- if (i * CTL_MAX_INIT_PER_PORT + j == except)
- continue;
- lun->pending_ua[i][j] |= ua;
- }
+ lun->pending_ua[port][i] |= ua;
}
}
-static void
+void
+ctl_est_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua)
+{
+ struct ctl_softc *softc = lun->ctl_softc;
+ int i;
+
+ mtx_assert(&lun->lun_lock, MA_OWNED);
+ for (i = softc->port_min; i < softc->port_max; i++)
+ ctl_est_ua_port(lun, i, except, ua);
+}
+
+void
ctl_clr_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua)
{
+ struct ctl_softc *softc = lun->ctl_softc;
ctl_ua_type *pu;
+ if (initidx < softc->init_min || initidx >= softc->init_max)
+ return;
mtx_assert(&lun->lun_lock, MA_OWNED);
pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT];
if (pu == NULL)
@@ -1007,13 +1705,14 @@ ctl_clr_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua)
pu[initidx % CTL_MAX_INIT_PER_PORT] &= ~ua;
}
-static void
+void
ctl_clr_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua)
{
+ struct ctl_softc *softc = lun->ctl_softc;
int i, j;
mtx_assert(&lun->lun_lock, MA_OWNED);
- for (i = 0; i < CTL_MAX_PORTS; i++) {
+ for (i = softc->port_min; i < softc->port_max; i++) {
if (lun->pending_ua[i] == NULL)
continue;
for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) {
@@ -1024,18 +1723,29 @@ ctl_clr_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua)
}
}
+void
+ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx,
+ ctl_ua_type ua_type)
+{
+ struct ctl_lun *lun;
+
+ mtx_assert(&ctl_softc->ctl_lock, MA_OWNED);
+ STAILQ_FOREACH(lun, &ctl_softc->lun_list, links) {
+ mtx_lock(&lun->lun_lock);
+ ctl_clr_ua(lun, initidx, ua_type);
+ mtx_unlock(&lun->lun_lock);
+ }
+}
+
static int
-ctl_ha_state_sysctl(SYSCTL_HANDLER_ARGS)
+ctl_ha_role_sysctl(SYSCTL_HANDLER_ARGS)
{
struct ctl_softc *softc = (struct ctl_softc *)arg1;
struct ctl_lun *lun;
+ struct ctl_lun_req ireq;
int error, value;
- if (softc->flags & CTL_FLAG_ACTIVE_SHELF)
- value = 0;
- else
- value = 1;
-
+ value = (softc->flags & CTL_FLAG_ACTIVE_SHELF) ? 0 : 1;
error = sysctl_handle_int(oidp, &value, 0, req);
if ((error != 0) || (req->newptr == NULL))
return (error);
@@ -1046,9 +1756,17 @@ ctl_ha_state_sysctl(SYSCTL_HANDLER_ARGS)
else
softc->flags &= ~CTL_FLAG_ACTIVE_SHELF;
STAILQ_FOREACH(lun, &softc->lun_list, links) {
- mtx_lock(&lun->lun_lock);
- ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
- mtx_unlock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ bzero(&ireq, sizeof(ireq));
+ ireq.reqtype = CTL_LUNREQ_MODIFY;
+ ireq.reqdata.modify.lun_id = lun->lun;
+ lun->backend->ioctl(NULL, CTL_LUN_REQ, (caddr_t)&ireq, 0,
+ curthread);
+ if (ireq.status != CTL_LUN_OK) {
+ printf("%s: CTL_LUNREQ_MODIFY returned %d '%s'\n",
+ __func__, ireq.status, ireq.error_str);
+ }
+ mtx_lock(&softc->ctl_lock);
}
mtx_unlock(&softc->ctl_lock);
return (0);
@@ -1059,31 +1777,15 @@ ctl_init(void)
{
struct ctl_softc *softc;
void *other_pool;
- struct ctl_port *port;
- int i, error, retval;
- //int isc_retval;
-
- retval = 0;
- ctl_pause_rtr = 0;
- rcv_sync_msg = 0;
+ int i, error;
- control_softc = malloc(sizeof(*control_softc), M_DEVBUF,
+ softc = control_softc = malloc(sizeof(*control_softc), M_DEVBUF,
M_WAITOK | M_ZERO);
- softc = control_softc;
softc->dev = make_dev(&ctl_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600,
"cam/ctl");
-
softc->dev->si_drv1 = softc;
- /*
- * By default, return a "bad LUN" peripheral qualifier for unknown
- * LUNs. The user can override this default using the tunable or
- * sysctl. See the comment in ctl_inquiry_std() for more details.
- */
- softc->inquiry_pq_no_lun = 1;
- TUNABLE_INT_FETCH("kern.cam.ctl.inquiry_pq_no_lun",
- &softc->inquiry_pq_no_lun);
sysctl_ctx_init(&softc->sysctl_ctx);
softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx,
SYSCTL_STATIC_CHILDREN(_kern_cam), OID_AUTO, "ctl",
@@ -1097,22 +1799,14 @@ ctl_init(void)
return (ENOMEM);
}
- SYSCTL_ADD_INT(&softc->sysctl_ctx,
- SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
- "inquiry_pq_no_lun", CTLFLAG_RW,
- &softc->inquiry_pq_no_lun, 0,
- "Report no lun possible for invalid LUNs");
-
mtx_init(&softc->ctl_lock, "CTL mutex", NULL, MTX_DEF);
softc->io_zone = uma_zcreate("CTL IO", sizeof(union ctl_io),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- softc->open_count = 0;
+ softc->flags = 0;
- /*
- * Default to actually sending a SYNCHRONIZE CACHE command down to
- * the drive.
- */
- softc->flags = CTL_FLAG_REAL_SYNC;
+ SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "ha_mode", CTLFLAG_RDTUN, (int *)&softc->ha_mode, 0,
+ "HA mode (0 - act/stby, 1 - serialize only, 2 - xfer)");
/*
* In Copan's HA scheme, the "master" and "slave" roles are
@@ -1122,13 +1816,22 @@ ctl_init(void)
SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
OID_AUTO, "ha_id", CTLFLAG_RDTUN, &softc->ha_id, 0,
"HA head ID (0 - no HA)");
- if (softc->ha_id == 0) {
+ if (softc->ha_id == 0 || softc->ha_id > NUM_TARGET_PORT_GROUPS) {
softc->flags |= CTL_FLAG_ACTIVE_SHELF;
softc->is_single = 1;
- softc->port_offset = 0;
- } else
- softc->port_offset = (softc->ha_id - 1) * CTL_MAX_PORTS;
- softc->persis_offset = softc->port_offset * CTL_MAX_INIT_PER_PORT;
+ softc->port_cnt = CTL_MAX_PORTS;
+ softc->port_min = 0;
+ } else {
+ softc->port_cnt = CTL_MAX_PORTS / NUM_TARGET_PORT_GROUPS;
+ softc->port_min = (softc->ha_id - 1) * softc->port_cnt;
+ }
+ softc->port_max = softc->port_min + softc->port_cnt;
+ softc->init_min = softc->port_min * CTL_MAX_INIT_PER_PORT;
+ softc->init_max = softc->port_max * CTL_MAX_INIT_PER_PORT;
+
+ SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO, "ha_link", CTLFLAG_RD, (int *)&softc->ha_link, 0,
+ "HA link state (0 - offline, 1 - unknown, 2 - online)");
STAILQ_INIT(&softc->lun_list);
STAILQ_INIT(&softc->pending_lun_queue);
@@ -1184,72 +1887,48 @@ ctl_init(void)
return (error);
}
- /*
- * Initialize the ioctl front end.
- */
- ctl_frontend_register(&ioctl_frontend);
- port = &softc->ioctl_info.port;
- port->frontend = &ioctl_frontend;
- sprintf(softc->ioctl_info.port_name, "ioctl");
- port->port_type = CTL_PORT_IOCTL;
- port->num_requested_ctl_io = 100;
- port->port_name = softc->ioctl_info.port_name;
- port->port_online = ctl_ioctl_online;
- port->port_offline = ctl_ioctl_offline;
- port->onoff_arg = &softc->ioctl_info;
- port->lun_enable = ctl_ioctl_lun_enable;
- port->lun_disable = ctl_ioctl_lun_disable;
- port->targ_lun_arg = &softc->ioctl_info;
- port->fe_datamove = ctl_ioctl_datamove;
- port->fe_done = ctl_ioctl_done;
- port->max_targets = 15;
- port->max_target_id = 15;
-
- if (ctl_port_register(&softc->ioctl_info.port) != 0) {
- printf("ctl: ioctl front end registration failed, will "
- "continue anyway\n");
- }
-
SYSCTL_ADD_PROC(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
- OID_AUTO, "ha_state", CTLTYPE_INT | CTLFLAG_RWTUN,
- softc, 0, ctl_ha_state_sysctl, "I", "HA state for this head");
+ OID_AUTO, "ha_role", CTLTYPE_INT | CTLFLAG_RWTUN,
+ softc, 0, ctl_ha_role_sysctl, "I", "HA role for this head");
-#ifdef CTL_IO_DELAY
- if (sizeof(struct callout) > CTL_TIMER_BYTES) {
- printf("sizeof(struct callout) %zd > CTL_TIMER_BYTES %zd\n",
- sizeof(struct callout), CTL_TIMER_BYTES);
- return (EINVAL);
+ if (softc->is_single == 0) {
+ ctl_frontend_register(&ha_frontend);
+ if (ctl_ha_msg_init(softc) != CTL_HA_STATUS_SUCCESS) {
+ printf("ctl_init: ctl_ha_msg_init failed.\n");
+ softc->is_single = 1;
+ } else
+ if (ctl_ha_msg_register(CTL_HA_CHAN_CTL, ctl_isc_event_handler)
+ != CTL_HA_STATUS_SUCCESS) {
+ printf("ctl_init: ctl_ha_msg_register failed.\n");
+ softc->is_single = 1;
+ }
}
-#endif /* CTL_IO_DELAY */
-
return (0);
}
void
ctl_shutdown(void)
{
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
struct ctl_lun *lun, *next_lun;
- softc = (struct ctl_softc *)control_softc;
-
- if (ctl_port_deregister(&softc->ioctl_info.port) != 0)
- printf("ctl: ioctl front end deregistration failed\n");
+ if (softc->is_single == 0) {
+ ctl_ha_msg_shutdown(softc);
+ if (ctl_ha_msg_deregister(CTL_HA_CHAN_CTL)
+ != CTL_HA_STATUS_SUCCESS)
+ printf("%s: ctl_ha_msg_deregister failed.\n", __func__);
+ if (ctl_ha_msg_destroy(softc) != CTL_HA_STATUS_SUCCESS)
+ printf("%s: ctl_ha_msg_destroy failed.\n", __func__);
+ ctl_frontend_deregister(&ha_frontend);
+ }
mtx_lock(&softc->ctl_lock);
- /*
- * Free up each LUN.
- */
- for (lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; lun = next_lun){
- next_lun = STAILQ_NEXT(lun, links);
+ STAILQ_FOREACH_SAFE(lun, &softc->lun_list, links, next_lun)
ctl_free_lun(lun);
- }
mtx_unlock(&softc->ctl_lock);
- ctl_frontend_deregister(&ioctl_frontend);
-
#if 0
ctl_shutdown_thread(softc->work_thread);
mtx_destroy(&softc->queue_lock);
@@ -1297,150 +1976,6 @@ ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td)
return (0);
}
-int
-ctl_port_enable(ctl_port_type port_type)
-{
- struct ctl_softc *softc = control_softc;
- struct ctl_port *port;
-
- if (softc->is_single == 0) {
- union ctl_ha_msg msg_info;
- int isc_retval;
-
-#if 0
- printf("%s: HA mode, synchronizing frontend enable\n",
- __func__);
-#endif
- msg_info.hdr.msg_type = CTL_MSG_SYNC_FE;
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 1 )) > CTL_HA_STATUS_SUCCESS) {
- printf("Sync msg send error retval %d\n", isc_retval);
- }
- if (!rcv_sync_msg) {
- isc_retval=ctl_ha_msg_recv(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 1);
- }
-#if 0
- printf("CTL:Frontend Enable\n");
- } else {
- printf("%s: single mode, skipping frontend synchronization\n",
- __func__);
-#endif
- }
-
- STAILQ_FOREACH(port, &softc->port_list, links) {
- if (port_type & port->port_type)
- {
-#if 0
- printf("port %d\n", port->targ_port);
-#endif
- ctl_port_online(port);
- }
- }
-
- return (0);
-}
-
-int
-ctl_port_disable(ctl_port_type port_type)
-{
- struct ctl_softc *softc;
- struct ctl_port *port;
-
- softc = control_softc;
-
- STAILQ_FOREACH(port, &softc->port_list, links) {
- if (port_type & port->port_type)
- ctl_port_offline(port);
- }
-
- return (0);
-}
-
-/*
- * Returns 0 for success, 1 for failure.
- * Currently the only failure mode is if there aren't enough entries
- * allocated. So, in case of a failure, look at num_entries_dropped,
- * reallocate and try again.
- */
-int
-ctl_port_list(struct ctl_port_entry *entries, int num_entries_alloced,
- int *num_entries_filled, int *num_entries_dropped,
- ctl_port_type port_type, int no_virtual)
-{
- struct ctl_softc *softc;
- struct ctl_port *port;
- int entries_dropped, entries_filled;
- int retval;
- int i;
-
- softc = control_softc;
-
- retval = 0;
- entries_filled = 0;
- entries_dropped = 0;
-
- i = 0;
- mtx_lock(&softc->ctl_lock);
- STAILQ_FOREACH(port, &softc->port_list, links) {
- struct ctl_port_entry *entry;
-
- if ((port->port_type & port_type) == 0)
- continue;
-
- if ((no_virtual != 0)
- && (port->virtual_port != 0))
- continue;
-
- if (entries_filled >= num_entries_alloced) {
- entries_dropped++;
- continue;
- }
- entry = &entries[i];
-
- entry->port_type = port->port_type;
- strlcpy(entry->port_name, port->port_name,
- sizeof(entry->port_name));
- entry->physical_port = port->physical_port;
- entry->virtual_port = port->virtual_port;
- entry->wwnn = port->wwnn;
- entry->wwpn = port->wwpn;
-
- i++;
- entries_filled++;
- }
-
- mtx_unlock(&softc->ctl_lock);
-
- if (entries_dropped > 0)
- retval = 1;
-
- *num_entries_dropped = entries_dropped;
- *num_entries_filled = entries_filled;
-
- return (retval);
-}
-
-static void
-ctl_ioctl_online(void *arg)
-{
- struct ctl_ioctl_info *ioctl_info;
-
- ioctl_info = (struct ctl_ioctl_info *)arg;
-
- ioctl_info->flags |= CTL_IOCTL_FLAG_ENABLED;
-}
-
-static void
-ctl_ioctl_offline(void *arg)
-{
- struct ctl_ioctl_info *ioctl_info;
-
- ioctl_info = (struct ctl_ioctl_info *)arg;
-
- ioctl_info->flags &= ~CTL_IOCTL_FLAG_ENABLED;
-}
-
/*
* Remove an initiator by port number and initiator ID.
* Returns 0 for success, -1 for failure.
@@ -1448,7 +1983,7 @@ ctl_ioctl_offline(void *arg)
int
ctl_remove_initiator(struct ctl_port *port, int iid)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
mtx_assert(&softc->ctl_lock, MA_NOTOWNED);
@@ -1462,6 +1997,7 @@ ctl_remove_initiator(struct ctl_port *port, int iid)
port->wwpn_iid[iid].in_use--;
port->wwpn_iid[iid].last_use = time_uptime;
mtx_unlock(&softc->ctl_lock);
+ ctl_isc_announce_iid(port, iid);
return (0);
}
@@ -1473,7 +2009,7 @@ ctl_remove_initiator(struct ctl_port *port, int iid)
int
ctl_add_initiator(struct ctl_port *port, int iid, uint64_t wwpn, char *name)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
time_t best_time;
int i, best;
@@ -1577,6 +2113,7 @@ take:
port->wwpn_iid[iid].wwpn = wwpn;
port->wwpn_iid[iid].in_use++;
mtx_unlock(&softc->ctl_lock);
+ ctl_isc_announce_iid(port, iid);
return (iid);
}
@@ -1636,181 +2173,6 @@ ctl_create_iid(struct ctl_port *port, int iid, uint8_t *buf)
}
}
-static int
-ctl_ioctl_lun_enable(void *arg, int lun_id)
-{
- return (0);
-}
-
-static int
-ctl_ioctl_lun_disable(void *arg, int lun_id)
-{
- return (0);
-}
-
-/*
- * Data movement routine for the CTL ioctl frontend port.
- */
-static int
-ctl_ioctl_do_datamove(struct ctl_scsiio *ctsio)
-{
- struct ctl_sg_entry *ext_sglist, *kern_sglist;
- struct ctl_sg_entry ext_entry, kern_entry;
- int ext_sglen, ext_sg_entries, kern_sg_entries;
- int ext_sg_start, ext_offset;
- int len_to_copy, len_copied;
- int kern_watermark, ext_watermark;
- int ext_sglist_malloced;
- int i, j;
-
- ext_sglist_malloced = 0;
- ext_sg_start = 0;
- ext_offset = 0;
-
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove\n"));
-
- /*
- * If this flag is set, fake the data transfer.
- */
- if (ctsio->io_hdr.flags & CTL_FLAG_NO_DATAMOVE) {
- ctsio->ext_data_filled = ctsio->ext_data_len;
- goto bailout;
- }
-
- /*
- * To simplify things here, if we have a single buffer, stick it in
- * a S/G entry and just make it a single entry S/G list.
- */
- if (ctsio->io_hdr.flags & CTL_FLAG_EDPTR_SGLIST) {
- int len_seen;
-
- ext_sglen = ctsio->ext_sg_entries * sizeof(*ext_sglist);
-
- ext_sglist = (struct ctl_sg_entry *)malloc(ext_sglen, M_CTL,
- M_WAITOK);
- ext_sglist_malloced = 1;
- if (copyin(ctsio->ext_data_ptr, ext_sglist,
- ext_sglen) != 0) {
- ctl_set_internal_failure(ctsio,
- /*sks_valid*/ 0,
- /*retry_count*/ 0);
- goto bailout;
- }
- ext_sg_entries = ctsio->ext_sg_entries;
- len_seen = 0;
- for (i = 0; i < ext_sg_entries; i++) {
- if ((len_seen + ext_sglist[i].len) >=
- ctsio->ext_data_filled) {
- ext_sg_start = i;
- ext_offset = ctsio->ext_data_filled - len_seen;
- break;
- }
- len_seen += ext_sglist[i].len;
- }
- } else {
- ext_sglist = &ext_entry;
- ext_sglist->addr = ctsio->ext_data_ptr;
- ext_sglist->len = ctsio->ext_data_len;
- ext_sg_entries = 1;
- ext_sg_start = 0;
- ext_offset = ctsio->ext_data_filled;
- }
-
- if (ctsio->kern_sg_entries > 0) {
- kern_sglist = (struct ctl_sg_entry *)ctsio->kern_data_ptr;
- kern_sg_entries = ctsio->kern_sg_entries;
- } else {
- kern_sglist = &kern_entry;
- kern_sglist->addr = ctsio->kern_data_ptr;
- kern_sglist->len = ctsio->kern_data_len;
- kern_sg_entries = 1;
- }
-
-
- kern_watermark = 0;
- ext_watermark = ext_offset;
- len_copied = 0;
- for (i = ext_sg_start, j = 0;
- i < ext_sg_entries && j < kern_sg_entries;) {
- uint8_t *ext_ptr, *kern_ptr;
-
- len_to_copy = MIN(ext_sglist[i].len - ext_watermark,
- kern_sglist[j].len - kern_watermark);
-
- ext_ptr = (uint8_t *)ext_sglist[i].addr;
- ext_ptr = ext_ptr + ext_watermark;
- if (ctsio->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
- /*
- * XXX KDM fix this!
- */
- panic("need to implement bus address support");
-#if 0
- kern_ptr = bus_to_virt(kern_sglist[j].addr);
-#endif
- } else
- kern_ptr = (uint8_t *)kern_sglist[j].addr;
- kern_ptr = kern_ptr + kern_watermark;
-
- kern_watermark += len_to_copy;
- ext_watermark += len_to_copy;
-
- if ((ctsio->io_hdr.flags & CTL_FLAG_DATA_MASK) ==
- CTL_FLAG_DATA_IN) {
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: copying %d "
- "bytes to user\n", len_to_copy));
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: from %p "
- "to %p\n", kern_ptr, ext_ptr));
- if (copyout(kern_ptr, ext_ptr, len_to_copy) != 0) {
- ctl_set_internal_failure(ctsio,
- /*sks_valid*/ 0,
- /*retry_count*/ 0);
- goto bailout;
- }
- } else {
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: copying %d "
- "bytes from user\n", len_to_copy));
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: from %p "
- "to %p\n", ext_ptr, kern_ptr));
- if (copyin(ext_ptr, kern_ptr, len_to_copy)!= 0){
- ctl_set_internal_failure(ctsio,
- /*sks_valid*/ 0,
- /*retry_count*/0);
- goto bailout;
- }
- }
-
- len_copied += len_to_copy;
-
- if (ext_sglist[i].len == ext_watermark) {
- i++;
- ext_watermark = 0;
- }
-
- if (kern_sglist[j].len == kern_watermark) {
- j++;
- kern_watermark = 0;
- }
- }
-
- ctsio->ext_data_filled += len_copied;
-
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: ext_sg_entries: %d, "
- "kern_sg_entries: %d\n", ext_sg_entries,
- kern_sg_entries));
- CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: ext_data_len = %d, "
- "kern_data_len = %d\n", ctsio->ext_data_len,
- ctsio->kern_data_len));
-
-
- /* XXX KDM set residual?? */
-bailout:
-
- if (ext_sglist_malloced != 0)
- free(ext_sglist, M_CTL);
-
- return (CTL_RETVAL_COMPLETE);
-}
-
/*
* Serialize a command that went down the "wrong" side, and so was sent to
* this controller for execution. The logic is a little different than the
@@ -1822,49 +2184,72 @@ bailout:
static int
ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio)
{
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
union ctl_ha_msg msg_info;
+ struct ctl_port *port;
struct ctl_lun *lun;
+ const struct ctl_cmd_entry *entry;
int retval = 0;
uint32_t targ_lun;
- softc = control_softc;
-
targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun;
- lun = softc->ctl_luns[targ_lun];
- if (lun==NULL)
- {
+ mtx_lock(&softc->ctl_lock);
+
+ /* Make sure that we know about this port. */
+ port = ctl_io_port(&ctsio->io_hdr);
+ if (port == NULL || (port->status & CTL_PORT_STATUS_ONLINE) == 0) {
+ ctl_set_internal_failure(ctsio, /*sks_valid*/ 0,
+ /*retry_count*/ 1);
+ goto badjuju;
+ }
+
+ /* Make sure that we know about this LUN. */
+ if ((targ_lun < CTL_MAX_LUNS) &&
+ ((lun = softc->ctl_luns[targ_lun]) != NULL)) {
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
/*
- * Why isn't LUN defined? The other side wouldn't
- * send a cmd if the LUN is undefined.
+ * If the LUN is invalid, pretend that it doesn't exist.
+ * It will go away as soon as all pending I/O has been
+ * completed.
*/
- printf("%s: Bad JUJU!, LUN is NULL!\n", __func__);
-
- /* "Logical unit not supported" */
- ctl_set_sense_data(&msg_info.scsi.sense_data,
- lun,
- /*sense_format*/SSD_TYPE_NONE,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
- /*asc*/ 0x25,
- /*ascq*/ 0x00,
- SSD_ELEM_NONE);
-
- msg_info.scsi.sense_len = SSD_FULL_SIZE;
- msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND;
- msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE;
- msg_info.hdr.original_sc = ctsio->io_hdr.original_sc;
- msg_info.hdr.serializing_sc = NULL;
- msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) {
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ lun = NULL;
}
- return(1);
+ } else {
+ mtx_unlock(&softc->ctl_lock);
+ lun = NULL;
+ }
+ if (lun == NULL) {
+ /*
+ * The other node would not send this request to us unless
+ * received announce that we are primary node for this LUN.
+ * If this LUN does not exist now, it is probably result of
+ * a race, so respond to initiator in the most opaque way.
+ */
+ ctl_set_busy(ctsio);
+ goto badjuju;
+ }
+ entry = ctl_get_cmd_entry(ctsio, NULL);
+ if (ctl_scsiio_lun_check(lun, entry, ctsio) != 0) {
+ mtx_unlock(&lun->lun_lock);
+ goto badjuju;
}
- mtx_lock(&lun->lun_lock);
- TAILQ_INSERT_TAIL(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
+ ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = lun;
+ ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = lun->be_lun;
+
+ /*
+ * Every I/O goes into the OOA queue for a
+ * particular LUN, and stays there until completion.
+ */
+#ifdef CTL_TIME_IO
+ if (TAILQ_EMPTY(&lun->ooa_queue))
+ lun->idle_time += getsbinuptime() - lun->last_busy;
+#endif
+ TAILQ_INSERT_TAIL(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
switch (ctl_check_ooa(lun, (union ctl_io *)ctsio,
(union ctl_io *)TAILQ_PREV(&ctsio->io_hdr, ctl_ooaq,
@@ -1873,264 +2258,64 @@ ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio)
ctsio->io_hdr.flags |= CTL_FLAG_BLOCKED;
TAILQ_INSERT_TAIL(&lun->blocked_queue, &ctsio->io_hdr,
blocked_links);
+ mtx_unlock(&lun->lun_lock);
break;
case CTL_ACTION_PASS:
case CTL_ACTION_SKIP:
if (softc->ha_mode == CTL_HA_MODE_XFER) {
ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
ctl_enqueue_rtr((union ctl_io *)ctsio);
+ mtx_unlock(&lun->lun_lock);
} else {
+ ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
+ mtx_unlock(&lun->lun_lock);
/* send msg back to other side */
msg_info.hdr.original_sc = ctsio->io_hdr.original_sc;
msg_info.hdr.serializing_sc = (union ctl_io *)ctsio;
msg_info.hdr.msg_type = CTL_MSG_R2R;
-#if 0
- printf("2. pOrig %x\n", (int)msg_info.hdr.original_sc);
-#endif
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) {
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.hdr), M_WAITOK);
}
break;
case CTL_ACTION_OVERLAP:
- /* OVERLAPPED COMMANDS ATTEMPTED */
- ctl_set_sense_data(&msg_info.scsi.sense_data,
- lun,
- /*sense_format*/SSD_TYPE_NONE,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
- /*asc*/ 0x4E,
- /*ascq*/ 0x00,
- SSD_ELEM_NONE);
-
- msg_info.scsi.sense_len = SSD_FULL_SIZE;
- msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND;
- msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE;
- msg_info.hdr.original_sc = ctsio->io_hdr.original_sc;
- msg_info.hdr.serializing_sc = NULL;
- msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
-#if 0
- printf("BAD JUJU:Major Bummer Overlap\n");
-#endif
TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
- retval = 1;
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) {
- }
- break;
+ mtx_unlock(&lun->lun_lock);
+ ctl_set_overlapped_cmd(ctsio);
+ goto badjuju;
case CTL_ACTION_OVERLAP_TAG:
- /* TAGGED OVERLAPPED COMMANDS (NN = QUEUE TAG) */
- ctl_set_sense_data(&msg_info.scsi.sense_data,
- lun,
- /*sense_format*/SSD_TYPE_NONE,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
- /*asc*/ 0x4D,
- /*ascq*/ ctsio->tag_num & 0xff,
- SSD_ELEM_NONE);
-
- msg_info.scsi.sense_len = SSD_FULL_SIZE;
- msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND;
- msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE;
- msg_info.hdr.original_sc = ctsio->io_hdr.original_sc;
- msg_info.hdr.serializing_sc = NULL;
- msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
-#if 0
- printf("BAD JUJU:Major Bummer Overlap Tag\n");
-#endif
TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
- retval = 1;
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) {
- }
- break;
+ mtx_unlock(&lun->lun_lock);
+ ctl_set_overlapped_tag(ctsio, ctsio->tag_num);
+ goto badjuju;
case CTL_ACTION_ERROR:
default:
- /* "Internal target failure" */
- ctl_set_sense_data(&msg_info.scsi.sense_data,
- lun,
- /*sense_format*/SSD_TYPE_NONE,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_HARDWARE_ERROR,
- /*asc*/ 0x44,
- /*ascq*/ 0x00,
- SSD_ELEM_NONE);
-
- msg_info.scsi.sense_len = SSD_FULL_SIZE;
- msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND;
- msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE;
+ TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
+ mtx_unlock(&lun->lun_lock);
+
+ ctl_set_internal_failure(ctsio, /*sks_valid*/ 0,
+ /*retry_count*/ 0);
+badjuju:
+ ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info);
msg_info.hdr.original_sc = ctsio->io_hdr.original_sc;
msg_info.hdr.serializing_sc = NULL;
msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU;
-#if 0
- printf("BAD JUJU:Major Bummer HW Error\n");
-#endif
- TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.scsi), M_WAITOK);
retval = 1;
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
- sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) {
- }
break;
}
- mtx_unlock(&lun->lun_lock);
return (retval);
}
-static int
-ctl_ioctl_submit_wait(union ctl_io *io)
-{
- struct ctl_fe_ioctl_params params;
- ctl_fe_ioctl_state last_state;
- int done, retval;
-
- retval = 0;
-
- bzero(&params, sizeof(params));
-
- mtx_init(&params.ioctl_mtx, "ctliocmtx", NULL, MTX_DEF);
- cv_init(&params.sem, "ctlioccv");
- params.state = CTL_IOCTL_INPROG;
- last_state = params.state;
-
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = &params;
-
- CTL_DEBUG_PRINT(("ctl_ioctl_submit_wait\n"));
-
- /* This shouldn't happen */
- if ((retval = ctl_queue(io)) != CTL_RETVAL_COMPLETE)
- return (retval);
-
- done = 0;
-
- do {
- mtx_lock(&params.ioctl_mtx);
- /*
- * Check the state here, and don't sleep if the state has
- * already changed (i.e. wakeup has already occured, but we
- * weren't waiting yet).
- */
- if (params.state == last_state) {
- /* XXX KDM cv_wait_sig instead? */
- cv_wait(&params.sem, &params.ioctl_mtx);
- }
- last_state = params.state;
-
- switch (params.state) {
- case CTL_IOCTL_INPROG:
- /* Why did we wake up? */
- /* XXX KDM error here? */
- mtx_unlock(&params.ioctl_mtx);
- break;
- case CTL_IOCTL_DATAMOVE:
- CTL_DEBUG_PRINT(("got CTL_IOCTL_DATAMOVE\n"));
-
- /*
- * change last_state back to INPROG to avoid
- * deadlock on subsequent data moves.
- */
- params.state = last_state = CTL_IOCTL_INPROG;
-
- mtx_unlock(&params.ioctl_mtx);
- ctl_ioctl_do_datamove(&io->scsiio);
- /*
- * Note that in some cases, most notably writes,
- * this will queue the I/O and call us back later.
- * In other cases, generally reads, this routine
- * will immediately call back and wake us up,
- * probably using our own context.
- */
- io->scsiio.be_move_done(io);
- break;
- case CTL_IOCTL_DONE:
- mtx_unlock(&params.ioctl_mtx);
- CTL_DEBUG_PRINT(("got CTL_IOCTL_DONE\n"));
- done = 1;
- break;
- default:
- mtx_unlock(&params.ioctl_mtx);
- /* XXX KDM error here? */
- break;
- }
- } while (done == 0);
-
- mtx_destroy(&params.ioctl_mtx);
- cv_destroy(&params.sem);
-
- return (CTL_RETVAL_COMPLETE);
-}
-
-static void
-ctl_ioctl_datamove(union ctl_io *io)
-{
- struct ctl_fe_ioctl_params *params;
-
- params = (struct ctl_fe_ioctl_params *)
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
-
- mtx_lock(&params->ioctl_mtx);
- params->state = CTL_IOCTL_DATAMOVE;
- cv_broadcast(&params->sem);
- mtx_unlock(&params->ioctl_mtx);
-}
-
-static void
-ctl_ioctl_done(union ctl_io *io)
-{
- struct ctl_fe_ioctl_params *params;
-
- params = (struct ctl_fe_ioctl_params *)
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
-
- mtx_lock(&params->ioctl_mtx);
- params->state = CTL_IOCTL_DONE;
- cv_broadcast(&params->sem);
- mtx_unlock(&params->ioctl_mtx);
-}
-
-static void
-ctl_ioctl_hard_startstop_callback(void *arg, struct cfi_metatask *metatask)
-{
- struct ctl_fe_ioctl_startstop_info *sd_info;
-
- sd_info = (struct ctl_fe_ioctl_startstop_info *)arg;
-
- sd_info->hs_info.status = metatask->status;
- sd_info->hs_info.total_luns = metatask->taskinfo.startstop.total_luns;
- sd_info->hs_info.luns_complete =
- metatask->taskinfo.startstop.luns_complete;
- sd_info->hs_info.luns_failed = metatask->taskinfo.startstop.luns_failed;
-
- cv_broadcast(&sd_info->sem);
-}
-
-static void
-ctl_ioctl_bbrread_callback(void *arg, struct cfi_metatask *metatask)
-{
- struct ctl_fe_ioctl_bbrread_info *fe_bbr_info;
-
- fe_bbr_info = (struct ctl_fe_ioctl_bbrread_info *)arg;
-
- mtx_lock(fe_bbr_info->lock);
- fe_bbr_info->bbr_info->status = metatask->status;
- fe_bbr_info->bbr_info->bbr_status = metatask->taskinfo.bbrread.status;
- fe_bbr_info->wakeup_done = 1;
- mtx_unlock(fe_bbr_info->lock);
-
- cv_broadcast(&fe_bbr_info->sem);
-}
-
/*
* Returns 0 for success, errno for failure.
*/
-static int
+static void
ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
struct ctl_ooa *ooa_hdr, struct ctl_ooa_entry *kern_entries)
{
union ctl_io *io;
- int retval;
-
- retval = 0;
mtx_lock(&lun->lun_lock);
for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); (io != NULL);
@@ -2170,8 +2355,6 @@ ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num,
entry->cmd_flags |= CTL_OOACMD_FLAG_DMA_QUEUED;
}
mtx_unlock(&lun->lun_lock);
-
- return (retval);
}
static void *
@@ -2354,65 +2537,16 @@ static int
ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
struct thread *td)
{
- struct ctl_softc *softc;
+ struct ctl_softc *softc = dev->si_drv1;
+ struct ctl_lun *lun;
int retval;
- softc = control_softc;
-
retval = 0;
switch (cmd) {
- case CTL_IO: {
- union ctl_io *io;
- void *pool_tmp;
-
- /*
- * If we haven't been "enabled", don't allow any SCSI I/O
- * to this FETD.
- */
- if ((softc->ioctl_info.flags & CTL_IOCTL_FLAG_ENABLED) == 0) {
- retval = EPERM;
- break;
- }
-
- io = ctl_alloc_io(softc->ioctl_info.port.ctl_pool_ref);
-
- /*
- * Need to save the pool reference so it doesn't get
- * spammed by the user's ctl_io.
- */
- pool_tmp = io->io_hdr.pool;
- memcpy(io, (void *)addr, sizeof(*io));
- io->io_hdr.pool = pool_tmp;
-
- /*
- * No status yet, so make sure the status is set properly.
- */
- io->io_hdr.status = CTL_STATUS_NONE;
-
- /*
- * The user sets the initiator ID, target and LUN IDs.
- */
- io->io_hdr.nexus.targ_port = softc->ioctl_info.port.targ_port;
- io->io_hdr.flags |= CTL_FLAG_USER_REQ;
- if ((io->io_hdr.io_type == CTL_IO_SCSI)
- && (io->scsiio.tag_type != CTL_TAG_UNTAGGED))
- io->scsiio.tag_num = softc->ioctl_info.cur_tag_num++;
-
- retval = ctl_ioctl_submit_wait(io);
-
- if (retval != 0) {
- ctl_free_io(io);
- break;
- }
-
- memcpy((void *)addr, io, sizeof(*io));
-
- /* return this to our pool */
- ctl_free_io(io);
-
+ case CTL_IO:
+ retval = ctl_ioctl_io(dev, cmd, addr, flag, td);
break;
- }
case CTL_ENABLE_PORT:
case CTL_DISABLE_PORT:
case CTL_SET_PORT_WWNS: {
@@ -2425,9 +2559,12 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
STAILQ_FOREACH(port, &softc->port_list, links) {
int action, done;
+ if (port->targ_port < softc->port_min ||
+ port->targ_port >= softc->port_max)
+ continue;
+
action = 0;
done = 0;
-
if ((entry->port_type == CTL_PORT_NONE)
&& (entry->targ_port == port->targ_port)) {
/*
@@ -2457,30 +2594,29 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
break;
}
}
- if (action != 0) {
- /*
- * XXX KDM we have to drop the lock here,
- * because the online/offline operations
- * can potentially block. We need to
- * reference count the frontends so they
- * can't go away,
- */
- mtx_unlock(&softc->ctl_lock);
-
- if (cmd == CTL_ENABLE_PORT) {
- ctl_port_online(port);
- } else if (cmd == CTL_DISABLE_PORT) {
- ctl_port_offline(port);
- }
+ if (action == 0)
+ continue;
+ /*
+ * XXX KDM we have to drop the lock here, because
+ * the online/offline operations can potentially
+ * block. We need to reference count the frontends
+ * so they can't go away,
+ */
+ if (cmd == CTL_ENABLE_PORT) {
+ mtx_unlock(&softc->ctl_lock);
+ ctl_port_online(port);
mtx_lock(&softc->ctl_lock);
-
- if (cmd == CTL_SET_PORT_WWNS)
- ctl_port_set_wwns(port,
- (entry->flags & CTL_PORT_WWNN_VALID) ?
- 1 : 0, entry->wwnn,
- (entry->flags & CTL_PORT_WWPN_VALID) ?
- 1 : 0, entry->wwpn);
+ } else if (cmd == CTL_DISABLE_PORT) {
+ mtx_unlock(&softc->ctl_lock);
+ ctl_port_offline(port);
+ mtx_lock(&softc->ctl_lock);
+ } else if (cmd == CTL_SET_PORT_WWNS) {
+ ctl_port_set_wwns(port,
+ (entry->flags & CTL_PORT_WWNN_VALID) ?
+ 1 : 0, entry->wwnn,
+ (entry->flags & CTL_PORT_WWPN_VALID) ?
+ 1 : 0, entry->wwpn);
}
if (done != 0)
break;
@@ -2488,115 +2624,7 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
mtx_unlock(&softc->ctl_lock);
break;
}
- case CTL_GET_PORT_LIST: {
- struct ctl_port *port;
- struct ctl_port_list *list;
- int i;
-
- list = (struct ctl_port_list *)addr;
-
- if (list->alloc_len != (list->alloc_num *
- sizeof(struct ctl_port_entry))) {
- printf("%s: CTL_GET_PORT_LIST: alloc_len %u != "
- "alloc_num %u * sizeof(struct ctl_port_entry) "
- "%zu\n", __func__, list->alloc_len,
- list->alloc_num, sizeof(struct ctl_port_entry));
- retval = EINVAL;
- break;
- }
- list->fill_len = 0;
- list->fill_num = 0;
- list->dropped_num = 0;
- i = 0;
- mtx_lock(&softc->ctl_lock);
- STAILQ_FOREACH(port, &softc->port_list, links) {
- struct ctl_port_entry entry, *list_entry;
-
- if (list->fill_num >= list->alloc_num) {
- list->dropped_num++;
- continue;
- }
-
- entry.port_type = port->port_type;
- strlcpy(entry.port_name, port->port_name,
- sizeof(entry.port_name));
- entry.targ_port = port->targ_port;
- entry.physical_port = port->physical_port;
- entry.virtual_port = port->virtual_port;
- entry.wwnn = port->wwnn;
- entry.wwpn = port->wwpn;
- if (port->status & CTL_PORT_STATUS_ONLINE)
- entry.online = 1;
- else
- entry.online = 0;
-
- list_entry = &list->entries[i];
-
- retval = copyout(&entry, list_entry, sizeof(entry));
- if (retval != 0) {
- printf("%s: CTL_GET_PORT_LIST: copyout "
- "returned %d\n", __func__, retval);
- break;
- }
- i++;
- list->fill_num++;
- list->fill_len += sizeof(entry);
- }
- mtx_unlock(&softc->ctl_lock);
-
- /*
- * If this is non-zero, we had a copyout fault, so there's
- * probably no point in attempting to set the status inside
- * the structure.
- */
- if (retval != 0)
- break;
-
- if (list->dropped_num > 0)
- list->status = CTL_PORT_LIST_NEED_MORE_SPACE;
- else
- list->status = CTL_PORT_LIST_OK;
- break;
- }
- case CTL_DUMP_OOA: {
- struct ctl_lun *lun;
- union ctl_io *io;
- char printbuf[128];
- struct sbuf sb;
-
- mtx_lock(&softc->ctl_lock);
- printf("Dumping OOA queues:\n");
- STAILQ_FOREACH(lun, &softc->lun_list, links) {
- mtx_lock(&lun->lun_lock);
- for (io = (union ctl_io *)TAILQ_FIRST(
- &lun->ooa_queue); io != NULL;
- io = (union ctl_io *)TAILQ_NEXT(&io->io_hdr,
- ooa_links)) {
- sbuf_new(&sb, printbuf, sizeof(printbuf),
- SBUF_FIXEDLEN);
- sbuf_printf(&sb, "LUN %jd tag 0x%04x%s%s%s%s: ",
- (intmax_t)lun->lun,
- io->scsiio.tag_num,
- (io->io_hdr.flags &
- CTL_FLAG_BLOCKED) ? "" : " BLOCKED",
- (io->io_hdr.flags &
- CTL_FLAG_DMA_INPROG) ? " DMA" : "",
- (io->io_hdr.flags &
- CTL_FLAG_ABORT) ? " ABORT" : "",
- (io->io_hdr.flags &
- CTL_FLAG_IS_WAS_ON_RTR) ? " RTR" : "");
- ctl_scsi_command_string(&io->scsiio, NULL, &sb);
- sbuf_finish(&sb);
- printf("%s\n", sbuf_data(&sb));
- }
- mtx_unlock(&lun->lun_lock);
- }
- printf("OOA queues dump done\n");
- mtx_unlock(&softc->ctl_lock);
- break;
- }
case CTL_GET_OOA: {
- struct ctl_lun *lun;
struct ctl_ooa *ooa_hdr;
struct ctl_ooa_entry *entries;
uint32_t cur_fill_num;
@@ -2646,21 +2674,13 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
if (ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) {
STAILQ_FOREACH(lun, &softc->lun_list, links) {
- retval = ctl_ioctl_fill_ooa(lun, &cur_fill_num,
- ooa_hdr, entries);
- if (retval != 0)
- break;
- }
- if (retval != 0) {
- mtx_unlock(&softc->ctl_lock);
- free(entries, M_CTL);
- break;
+ ctl_ioctl_fill_ooa(lun, &cur_fill_num,
+ ooa_hdr, entries);
}
} else {
lun = softc->ctl_luns[ooa_hdr->lun_num];
-
- retval = ctl_ioctl_fill_ooa(lun, &cur_fill_num,ooa_hdr,
- entries);
+ ctl_ioctl_fill_ooa(lun, &cur_fill_num, ooa_hdr,
+ entries);
}
mtx_unlock(&softc->ctl_lock);
@@ -2673,7 +2693,7 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
__func__, ooa_hdr->fill_len);
}
- getbintime(&ooa_hdr->cur_bt);
+ getbinuptime(&ooa_hdr->cur_bt);
if (cur_fill_num > ooa_hdr->alloc_num) {
ooa_hdr->dropped_num = cur_fill_num -ooa_hdr->alloc_num;
@@ -2686,141 +2706,8 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
free(entries, M_CTL);
break;
}
- case CTL_CHECK_OOA: {
- union ctl_io *io;
- struct ctl_lun *lun;
- struct ctl_ooa_info *ooa_info;
-
-
- ooa_info = (struct ctl_ooa_info *)addr;
-
- if (ooa_info->lun_id >= CTL_MAX_LUNS) {
- ooa_info->status = CTL_OOA_INVALID_LUN;
- break;
- }
- mtx_lock(&softc->ctl_lock);
- lun = softc->ctl_luns[ooa_info->lun_id];
- if (lun == NULL) {
- mtx_unlock(&softc->ctl_lock);
- ooa_info->status = CTL_OOA_INVALID_LUN;
- break;
- }
- mtx_lock(&lun->lun_lock);
- mtx_unlock(&softc->ctl_lock);
- ooa_info->num_entries = 0;
- for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue);
- io != NULL; io = (union ctl_io *)TAILQ_NEXT(
- &io->io_hdr, ooa_links)) {
- ooa_info->num_entries++;
- }
- mtx_unlock(&lun->lun_lock);
-
- ooa_info->status = CTL_OOA_SUCCESS;
-
- break;
- }
- case CTL_HARD_START:
- case CTL_HARD_STOP: {
- struct ctl_fe_ioctl_startstop_info ss_info;
- struct cfi_metatask *metatask;
- struct mtx hs_mtx;
-
- mtx_init(&hs_mtx, "HS Mutex", NULL, MTX_DEF);
-
- cv_init(&ss_info.sem, "hard start/stop cv" );
-
- metatask = cfi_alloc_metatask(/*can_wait*/ 1);
- if (metatask == NULL) {
- retval = ENOMEM;
- mtx_destroy(&hs_mtx);
- break;
- }
-
- if (cmd == CTL_HARD_START)
- metatask->tasktype = CFI_TASK_STARTUP;
- else
- metatask->tasktype = CFI_TASK_SHUTDOWN;
-
- metatask->callback = ctl_ioctl_hard_startstop_callback;
- metatask->callback_arg = &ss_info;
-
- cfi_action(metatask);
-
- /* Wait for the callback */
- mtx_lock(&hs_mtx);
- cv_wait_sig(&ss_info.sem, &hs_mtx);
- mtx_unlock(&hs_mtx);
-
- /*
- * All information has been copied from the metatask by the
- * time cv_broadcast() is called, so we free the metatask here.
- */
- cfi_free_metatask(metatask);
-
- memcpy((void *)addr, &ss_info.hs_info, sizeof(ss_info.hs_info));
-
- mtx_destroy(&hs_mtx);
- break;
- }
- case CTL_BBRREAD: {
- struct ctl_bbrread_info *bbr_info;
- struct ctl_fe_ioctl_bbrread_info fe_bbr_info;
- struct mtx bbr_mtx;
- struct cfi_metatask *metatask;
-
- bbr_info = (struct ctl_bbrread_info *)addr;
-
- bzero(&fe_bbr_info, sizeof(fe_bbr_info));
-
- bzero(&bbr_mtx, sizeof(bbr_mtx));
- mtx_init(&bbr_mtx, "BBR Mutex", NULL, MTX_DEF);
-
- fe_bbr_info.bbr_info = bbr_info;
- fe_bbr_info.lock = &bbr_mtx;
-
- cv_init(&fe_bbr_info.sem, "BBR read cv");
- metatask = cfi_alloc_metatask(/*can_wait*/ 1);
-
- if (metatask == NULL) {
- mtx_destroy(&bbr_mtx);
- cv_destroy(&fe_bbr_info.sem);
- retval = ENOMEM;
- break;
- }
- metatask->tasktype = CFI_TASK_BBRREAD;
- metatask->callback = ctl_ioctl_bbrread_callback;
- metatask->callback_arg = &fe_bbr_info;
- metatask->taskinfo.bbrread.lun_num = bbr_info->lun_num;
- metatask->taskinfo.bbrread.lba = bbr_info->lba;
- metatask->taskinfo.bbrread.len = bbr_info->len;
-
- cfi_action(metatask);
-
- mtx_lock(&bbr_mtx);
- while (fe_bbr_info.wakeup_done == 0)
- cv_wait_sig(&fe_bbr_info.sem, &bbr_mtx);
- mtx_unlock(&bbr_mtx);
-
- bbr_info->status = metatask->status;
- bbr_info->bbr_status = metatask->taskinfo.bbrread.status;
- bbr_info->scsi_status = metatask->taskinfo.bbrread.scsi_status;
- memcpy(&bbr_info->sense_data,
- &metatask->taskinfo.bbrread.sense_data,
- MIN(sizeof(bbr_info->sense_data),
- sizeof(metatask->taskinfo.bbrread.sense_data)));
-
- cfi_free_metatask(metatask);
-
- mtx_destroy(&bbr_mtx);
- cv_destroy(&fe_bbr_info.sem);
-
- break;
- }
case CTL_DELAY_IO: {
struct ctl_io_delay_info *delay_info;
-#ifdef CTL_IO_DELAY
- struct ctl_lun *lun;
-#endif /* CTL_IO_DELAY */
delay_info = (struct ctl_io_delay_info *)addr;
@@ -2874,73 +2761,8 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
#endif /* CTL_IO_DELAY */
break;
}
- case CTL_REALSYNC_SET: {
- int *syncstate;
-
- syncstate = (int *)addr;
-
- mtx_lock(&softc->ctl_lock);
- switch (*syncstate) {
- case 0:
- softc->flags &= ~CTL_FLAG_REAL_SYNC;
- break;
- case 1:
- softc->flags |= CTL_FLAG_REAL_SYNC;
- break;
- default:
- retval = EINVAL;
- break;
- }
- mtx_unlock(&softc->ctl_lock);
- break;
- }
- case CTL_REALSYNC_GET: {
- int *syncstate;
-
- syncstate = (int*)addr;
-
- mtx_lock(&softc->ctl_lock);
- if (softc->flags & CTL_FLAG_REAL_SYNC)
- *syncstate = 1;
- else
- *syncstate = 0;
- mtx_unlock(&softc->ctl_lock);
-
- break;
- }
- case CTL_SETSYNC:
- case CTL_GETSYNC: {
- struct ctl_sync_info *sync_info;
- struct ctl_lun *lun;
-
- sync_info = (struct ctl_sync_info *)addr;
-
- mtx_lock(&softc->ctl_lock);
- lun = softc->ctl_luns[sync_info->lun_id];
- if (lun == NULL) {
- mtx_unlock(&softc->ctl_lock);
- sync_info->status = CTL_GS_SYNC_NO_LUN;
- }
- /*
- * Get or set the sync interval. We're not bounds checking
- * in the set case, hopefully the user won't do something
- * silly.
- */
- mtx_lock(&lun->lun_lock);
- mtx_unlock(&softc->ctl_lock);
- if (cmd == CTL_GETSYNC)
- sync_info->sync_interval = lun->sync_interval;
- else
- lun->sync_interval = sync_info->sync_interval;
- mtx_unlock(&lun->lun_lock);
-
- sync_info->status = CTL_GS_SYNC_OK;
-
- break;
- }
case CTL_GETSTATS: {
struct ctl_stats *stats;
- struct ctl_lun *lun;
int i;
stats = (struct ctl_stats *)addr;
@@ -2955,9 +2777,9 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
* XXX KDM no locking here. If the LUN list changes,
* things can blow up.
*/
- for (i = 0, lun = STAILQ_FIRST(&softc->lun_list); lun != NULL;
- i++, lun = STAILQ_NEXT(lun, links)) {
- retval = copyout(&lun->stats, &stats->lun_stats[i],
+ i = 0;
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ retval = copyout(&lun->stats, &stats->lun_stats[i++],
sizeof(lun->stats));
if (retval != 0)
break;
@@ -2976,7 +2798,6 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
}
case CTL_ERROR_INJECT: {
struct ctl_error_desc *err_desc, *new_err_desc;
- struct ctl_lun *lun;
err_desc = (struct ctl_error_desc *)addr;
@@ -3023,7 +2844,6 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
}
case CTL_ERROR_INJECT_DELETE: {
struct ctl_error_desc *delete_desc, *desc, *desc2;
- struct ctl_lun *lun;
int delete_done;
delete_desc = (struct ctl_error_desc *)addr;
@@ -3067,15 +2887,13 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
mtx_lock(&softc->ctl_lock);
printf("CTL Persistent Reservation information start:\n");
for (i = 0; i < CTL_MAX_LUNS; i++) {
- struct ctl_lun *lun;
-
lun = softc->ctl_luns[i];
if ((lun == NULL)
|| ((lun->flags & CTL_LUN_DISABLED) != 0))
continue;
- for (j = 0; j < (CTL_MAX_PORTS * 2); j++) {
+ for (j = 0; j < CTL_MAX_PORTS; j++) {
if (lun->pr_keys[j] == NULL)
continue;
for (k = 0; k < CTL_MAX_INIT_PER_PORT; k++){
@@ -3162,7 +2980,6 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
}
case CTL_LUN_LIST: {
struct sbuf *sb;
- struct ctl_lun *lun;
struct ctl_lun_list *list;
struct ctl_option *opt;
@@ -3367,7 +3184,10 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
}
}
- retval = fe->ioctl(dev, cmd, addr, flag, td);
+ if (fe->ioctl)
+ retval = fe->ioctl(dev, cmd, addr, flag, td);
+ else
+ retval = ENODEV;
if (req->num_args > 0) {
ctl_copyout_args(req->num_args, req->kern_args);
@@ -3526,11 +3346,23 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
struct ctl_port *port;
mtx_lock(&softc->ctl_lock);
- if (lm->port >= CTL_MAX_PORTS ||
+ if (lm->port < softc->port_min ||
+ lm->port >= softc->port_max ||
(port = softc->ctl_ports[lm->port]) == NULL) {
mtx_unlock(&softc->ctl_lock);
return (ENXIO);
}
+ if (port->status & CTL_PORT_STATUS_ONLINE) {
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ if (ctl_lun_map_to_port(port, lun->lun) >=
+ CTL_MAX_LUNS)
+ continue;
+ mtx_lock(&lun->lun_lock);
+ ctl_est_ua_port(lun, lm->port, -1,
+ CTL_UA_LUN_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+ }
+ }
mtx_unlock(&softc->ctl_lock); // XXX: port_enable sleeps
if (lm->plun < CTL_MAX_LUNS) {
if (lm->lun == UINT32_MAX)
@@ -3547,6 +3379,8 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
retval = ctl_lun_map_init(port);
} else
return (ENXIO);
+ if (port->status & CTL_PORT_STATUS_ONLINE)
+ ctl_isc_announce_port(port);
break;
}
default: {
@@ -3589,34 +3423,13 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
uint32_t
ctl_get_initindex(struct ctl_nexus *nexus)
{
- if (nexus->targ_port < CTL_MAX_PORTS)
- return (nexus->initid.id +
- (nexus->targ_port * CTL_MAX_INIT_PER_PORT));
- else
- return (nexus->initid.id +
- ((nexus->targ_port - CTL_MAX_PORTS) *
- CTL_MAX_INIT_PER_PORT));
-}
-
-uint32_t
-ctl_get_resindex(struct ctl_nexus *nexus)
-{
- return (nexus->initid.id + (nexus->targ_port * CTL_MAX_INIT_PER_PORT));
-}
-
-uint32_t
-ctl_port_idx(int port_num)
-{
- if (port_num < CTL_MAX_PORTS)
- return(port_num);
- else
- return(port_num - CTL_MAX_PORTS);
+ return (nexus->initid + (nexus->targ_port * CTL_MAX_INIT_PER_PORT));
}
int
ctl_lun_map_init(struct ctl_port *port)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
struct ctl_lun *lun;
uint32_t i;
@@ -3628,8 +3441,11 @@ ctl_lun_map_init(struct ctl_port *port)
for (i = 0; i < CTL_MAX_LUNS; i++)
port->lun_map[i] = UINT32_MAX;
if (port->status & CTL_PORT_STATUS_ONLINE) {
- STAILQ_FOREACH(lun, &softc->lun_list, links)
- port->lun_disable(port->targ_lun_arg, lun->lun);
+ if (port->lun_disable != NULL) {
+ STAILQ_FOREACH(lun, &softc->lun_list, links)
+ port->lun_disable(port->targ_lun_arg, lun->lun);
+ }
+ ctl_isc_announce_port(port);
}
return (0);
}
@@ -3637,7 +3453,7 @@ ctl_lun_map_init(struct ctl_port *port)
int
ctl_lun_map_deinit(struct ctl_port *port)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
struct ctl_lun *lun;
if (port->lun_map == NULL)
@@ -3645,8 +3461,11 @@ ctl_lun_map_deinit(struct ctl_port *port)
free(port->lun_map, M_CTL);
port->lun_map = NULL;
if (port->status & CTL_PORT_STATUS_ONLINE) {
- STAILQ_FOREACH(lun, &softc->lun_list, links)
- port->lun_enable(port->targ_lun_arg, lun->lun);
+ if (port->lun_enable != NULL) {
+ STAILQ_FOREACH(lun, &softc->lun_list, links)
+ port->lun_enable(port->targ_lun_arg, lun->lun);
+ }
+ ctl_isc_announce_port(port);
}
return (0);
}
@@ -3664,8 +3483,11 @@ ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun)
}
old = port->lun_map[plun];
port->lun_map[plun] = glun;
- if ((port->status & CTL_PORT_STATUS_ONLINE) && old >= CTL_MAX_LUNS)
- port->lun_enable(port->targ_lun_arg, plun);
+ if ((port->status & CTL_PORT_STATUS_ONLINE) && old >= CTL_MAX_LUNS) {
+ if (port->lun_enable != NULL)
+ port->lun_enable(port->targ_lun_arg, plun);
+ ctl_isc_announce_port(port);
+ }
return (0);
}
@@ -3678,8 +3500,11 @@ ctl_lun_map_unset(struct ctl_port *port, uint32_t plun)
return (0);
old = port->lun_map[plun];
port->lun_map[plun] = UINT32_MAX;
- if ((port->status & CTL_PORT_STATUS_ONLINE) && old < CTL_MAX_LUNS)
- port->lun_disable(port->targ_lun_arg, plun);
+ if ((port->status & CTL_PORT_STATUS_ONLINE) && old < CTL_MAX_LUNS) {
+ if (port->lun_disable != NULL)
+ port->lun_disable(port->targ_lun_arg, plun);
+ ctl_isc_announce_port(port);
+ }
return (0);
}
@@ -3713,34 +3538,19 @@ ctl_lun_map_to_port(struct ctl_port *port, uint32_t lun_id)
static struct ctl_port *
ctl_io_port(struct ctl_io_hdr *io_hdr)
{
- int port_num;
- port_num = io_hdr->nexus.targ_port;
- return (control_softc->ctl_ports[ctl_port_idx(port_num)]);
+ return (control_softc->ctl_ports[io_hdr->nexus.targ_port]);
}
-/*
- * Note: This only works for bitmask sizes that are at least 32 bits, and
- * that are a power of 2.
- */
int
-ctl_ffz(uint32_t *mask, uint32_t size)
+ctl_ffz(uint32_t *mask, uint32_t first, uint32_t last)
{
- uint32_t num_chunks, num_pieces;
- int i, j;
-
- num_chunks = (size >> 5);
- if (num_chunks == 0)
- num_chunks++;
- num_pieces = MIN((sizeof(uint32_t) * 8), size);
+ int i;
- for (i = 0; i < num_chunks; i++) {
- for (j = 0; j < num_pieces; j++) {
- if ((mask[i] & (1 << j)) == 0)
- return ((i << 5) + j);
- }
+ for (i = first; i < last; i++) {
+ if ((mask[i / 32] & (1 << (i % 32))) == 0)
+ return (i);
}
-
return (-1);
}
@@ -3949,46 +3759,6 @@ ctl_zero_io(union ctl_io *io)
io->io_hdr.pool = pool_ref;
}
-/*
- * This routine is currently used for internal copies of ctl_ios that need
- * to persist for some reason after we've already returned status to the
- * FETD. (Thus the flag set.)
- *
- * XXX XXX
- * Note that this makes a blind copy of all fields in the ctl_io, except
- * for the pool reference. This includes any memory that has been
- * allocated! That memory will no longer be valid after done has been
- * called, so this would be VERY DANGEROUS for command that actually does
- * any reads or writes. Right now (11/7/2005), this is only used for immediate
- * start and stop commands, which don't transfer any data, so this is not a
- * problem. If it is used for anything else, the caller would also need to
- * allocate data buffer space and this routine would need to be modified to
- * copy the data buffer(s) as well.
- */
-void
-ctl_copy_io(union ctl_io *src, union ctl_io *dest)
-{
- void *pool_ref;
-
- if ((src == NULL)
- || (dest == NULL))
- return;
-
- /*
- * May need to preserve linked list pointers at some point too.
- */
- pool_ref = dest->io_hdr.pool;
-
- memcpy(dest, src, MIN(sizeof(*src), sizeof(*dest)));
-
- dest->io_hdr.pool = pool_ref;
- /*
- * We need to know that this is an internal copy, and doesn't need
- * to get passed back to the FETD that allocated it.
- */
- dest->io_hdr.flags |= CTL_FLAG_INT_COPY;
-}
-
int
ctl_expand_number(const char *buf, uint64_t *num)
{
@@ -4053,14 +3823,14 @@ ctl_init_page_index(struct ctl_lun *lun)
for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
page_index = &lun->mode_pages.index[i];
- /*
- * If this is a disk-only mode page, there's no point in
- * setting it up. For some pages, we have to have some
- * basic information about the disk in order to calculate the
- * mode page data.
- */
- if ((lun->be_lun->lun_type != T_DIRECT)
- && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY))
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
continue;
switch (page_index->page_code & SMPH_PC_MASK) {
@@ -4241,35 +4011,65 @@ ctl_init_page_index(struct ctl_lun *lun)
break;
}
case SMS_CONTROL_MODE_PAGE: {
- struct scsi_control_page *control_page;
-
- if (page_index->subpage != SMS_SUBPAGE_PAGE_0)
- panic("invalid subpage value %d",
- page_index->subpage);
-
- memcpy(&lun->mode_pages.control_page[CTL_PAGE_DEFAULT],
- &control_page_default,
- sizeof(control_page_default));
- memcpy(&lun->mode_pages.control_page[
- CTL_PAGE_CHANGEABLE], &control_page_changeable,
- sizeof(control_page_changeable));
- memcpy(&lun->mode_pages.control_page[CTL_PAGE_SAVED],
- &control_page_default,
- sizeof(control_page_default));
- control_page = &lun->mode_pages.control_page[
- CTL_PAGE_SAVED];
- value = ctl_get_opt(&lun->be_lun->options, "reordering");
- if (value != NULL && strcmp(value, "unrestricted") == 0) {
- control_page->queue_flags &= ~SCP_QUEUE_ALG_MASK;
- control_page->queue_flags |= SCP_QUEUE_ALG_UNRESTRICTED;
+ switch (page_index->subpage) {
+ case SMS_SUBPAGE_PAGE_0: {
+ struct scsi_control_page *control_page;
+
+ memcpy(&lun->mode_pages.control_page[
+ CTL_PAGE_DEFAULT],
+ &control_page_default,
+ sizeof(control_page_default));
+ memcpy(&lun->mode_pages.control_page[
+ CTL_PAGE_CHANGEABLE],
+ &control_page_changeable,
+ sizeof(control_page_changeable));
+ memcpy(&lun->mode_pages.control_page[
+ CTL_PAGE_SAVED],
+ &control_page_default,
+ sizeof(control_page_default));
+ control_page = &lun->mode_pages.control_page[
+ CTL_PAGE_SAVED];
+ value = ctl_get_opt(&lun->be_lun->options,
+ "reordering");
+ if (value != NULL &&
+ strcmp(value, "unrestricted") == 0) {
+ control_page->queue_flags &=
+ ~SCP_QUEUE_ALG_MASK;
+ control_page->queue_flags |=
+ SCP_QUEUE_ALG_UNRESTRICTED;
+ }
+ memcpy(&lun->mode_pages.control_page[
+ CTL_PAGE_CURRENT],
+ &lun->mode_pages.control_page[
+ CTL_PAGE_SAVED],
+ sizeof(control_page_default));
+ page_index->page_data =
+ (uint8_t *)lun->mode_pages.control_page;
+ break;
+ }
+ case 0x01:
+ memcpy(&lun->mode_pages.control_ext_page[
+ CTL_PAGE_DEFAULT],
+ &control_ext_page_default,
+ sizeof(control_ext_page_default));
+ memcpy(&lun->mode_pages.control_ext_page[
+ CTL_PAGE_CHANGEABLE],
+ &control_ext_page_changeable,
+ sizeof(control_ext_page_changeable));
+ memcpy(&lun->mode_pages.control_ext_page[
+ CTL_PAGE_SAVED],
+ &control_ext_page_default,
+ sizeof(control_ext_page_default));
+ memcpy(&lun->mode_pages.control_ext_page[
+ CTL_PAGE_CURRENT],
+ &lun->mode_pages.control_ext_page[
+ CTL_PAGE_SAVED],
+ sizeof(control_ext_page_default));
+ page_index->page_data =
+ (uint8_t *)lun->mode_pages.control_ext_page;
+ break;
}
- memcpy(&lun->mode_pages.control_page[CTL_PAGE_CURRENT],
- &lun->mode_pages.control_page[CTL_PAGE_SAVED],
- sizeof(control_page_default));
- page_index->page_data =
- (uint8_t *)lun->mode_pages.control_page;
break;
-
}
case SMS_INFO_EXCEPTIONS_PAGE: {
switch (page_index->subpage) {
@@ -4362,12 +4162,26 @@ ctl_init_page_index(struct ctl_lun *lun)
}}
break;
}
+ case SMS_CDDVD_CAPS_PAGE:{
+ memcpy(&lun->mode_pages.cddvd_page[CTL_PAGE_DEFAULT],
+ &cddvd_page_default,
+ sizeof(cddvd_page_default));
+ memcpy(&lun->mode_pages.cddvd_page[
+ CTL_PAGE_CHANGEABLE], &cddvd_page_changeable,
+ sizeof(cddvd_page_changeable));
+ memcpy(&lun->mode_pages.cddvd_page[CTL_PAGE_SAVED],
+ &cddvd_page_default,
+ sizeof(cddvd_page_default));
+ memcpy(&lun->mode_pages.cddvd_page[CTL_PAGE_CURRENT],
+ &lun->mode_pages.cddvd_page[CTL_PAGE_SAVED],
+ sizeof(cddvd_page_default));
+ page_index->page_data =
+ (uint8_t *)lun->mode_pages.cddvd_page;
+ break;
+ }
case SMS_VENDOR_SPECIFIC_PAGE:{
switch (page_index->subpage) {
case DBGCNF_SUBPAGE_CODE: {
- struct copan_debugconf_subpage *current_page,
- *saved_page;
-
memcpy(&lun->mode_pages.debugconf_subpage[
CTL_PAGE_CURRENT],
&debugconf_page_default,
@@ -4385,16 +4199,7 @@ ctl_init_page_index(struct ctl_lun *lun)
&debugconf_page_default,
sizeof(debugconf_page_default));
page_index->page_data =
- (uint8_t *)lun->mode_pages.debugconf_subpage;
-
- current_page = (struct copan_debugconf_subpage *)
- (page_index->page_data +
- (page_index->page_len *
- CTL_PAGE_CURRENT));
- saved_page = (struct copan_debugconf_subpage *)
- (page_index->page_data +
- (page_index->page_len *
- CTL_PAGE_SAVED));
+ (uint8_t *)lun->mode_pages.debugconf_subpage;
break;
}
default:
@@ -4427,18 +4232,18 @@ ctl_init_log_page_index(struct ctl_lun *lun)
for (i = 0, j = 0, k = 0; i < CTL_NUM_LOG_PAGES; i++) {
page_index = &lun->log_pages.index[i];
- /*
- * If this is a disk-only mode page, there's no point in
- * setting it up. For some pages, we have to have some
- * basic information about the disk in order to calculate the
- * mode page data.
- */
- if ((lun->be_lun->lun_type != T_DIRECT)
- && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY))
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
continue;
if (page_index->page_code == SLS_LOGICAL_BLOCK_PROVISIONING &&
- lun->backend->lun_attr == NULL)
+ lun->backend->lun_attr == NULL)
continue;
if (page_index->page_code != prev) {
@@ -4522,8 +4327,8 @@ ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *ctl_lun,
*/
switch (be_lun->lun_type) {
case T_DIRECT:
- break;
case T_PROCESSOR:
+ case T_CDROM:
break;
case T_SEQUENTIAL:
case T_CHANGER:
@@ -4639,7 +4444,7 @@ ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *ctl_lun,
}
lun_number = be_lun->req_lun_id;
} else {
- lun_number = ctl_ffz(ctl_softc->ctl_lun_mask, CTL_MAX_LUNS);
+ lun_number = ctl_ffz(ctl_softc->ctl_lun_mask, 0, CTL_MAX_LUNS);
if (lun_number == -1) {
mtx_unlock(&ctl_softc->ctl_lock);
printf("ctl: can't allocate LUN, out of LUNs\n");
@@ -4664,32 +4469,22 @@ ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *ctl_lun,
be_lun->ctl_lun = lun;
be_lun->lun_id = lun_number;
atomic_add_int(&be_lun->be->num_luns, 1);
- if (be_lun->flags & CTL_LUN_FLAG_OFFLINE)
- lun->flags |= CTL_LUN_OFFLINE;
-
- if (be_lun->flags & CTL_LUN_FLAG_POWERED_OFF)
+ if (be_lun->flags & CTL_LUN_FLAG_EJECTED)
+ lun->flags |= CTL_LUN_EJECTED;
+ if (be_lun->flags & CTL_LUN_FLAG_NO_MEDIA)
+ lun->flags |= CTL_LUN_NO_MEDIA;
+ if (be_lun->flags & CTL_LUN_FLAG_STOPPED)
lun->flags |= CTL_LUN_STOPPED;
- if (be_lun->flags & CTL_LUN_FLAG_INOPERABLE)
- lun->flags |= CTL_LUN_INOPERABLE;
-
if (be_lun->flags & CTL_LUN_FLAG_PRIMARY)
lun->flags |= CTL_LUN_PRIMARY_SC;
- value = ctl_get_opt(&be_lun->options, "readonly");
- if (value != NULL && strcmp(value, "on") == 0)
- lun->flags |= CTL_LUN_READONLY;
-
- lun->serseq = CTL_LUN_SERSEQ_OFF;
- if (be_lun->flags & CTL_LUN_FLAG_SERSEQ_READ)
- lun->serseq = CTL_LUN_SERSEQ_READ;
- value = ctl_get_opt(&be_lun->options, "serseq");
- if (value != NULL && strcmp(value, "on") == 0)
- lun->serseq = CTL_LUN_SERSEQ_ON;
- else if (value != NULL && strcmp(value, "read") == 0)
- lun->serseq = CTL_LUN_SERSEQ_READ;
- else if (value != NULL && strcmp(value, "off") == 0)
- lun->serseq = CTL_LUN_SERSEQ_OFF;
+ value = ctl_get_opt(&be_lun->options, "removable");
+ if (value != NULL) {
+ if (strcmp(value, "on") == 0)
+ lun->flags |= CTL_LUN_REMOVABLE;
+ } else if (be_lun->lun_type == T_CDROM)
+ lun->flags |= CTL_LUN_REMOVABLE;
lun->ctl_softc = ctl_softc;
#ifdef CTL_TIME_IO
@@ -4725,9 +4520,8 @@ ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *ctl_lun,
/* Setup statistics gathering */
lun->stats.device_type = be_lun->lun_type;
lun->stats.lun_number = lun_number;
- if (lun->stats.device_type == T_DIRECT)
- lun->stats.blocksize = be_lun->blocksize;
- else
+ lun->stats.blocksize = be_lun->blocksize;
+ if (be_lun->blocksize == 0)
lun->stats.flags = CTL_LUN_STATS_NO_BLOCKSIZE;
for (i = 0;i < CTL_MAX_PORTS;i++)
lun->stats.ports[i].targ_port = i;
@@ -4777,7 +4571,7 @@ ctl_free_lun(struct ctl_lun *lun)
free(lun->lun_devid, M_CTL);
for (i = 0; i < CTL_MAX_PORTS; i++)
free(lun->pending_ua[i], M_CTL);
- for (i = 0; i < 2 * CTL_MAX_PORTS; i++)
+ for (i = 0; i < CTL_MAX_PORTS; i++)
free(lun->pr_keys[i], M_CTL);
free(lun->write_buffer, M_CTL);
if (lun->flags & CTL_LUN_MALLOCED)
@@ -4795,14 +4589,11 @@ ctl_free_lun(struct ctl_lun *lun)
static void
ctl_create_lun(struct ctl_be_lun *be_lun)
{
- struct ctl_softc *softc;
-
- softc = control_softc;
/*
* ctl_alloc_lun() should handle all potential failure cases.
*/
- ctl_alloc_lun(softc, NULL, be_lun);
+ ctl_alloc_lun(control_softc, NULL, be_lun);
}
int
@@ -4843,10 +4634,9 @@ ctl_enable_lun(struct ctl_be_lun *be_lun)
lun->flags &= ~CTL_LUN_DISABLED;
mtx_unlock(&lun->lun_lock);
- for (port = STAILQ_FIRST(&softc->port_list); port != NULL; port = nport) {
- nport = STAILQ_NEXT(port, links);
+ STAILQ_FOREACH_SAFE(port, &softc->port_list, links, nport) {
if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 ||
- port->lun_map != NULL)
+ port->lun_map != NULL || port->lun_enable == NULL)
continue;
/*
@@ -4866,6 +4656,7 @@ ctl_enable_lun(struct ctl_be_lun *be_lun)
}
mtx_unlock(&softc->ctl_lock);
+ ctl_isc_announce_lun(lun);
return (0);
}
@@ -4893,9 +4684,9 @@ ctl_disable_lun(struct ctl_be_lun *be_lun)
STAILQ_FOREACH(port, &softc->port_list, links) {
if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 ||
- port->lun_map != NULL)
+ port->lun_map != NULL || port->lun_disable == NULL)
continue;
- mtx_unlock(&softc->ctl_lock);
+
/*
* Drop the lock before we call the frontend's disable
* routine, to avoid lock order reversals.
@@ -4903,6 +4694,7 @@ ctl_disable_lun(struct ctl_be_lun *be_lun)
* XXX KDM what happens if the frontend list changes while
* we're traversing it? It's unlikely, but should be handled.
*/
+ mtx_unlock(&softc->ctl_lock);
retval = port->lun_disable(port->targ_lun_arg, lun->lun);
mtx_lock(&softc->ctl_lock);
if (retval != 0) {
@@ -4914,6 +4706,7 @@ ctl_disable_lun(struct ctl_be_lun *be_lun)
}
mtx_unlock(&softc->ctl_lock);
+ ctl_isc_announce_lun(lun);
return (0);
}
@@ -4941,28 +4734,82 @@ ctl_stop_lun(struct ctl_be_lun *be_lun)
}
int
-ctl_lun_offline(struct ctl_be_lun *be_lun)
+ctl_lun_no_media(struct ctl_be_lun *be_lun)
+{
+ struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+
+ mtx_lock(&lun->lun_lock);
+ lun->flags |= CTL_LUN_NO_MEDIA;
+ mtx_unlock(&lun->lun_lock);
+ return (0);
+}
+
+int
+ctl_lun_has_media(struct ctl_be_lun *be_lun)
{
struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+ union ctl_ha_msg msg;
mtx_lock(&lun->lun_lock);
- lun->flags |= CTL_LUN_OFFLINE;
+ lun->flags &= ~(CTL_LUN_NO_MEDIA | CTL_LUN_EJECTED);
+ if (lun->flags & CTL_LUN_REMOVABLE)
+ ctl_est_ua_all(lun, -1, CTL_UA_MEDIUM_CHANGE);
mtx_unlock(&lun->lun_lock);
+ if ((lun->flags & CTL_LUN_REMOVABLE) &&
+ lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) {
+ bzero(&msg.ua, sizeof(msg.ua));
+ msg.hdr.msg_type = CTL_MSG_UA;
+ msg.hdr.nexus.initid = -1;
+ msg.hdr.nexus.targ_port = -1;
+ msg.hdr.nexus.targ_lun = lun->lun;
+ msg.hdr.nexus.targ_mapped_lun = lun->lun;
+ msg.ua.ua_all = 1;
+ msg.ua.ua_set = 1;
+ msg.ua.ua_type = CTL_UA_MEDIUM_CHANGE;
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.ua),
+ M_WAITOK);
+ }
return (0);
}
int
-ctl_lun_online(struct ctl_be_lun *be_lun)
+ctl_lun_ejected(struct ctl_be_lun *be_lun)
{
struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
mtx_lock(&lun->lun_lock);
- lun->flags &= ~CTL_LUN_OFFLINE;
+ lun->flags |= CTL_LUN_EJECTED;
mtx_unlock(&lun->lun_lock);
return (0);
}
int
+ctl_lun_primary(struct ctl_be_lun *be_lun)
+{
+ struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+
+ mtx_lock(&lun->lun_lock);
+ lun->flags |= CTL_LUN_PRIMARY_SC;
+ ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+ ctl_isc_announce_lun(lun);
+ return (0);
+}
+
+int
+ctl_lun_secondary(struct ctl_be_lun *be_lun)
+{
+ struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+
+ mtx_lock(&lun->lun_lock);
+ lun->flags &= ~CTL_LUN_PRIMARY_SC;
+ ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+ ctl_isc_announce_lun(lun);
+ return (0);
+}
+
+int
ctl_invalidate_lun(struct ctl_be_lun *be_lun)
{
struct ctl_softc *softc;
@@ -5001,36 +4848,29 @@ ctl_invalidate_lun(struct ctl_be_lun *be_lun)
return (0);
}
-int
-ctl_lun_inoperable(struct ctl_be_lun *be_lun)
-{
- struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
-
- mtx_lock(&lun->lun_lock);
- lun->flags |= CTL_LUN_INOPERABLE;
- mtx_unlock(&lun->lun_lock);
- return (0);
-}
-
-int
-ctl_lun_operable(struct ctl_be_lun *be_lun)
-{
- struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
-
- mtx_lock(&lun->lun_lock);
- lun->flags &= ~CTL_LUN_INOPERABLE;
- mtx_unlock(&lun->lun_lock);
- return (0);
-}
-
void
ctl_lun_capacity_changed(struct ctl_be_lun *be_lun)
{
struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun;
+ union ctl_ha_msg msg;
mtx_lock(&lun->lun_lock);
- ctl_est_ua_all(lun, -1, CTL_UA_CAPACITY_CHANGED);
+ ctl_est_ua_all(lun, -1, CTL_UA_CAPACITY_CHANGE);
mtx_unlock(&lun->lun_lock);
+ if (lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) {
+ /* Send msg to other side. */
+ bzero(&msg.ua, sizeof(msg.ua));
+ msg.hdr.msg_type = CTL_MSG_UA;
+ msg.hdr.nexus.initid = -1;
+ msg.hdr.nexus.targ_port = -1;
+ msg.hdr.nexus.targ_lun = lun->lun;
+ msg.hdr.nexus.targ_mapped_lun = lun->lun;
+ msg.ua.ua_all = 1;
+ msg.ua.ua_set = 1;
+ msg.ua.ua_type = CTL_UA_CAPACITY_CHANGE;
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.ua),
+ M_WAITOK);
+ }
}
/*
@@ -5063,6 +4903,8 @@ ctl_config_move_done(union ctl_io *io)
io->io_hdr.port_status);
}
+ if (ctl_debug & CTL_DEBUG_CDB_DATA)
+ ctl_data_print(io);
if (((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) ||
((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
(io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) ||
@@ -5089,8 +4931,6 @@ ctl_config_move_done(union ctl_io *io)
*
* - Call some other function once the data is in?
*/
- if (ctl_debug & CTL_DEBUG_CDB_DATA)
- ctl_data_print(io);
/*
* XXX KDM call ctl_scsiio() again for now, and check flag
@@ -5204,36 +5044,14 @@ ctl_config_read_done(union ctl_io *io)
int
ctl_scsi_release(struct ctl_scsiio *ctsio)
{
- int length, longid, thirdparty_id, resv_id;
struct ctl_lun *lun;
uint32_t residx;
- length = 0;
- resv_id = 0;
-
CTL_DEBUG_PRINT(("ctl_scsi_release\n"));
- residx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
- switch (ctsio->cdb[0]) {
- case RELEASE_10: {
- struct scsi_release_10 *cdb;
-
- cdb = (struct scsi_release_10 *)ctsio->cdb;
-
- if (cdb->byte2 & SR10_LONGID)
- longid = 1;
- else
- thirdparty_id = cdb->thirdparty_id;
-
- resv_id = cdb->resv_id;
- length = scsi_2btoul(cdb->length);
- break;
- }
- }
-
-
/*
* XXX KDM right now, we only support LUN reservation. We don't
* support 3rd party reservations, or extent reservations, which
@@ -5241,25 +5059,6 @@ ctl_scsi_release(struct ctl_scsiio *ctsio)
* far, we've got a LUN reservation. Anything else got kicked out
* above. So, according to SPC, ignore the length.
*/
- length = 0;
-
- if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0)
- && (length > 0)) {
- ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK);
- ctsio->kern_data_len = length;
- ctsio->kern_total_len = length;
- ctsio->kern_data_resid = 0;
- ctsio->kern_rel_offset = 0;
- ctsio->kern_sg_entries = 0;
- ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
- ctsio->be_move_done = ctl_config_move_done;
- ctl_datamove((union ctl_io *)ctsio);
-
- return (CTL_RETVAL_COMPLETE);
- }
-
- if (length > 0)
- thirdparty_id = scsi_8btou64(ctsio->kern_data_ptr);
mtx_lock(&lun->lun_lock);
@@ -5275,11 +5074,6 @@ ctl_scsi_release(struct ctl_scsiio *ctsio)
mtx_unlock(&lun->lun_lock);
- if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) {
- free(ctsio->kern_data_ptr, M_CTL);
- ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED;
- }
-
ctl_set_success(ctsio);
ctl_done((union ctl_io *)ctsio);
return (CTL_RETVAL_COMPLETE);
@@ -5288,41 +5082,14 @@ ctl_scsi_release(struct ctl_scsiio *ctsio)
int
ctl_scsi_reserve(struct ctl_scsiio *ctsio)
{
- int extent, thirdparty, longid;
- int resv_id, length;
- uint64_t thirdparty_id;
struct ctl_lun *lun;
uint32_t residx;
- extent = 0;
- thirdparty = 0;
- longid = 0;
- resv_id = 0;
- length = 0;
- thirdparty_id = 0;
-
CTL_DEBUG_PRINT(("ctl_reserve\n"));
- residx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
- switch (ctsio->cdb[0]) {
- case RESERVE_10: {
- struct scsi_reserve_10 *cdb;
-
- cdb = (struct scsi_reserve_10 *)ctsio->cdb;
-
- if (cdb->byte2 & SR10_LONGID)
- longid = 1;
- else
- thirdparty_id = cdb->thirdparty_id;
-
- resv_id = cdb->resv_id;
- length = scsi_2btoul(cdb->length);
- break;
- }
- }
-
/*
* XXX KDM right now, we only support LUN reservation. We don't
* support 3rd party reservations, or extent reservations, which
@@ -5330,25 +5097,6 @@ ctl_scsi_reserve(struct ctl_scsiio *ctsio)
* far, we've got a LUN reservation. Anything else got kicked out
* above. So, according to SPC, ignore the length.
*/
- length = 0;
-
- if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0)
- && (length > 0)) {
- ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK);
- ctsio->kern_data_len = length;
- ctsio->kern_total_len = length;
- ctsio->kern_data_resid = 0;
- ctsio->kern_rel_offset = 0;
- ctsio->kern_sg_entries = 0;
- ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
- ctsio->be_move_done = ctl_config_move_done;
- ctl_datamove((union ctl_io *)ctsio);
-
- return (CTL_RETVAL_COMPLETE);
- }
-
- if (length > 0)
- thirdparty_id = scsi_8btou64(ctsio->kern_data_ptr);
mtx_lock(&lun->lun_lock);
if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx != residx)) {
@@ -5356,19 +5104,18 @@ ctl_scsi_reserve(struct ctl_scsiio *ctsio)
goto bailout;
}
+ /* SPC-3 exceptions to SPC-2 RESERVE and RELEASE behavior. */
+ if (lun->flags & CTL_LUN_PR_RESERVED) {
+ ctl_set_success(ctsio);
+ goto bailout;
+ }
+
lun->flags |= CTL_LUN_RESERVED;
lun->res_idx = residx;
-
ctl_set_success(ctsio);
bailout:
mtx_unlock(&lun->lun_lock);
-
- if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) {
- free(ctsio->kern_data_ptr, M_CTL);
- ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED;
- }
-
ctl_done((union ctl_io *)ctsio);
return (CTL_RETVAL_COMPLETE);
}
@@ -5383,113 +5130,83 @@ ctl_start_stop(struct ctl_scsiio *ctsio)
CTL_DEBUG_PRINT(("ctl_start_stop\n"));
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
- retval = 0;
-
cdb = (struct scsi_start_stop_unit *)ctsio->cdb;
- /*
- * XXX KDM
- * We don't support the immediate bit on a stop unit. In order to
- * do that, we would need to code up a way to know that a stop is
- * pending, and hold off any new commands until it completes, one
- * way or another. Then we could accept or reject those commands
- * depending on its status. We would almost need to do the reverse
- * of what we do below for an immediate start -- return the copy of
- * the ctl_io to the FETD with status to send to the host (and to
- * free the copy!) and then free the original I/O once the stop
- * actually completes. That way, the OOA queue mechanism can work
- * to block commands that shouldn't proceed. Another alternative
- * would be to put the copy in the queue in place of the original,
- * and return the original back to the caller. That could be
- * slightly safer..
- */
- if ((cdb->byte2 & SSS_IMMED)
- && ((cdb->how & SSS_START) == 0)) {
- ctl_set_invalid_field(ctsio,
- /*sks_valid*/ 1,
- /*command*/ 1,
- /*field*/ 1,
- /*bit_valid*/ 1,
- /*bit*/ 0);
- ctl_done((union ctl_io *)ctsio);
- return (CTL_RETVAL_COMPLETE);
- }
+ if ((cdb->how & SSS_PC_MASK) == 0) {
+ if ((lun->flags & CTL_LUN_PR_RESERVED) &&
+ (cdb->how & SSS_START) == 0) {
+ uint32_t residx;
- if ((lun->flags & CTL_LUN_PR_RESERVED)
- && ((cdb->how & SSS_START)==0)) {
- uint32_t residx;
+ residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
+ if (ctl_get_prkey(lun, residx) == 0 ||
+ (lun->pr_res_idx != residx && lun->pr_res_type < 4)) {
- residx = ctl_get_resindex(&ctsio->io_hdr.nexus);
- if (ctl_get_prkey(lun, residx) == 0
- || (lun->pr_res_idx!=residx && lun->res_type < 4)) {
+ ctl_set_reservation_conflict(ctsio);
+ ctl_done((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+ }
- ctl_set_reservation_conflict(ctsio);
+ if ((cdb->how & SSS_LOEJ) &&
+ (lun->flags & CTL_LUN_REMOVABLE) == 0) {
+ ctl_set_invalid_field(ctsio,
+ /*sks_valid*/ 1,
+ /*command*/ 1,
+ /*field*/ 4,
+ /*bit_valid*/ 1,
+ /*bit*/ 1);
+ ctl_done((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ if ((cdb->how & SSS_START) == 0 && (cdb->how & SSS_LOEJ) &&
+ lun->prevent_count > 0) {
+ /* "Medium removal prevented" */
+ ctl_set_sense(ctsio, /*current_error*/ 1,
+ /*sense_key*/(lun->flags & CTL_LUN_NO_MEDIA) ?
+ SSD_KEY_NOT_READY : SSD_KEY_ILLEGAL_REQUEST,
+ /*asc*/ 0x53, /*ascq*/ 0x02, SSD_ELEM_NONE);
ctl_done((union ctl_io *)ctsio);
return (CTL_RETVAL_COMPLETE);
}
}
- /*
- * If there is no backend on this device, we can't start or stop
- * it. In theory we shouldn't get any start/stop commands in the
- * first place at this level if the LUN doesn't have a backend.
- * That should get stopped by the command decode code.
- */
- if (lun->backend == NULL) {
+ retval = lun->backend->config_write((union ctl_io *)ctsio);
+ return (retval);
+}
+
+int
+ctl_prevent_allow(struct ctl_scsiio *ctsio)
+{
+ struct ctl_lun *lun;
+ struct scsi_prevent *cdb;
+ int retval;
+ uint32_t initidx;
+
+ CTL_DEBUG_PRINT(("ctl_prevent_allow\n"));
+
+ lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ cdb = (struct scsi_prevent *)ctsio->cdb;
+
+ if ((lun->flags & CTL_LUN_REMOVABLE) == 0) {
ctl_set_invalid_opcode(ctsio);
ctl_done((union ctl_io *)ctsio);
return (CTL_RETVAL_COMPLETE);
}
- /*
- * XXX KDM Copan-specific offline behavior.
- * Figure out a reasonable way to port this?
- */
-#ifdef NEEDTOPORT
+ initidx = ctl_get_initindex(&ctsio->io_hdr.nexus);
mtx_lock(&lun->lun_lock);
-
- if (((cdb->byte2 & SSS_ONOFFLINE) == 0)
- && (lun->flags & CTL_LUN_OFFLINE)) {
- /*
- * If the LUN is offline, and the on/offline bit isn't set,
- * reject the start or stop. Otherwise, let it through.
- */
- mtx_unlock(&lun->lun_lock);
- ctl_set_lun_not_ready(ctsio);
- ctl_done((union ctl_io *)ctsio);
- } else {
- mtx_unlock(&lun->lun_lock);
-#endif /* NEEDTOPORT */
- /*
- * This could be a start or a stop when we're online,
- * or a stop/offline or start/online. A start or stop when
- * we're offline is covered in the case above.
- */
- /*
- * In the non-immediate case, we send the request to
- * the backend and return status to the user when
- * it is done.
- *
- * In the immediate case, we allocate a new ctl_io
- * to hold a copy of the request, and send that to
- * the backend. We then set good status on the
- * user's request and return it immediately.
- */
- if (cdb->byte2 & SSS_IMMED) {
- union ctl_io *new_io;
-
- new_io = ctl_alloc_io(ctsio->io_hdr.pool);
- ctl_copy_io((union ctl_io *)ctsio, new_io);
- retval = lun->backend->config_write(new_io);
- ctl_set_success(ctsio);
- ctl_done((union ctl_io *)ctsio);
- } else {
- retval = lun->backend->config_write(
- (union ctl_io *)ctsio);
- }
-#ifdef NEEDTOPORT
+ if ((cdb->how & PR_PREVENT) &&
+ ctl_is_set(lun->prevent, initidx) == 0) {
+ ctl_set_mask(lun->prevent, initidx);
+ lun->prevent_count++;
+ } else if ((cdb->how & PR_PREVENT) == 0 &&
+ ctl_is_set(lun->prevent, initidx)) {
+ ctl_clear_mask(lun->prevent, initidx);
+ lun->prevent_count--;
}
-#endif
+ mtx_unlock(&lun->lun_lock);
+ retval = lun->backend->config_write((union ctl_io *)ctsio);
return (retval);
}
@@ -5554,38 +5271,13 @@ ctl_sync_cache(struct ctl_scsiio *ctsio)
goto bailout;
}
- /*
- * If this LUN has no backend, we can't flush the cache anyway.
- */
- if (lun->backend == NULL) {
- ctl_set_invalid_opcode(ctsio);
- ctl_done((union ctl_io *)ctsio);
- goto bailout;
- }
-
lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
lbalen->lba = starting_lba;
lbalen->len = block_count;
lbalen->flags = byte2;
-
- /*
- * Check to see whether we're configured to send the SYNCHRONIZE
- * CACHE command directly to the back end.
- */
- mtx_lock(&lun->lun_lock);
- if ((softc->flags & CTL_FLAG_REAL_SYNC)
- && (++(lun->sync_count) >= lun->sync_interval)) {
- lun->sync_count = 0;
- mtx_unlock(&lun->lun_lock);
- retval = lun->backend->config_write((union ctl_io *)ctsio);
- } else {
- mtx_unlock(&lun->lun_lock);
- ctl_set_success(ctsio);
- ctl_done((union ctl_io *)ctsio);
- }
+ retval = lun->backend->config_write((union ctl_io *)ctsio);
bailout:
-
return (retval);
}
@@ -5663,17 +5355,6 @@ ctl_format(struct ctl_scsiio *ctsio)
}
}
- /*
- * The format command will clear out the "Medium format corrupted"
- * status if set by the configuration code. That status is really
- * just a way to notify the host that we have lost the media, and
- * get them to issue a command that will basically make them think
- * they're blowing away the media.
- */
- mtx_lock(&lun->lun_lock);
- lun->flags &= ~CTL_LUN_INOPERABLE;
- mtx_unlock(&lun->lun_lock);
-
ctl_set_success(ctsio);
bailout:
@@ -5689,20 +5370,43 @@ bailout:
int
ctl_read_buffer(struct ctl_scsiio *ctsio)
{
- struct scsi_read_buffer *cdb;
struct ctl_lun *lun;
- int buffer_offset, len;
+ uint64_t buffer_offset;
+ uint32_t len;
+ uint8_t byte2;
static uint8_t descr[4];
static uint8_t echo_descr[4] = { 0 };
CTL_DEBUG_PRINT(("ctl_read_buffer\n"));
-
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
- cdb = (struct scsi_read_buffer *)ctsio->cdb;
+ switch (ctsio->cdb[0]) {
+ case READ_BUFFER: {
+ struct scsi_read_buffer *cdb;
+
+ cdb = (struct scsi_read_buffer *)ctsio->cdb;
+ buffer_offset = scsi_3btoul(cdb->offset);
+ len = scsi_3btoul(cdb->length);
+ byte2 = cdb->byte2;
+ break;
+ }
+ case READ_BUFFER_16: {
+ struct scsi_read_buffer_16 *cdb;
- if ((cdb->byte2 & RWB_MODE) != RWB_MODE_DATA &&
- (cdb->byte2 & RWB_MODE) != RWB_MODE_ECHO_DESCR &&
- (cdb->byte2 & RWB_MODE) != RWB_MODE_DESCR) {
+ cdb = (struct scsi_read_buffer_16 *)ctsio->cdb;
+ buffer_offset = scsi_8btou64(cdb->offset);
+ len = scsi_4btoul(cdb->length);
+ byte2 = cdb->byte2;
+ break;
+ }
+ default: /* This shouldn't happen. */
+ ctl_set_invalid_opcode(ctsio);
+ ctl_done((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ if ((byte2 & RWB_MODE) != RWB_MODE_DATA &&
+ (byte2 & RWB_MODE) != RWB_MODE_ECHO_DESCR &&
+ (byte2 & RWB_MODE) != RWB_MODE_DESCR) {
ctl_set_invalid_field(ctsio,
/*sks_valid*/ 1,
/*command*/ 1,
@@ -5713,10 +5417,8 @@ ctl_read_buffer(struct ctl_scsiio *ctsio)
return (CTL_RETVAL_COMPLETE);
}
- len = scsi_3btoul(cdb->length);
- buffer_offset = scsi_3btoul(cdb->offset);
-
- if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) {
+ if (buffer_offset > CTL_WRITE_BUFFER_SIZE ||
+ buffer_offset + len > CTL_WRITE_BUFFER_SIZE) {
ctl_set_invalid_field(ctsio,
/*sks_valid*/ 1,
/*command*/ 1,
@@ -5727,12 +5429,12 @@ ctl_read_buffer(struct ctl_scsiio *ctsio)
return (CTL_RETVAL_COMPLETE);
}
- if ((cdb->byte2 & RWB_MODE) == RWB_MODE_DESCR) {
+ if ((byte2 & RWB_MODE) == RWB_MODE_DESCR) {
descr[0] = 0;
scsi_ulto3b(CTL_WRITE_BUFFER_SIZE, &descr[1]);
ctsio->kern_data_ptr = descr;
len = min(len, sizeof(descr));
- } else if ((cdb->byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) {
+ } else if ((byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) {
ctsio->kern_data_ptr = echo_descr;
len = min(len, sizeof(echo_descr));
} else {
@@ -5827,8 +5529,6 @@ ctl_write_same(struct ctl_scsiio *ctsio)
int len, retval;
uint8_t byte2;
- retval = CTL_RETVAL_COMPLETE;
-
CTL_DEBUG_PRINT(("ctl_write_same\n"));
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
@@ -5866,9 +5566,8 @@ ctl_write_same(struct ctl_scsiio *ctsio)
break; /* NOTREACHED */
}
- /* NDOB and ANCHOR flags can be used only together with UNMAP */
- if ((byte2 & SWS_UNMAP) == 0 &&
- (byte2 & (SWS_NDOB | SWS_ANCHOR)) != 0) {
+ /* ANCHOR flag can be used only together with UNMAP */
+ if ((byte2 & SWS_UNMAP) == 0 && (byte2 & SWS_ANCHOR) != 0) {
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
/*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0);
ctl_done((union ctl_io *)ctsio);
@@ -5946,8 +5645,6 @@ ctl_unmap(struct ctl_scsiio *ctsio)
int len, retval;
uint8_t byte2;
- retval = CTL_RETVAL_COMPLETE;
-
CTL_DEBUG_PRINT(("ctl_unmap\n"));
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
@@ -6112,7 +5809,11 @@ ctl_control_page_handler(struct ctl_scsiio *ctsio,
if (set_ua != 0)
ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE);
mtx_unlock(&lun->lun_lock);
-
+ if (set_ua) {
+ ctl_isc_announce_mode(lun,
+ ctl_get_initindex(&ctsio->io_hdr.nexus),
+ page_index->page_code, page_index->subpage);
+ }
return (0);
}
@@ -6149,7 +5850,11 @@ ctl_caching_sp_handler(struct ctl_scsiio *ctsio,
if (set_ua != 0)
ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE);
mtx_unlock(&lun->lun_lock);
-
+ if (set_ua) {
+ ctl_isc_announce_mode(lun,
+ ctl_get_initindex(&ctsio->io_hdr.nexus),
+ page_index->page_code, page_index->subpage);
+ }
return (0);
}
@@ -6198,9 +5903,6 @@ ctl_debugconf_sp_sense_handler(struct ctl_scsiio *ctsio,
page->ctl_time_io_secs[1] = ctl_time_io_secs >> 0;
break;
default:
-#ifdef NEEDTOPORT
- EPRINT(0, "Invalid PC %d!!", pc);
-#endif /* NEEDTOPORT */
break;
}
return (0);
@@ -6213,8 +5915,7 @@ ctl_do_mode_select(union ctl_io *io)
struct scsi_mode_page_header *page_header;
struct ctl_page_index *page_index;
struct ctl_scsiio *ctsio;
- int control_dev, page_len;
- int page_len_offset, page_len_size;
+ int page_len, page_len_offset, page_len_size;
union ctl_modepage_info *modepage_info;
struct ctl_lun *lun;
int *len_left, *len_used;
@@ -6223,15 +5924,8 @@ ctl_do_mode_select(union ctl_io *io)
ctsio = &io->scsiio;
page_index = NULL;
page_len = 0;
- retval = CTL_RETVAL_COMPLETE;
-
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
- if (lun->be_lun->lun_type != T_DIRECT)
- control_dev = 1;
- else
- control_dev = 0;
-
modepage_info = (union ctl_modepage_info *)
ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes;
len_left = &modepage_info->header.len_left;
@@ -6268,13 +5962,18 @@ do_next_page:
* XXX KDM should we do something with the block descriptor?
*/
for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
-
- if ((control_dev != 0)
- && (lun->mode_pages.index[i].page_flags &
- CTL_PAGE_FLAG_DISK_ONLY))
+ page_index = &lun->mode_pages.index[i];
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
continue;
- if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) !=
+ if ((page_index->page_code & SMPH_PC_MASK) !=
(page_header->page_code & SMPH_PC_MASK))
continue;
@@ -6282,9 +5981,8 @@ do_next_page:
* If neither page has a subpage code, then we've got a
* match.
*/
- if (((lun->mode_pages.index[i].page_code & SMPH_SPF) == 0)
+ if (((page_index->page_code & SMPH_SPF) == 0)
&& ((page_header->page_code & SMPH_SPF) == 0)) {
- page_index = &lun->mode_pages.index[i];
page_len = page_header->page_length;
break;
}
@@ -6293,15 +5991,12 @@ do_next_page:
* If both pages have subpages, then the subpage numbers
* have to match.
*/
- if ((lun->mode_pages.index[i].page_code & SMPH_SPF)
+ if ((page_index->page_code & SMPH_SPF)
&& (page_header->page_code & SMPH_SPF)) {
struct scsi_mode_page_header_sp *sph;
sph = (struct scsi_mode_page_header_sp *)page_header;
-
- if (lun->mode_pages.index[i].subpage ==
- sph->subpage) {
- page_index = &lun->mode_pages.index[i];
+ if (page_index->subpage == sph->subpage) {
page_len = scsi_2btoul(sph->page_length);
break;
}
@@ -6312,7 +6007,7 @@ do_next_page:
* If we couldn't find the page, or if we don't have a mode select
* handler for it, send back an error to the user.
*/
- if ((page_index == NULL)
+ if ((i >= CTL_NUM_MODE_PAGES)
|| (page_index->select_handler == NULL)) {
ctl_set_invalid_field(ctsio,
/*sks_valid*/ 1,
@@ -6442,28 +6137,7 @@ ctl_mode_select(struct ctl_scsiio *ctsio)
{
int param_len, pf, sp;
int header_size, bd_len;
- int len_left, len_used;
- struct ctl_page_index *page_index;
- struct ctl_lun *lun;
- int control_dev, page_len;
union ctl_modepage_info *modepage_info;
- int retval;
-
- pf = 0;
- sp = 0;
- page_len = 0;
- len_used = 0;
- len_left = 0;
- retval = 0;
- bd_len = 0;
- page_index = NULL;
-
- lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
-
- if (lun->be_lun->lun_type != T_DIRECT)
- control_dev = 1;
- else
- control_dev = 0;
switch (ctsio->cdb[0]) {
case MODE_SELECT_6: {
@@ -6473,7 +6147,6 @@ ctl_mode_select(struct ctl_scsiio *ctsio)
pf = (cdb->byte2 & SMS_PF) ? 1 : 0;
sp = (cdb->byte2 & SMS_SP) ? 1 : 0;
-
param_len = cdb->length;
header_size = sizeof(struct scsi_mode_header_6);
break;
@@ -6485,7 +6158,6 @@ ctl_mode_select(struct ctl_scsiio *ctsio)
pf = (cdb->byte2 & SMS_PF) ? 1 : 0;
sp = (cdb->byte2 & SMS_SP) ? 1 : 0;
-
param_len = scsi_2btoul(cdb->length);
header_size = sizeof(struct scsi_mode_header_10);
break;
@@ -6494,7 +6166,6 @@ ctl_mode_select(struct ctl_scsiio *ctsio)
ctl_set_invalid_opcode(ctsio);
ctl_done((union ctl_io *)ctsio);
return (CTL_RETVAL_COMPLETE);
- break; /* NOTREACHED */
}
/*
@@ -6576,14 +6247,9 @@ ctl_mode_select(struct ctl_scsiio *ctsio)
modepage_info = (union ctl_modepage_info *)
ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes;
-
memset(modepage_info, 0, sizeof(*modepage_info));
-
- len_left = param_len - header_size - bd_len;
- len_used = header_size + bd_len;
-
- modepage_info->header.len_left = len_left;
- modepage_info->header.len_used = len_used;
+ modepage_info->header.len_left = param_len - header_size - bd_len;
+ modepage_info->header.len_used = header_size + bd_len;
return (ctl_do_mode_select((union ctl_io *)ctsio));
}
@@ -6596,22 +6262,14 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
int alloc_len, page_len, header_len, total_len;
struct scsi_mode_block_descr *block_desc;
struct ctl_page_index *page_index;
- int control_dev;
dbd = 0;
llba = 0;
block_desc = NULL;
- page_index = NULL;
CTL_DEBUG_PRINT(("ctl_mode_sense\n"));
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
-
- if (lun->be_lun->lun_type != T_DIRECT)
- control_dev = 1;
- else
- control_dev = 0;
-
switch (ctsio->cdb[0]) {
case MODE_SENSE_6: {
struct scsi_mode_sense_6 *cdb;
@@ -6684,26 +6342,33 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
}
for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
- if ((control_dev != 0)
- && (lun->mode_pages.index[i].page_flags &
- CTL_PAGE_FLAG_DISK_ONLY))
+ page_index = &lun->mode_pages.index[i];
+
+ /* Make sure the page is supported for this dev type */
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
continue;
/*
* We don't use this subpage if the user didn't
* request all subpages.
*/
- if ((lun->mode_pages.index[i].subpage != 0)
+ if ((page_index->subpage != 0)
&& (subpage == SMS_SUBPAGE_PAGE_0))
continue;
#if 0
printf("found page %#x len %d\n",
- lun->mode_pages.index[i].page_code &
- SMPH_PC_MASK,
- lun->mode_pages.index[i].page_len);
+ page_index->page_code & SMPH_PC_MASK,
+ page_index->page_len);
#endif
- page_len += lun->mode_pages.index[i].page_len;
+ page_len += page_index->page_len;
}
break;
}
@@ -6713,30 +6378,35 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
page_len = 0;
for (i = 0; i < CTL_NUM_MODE_PAGES; i++) {
+ page_index = &lun->mode_pages.index[i];
+
+ /* Make sure the page is supported for this dev type */
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
+ continue;
+
/* Look for the right page code */
- if ((lun->mode_pages.index[i].page_code &
- SMPH_PC_MASK) != page_code)
+ if ((page_index->page_code & SMPH_PC_MASK) != page_code)
continue;
/* Look for the right subpage or the subpage wildcard*/
- if ((lun->mode_pages.index[i].subpage != subpage)
+ if ((page_index->subpage != subpage)
&& (subpage != SMS_SUBPAGE_ALL))
continue;
- /* Make sure the page is supported for this dev type */
- if ((control_dev != 0)
- && (lun->mode_pages.index[i].page_flags &
- CTL_PAGE_FLAG_DISK_ONLY))
- continue;
-
#if 0
printf("found page %#x len %d\n",
- lun->mode_pages.index[i].page_code &
- SMPH_PC_MASK,
- lun->mode_pages.index[i].page_len);
+ page_index->page_code & SMPH_PC_MASK,
+ page_index->page_len);
#endif
- page_len += lun->mode_pages.index[i].page_len;
+ page_len += page_index->page_len;
}
if (page_len == 0) {
@@ -6780,9 +6450,9 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
header = (struct scsi_mode_hdr_6 *)ctsio->kern_data_ptr;
header->datalen = MIN(total_len - 1, 254);
- if (control_dev == 0) {
+ if (lun->be_lun->lun_type == T_DIRECT) {
header->dev_specific = 0x10; /* DPOFUA */
- if ((lun->flags & CTL_LUN_READONLY) ||
+ if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) ||
(lun->mode_pages.control_page[CTL_PAGE_CURRENT]
.eca_and_aen & SCP_SWP) != 0)
header->dev_specific |= 0x80; /* WP */
@@ -6803,9 +6473,9 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
datalen = MIN(total_len - 2, 65533);
scsi_ulto2b(datalen, header->datalen);
- if (control_dev == 0) {
+ if (lun->be_lun->lun_type == T_DIRECT) {
header->dev_specific = 0x10; /* DPOFUA */
- if ((lun->flags & CTL_LUN_READONLY) ||
+ if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) ||
(lun->mode_pages.control_page[CTL_PAGE_CURRENT]
.eca_and_aen & SCP_SWP) != 0)
header->dev_specific |= 0x80; /* WP */
@@ -6828,7 +6498,7 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
* descriptor. Otherwise, just set it to 0.
*/
if (dbd == 0) {
- if (control_dev == 0)
+ if (lun->be_lun->lun_type == T_DIRECT)
scsi_ulto3b(lun->be_lun->blocksize,
block_desc->block_len);
else
@@ -6844,10 +6514,14 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
struct ctl_page_index *page_index;
page_index = &lun->mode_pages.index[i];
-
- if ((control_dev != 0)
- && (page_index->page_flags &
- CTL_PAGE_FLAG_DISK_ONLY))
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
continue;
/*
@@ -6895,9 +6569,14 @@ ctl_mode_sense(struct ctl_scsiio *ctsio)
continue;
/* Make sure the page is supported for this dev type */
- if ((control_dev != 0)
- && (page_index->page_flags &
- CTL_PAGE_FLAG_DISK_ONLY))
+ if (lun->be_lun->lun_type == T_DIRECT &&
+ (page_index->page_flags & CTL_PAGE_FLAG_DIRECT) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_PROCESSOR &&
+ (page_index->page_flags & CTL_PAGE_FLAG_PROC) == 0)
+ continue;
+ if (lun->be_lun->lun_type == T_CDROM &&
+ (page_index->page_flags & CTL_PAGE_FLAG_CDROM) == 0)
continue;
/*
@@ -7385,7 +7064,7 @@ ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio)
{
struct scsi_maintenance_in *cdb;
int retval;
- int alloc_len, ext, total_len = 0, g, p, pc, pg, gs, os;
+ int alloc_len, ext, total_len = 0, g, pc, pg, gs, os;
int num_target_port_groups, num_target_ports;
struct ctl_lun *lun;
struct ctl_softc *softc;
@@ -7441,8 +7120,7 @@ ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio)
total_len = sizeof(struct scsi_target_group_data);
total_len += sizeof(struct scsi_target_port_group_descriptor) *
num_target_port_groups +
- sizeof(struct scsi_target_port_descriptor) *
- num_target_ports * num_target_port_groups;
+ sizeof(struct scsi_target_port_descriptor) * num_target_ports;
alloc_len = scsi_4btoul(cdb->length);
@@ -7477,35 +7155,36 @@ ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio)
}
mtx_lock(&softc->ctl_lock);
- pg = softc->port_offset / CTL_MAX_PORTS;
- if (softc->flags & CTL_FLAG_ACTIVE_SHELF) {
- if (softc->ha_mode == CTL_HA_MODE_ACT_STBY) {
- gs = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
- os = TPG_ASYMMETRIC_ACCESS_STANDBY;
- } else if (lun->flags & CTL_LUN_PRIMARY_SC) {
- gs = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
- os = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED;
- } else {
- gs = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED;
- os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
- }
- } else {
+ pg = softc->port_min / softc->port_cnt;
+ if (softc->ha_link == CTL_HA_LINK_OFFLINE)
+ gs = TPG_ASYMMETRIC_ACCESS_UNAVAILABLE;
+ else if (softc->ha_link == CTL_HA_LINK_UNKNOWN)
+ gs = TPG_ASYMMETRIC_ACCESS_TRANSITIONING;
+ else if (softc->ha_mode == CTL_HA_MODE_ACT_STBY)
gs = TPG_ASYMMETRIC_ACCESS_STANDBY;
+ else
+ gs = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED;
+ if (lun->flags & CTL_LUN_PRIMARY_SC) {
+ os = gs;
+ gs = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
+ } else
os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED;
- }
for (g = 0; g < num_target_port_groups; g++) {
tpg_desc->pref_state = (g == pg) ? gs : os;
- tpg_desc->support = TPG_AO_SUP | TPG_AN_SUP | TPG_S_SUP;
+ tpg_desc->support = TPG_AO_SUP | TPG_AN_SUP | TPG_S_SUP |
+ TPG_U_SUP | TPG_T_SUP;
scsi_ulto2b(g + 1, tpg_desc->target_port_group);
tpg_desc->status = TPG_IMPLICIT;
pc = 0;
STAILQ_FOREACH(port, &softc->port_list, links) {
+ if (port->targ_port < g * softc->port_cnt ||
+ port->targ_port >= (g + 1) * softc->port_cnt)
+ continue;
if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
continue;
if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
continue;
- p = port->targ_port % CTL_MAX_PORTS + g * CTL_MAX_PORTS;
- scsi_ulto2b(p, tpg_desc->descriptors[pc].
+ scsi_ulto2b(port->targ_port, tpg_desc->descriptors[pc].
relative_target_port_identifier);
pc++;
}
@@ -7722,8 +7401,9 @@ ctl_report_supported_tmf(struct ctl_scsiio *ctsio)
ctsio->kern_rel_offset = 0;
data = (struct scsi_report_supported_tmf_data *)ctsio->kern_data_ptr;
- data->byte1 |= RST_ATS | RST_ATSS | RST_CTSS | RST_LURS | RST_TRS;
- data->byte2 |= RST_ITNRS;
+ data->byte1 |= RST_ATS | RST_ATSS | RST_CTSS | RST_LURS | RST_QTS |
+ RST_TRS;
+ data->byte2 |= RST_QAES | RST_QTSS | RST_ITNRS;
ctl_set_success(ctsio);
ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
@@ -7869,12 +7549,12 @@ retry:
goto retry;
}
- scsi_ulto4b(lun->PRGeneration, res_keys->header.generation);
+ scsi_ulto4b(lun->pr_generation, res_keys->header.generation);
scsi_ulto4b(sizeof(struct scsi_per_res_key) *
lun->pr_key_count, res_keys->header.length);
- for (i = 0, key_count = 0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i = 0, key_count = 0; i < CTL_MAX_INITIATORS; i++) {
if ((key = ctl_get_prkey(lun, i)) == 0)
continue;
@@ -7886,18 +7566,6 @@ retry:
* sync), we've got a problem.
*/
if (key_count >= lun->pr_key_count) {
-#ifdef NEEDTOPORT
- csevent_log(CSC_CTL | CSC_SHELF_SW |
- CTL_PR_ERROR,
- csevent_LogType_Fault,
- csevent_AlertLevel_Yellow,
- csevent_FRU_ShelfController,
- csevent_FRU_Firmware,
- csevent_FRU_Unknown,
- "registered keys %d >= key "
- "count %d", key_count,
- lun->pr_key_count);
-#endif
key_count++;
continue;
}
@@ -7912,7 +7580,7 @@ retry:
res = (struct scsi_per_res_in_rsrv *)ctsio->kern_data_ptr;
- scsi_ulto4b(lun->PRGeneration, res->header.generation);
+ scsi_ulto4b(lun->pr_generation, res->header.generation);
if (lun->flags & CTL_LUN_PR_RESERVED)
{
@@ -7955,7 +7623,7 @@ retry:
scsi_u64to8b(ctl_get_prkey(lun, lun->pr_res_idx),
res->data.reservation);
}
- res->data.scopetype = lun->res_type;
+ res->data.scopetype = lun->pr_res_type;
break;
}
case SPRI_RC: //report capabilities
@@ -7965,7 +7633,8 @@ retry:
res_cap = (struct scsi_per_res_cap *)ctsio->kern_data_ptr;
scsi_ulto2b(sizeof(*res_cap), res_cap->length);
- res_cap->flags2 |= SPRI_TMV | SPRI_ALLOW_5;
+ res_cap->flags1 = SPRI_CRH;
+ res_cap->flags2 = SPRI_TMV | SPRI_ALLOW_5;
type_mask = SPRI_TM_WR_EX_AR |
SPRI_TM_EX_AC_RO |
SPRI_TM_WR_EX_RO |
@@ -8000,10 +7669,10 @@ retry:
goto retry;
}
- scsi_ulto4b(lun->PRGeneration, res_status->header.generation);
+ scsi_ulto4b(lun->pr_generation, res_status->header.generation);
res_desc = &res_status->desc[0];
- for (i = 0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i = 0; i < CTL_MAX_INITIATORS; i++) {
if ((key = ctl_get_prkey(lun, i)) == 0)
continue;
@@ -8012,13 +7681,12 @@ retry:
(lun->pr_res_idx == i ||
lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS)) {
res_desc->flags = SPRI_FULL_R_HOLDER;
- res_desc->scopetype = lun->res_type;
+ res_desc->scopetype = lun->pr_res_type;
}
scsi_ulto2b(i / CTL_MAX_INIT_PER_PORT,
res_desc->rel_trgt_port_id);
len = 0;
- port = softc->ctl_ports[
- ctl_port_idx(i / CTL_MAX_INIT_PER_PORT)];
+ port = softc->ctl_ports[i / CTL_MAX_INIT_PER_PORT];
if (port != NULL)
len = ctl_create_iid(port,
i % CTL_MAX_INIT_PER_PORT,
@@ -8048,15 +7716,6 @@ retry:
return (CTL_RETVAL_COMPLETE);
}
-static void
-ctl_est_res_ua(struct ctl_lun *lun, uint32_t residx, ctl_ua_type ua)
-{
- int off = lun->ctl_softc->persis_offset;
-
- if (residx >= off && residx < off + CTL_MAX_INITIATORS)
- ctl_est_ua(lun, residx - off, ua);
-}
-
/*
* Returns 0 if ctl_persistent_reserve_out() should continue, non-zero if
* it should return.
@@ -8068,10 +7727,7 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
struct scsi_per_res_out_parms* param)
{
union ctl_ha_msg persis_io;
- int retval, i;
- int isc_retval;
-
- retval = 0;
+ int i;
mtx_lock(&lun->lun_lock);
if (sa_res_key == 0) {
@@ -8106,18 +7762,20 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
* Unregister everybody else and build UA for
* them
*/
- for(i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for(i = 0; i < CTL_MAX_INITIATORS; i++) {
if (i == residx || ctl_get_prkey(lun, i) == 0)
continue;
ctl_clr_prkey(lun, i);
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
lun->pr_key_count = 1;
- lun->res_type = type;
- if (lun->res_type != SPR_TYPE_WR_EX_AR
- && lun->res_type != SPR_TYPE_EX_AC_AR)
+ lun->pr_res_type = type;
+ if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
+ lun->pr_res_type != SPR_TYPE_EX_AC_AR)
lun->pr_res_idx = residx;
+ lun->pr_generation++;
+ mtx_unlock(&lun->lun_lock);
/* send msg to other side */
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
@@ -8128,13 +7786,8 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
memcpy(persis_io.pr.pr_info.sa_res_key,
param->serv_act_res_key,
sizeof(param->serv_act_res_key));
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned "
- "from ctl_ha_msg_send %d\n",
- isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
} else {
/* not all registrants */
mtx_unlock(&lun->lun_lock);
@@ -8175,14 +7828,14 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
return (1);
}
- for (i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i = 0; i < CTL_MAX_INITIATORS; i++) {
if (ctl_get_prkey(lun, i) != sa_res_key)
continue;
found = 1;
ctl_clr_prkey(lun, i);
lun->pr_key_count--;
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
if (!found) {
mtx_unlock(&lun->lun_lock);
@@ -8191,6 +7844,9 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
ctl_done((union ctl_io *)ctsio);
return (CTL_RETVAL_COMPLETE);
}
+ lun->pr_generation++;
+ mtx_unlock(&lun->lun_lock);
+
/* send msg to other side */
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
@@ -8200,12 +7856,8 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
memcpy(persis_io.pr.pr_info.sa_res_key,
param->serv_act_res_key,
sizeof(param->serv_act_res_key));
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned from "
- "ctl_ha_msg_send %d\n", isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
} else {
/* Reserved but not all registrants */
/* sa_res_key is res holder */
@@ -8250,26 +7902,28 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
* except don't unregister the res holder.
*/
- for(i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for(i = 0; i < CTL_MAX_INITIATORS; i++) {
if (i == residx || ctl_get_prkey(lun, i) == 0)
continue;
if (sa_res_key == ctl_get_prkey(lun, i)) {
ctl_clr_prkey(lun, i);
lun->pr_key_count--;
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
- } else if (type != lun->res_type
- && (lun->res_type == SPR_TYPE_WR_EX_RO
- || lun->res_type ==SPR_TYPE_EX_AC_RO)){
- ctl_est_res_ua(lun, i, CTL_UA_RES_RELEASE);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
+ } else if (type != lun->pr_res_type &&
+ (lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
+ lun->pr_res_type == SPR_TYPE_EX_AC_RO)) {
+ ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
}
}
- lun->res_type = type;
- if (lun->res_type != SPR_TYPE_WR_EX_AR
- && lun->res_type != SPR_TYPE_EX_AC_AR)
+ lun->pr_res_type = type;
+ if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
+ lun->pr_res_type != SPR_TYPE_EX_AC_AR)
lun->pr_res_idx = residx;
else
lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS;
+ lun->pr_generation++;
+ mtx_unlock(&lun->lun_lock);
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
@@ -8279,13 +7933,8 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
memcpy(persis_io.pr.pr_info.sa_res_key,
param->serv_act_res_key,
sizeof(param->serv_act_res_key));
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned "
- "from ctl_ha_msg_send %d\n",
- isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
} else {
/*
* sa_res_key is not the res holder just
@@ -8293,14 +7942,14 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
*/
int found=0;
- for (i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i = 0; i < CTL_MAX_INITIATORS; i++) {
if (sa_res_key != ctl_get_prkey(lun, i))
continue;
found = 1;
ctl_clr_prkey(lun, i);
lun->pr_key_count--;
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
if (!found) {
@@ -8310,6 +7959,9 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
ctl_done((union ctl_io *)ctsio);
return (1);
}
+ lun->pr_generation++;
+ mtx_unlock(&lun->lun_lock);
+
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
persis_io.pr.pr_info.action = CTL_PR_PREEMPT;
@@ -8318,20 +7970,11 @@ ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key,
memcpy(persis_io.pr.pr_info.sa_res_key,
param->serv_act_res_key,
sizeof(param->serv_act_res_key));
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned "
- "from ctl_ha_msg_send %d\n",
- isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
}
}
-
- lun->PRGeneration++;
- mtx_unlock(&lun->lun_lock);
-
- return (retval);
+ return (0);
}
static void
@@ -8350,32 +7993,32 @@ ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg)
* Unregister everybody else and build UA for
* them
*/
- for(i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for(i = 0; i < CTL_MAX_INITIATORS; i++) {
if (i == msg->pr.pr_info.residx ||
ctl_get_prkey(lun, i) == 0)
continue;
ctl_clr_prkey(lun, i);
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
lun->pr_key_count = 1;
- lun->res_type = msg->pr.pr_info.res_type;
- if (lun->res_type != SPR_TYPE_WR_EX_AR
- && lun->res_type != SPR_TYPE_EX_AC_AR)
+ lun->pr_res_type = msg->pr.pr_info.res_type;
+ if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
+ lun->pr_res_type != SPR_TYPE_EX_AC_AR)
lun->pr_res_idx = msg->pr.pr_info.residx;
} else {
- for (i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i = 0; i < CTL_MAX_INITIATORS; i++) {
if (sa_res_key == ctl_get_prkey(lun, i))
continue;
ctl_clr_prkey(lun, i);
lun->pr_key_count--;
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
}
} else {
- for (i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i = 0; i < CTL_MAX_INITIATORS; i++) {
if (i == msg->pr.pr_info.residx ||
ctl_get_prkey(lun, i) == 0)
continue;
@@ -8383,21 +8026,21 @@ ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg)
if (sa_res_key == ctl_get_prkey(lun, i)) {
ctl_clr_prkey(lun, i);
lun->pr_key_count--;
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
- } else if (msg->pr.pr_info.res_type != lun->res_type
- && (lun->res_type == SPR_TYPE_WR_EX_RO
- || lun->res_type == SPR_TYPE_EX_AC_RO)) {
- ctl_est_res_ua(lun, i, CTL_UA_RES_RELEASE);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
+ } else if (msg->pr.pr_info.res_type != lun->pr_res_type
+ && (lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
+ lun->pr_res_type == SPR_TYPE_EX_AC_RO)) {
+ ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
}
}
- lun->res_type = msg->pr.pr_info.res_type;
- if (lun->res_type != SPR_TYPE_WR_EX_AR
- && lun->res_type != SPR_TYPE_EX_AC_AR)
+ lun->pr_res_type = msg->pr.pr_info.res_type;
+ if (lun->pr_res_type != SPR_TYPE_WR_EX_AR &&
+ lun->pr_res_type != SPR_TYPE_EX_AC_AR)
lun->pr_res_idx = msg->pr.pr_info.residx;
else
lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS;
}
- lun->PRGeneration++;
+ lun->pr_generation++;
}
@@ -8406,7 +8049,6 @@ int
ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
{
int retval;
- int isc_retval;
u_int32_t param_len;
struct scsi_per_res_out *cdb;
struct ctl_lun *lun;
@@ -8476,7 +8118,7 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
param = (struct scsi_per_res_out_parms *)ctsio->kern_data_ptr;
- residx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
res_key = scsi_8btou64(param->res_key.key);
sa_res_key = scsi_8btou64(param->serv_act_res_key);
@@ -8580,9 +8222,9 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
lun->flags &= ~CTL_LUN_PR_RESERVED;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
- if ((lun->res_type == SPR_TYPE_WR_EX_RO
- || lun->res_type == SPR_TYPE_EX_AC_RO)
- && lun->pr_key_count) {
+ if ((lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
+ lun->pr_res_type == SPR_TYPE_EX_AC_RO) &&
+ lun->pr_key_count) {
/*
* If the reservation is a registrants
* only type we need to generate a UA
@@ -8591,32 +8233,30 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
* RELEASED
*/
- for (i = 0; i < CTL_MAX_INITIATORS;i++){
- if (ctl_get_prkey(lun, i +
- softc->persis_offset) == 0)
+ for (i = softc->init_min; i < softc->init_max; i++){
+ if (ctl_get_prkey(lun, i) == 0)
continue;
ctl_est_ua(lun, i,
CTL_UA_RES_RELEASE);
}
}
- lun->res_type = 0;
+ lun->pr_res_type = 0;
} else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) {
if (lun->pr_key_count==0) {
lun->flags &= ~CTL_LUN_PR_RESERVED;
- lun->res_type = 0;
+ lun->pr_res_type = 0;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
}
}
+ lun->pr_generation++;
+ mtx_unlock(&lun->lun_lock);
+
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
persis_io.pr.pr_info.action = CTL_PR_UNREG_KEY;
persis_io.pr.pr_info.residx = residx;
- if ((isc_retval = ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0 )) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned from "
- "ctl_ha_msg_send %d\n", isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
} else /* sa_res_key != 0 */ {
/*
@@ -8627,6 +8267,8 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
if (ctl_get_prkey(lun, residx) == 0)
lun->pr_key_count++;
ctl_set_prkey(lun, residx, sa_res_key);
+ lun->pr_generation++;
+ mtx_unlock(&lun->lun_lock);
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
@@ -8635,15 +8277,9 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
memcpy(persis_io.pr.pr_info.sa_res_key,
param->serv_act_res_key,
sizeof(param->serv_act_res_key));
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned from "
- "ctl_ha_msg_send %d\n", isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
}
- lun->PRGeneration++;
- mtx_unlock(&lun->lun_lock);
break;
}
@@ -8660,7 +8296,7 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
*/
if ((lun->pr_res_idx != residx
&& lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS)
- || lun->res_type != type) {
+ || lun->pr_res_type != type) {
mtx_unlock(&lun->lun_lock);
free(ctsio->kern_data_ptr, M_CTL);
ctl_set_reservation_conflict(ctsio);
@@ -8680,7 +8316,7 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS;
lun->flags |= CTL_LUN_PR_RESERVED;
- lun->res_type = type;
+ lun->pr_res_type = type;
mtx_unlock(&lun->lun_lock);
@@ -8690,12 +8326,8 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
persis_io.pr.pr_info.action = CTL_PR_RESERVE;
persis_io.pr.pr_info.residx = lun->pr_res_idx;
persis_io.pr.pr_info.res_type = type;
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &persis_io, sizeof(persis_io), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned from "
- "ctl_ha_msg_send %d\n", isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
}
break;
@@ -8719,7 +8351,7 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
goto done;
}
- if (lun->res_type != type) {
+ if (lun->pr_res_type != type) {
mtx_unlock(&lun->lun_lock);
free(ctsio->kern_data_ptr, M_CTL);
ctl_set_illegal_pr_release(ctsio);
@@ -8730,7 +8362,7 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
/* okay to release */
lun->flags &= ~CTL_LUN_PR_RESERVED;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
- lun->res_type = 0;
+ lun->pr_res_type = 0;
/*
* if this isn't an exclusive access
@@ -8739,24 +8371,20 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
*/
if (type != SPR_TYPE_EX_AC
&& type != SPR_TYPE_WR_EX) {
- for (i = 0; i < CTL_MAX_INITIATORS; i++) {
- if (i == residx ||
- ctl_get_prkey(lun,
- i + softc->persis_offset) == 0)
+ for (i = softc->init_min; i < softc->init_max; i++) {
+ if (i == residx || ctl_get_prkey(lun, i) == 0)
continue;
ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
}
}
mtx_unlock(&lun->lun_lock);
+
/* Send msg to other side */
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
persis_io.pr.pr_info.action = CTL_PR_RELEASE;
- if ((isc_retval=ctl_ha_msg_send( CTL_HA_CHAN_CTL, &persis_io,
- sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned from "
- "ctl_ha_msg_send %d\n", isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
break;
case SPRO_CLEAR:
@@ -8764,26 +8392,24 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio)
mtx_lock(&lun->lun_lock);
lun->flags &= ~CTL_LUN_PR_RESERVED;
- lun->res_type = 0;
+ lun->pr_res_type = 0;
lun->pr_key_count = 0;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
ctl_clr_prkey(lun, residx);
- for (i=0; i < 2*CTL_MAX_INITIATORS; i++)
+ for (i = 0; i < CTL_MAX_INITIATORS; i++)
if (ctl_get_prkey(lun, i) != 0) {
ctl_clr_prkey(lun, i);
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
- lun->PRGeneration++;
+ lun->pr_generation++;
mtx_unlock(&lun->lun_lock);
+
persis_io.hdr.nexus = ctsio->io_hdr.nexus;
persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION;
persis_io.pr.pr_info.action = CTL_PR_CLEAR;
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
- sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Persis Out error returned from "
- "ctl_ha_msg_send %d\n", isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io,
+ sizeof(persis_io.pr), M_WAITOK);
break;
case SPRO_PREEMPT:
@@ -8817,16 +8443,25 @@ done:
static void
ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg)
{
+ struct ctl_softc *softc = control_softc;
struct ctl_lun *lun;
- struct ctl_softc *softc;
int i;
- uint32_t targ_lun;
-
- softc = control_softc;
+ uint32_t residx, targ_lun;
targ_lun = msg->hdr.nexus.targ_mapped_lun;
- lun = softc->ctl_luns[targ_lun];
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ ((lun = softc->ctl_luns[targ_lun]) == NULL)) {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ residx = ctl_get_initindex(&msg->hdr.nexus);
switch(msg->pr.pr_info.action) {
case CTL_PR_REG_KEY:
ctl_alloc_prkey(lun, msg->pr.pr_info.residx);
@@ -8834,7 +8469,7 @@ ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg)
lun->pr_key_count++;
ctl_set_prkey(lun, msg->pr.pr_info.residx,
scsi_8btou64(msg->pr.pr_info.sa_res_key));
- lun->PRGeneration++;
+ lun->pr_generation++;
break;
case CTL_PR_UNREG_KEY:
@@ -8847,9 +8482,9 @@ ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg)
lun->flags &= ~CTL_LUN_PR_RESERVED;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
- if ((lun->res_type == SPR_TYPE_WR_EX_RO
- || lun->res_type == SPR_TYPE_EX_AC_RO)
- && lun->pr_key_count) {
+ if ((lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
+ lun->pr_res_type == SPR_TYPE_EX_AC_RO) &&
+ lun->pr_key_count) {
/*
* If the reservation is a registrants
* only type we need to generate a UA
@@ -8858,28 +8493,27 @@ ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg)
* RELEASED
*/
- for (i = 0; i < CTL_MAX_INITIATORS; i++) {
- if (ctl_get_prkey(lun, i +
- softc->persis_offset) == 0)
+ for (i = softc->init_min; i < softc->init_max; i++) {
+ if (ctl_get_prkey(lun, i) == 0)
continue;
ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
}
}
- lun->res_type = 0;
+ lun->pr_res_type = 0;
} else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) {
if (lun->pr_key_count==0) {
lun->flags &= ~CTL_LUN_PR_RESERVED;
- lun->res_type = 0;
+ lun->pr_res_type = 0;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
}
}
- lun->PRGeneration++;
+ lun->pr_generation++;
break;
case CTL_PR_RESERVE:
lun->flags |= CTL_LUN_PR_RESERVED;
- lun->res_type = msg->pr.pr_info.res_type;
+ lun->pr_res_type = msg->pr.pr_info.res_type;
lun->pr_res_idx = msg->pr.pr_info.residx;
break;
@@ -8889,16 +8523,17 @@ ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg)
* if this isn't an exclusive access res generate UA for all
* other registrants.
*/
- if (lun->res_type != SPR_TYPE_EX_AC
- && lun->res_type != SPR_TYPE_WR_EX) {
- for (i = 0; i < CTL_MAX_INITIATORS; i++)
- if (ctl_get_prkey(lun, i + softc->persis_offset) != 0)
- ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
+ if (lun->pr_res_type != SPR_TYPE_EX_AC &&
+ lun->pr_res_type != SPR_TYPE_WR_EX) {
+ for (i = softc->init_min; i < softc->init_max; i++)
+ if (i == residx || ctl_get_prkey(lun, i) == 0)
+ continue;
+ ctl_est_ua(lun, i, CTL_UA_RES_RELEASE);
}
lun->flags &= ~CTL_LUN_PR_RESERVED;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
- lun->res_type = 0;
+ lun->pr_res_type = 0;
break;
case CTL_PR_PREEMPT:
@@ -8906,17 +8541,17 @@ ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg)
break;
case CTL_PR_CLEAR:
lun->flags &= ~CTL_LUN_PR_RESERVED;
- lun->res_type = 0;
+ lun->pr_res_type = 0;
lun->pr_key_count = 0;
lun->pr_res_idx = CTL_PR_NO_RESERVATION;
- for (i=0; i < 2*CTL_MAX_INITIATORS; i++) {
+ for (i=0; i < CTL_MAX_INITIATORS; i++) {
if (ctl_get_prkey(lun, i) == 0)
continue;
ctl_clr_prkey(lun, i);
- ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT);
+ ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT);
}
- lun->PRGeneration++;
+ lun->pr_generation++;
break;
}
@@ -8938,8 +8573,6 @@ ctl_read_write(struct ctl_scsiio *ctsio)
CTL_DEBUG_PRINT(("ctl_read_write: command: %#x\n", ctsio->cdb[0]));
flags = 0;
- retval = CTL_RETVAL_COMPLETE;
-
isread = ctsio->cdb[0] == READ_6 || ctsio->cdb[0] == READ_10
|| ctsio->cdb[0] == READ_12 || ctsio->cdb[0] == READ_16;
switch (ctsio->cdb[0]) {
@@ -9022,7 +8655,7 @@ ctl_read_write(struct ctl_scsiio *ctsio)
break;
}
case WRITE_ATOMIC_16: {
- struct scsi_rw_16 *cdb;
+ struct scsi_write_atomic_16 *cdb;
if (lun->be_lun->atomicblock == 0) {
ctl_set_invalid_opcode(ctsio);
@@ -9030,13 +8663,13 @@ ctl_read_write(struct ctl_scsiio *ctsio)
return (CTL_RETVAL_COMPLETE);
}
- cdb = (struct scsi_rw_16 *)ctsio->cdb;
+ cdb = (struct scsi_write_atomic_16 *)ctsio->cdb;
if (cdb->byte2 & SRW12_FUA)
flags |= CTL_LLF_FUA;
if (cdb->byte2 & SRW12_DPO)
flags |= CTL_LLF_DPO;
lba = scsi_8btou64(cdb->addr);
- num_blocks = scsi_4btoul(cdb->length);
+ num_blocks = scsi_2btoul(cdb->length);
if (num_blocks > lun->be_lun->atomicblock) {
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
/*command*/ 1, /*field*/ 12, /*bit_valid*/ 0,
@@ -9116,7 +8749,6 @@ ctl_read_write(struct ctl_scsiio *ctsio)
CTL_DEBUG_PRINT(("ctl_read_write: calling data_submit()\n"));
retval = lun->backend->data_submit((union ctl_io *)ctsio);
-
return (retval);
}
@@ -9156,8 +8788,6 @@ ctl_cnw(struct ctl_scsiio *ctsio)
CTL_DEBUG_PRINT(("ctl_cnw: command: %#x\n", ctsio->cdb[0]));
flags = 0;
- retval = CTL_RETVAL_COMPLETE;
-
switch (ctsio->cdb[0]) {
case COMPARE_AND_WRITE: {
struct scsi_compare_and_write *cdb;
@@ -9248,8 +8878,6 @@ ctl_verify(struct ctl_scsiio *ctsio)
bytchk = 0;
flags = CTL_LLF_FUA;
- retval = CTL_RETVAL_COMPLETE;
-
switch (ctsio->cdb[0]) {
case VERIFY_10: {
struct scsi_verify_10 *cdb;
@@ -9340,21 +8968,20 @@ ctl_verify(struct ctl_scsiio *ctsio)
int
ctl_report_luns(struct ctl_scsiio *ctsio)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc;
struct scsi_report_luns *cdb;
struct scsi_report_luns_data *lun_data;
struct ctl_lun *lun, *request_lun;
struct ctl_port *port;
int num_luns, retval;
uint32_t alloc_len, lun_datalen;
- int num_filled, well_known;
+ int num_filled;
uint32_t initidx, targ_lun_id, lun_id;
retval = CTL_RETVAL_COMPLETE;
- well_known = 0;
-
cdb = (struct scsi_report_luns *)ctsio->cdb;
port = ctl_io_port(&ctsio->io_hdr);
+ softc = port->ctl_softc;
CTL_DEBUG_PRINT(("ctl_report_luns\n"));
@@ -9369,9 +8996,11 @@ ctl_report_luns(struct ctl_scsiio *ctsio)
switch (cdb->select_report) {
case RPL_REPORT_DEFAULT:
case RPL_REPORT_ALL:
+ case RPL_REPORT_NONSUBSID:
break;
case RPL_REPORT_WELLKNOWN:
- well_known = 1;
+ case RPL_REPORT_ADMIN:
+ case RPL_REPORT_CONGLOM:
num_luns = 0;
break;
default:
@@ -9622,7 +9251,7 @@ ctl_request_sense(struct ctl_scsiio *ctsio)
if (ua_type == CTL_UA_LUN_CHANGE) {
mtx_unlock(&lun->lun_lock);
mtx_lock(&ctl_softc->ctl_lock);
- ctl_clear_ua(ctl_softc, initidx, ua_type);
+ ctl_clr_ua_allluns(ctl_softc, initidx, ua_type);
mtx_unlock(&ctl_softc->ctl_lock);
mtx_lock(&lun->lun_lock);
}
@@ -9686,14 +9315,6 @@ ctl_tur(struct ctl_scsiio *ctsio)
return (CTL_RETVAL_COMPLETE);
}
-#ifdef notyet
-static int
-ctl_cmddt_inquiry(struct ctl_scsiio *ctsio)
-{
-
-}
-#endif
-
/*
* SCSI VPD page 0x00, the Supported VPD Pages page.
*/
@@ -9969,7 +9590,7 @@ ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
softc = control_softc;
- port = softc->ctl_ports[ctl_port_idx(ctsio->io_hdr.nexus.targ_port)];
+ port = ctl_io_port(&ctsio->io_hdr);
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
data_len = sizeof(struct scsi_vpd_device_id) +
@@ -9979,9 +9600,9 @@ ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
sizeof(struct scsi_vpd_id_trgt_port_grp_id);
if (lun && lun->lun_devid)
data_len += lun->lun_devid->len;
- if (port->port_devid)
+ if (port && port->port_devid)
data_len += port->port_devid->len;
- if (port->target_devid)
+ if (port && port->target_devid)
data_len += port->target_devid->len;
ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
@@ -10013,9 +9634,9 @@ ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
devid_ptr->page_code = SVPD_DEVICE_ID;
scsi_ulto2b(data_len - 4, devid_ptr->length);
- if (port->port_type == CTL_PORT_FC)
+ if (port && port->port_type == CTL_PORT_FC)
proto = SCSI_PROTO_FC << 4;
- else if (port->port_type == CTL_PORT_ISCSI)
+ else if (port && port->port_type == CTL_PORT_ISCSI)
proto = SCSI_PROTO_ISCSI << 4;
else
proto = SCSI_PROTO_SPI << 4;
@@ -10034,7 +9655,7 @@ ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
/*
* This is for the WWPN which is a port association.
*/
- if (port->port_devid) {
+ if (port && port->port_devid) {
memcpy(desc, port->port_devid->data, port->port_devid->len);
desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc +
port->port_devid->len);
@@ -10058,7 +9679,7 @@ ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT |
SVPD_ID_TYPE_TPORTGRP;
desc->length = 4;
- scsi_ulto2b(ctsio->io_hdr.nexus.targ_port / CTL_MAX_PORTS + 1,
+ scsi_ulto2b(ctsio->io_hdr.nexus.targ_port / softc->port_cnt + 1,
&desc->identifier[2]);
desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] +
sizeof(struct scsi_vpd_id_trgt_port_grp_id));
@@ -10066,7 +9687,7 @@ ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len)
/*
* This is for the Target identifier
*/
- if (port->target_devid) {
+ if (port && port->target_devid) {
memcpy(desc, port->target_devid->data, port->target_devid->len);
}
@@ -10086,15 +9707,10 @@ ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len)
struct scsi_vpd_port_designation_cont *pdc;
struct ctl_lun *lun;
struct ctl_port *port;
- int data_len, num_target_ports, iid_len, id_len, g, pg, p;
- int num_target_port_groups;
+ int data_len, num_target_ports, iid_len, id_len;
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
- if (softc->is_single)
- num_target_port_groups = 1;
- else
- num_target_port_groups = NUM_TARGET_PORT_GROUPS;
num_target_ports = 0;
iid_len = 0;
id_len = 0;
@@ -10113,7 +9729,7 @@ ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len)
}
mtx_unlock(&softc->ctl_lock);
- data_len = sizeof(struct scsi_vpd_scsi_ports) + num_target_port_groups *
+ data_len = sizeof(struct scsi_vpd_scsi_ports) +
num_target_ports * (sizeof(struct scsi_vpd_port_designation) +
sizeof(struct scsi_vpd_port_designation_cont)) + iid_len + id_len;
ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
@@ -10150,35 +9766,31 @@ ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len)
pd = &sp->design[0];
mtx_lock(&softc->ctl_lock);
- pg = softc->port_offset / CTL_MAX_PORTS;
- for (g = 0; g < num_target_port_groups; g++) {
- STAILQ_FOREACH(port, &softc->port_list, links) {
- if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
- continue;
- if (lun != NULL &&
- ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
- continue;
- p = port->targ_port % CTL_MAX_PORTS + g * CTL_MAX_PORTS;
- scsi_ulto2b(p, pd->relative_port_id);
- if (port->init_devid && g == pg) {
- iid_len = port->init_devid->len;
- memcpy(pd->initiator_transportid,
- port->init_devid->data, port->init_devid->len);
- } else
- iid_len = 0;
- scsi_ulto2b(iid_len, pd->initiator_transportid_length);
- pdc = (struct scsi_vpd_port_designation_cont *)
- (&pd->initiator_transportid[iid_len]);
- if (port->port_devid && g == pg) {
- id_len = port->port_devid->len;
- memcpy(pdc->target_port_descriptors,
- port->port_devid->data, port->port_devid->len);
- } else
- id_len = 0;
- scsi_ulto2b(id_len, pdc->target_port_descriptors_length);
- pd = (struct scsi_vpd_port_designation *)
- ((uint8_t *)pdc->target_port_descriptors + id_len);
- }
+ STAILQ_FOREACH(port, &softc->port_list, links) {
+ if ((port->status & CTL_PORT_STATUS_ONLINE) == 0)
+ continue;
+ if (lun != NULL &&
+ ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
+ continue;
+ scsi_ulto2b(port->targ_port, pd->relative_port_id);
+ if (port->init_devid) {
+ iid_len = port->init_devid->len;
+ memcpy(pd->initiator_transportid,
+ port->init_devid->data, port->init_devid->len);
+ } else
+ iid_len = 0;
+ scsi_ulto2b(iid_len, pd->initiator_transportid_length);
+ pdc = (struct scsi_vpd_port_designation_cont *)
+ (&pd->initiator_transportid[iid_len]);
+ if (port->port_devid) {
+ id_len = port->port_devid->len;
+ memcpy(pdc->target_port_descriptors,
+ port->port_devid->data, port->port_devid->len);
+ } else
+ id_len = 0;
+ scsi_ulto2b(id_len, pdc->target_port_descriptors_length);
+ pd = (struct scsi_vpd_port_designation *)
+ ((uint8_t *)pdc->target_port_descriptors + id_len);
}
mtx_unlock(&softc->ctl_lock);
@@ -10194,7 +9806,6 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len)
{
struct scsi_vpd_block_limits *bl_ptr;
struct ctl_lun *lun;
- int bs;
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
@@ -10231,7 +9842,6 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len)
bl_ptr->max_cmp_write_len = 0xff;
scsi_ulto4b(0xffffffff, bl_ptr->max_txfer_len);
if (lun != NULL) {
- bs = lun->be_lun->blocksize;
scsi_ulto4b(lun->be_lun->opttxferlen, bl_ptr->opt_txfer_len);
if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) {
scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_lba_cnt);
@@ -10247,6 +9857,8 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len)
bl_ptr->max_atomic_transfer_length);
scsi_ulto4b(0, bl_ptr->atomic_alignment);
scsi_ulto4b(0, bl_ptr->atomic_transfer_length_granularity);
+ scsi_ulto4b(0, bl_ptr->max_atomic_transfer_length_with_atomic_boundary);
+ scsi_ulto4b(0, bl_ptr->max_atomic_boundary_size);
}
scsi_u64to8b(UINT64_MAX, bl_ptr->max_write_same_length);
@@ -10444,21 +10056,15 @@ ctl_inquiry_std(struct ctl_scsiio *ctsio)
{
struct scsi_inquiry_data *inq_ptr;
struct scsi_inquiry *cdb;
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
+ struct ctl_port *port;
struct ctl_lun *lun;
char *val;
uint32_t alloc_len, data_len;
ctl_port_type port_type;
- softc = control_softc;
-
- /*
- * Figure out whether we're talking to a Fibre Channel port or not.
- * We treat the ioctl front end, and any SCSI adapters, as packetized
- * SCSI front ends.
- */
- port_type = softc->ctl_ports[
- ctl_port_idx(ctsio->io_hdr.nexus.targ_port)]->port_type;
+ port = ctl_io_port(&ctsio->io_hdr);
+ port_type = port->port_type;
if (port_type == CTL_PORT_IOCTL || port_type == CTL_PORT_INTERNAL)
port_type = CTL_PORT_SCSI;
@@ -10488,55 +10094,18 @@ ctl_inquiry_std(struct ctl_scsiio *ctsio)
ctsio->kern_total_len = alloc_len;
}
- /*
- * If we have a LUN configured, report it as connected. Otherwise,
- * report that it is offline or no device is supported, depending
- * on the value of inquiry_pq_no_lun.
- *
- * According to the spec (SPC-4 r34), the peripheral qualifier
- * SID_QUAL_LU_OFFLINE (001b) is used in the following scenario:
- *
- * "A peripheral device having the specified peripheral device type
- * is not connected to this logical unit. However, the device
- * server is capable of supporting the specified peripheral device
- * type on this logical unit."
- *
- * According to the same spec, the peripheral qualifier
- * SID_QUAL_BAD_LU (011b) is used in this scenario:
- *
- * "The device server is not capable of supporting a peripheral
- * device on this logical unit. For this peripheral qualifier the
- * peripheral device type shall be set to 1Fh. All other peripheral
- * device type values are reserved for this peripheral qualifier."
- *
- * Given the text, it would seem that we probably want to report that
- * the LUN is offline here. There is no LUN connected, but we can
- * support a LUN at the given LUN number.
- *
- * In the real world, though, it sounds like things are a little
- * different:
- *
- * - Linux, when presented with a LUN with the offline peripheral
- * qualifier, will create an sg driver instance for it. So when
- * you attach it to CTL, you wind up with a ton of sg driver
- * instances. (One for every LUN that Linux bothered to probe.)
- * Linux does this despite the fact that it issues a REPORT LUNs
- * to LUN 0 to get the inventory of supported LUNs.
- *
- * - There is other anecdotal evidence (from Emulex folks) about
- * arrays that use the offline peripheral qualifier for LUNs that
- * are on the "passive" path in an active/passive array.
- *
- * So the solution is provide a hopefully reasonable default
- * (return bad/no LUN) and allow the user to change the behavior
- * with a tunable/sysctl variable.
- */
- if (lun != NULL)
- inq_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
- lun->be_lun->lun_type;
- else if (softc->inquiry_pq_no_lun == 0)
- inq_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT;
- else
+ if (lun != NULL) {
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) ||
+ softc->ha_link >= CTL_HA_LINK_UNKNOWN) {
+ inq_ptr->device = (SID_QUAL_LU_CONNECTED << 5) |
+ lun->be_lun->lun_type;
+ } else {
+ inq_ptr->device = (SID_QUAL_LU_OFFLINE << 5) |
+ lun->be_lun->lun_type;
+ }
+ if (lun->flags & CTL_LUN_REMOVABLE)
+ inq_ptr->dev_qual2 |= SID_RMB;
+ } else
inq_ptr->device = (SID_QUAL_BAD_LU << 5) | T_NODEVICE;
/* RMB in byte 2 is 0 */
@@ -10567,23 +10136,13 @@ ctl_inquiry_std(struct ctl_scsiio *ctsio)
inq_ptr->additional_length));
inq_ptr->spc3_flags = SPC3_SID_3PC | SPC3_SID_TPGS_IMPLICIT;
- /* 16 bit addressing */
if (port_type == CTL_PORT_SCSI)
inq_ptr->spc2_flags = SPC2_SID_ADDR16;
- /* XXX set the SID_MultiP bit here if we're actually going to
- respond on multiple ports */
inq_ptr->spc2_flags |= SPC2_SID_MultiP;
-
- /* 16 bit data bus, synchronous transfers */
+ inq_ptr->flags = SID_CmdQue;
if (port_type == CTL_PORT_SCSI)
- inq_ptr->flags = SID_WBus16 | SID_Sync;
- /*
- * XXX KDM do we want to support tagged queueing on the control
- * device at all?
- */
- if ((lun == NULL)
- || (lun->be_lun->lun_type != T_PROCESSOR))
- inq_ptr->flags |= SID_CmdQue;
+ inq_ptr->flags |= SID_WBus16 | SID_Sync;
+
/*
* Per SPC-3, unused bytes in ASCII strings are filled with spaces.
* We have 8 bytes for the vendor name, and 16 bytes for the device
@@ -10610,6 +10169,10 @@ ctl_inquiry_std(struct ctl_scsiio *ctsio)
strncpy(inq_ptr->product, CTL_PROCESSOR_PRODUCT,
sizeof(inq_ptr->product));
break;
+ case T_CDROM:
+ strncpy(inq_ptr->product, CTL_CDROM_PRODUCT,
+ sizeof(inq_ptr->product));
+ break;
default:
strncpy(inq_ptr->product, CTL_UNKNOWN_PRODUCT,
sizeof(inq_ptr->product));
@@ -10672,6 +10235,11 @@ ctl_inquiry_std(struct ctl_scsiio *ctsio)
scsi_ulto2b(0x0600, inq_ptr->version4);
break;
case T_PROCESSOR:
+ break;
+ case T_CDROM:
+ /* MMC-6 (no version claimed) */
+ scsi_ulto2b(0x04E0, inq_ptr->version4);
+ break;
default:
break;
}
@@ -10711,6 +10279,398 @@ ctl_inquiry(struct ctl_scsiio *ctsio)
return (retval);
}
+int
+ctl_get_config(struct ctl_scsiio *ctsio)
+{
+ struct scsi_get_config_header *hdr;
+ struct scsi_get_config_feature *feature;
+ struct scsi_get_config *cdb;
+ struct ctl_lun *lun;
+ uint32_t alloc_len, data_len;
+ int rt, starting;
+
+ lun = ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ cdb = (struct scsi_get_config *)ctsio->cdb;
+ rt = (cdb->rt & SGC_RT_MASK);
+ starting = scsi_2btoul(cdb->starting_feature);
+ alloc_len = scsi_2btoul(cdb->length);
+
+ data_len = sizeof(struct scsi_get_config_header) +
+ sizeof(struct scsi_get_config_feature) + 8 +
+ sizeof(struct scsi_get_config_feature) + 8 +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 8 +
+ sizeof(struct scsi_get_config_feature) +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 4 +
+ sizeof(struct scsi_get_config_feature) + 4;
+ ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
+ ctsio->kern_sg_entries = 0;
+ ctsio->kern_data_resid = 0;
+ ctsio->kern_rel_offset = 0;
+
+ hdr = (struct scsi_get_config_header *)ctsio->kern_data_ptr;
+ if (lun->flags & CTL_LUN_NO_MEDIA)
+ scsi_ulto2b(0x0000, hdr->current_profile);
+ else
+ scsi_ulto2b(0x0010, hdr->current_profile);
+ feature = (struct scsi_get_config_feature *)(hdr + 1);
+
+ if (starting > 0x003b)
+ goto done;
+ if (starting > 0x003a)
+ goto f3b;
+ if (starting > 0x002b)
+ goto f3a;
+ if (starting > 0x002a)
+ goto f2b;
+ if (starting > 0x001f)
+ goto f2a;
+ if (starting > 0x001e)
+ goto f1f;
+ if (starting > 0x001d)
+ goto f1e;
+ if (starting > 0x0010)
+ goto f1d;
+ if (starting > 0x0003)
+ goto f10;
+ if (starting > 0x0002)
+ goto f3;
+ if (starting > 0x0001)
+ goto f2;
+ if (starting > 0x0000)
+ goto f1;
+
+ /* Profile List */
+ scsi_ulto2b(0x0000, feature->feature_code);
+ feature->flags = SGC_F_PERSISTENT | SGC_F_CURRENT;
+ feature->add_length = 8;
+ scsi_ulto2b(0x0008, &feature->feature_data[0]); /* CD-ROM */
+ feature->feature_data[2] = 0x00;
+ scsi_ulto2b(0x0010, &feature->feature_data[4]); /* DVD-ROM */
+ feature->feature_data[6] = 0x01;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f1: /* Core */
+ scsi_ulto2b(0x0001, feature->feature_code);
+ feature->flags = 0x08 | SGC_F_PERSISTENT | SGC_F_CURRENT;
+ feature->add_length = 8;
+ scsi_ulto4b(0x00000000, &feature->feature_data[0]);
+ feature->feature_data[4] = 0x03;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f2: /* Morphing */
+ scsi_ulto2b(0x0002, feature->feature_code);
+ feature->flags = 0x04 | SGC_F_PERSISTENT | SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x02;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f3: /* Removable Medium */
+ scsi_ulto2b(0x0003, feature->feature_code);
+ feature->flags = 0x04 | SGC_F_PERSISTENT | SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x39;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+ if (rt == SGC_RT_CURRENT && (lun->flags & CTL_LUN_NO_MEDIA))
+ goto done;
+
+f10: /* Random Read */
+ scsi_ulto2b(0x0010, feature->feature_code);
+ feature->flags = 0x00;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 8;
+ scsi_ulto4b(lun->be_lun->blocksize, &feature->feature_data[0]);
+ scsi_ulto2b(1, &feature->feature_data[4]);
+ feature->feature_data[6] = 0x00;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f1d: /* Multi-Read */
+ scsi_ulto2b(0x001D, feature->feature_code);
+ feature->flags = 0x00;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 0;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f1e: /* CD Read */
+ scsi_ulto2b(0x001E, feature->feature_code);
+ feature->flags = 0x00;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x00;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f1f: /* DVD Read */
+ scsi_ulto2b(0x001F, feature->feature_code);
+ feature->flags = 0x08;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x01;
+ feature->feature_data[2] = 0x03;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f2a: /* DVD+RW */
+ scsi_ulto2b(0x002A, feature->feature_code);
+ feature->flags = 0x04;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x00;
+ feature->feature_data[1] = 0x00;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f2b: /* DVD+R */
+ scsi_ulto2b(0x002B, feature->feature_code);
+ feature->flags = 0x00;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x00;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f3a: /* DVD+RW Dual Layer */
+ scsi_ulto2b(0x003A, feature->feature_code);
+ feature->flags = 0x00;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x00;
+ feature->feature_data[1] = 0x00;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+f3b: /* DVD+R Dual Layer */
+ scsi_ulto2b(0x003B, feature->feature_code);
+ feature->flags = 0x00;
+ if ((lun->flags & CTL_LUN_NO_MEDIA) == 0)
+ feature->flags |= SGC_F_CURRENT;
+ feature->add_length = 4;
+ feature->feature_data[0] = 0x00;
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+
+done:
+ data_len = (uint8_t *)feature - (uint8_t *)hdr;
+ if (rt == SGC_RT_SPECIFIC && data_len > 4) {
+ feature = (struct scsi_get_config_feature *)(hdr + 1);
+ if (scsi_2btoul(feature->feature_code) == starting)
+ feature = (struct scsi_get_config_feature *)
+ &feature->feature_data[feature->add_length];
+ data_len = (uint8_t *)feature - (uint8_t *)hdr;
+ }
+ scsi_ulto4b(data_len - 4, hdr->data_length);
+ if (data_len < alloc_len) {
+ ctsio->residual = alloc_len - data_len;
+ ctsio->kern_data_len = data_len;
+ ctsio->kern_total_len = data_len;
+ } else {
+ ctsio->residual = 0;
+ ctsio->kern_data_len = alloc_len;
+ ctsio->kern_total_len = alloc_len;
+ }
+
+ ctl_set_success(ctsio);
+ ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+ ctsio->be_move_done = ctl_config_move_done;
+ ctl_datamove((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+}
+
+int
+ctl_get_event_status(struct ctl_scsiio *ctsio)
+{
+ struct scsi_get_event_status_header *hdr;
+ struct scsi_get_event_status *cdb;
+ struct ctl_lun *lun;
+ uint32_t alloc_len, data_len;
+ int notif_class;
+
+ lun = ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ cdb = (struct scsi_get_event_status *)ctsio->cdb;
+ if ((cdb->byte2 & SGESN_POLLED) == 0) {
+ ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1,
+ /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0);
+ ctl_done((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+ notif_class = cdb->notif_class;
+ alloc_len = scsi_2btoul(cdb->length);
+
+ data_len = sizeof(struct scsi_get_event_status_header);
+ ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
+ ctsio->kern_sg_entries = 0;
+ ctsio->kern_data_resid = 0;
+ ctsio->kern_rel_offset = 0;
+
+ if (data_len < alloc_len) {
+ ctsio->residual = alloc_len - data_len;
+ ctsio->kern_data_len = data_len;
+ ctsio->kern_total_len = data_len;
+ } else {
+ ctsio->residual = 0;
+ ctsio->kern_data_len = alloc_len;
+ ctsio->kern_total_len = alloc_len;
+ }
+
+ hdr = (struct scsi_get_event_status_header *)ctsio->kern_data_ptr;
+ scsi_ulto2b(0, hdr->descr_length);
+ hdr->nea_class = SGESN_NEA;
+ hdr->supported_class = 0;
+
+ ctl_set_success(ctsio);
+ ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+ ctsio->be_move_done = ctl_config_move_done;
+ ctl_datamove((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+}
+
+int
+ctl_mechanism_status(struct ctl_scsiio *ctsio)
+{
+ struct scsi_mechanism_status_header *hdr;
+ struct scsi_mechanism_status *cdb;
+ struct ctl_lun *lun;
+ uint32_t alloc_len, data_len;
+
+ lun = ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ cdb = (struct scsi_mechanism_status *)ctsio->cdb;
+ alloc_len = scsi_2btoul(cdb->length);
+
+ data_len = sizeof(struct scsi_mechanism_status_header);
+ ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
+ ctsio->kern_sg_entries = 0;
+ ctsio->kern_data_resid = 0;
+ ctsio->kern_rel_offset = 0;
+
+ if (data_len < alloc_len) {
+ ctsio->residual = alloc_len - data_len;
+ ctsio->kern_data_len = data_len;
+ ctsio->kern_total_len = data_len;
+ } else {
+ ctsio->residual = 0;
+ ctsio->kern_data_len = alloc_len;
+ ctsio->kern_total_len = alloc_len;
+ }
+
+ hdr = (struct scsi_mechanism_status_header *)ctsio->kern_data_ptr;
+ hdr->state1 = 0x00;
+ hdr->state2 = 0xe0;
+ scsi_ulto3b(0, hdr->lba);
+ hdr->slots_num = 0;
+ scsi_ulto2b(0, hdr->slots_length);
+
+ ctl_set_success(ctsio);
+ ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+ ctsio->be_move_done = ctl_config_move_done;
+ ctl_datamove((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+}
+
+static void
+ctl_ultomsf(uint32_t lba, uint8_t *buf)
+{
+
+ lba += 150;
+ buf[0] = 0;
+ buf[1] = bin2bcd((lba / 75) / 60);
+ buf[2] = bin2bcd((lba / 75) % 60);
+ buf[3] = bin2bcd(lba % 75);
+}
+
+int
+ctl_read_toc(struct ctl_scsiio *ctsio)
+{
+ struct scsi_read_toc_hdr *hdr;
+ struct scsi_read_toc_type01_descr *descr;
+ struct scsi_read_toc *cdb;
+ struct ctl_lun *lun;
+ uint32_t alloc_len, data_len;
+ int format, msf;
+
+ lun = ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ cdb = (struct scsi_read_toc *)ctsio->cdb;
+ msf = (cdb->byte2 & CD_MSF) != 0;
+ format = cdb->format;
+ alloc_len = scsi_2btoul(cdb->data_len);
+
+ data_len = sizeof(struct scsi_read_toc_hdr);
+ if (format == 0)
+ data_len += 2 * sizeof(struct scsi_read_toc_type01_descr);
+ else
+ data_len += sizeof(struct scsi_read_toc_type01_descr);
+ ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO);
+ ctsio->kern_sg_entries = 0;
+ ctsio->kern_data_resid = 0;
+ ctsio->kern_rel_offset = 0;
+
+ if (data_len < alloc_len) {
+ ctsio->residual = alloc_len - data_len;
+ ctsio->kern_data_len = data_len;
+ ctsio->kern_total_len = data_len;
+ } else {
+ ctsio->residual = 0;
+ ctsio->kern_data_len = alloc_len;
+ ctsio->kern_total_len = alloc_len;
+ }
+
+ hdr = (struct scsi_read_toc_hdr *)ctsio->kern_data_ptr;
+ if (format == 0) {
+ scsi_ulto2b(0x12, hdr->data_length);
+ hdr->first = 1;
+ hdr->last = 1;
+ descr = (struct scsi_read_toc_type01_descr *)(hdr + 1);
+ descr->addr_ctl = 0x14;
+ descr->track_number = 1;
+ if (msf)
+ ctl_ultomsf(0, descr->track_start);
+ else
+ scsi_ulto4b(0, descr->track_start);
+ descr++;
+ descr->addr_ctl = 0x14;
+ descr->track_number = 0xaa;
+ if (msf)
+ ctl_ultomsf(lun->be_lun->maxlba+1, descr->track_start);
+ else
+ scsi_ulto4b(lun->be_lun->maxlba+1, descr->track_start);
+ } else {
+ scsi_ulto2b(0x0a, hdr->data_length);
+ hdr->first = 1;
+ hdr->last = 1;
+ descr = (struct scsi_read_toc_type01_descr *)(hdr + 1);
+ descr->addr_ctl = 0x14;
+ descr->track_number = 1;
+ if (msf)
+ ctl_ultomsf(0, descr->track_start);
+ else
+ scsi_ulto4b(0, descr->track_start);
+ }
+
+ ctl_set_success(ctsio);
+ ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+ ctsio->be_move_done = ctl_config_move_done;
+ ctl_datamove((union ctl_io *)ctsio);
+ return (CTL_RETVAL_COMPLETE);
+}
+
/*
* For known CDB types, parse the LBA and length.
*/
@@ -10781,8 +10741,7 @@ ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len)
break;
}
case READ_16:
- case WRITE_16:
- case WRITE_ATOMIC_16: {
+ case WRITE_16: {
struct scsi_rw_16 *cdb;
cdb = (struct scsi_rw_16 *)io->scsiio.cdb;
@@ -10791,6 +10750,15 @@ ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len)
*len = scsi_4btoul(cdb->length);
break;
}
+ case WRITE_ATOMIC_16: {
+ struct scsi_write_atomic_16 *cdb;
+
+ cdb = (struct scsi_write_atomic_16 *)io->scsiio.cdb;
+
+ *lba = scsi_8btou64(cdb->addr);
+ *len = scsi_2btoul(cdb->length);
+ break;
+ }
case WRITE_VERIFY_16: {
struct scsi_write_verify_16 *cdb;
@@ -10930,6 +10898,8 @@ ctl_extent_check(union ctl_io *io1, union ctl_io *io2, bool seq)
if (ctl_get_lba_len(io1, &lba1, &len1) != 0)
return (CTL_ACTION_ERROR);
+ if (io1->io_hdr.flags & CTL_FLAG_SERSEQ_DONE)
+ seq = FALSE;
return (ctl_extent_check_lba(lba1, len1, lba2, len2, seq));
}
@@ -10939,6 +10909,8 @@ ctl_extent_check_seq(union ctl_io *io1, union ctl_io *io2)
uint64_t lba1, lba2;
uint64_t len1, len2;
+ if (io1->io_hdr.flags & CTL_FLAG_SERSEQ_DONE)
+ return (CTL_ACTION_PASS);
if (ctl_get_lba_len(io1, &lba1, &len1) != 0)
return (CTL_ACTION_ERROR);
if (ctl_get_lba_len(io2, &lba2, &len2) != 0)
@@ -10954,7 +10926,7 @@ ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io,
union ctl_io *ooa_io)
{
const struct ctl_cmd_entry *pending_entry, *ooa_entry;
- ctl_serialize_action *serialize_row;
+ const ctl_serialize_action *serialize_row;
/*
* The initiator attempted multiple untagged commands at the same
@@ -10964,8 +10936,8 @@ ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io,
&& (ooa_io->scsiio.tag_type == CTL_TAG_UNTAGGED)
&& ((pending_io->io_hdr.nexus.targ_port ==
ooa_io->io_hdr.nexus.targ_port)
- && (pending_io->io_hdr.nexus.initid.id ==
- ooa_io->io_hdr.nexus.initid.id))
+ && (pending_io->io_hdr.nexus.initid ==
+ ooa_io->io_hdr.nexus.initid))
&& ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT |
CTL_FLAG_STATUS_SENT)) == 0))
return (CTL_ACTION_OVERLAP);
@@ -10986,8 +10958,8 @@ ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io,
&& (pending_io->scsiio.tag_num == ooa_io->scsiio.tag_num)
&& ((pending_io->io_hdr.nexus.targ_port ==
ooa_io->io_hdr.nexus.targ_port)
- && (pending_io->io_hdr.nexus.initid.id ==
- ooa_io->io_hdr.nexus.initid.id))
+ && (pending_io->io_hdr.nexus.initid ==
+ ooa_io->io_hdr.nexus.initid))
&& ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT |
CTL_FLAG_STATUS_SENT)) == 0))
return (CTL_ACTION_OVERLAP_TAG);
@@ -11038,15 +11010,16 @@ ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io,
return (CTL_ACTION_BLOCK);
case CTL_SER_EXTENT:
return (ctl_extent_check(ooa_io, pending_io,
- (lun->serseq == CTL_LUN_SERSEQ_ON)));
+ (lun->be_lun && lun->be_lun->serseq == CTL_LUN_SERSEQ_ON)));
case CTL_SER_EXTENTOPT:
if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT].queue_flags
& SCP_QUEUE_ALG_MASK) != SCP_QUEUE_ALG_UNRESTRICTED)
return (ctl_extent_check(ooa_io, pending_io,
- (lun->serseq == CTL_LUN_SERSEQ_ON)));
+ (lun->be_lun &&
+ lun->be_lun->serseq == CTL_LUN_SERSEQ_ON)));
return (CTL_ACTION_PASS);
case CTL_SER_EXTENTSEQ:
- if (lun->serseq != CTL_LUN_SERSEQ_OFF)
+ if (lun->be_lun && lun->be_lun->serseq != CTL_LUN_SERSEQ_OFF)
return (ctl_extent_check_seq(ooa_io, pending_io));
return (CTL_ACTION_PASS);
case CTL_SER_PASS:
@@ -11125,6 +11098,7 @@ ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
static int
ctl_check_blocked(struct ctl_lun *lun)
{
+ struct ctl_softc *softc = lun->ctl_softc;
union ctl_io *cur_blocked, *next_blocked;
mtx_assert(&lun->lun_lock, MA_OWNED);
@@ -11170,7 +11144,6 @@ ctl_check_blocked(struct ctl_lun *lun)
case CTL_ACTION_PASS:
case CTL_ACTION_SKIP: {
const struct ctl_cmd_entry *entry;
- int isc_retval;
/*
* The skip case shouldn't happen, this transaction
@@ -11186,24 +11159,21 @@ ctl_check_blocked(struct ctl_lun *lun)
blocked_links);
cur_blocked->io_hdr.flags &= ~CTL_FLAG_BLOCKED;
- if (cur_blocked->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC){
+ if ((softc->ha_mode != CTL_HA_MODE_XFER) &&
+ (cur_blocked->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)){
/*
* Need to send IO back to original side to
* run
*/
union ctl_ha_msg msg_info;
+ cur_blocked->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
msg_info.hdr.original_sc =
cur_blocked->io_hdr.original_sc;
msg_info.hdr.serializing_sc = cur_blocked;
msg_info.hdr.msg_type = CTL_MSG_R2R;
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- &msg_info, sizeof(msg_info), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:Check Blocked error from "
- "ctl_ha_msg_send %d\n",
- isc_retval);
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.hdr), M_NOWAIT);
break;
}
entry = ctl_get_cmd_entry(&cur_blocked->scsiio, NULL);
@@ -11262,21 +11232,38 @@ ctl_scsiio_lun_check(struct ctl_lun *lun,
mtx_assert(&lun->lun_lock, MA_OWNED);
/*
- * If this shelf is a secondary shelf controller, we have to reject
- * any media access commands.
+ * If this shelf is a secondary shelf controller, we may have to
+ * reject some commands disallowed by HA mode and link state.
*/
- if ((softc->flags & CTL_FLAG_ACTIVE_SHELF) == 0 &&
- (entry->flags & CTL_CMD_FLAG_OK_ON_SECONDARY) == 0) {
- ctl_set_lun_standby(ctsio);
- retval = 1;
- goto bailout;
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0) {
+ if (softc->ha_link == CTL_HA_LINK_OFFLINE &&
+ (entry->flags & CTL_CMD_FLAG_OK_ON_UNAVAIL) == 0) {
+ ctl_set_lun_unavail(ctsio);
+ retval = 1;
+ goto bailout;
+ }
+ if ((lun->flags & CTL_LUN_PEER_SC_PRIMARY) == 0 &&
+ (entry->flags & CTL_CMD_FLAG_OK_ON_UNAVAIL) == 0) {
+ ctl_set_lun_transit(ctsio);
+ retval = 1;
+ goto bailout;
+ }
+ if (softc->ha_mode == CTL_HA_MODE_ACT_STBY &&
+ (entry->flags & CTL_CMD_FLAG_OK_ON_STANDBY) == 0) {
+ ctl_set_lun_standby(ctsio);
+ retval = 1;
+ goto bailout;
+ }
+
+ /* The rest of checks are only done on executing side */
+ if (softc->ha_mode == CTL_HA_MODE_XFER)
+ goto bailout;
}
if (entry->pattern & CTL_LUN_PAT_WRITE) {
- if (lun->flags & CTL_LUN_READONLY) {
- ctl_set_sense(ctsio, /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_DATA_PROTECT,
- /*asc*/ 0x27, /*ascq*/ 0x01, SSD_ELEM_NONE);
+ if (lun->be_lun &&
+ lun->be_lun->flags & CTL_LUN_FLAG_READONLY) {
+ ctl_set_hw_write_protected(ctsio);
retval = 1;
goto bailout;
}
@@ -11295,7 +11282,7 @@ ctl_scsiio_lun_check(struct ctl_lun *lun,
* even on reserved LUNs, and if this initiator isn't the one who
* reserved us, reject the command with a reservation conflict.
*/
- residx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ residx = ctl_get_initindex(&ctsio->io_hdr.nexus);
if ((lun->flags & CTL_LUN_RESERVED)
&& ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_RESV) == 0)) {
if (lun->res_idx != residx) {
@@ -11309,9 +11296,9 @@ ctl_scsiio_lun_check(struct ctl_lun *lun,
(entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_RESV)) {
/* No reservation or command is allowed. */;
} else if ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_WRESV) &&
- (lun->res_type == SPR_TYPE_WR_EX ||
- lun->res_type == SPR_TYPE_WR_EX_RO ||
- lun->res_type == SPR_TYPE_WR_EX_AR)) {
+ (lun->pr_res_type == SPR_TYPE_WR_EX ||
+ lun->pr_res_type == SPR_TYPE_WR_EX_RO ||
+ lun->pr_res_type == SPR_TYPE_WR_EX_AR)) {
/* The command is allowed for Write Exclusive resv. */;
} else {
/*
@@ -11319,45 +11306,32 @@ ctl_scsiio_lun_check(struct ctl_lun *lun,
* reservation and this isn't the res holder then set a
* conflict.
*/
- if (ctl_get_prkey(lun, residx) == 0
- || (residx != lun->pr_res_idx && lun->res_type < 4)) {
+ if (ctl_get_prkey(lun, residx) == 0 ||
+ (residx != lun->pr_res_idx && lun->pr_res_type < 4)) {
ctl_set_reservation_conflict(ctsio);
retval = 1;
goto bailout;
}
-
}
- if ((lun->flags & CTL_LUN_OFFLINE)
- && ((entry->flags & CTL_CMD_FLAG_OK_ON_OFFLINE) == 0)) {
- ctl_set_lun_not_ready(ctsio);
- retval = 1;
- goto bailout;
- }
-
- /*
- * If the LUN is stopped, see if this particular command is allowed
- * for a stopped lun. Otherwise, reject it with 0x04,0x02.
- */
- if ((lun->flags & CTL_LUN_STOPPED)
- && ((entry->flags & CTL_CMD_FLAG_OK_ON_STOPPED) == 0)) {
- /* "Logical unit not ready, initializing cmd. required" */
- ctl_set_lun_stopped(ctsio);
- retval = 1;
- goto bailout;
- }
-
- if ((lun->flags & CTL_LUN_INOPERABLE)
- && ((entry->flags & CTL_CMD_FLAG_OK_ON_INOPERABLE) == 0)) {
- /* "Medium format corrupted" */
- ctl_set_medium_format_corrupted(ctsio);
+ if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) {
+ if (lun->flags & CTL_LUN_EJECTED)
+ ctl_set_lun_ejected(ctsio);
+ else if (lun->flags & CTL_LUN_NO_MEDIA) {
+ if (lun->flags & CTL_LUN_REMOVABLE)
+ ctl_set_lun_no_media(ctsio);
+ else
+ ctl_set_lun_int_reqd(ctsio);
+ } else if (lun->flags & CTL_LUN_STOPPED)
+ ctl_set_lun_stopped(ctsio);
+ else
+ goto bailout;
retval = 1;
goto bailout;
}
bailout:
return (retval);
-
}
static void
@@ -11368,262 +11342,89 @@ ctl_failover_io(union ctl_io *io, int have_lock)
}
static void
-ctl_failover(void)
+ctl_failover_lun(union ctl_io *rio)
{
+ struct ctl_softc *softc = control_softc;
struct ctl_lun *lun;
- struct ctl_softc *softc;
- union ctl_io *next_io, *pending_io;
- union ctl_io *io;
- int lun_idx;
+ struct ctl_io_hdr *io, *next_io;
+ uint32_t targ_lun;
- softc = control_softc;
+ targ_lun = rio->io_hdr.nexus.targ_mapped_lun;
+ CTL_DEBUG_PRINT(("FAILOVER for lun %ju\n", targ_lun));
+ /* Find and lock the LUN. */
mtx_lock(&softc->ctl_lock);
- /*
- * Remove any cmds from the other SC from the rtr queue. These
- * will obviously only be for LUNs for which we're the primary.
- * We can't send status or get/send data for these commands.
- * Since they haven't been executed yet, we can just remove them.
- * We'll either abort them or delete them below, depending on
- * which HA mode we're in.
- */
-#ifdef notyet
- mtx_lock(&softc->queue_lock);
- for (io = (union ctl_io *)STAILQ_FIRST(&softc->rtr_queue);
- io != NULL; io = next_io) {
- next_io = (union ctl_io *)STAILQ_NEXT(&io->io_hdr, links);
- if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)
- STAILQ_REMOVE(&softc->rtr_queue, &io->io_hdr,
- ctl_io_hdr, links);
- }
- mtx_unlock(&softc->queue_lock);
-#endif
-
- for (lun_idx=0; lun_idx < softc->num_luns; lun_idx++) {
- lun = softc->ctl_luns[lun_idx];
- if (lun==NULL)
- continue;
-
- /*
- * Processor LUNs are primary on both sides.
- * XXX will this always be true?
- */
- if (lun->be_lun->lun_type == T_PROCESSOR)
- continue;
-
- if ((lun->flags & CTL_LUN_PRIMARY_SC)
- && (softc->ha_mode == CTL_HA_MODE_SER_ONLY)) {
- printf("FAILOVER: primary lun %d\n", lun_idx);
- /*
- * Remove all commands from the other SC. First from the
- * blocked queue then from the ooa queue. Once we have
- * removed them. Call ctl_check_blocked to see if there
- * is anything that can run.
- */
- for (io = (union ctl_io *)TAILQ_FIRST(
- &lun->blocked_queue); io != NULL; io = next_io) {
-
- next_io = (union ctl_io *)TAILQ_NEXT(
- &io->io_hdr, blocked_links);
-
- if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) {
- TAILQ_REMOVE(&lun->blocked_queue,
- &io->io_hdr,blocked_links);
- io->io_hdr.flags &= ~CTL_FLAG_BLOCKED;
- TAILQ_REMOVE(&lun->ooa_queue,
- &io->io_hdr, ooa_links);
-
- ctl_free_io(io);
- }
- }
-
- for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue);
- io != NULL; io = next_io) {
-
- next_io = (union ctl_io *)TAILQ_NEXT(
- &io->io_hdr, ooa_links);
-
- if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) {
-
- TAILQ_REMOVE(&lun->ooa_queue,
- &io->io_hdr,
- ooa_links);
+ if ((targ_lun < CTL_MAX_LUNS) &&
+ ((lun = softc->ctl_luns[targ_lun]) != NULL)) {
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+ } else {
+ mtx_unlock(&softc->ctl_lock);
+ return;
+ }
- ctl_free_io(io);
+ if (softc->ha_mode == CTL_HA_MODE_XFER) {
+ TAILQ_FOREACH_SAFE(io, &lun->ooa_queue, ooa_links, next_io) {
+ /* We are master */
+ if (io->flags & CTL_FLAG_FROM_OTHER_SC) {
+ if (io->flags & CTL_FLAG_IO_ACTIVE) {
+ io->flags |= CTL_FLAG_ABORT;
+ io->flags |= CTL_FLAG_FAILOVER;
+ } else { /* This can be only due to DATAMOVE */
+ io->msg_type = CTL_MSG_DATAMOVE_DONE;
+ io->flags &= ~CTL_FLAG_DMA_INPROG;
+ io->flags |= CTL_FLAG_IO_ACTIVE;
+ io->port_status = 31340;
+ ctl_enqueue_isc((union ctl_io *)io);
}
}
- ctl_check_blocked(lun);
- } else if ((lun->flags & CTL_LUN_PRIMARY_SC)
- && (softc->ha_mode == CTL_HA_MODE_XFER)) {
-
- printf("FAILOVER: primary lun %d\n", lun_idx);
- /*
- * Abort all commands from the other SC. We can't
- * send status back for them now. These should get
- * cleaned up when they are completed or come out
- * for a datamove operation.
- */
- for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue);
- io != NULL; io = next_io) {
- next_io = (union ctl_io *)TAILQ_NEXT(
- &io->io_hdr, ooa_links);
-
- if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)
- io->io_hdr.flags |= CTL_FLAG_ABORT;
- }
- } else if (((lun->flags & CTL_LUN_PRIMARY_SC) == 0)
- && (softc->ha_mode == CTL_HA_MODE_XFER)) {
-
- printf("FAILOVER: secondary lun %d\n", lun_idx);
-
- lun->flags |= CTL_LUN_PRIMARY_SC;
-
- /*
- * We send all I/O that was sent to this controller
- * and redirected to the other side back with
- * busy status, and have the initiator retry it.
- * Figuring out how much data has been transferred,
- * etc. and picking up where we left off would be
- * very tricky.
- *
- * XXX KDM need to remove I/O from the blocked
- * queue as well!
- */
- for (pending_io = (union ctl_io *)TAILQ_FIRST(
- &lun->ooa_queue); pending_io != NULL;
- pending_io = next_io) {
-
- next_io = (union ctl_io *)TAILQ_NEXT(
- &pending_io->io_hdr, ooa_links);
-
- pending_io->io_hdr.flags &=
- ~CTL_FLAG_SENT_2OTHER_SC;
-
- if (pending_io->io_hdr.flags &
- CTL_FLAG_IO_ACTIVE) {
- pending_io->io_hdr.flags |=
- CTL_FLAG_FAILOVER;
+ /* We are slave */
+ if (io->flags & CTL_FLAG_SENT_2OTHER_SC) {
+ io->flags &= ~CTL_FLAG_SENT_2OTHER_SC;
+ if (io->flags & CTL_FLAG_IO_ACTIVE) {
+ io->flags |= CTL_FLAG_FAILOVER;
} else {
- ctl_set_busy(&pending_io->scsiio);
- ctl_done(pending_io);
+ ctl_set_busy(&((union ctl_io *)io)->
+ scsiio);
+ ctl_done((union ctl_io *)io);
}
}
-
- ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
- } else if (((lun->flags & CTL_LUN_PRIMARY_SC) == 0)
- && (softc->ha_mode == CTL_HA_MODE_SER_ONLY)) {
- printf("FAILOVER: secondary lun %d\n", lun_idx);
- /*
- * if the first io on the OOA is not on the RtR queue
- * add it.
- */
- lun->flags |= CTL_LUN_PRIMARY_SC;
-
- pending_io = (union ctl_io *)TAILQ_FIRST(
- &lun->ooa_queue);
- if (pending_io==NULL) {
- printf("Nothing on OOA queue\n");
- continue;
- }
-
- pending_io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC;
- if ((pending_io->io_hdr.flags &
- CTL_FLAG_IS_WAS_ON_RTR) == 0) {
- pending_io->io_hdr.flags |=
- CTL_FLAG_IS_WAS_ON_RTR;
- ctl_enqueue_rtr(pending_io);
+ }
+ } else { /* SERIALIZE modes */
+ TAILQ_FOREACH_SAFE(io, &lun->blocked_queue, blocked_links,
+ next_io) {
+ /* We are master */
+ if (io->flags & CTL_FLAG_FROM_OTHER_SC) {
+ TAILQ_REMOVE(&lun->blocked_queue, io,
+ blocked_links);
+ io->flags &= ~CTL_FLAG_BLOCKED;
+ TAILQ_REMOVE(&lun->ooa_queue, io, ooa_links);
+ ctl_free_io((union ctl_io *)io);
}
-#if 0
- else
- {
- printf("Tag 0x%04x is running\n",
- pending_io->scsiio.tag_num);
+ }
+ TAILQ_FOREACH_SAFE(io, &lun->ooa_queue, ooa_links, next_io) {
+ /* We are master */
+ if (io->flags & CTL_FLAG_FROM_OTHER_SC) {
+ TAILQ_REMOVE(&lun->ooa_queue, io, ooa_links);
+ ctl_free_io((union ctl_io *)io);
}
-#endif
-
- next_io = (union ctl_io *)TAILQ_NEXT(
- &pending_io->io_hdr, ooa_links);
- for (pending_io=next_io; pending_io != NULL;
- pending_io = next_io) {
- pending_io->io_hdr.flags &=
- ~CTL_FLAG_SENT_2OTHER_SC;
- next_io = (union ctl_io *)TAILQ_NEXT(
- &pending_io->io_hdr, ooa_links);
- if (pending_io->io_hdr.flags &
- CTL_FLAG_IS_WAS_ON_RTR) {
-#if 0
- printf("Tag 0x%04x is running\n",
- pending_io->scsiio.tag_num);
-#endif
- continue;
- }
-
- switch (ctl_check_ooa(lun, pending_io,
- (union ctl_io *)TAILQ_PREV(
- &pending_io->io_hdr, ctl_ooaq,
- ooa_links))) {
-
- case CTL_ACTION_BLOCK:
- TAILQ_INSERT_TAIL(&lun->blocked_queue,
- &pending_io->io_hdr,
- blocked_links);
- pending_io->io_hdr.flags |=
- CTL_FLAG_BLOCKED;
- break;
- case CTL_ACTION_PASS:
- case CTL_ACTION_SKIP:
- pending_io->io_hdr.flags |=
- CTL_FLAG_IS_WAS_ON_RTR;
- ctl_enqueue_rtr(pending_io);
- break;
- case CTL_ACTION_OVERLAP:
- ctl_set_overlapped_cmd(
- (struct ctl_scsiio *)pending_io);
- ctl_done(pending_io);
- break;
- case CTL_ACTION_OVERLAP_TAG:
- ctl_set_overlapped_tag(
- (struct ctl_scsiio *)pending_io,
- pending_io->scsiio.tag_num & 0xff);
- ctl_done(pending_io);
- break;
- case CTL_ACTION_ERROR:
- default:
- ctl_set_internal_failure(
- (struct ctl_scsiio *)pending_io,
- 0, // sks_valid
- 0); //retry count
- ctl_done(pending_io);
- break;
+ /* We are slave */
+ if (io->flags & CTL_FLAG_SENT_2OTHER_SC) {
+ io->flags &= ~CTL_FLAG_SENT_2OTHER_SC;
+ if (!(io->flags & CTL_FLAG_IO_ACTIVE)) {
+ ctl_set_busy(&((union ctl_io *)io)->
+ scsiio);
+ ctl_done((union ctl_io *)io);
}
}
-
- ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE);
- } else {
- panic("Unhandled HA mode failover, LUN flags = %#x, "
- "ha_mode = #%x", lun->flags, softc->ha_mode);
}
+ ctl_check_blocked(lun);
}
- ctl_pause_rtr = 0;
- mtx_unlock(&softc->ctl_lock);
-}
-
-static void
-ctl_clear_ua(struct ctl_softc *ctl_softc, uint32_t initidx,
- ctl_ua_type ua_type)
-{
- struct ctl_lun *lun;
- ctl_ua_type *pu;
-
- mtx_assert(&ctl_softc->ctl_lock, MA_OWNED);
-
- STAILQ_FOREACH(lun, &ctl_softc->lun_list, links) {
- mtx_lock(&lun->lun_lock);
- pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT];
- if (pu != NULL)
- pu[initidx % CTL_MAX_INIT_PER_PORT] &= ~ua_type;
- mtx_unlock(&lun->lun_lock);
- }
+ mtx_unlock(&lun->lun_lock);
}
static int
@@ -11656,9 +11457,6 @@ ctl_scsiio_precheck(struct ctl_softc *softc, struct ctl_scsiio *ctsio)
ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = lun;
ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr =
lun->be_lun;
- if (lun->be_lun->lun_type == T_PROCESSOR) {
- ctsio->io_hdr.flags |= CTL_FLAG_CONTROL_DEV;
- }
/*
* Every I/O goes into the OOA queue for a
@@ -11697,7 +11495,7 @@ ctl_scsiio_precheck(struct ctl_softc *softc, struct ctl_scsiio *ctsio)
* it on the rtr queue.
*/
if (lun == NULL) {
- if (entry->flags & CTL_CMD_FLAG_OK_ON_ALL_LUNS) {
+ if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) {
ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
ctl_enqueue_rtr((union ctl_io *)ctsio);
return (retval);
@@ -11758,15 +11556,9 @@ ctl_scsiio_precheck(struct ctl_softc *softc, struct ctl_scsiio *ctsio)
*/
if ((entry->flags & CTL_CMD_FLAG_NO_SENSE) == 0) {
ctl_ua_type ua_type;
- scsi_sense_data_type sense_format;
-
- if (lun->flags & CTL_LUN_SENSE_DESC)
- sense_format = SSD_TYPE_DESC;
- else
- sense_format = SSD_TYPE_FIXED;
ua_type = ctl_build_ua(lun, initidx, &ctsio->sense_data,
- sense_format);
+ SSD_TYPE_NONE);
if (ua_type != CTL_UA_NONE) {
mtx_unlock(&lun->lun_lock);
ctsio->scsi_status = SCSI_STATUS_CHECK_COND;
@@ -11793,45 +11585,32 @@ ctl_scsiio_precheck(struct ctl_softc *softc, struct ctl_scsiio *ctsio)
* find it easily. Something similar will need be done on the other
* side so when we are done we can find the copy.
*/
- if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0) {
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
+ (lun->flags & CTL_LUN_PEER_SC_PRIMARY) != 0 &&
+ (entry->flags & CTL_CMD_FLAG_RUN_HERE) == 0) {
union ctl_ha_msg msg_info;
int isc_retval;
ctsio->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC;
+ ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
+ mtx_unlock(&lun->lun_lock);
msg_info.hdr.msg_type = CTL_MSG_SERIALIZE;
msg_info.hdr.original_sc = (union ctl_io *)ctsio;
-#if 0
- printf("1. ctsio %p\n", ctsio);
-#endif
msg_info.hdr.serializing_sc = NULL;
msg_info.hdr.nexus = ctsio->io_hdr.nexus;
msg_info.scsi.tag_num = ctsio->tag_num;
msg_info.scsi.tag_type = ctsio->tag_type;
+ msg_info.scsi.cdb_len = ctsio->cdb_len;
memcpy(msg_info.scsi.cdb, ctsio->cdb, CTL_MAX_CDBLEN);
- ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
-
- if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- (void *)&msg_info, sizeof(msg_info), 0)) >
- CTL_HA_STATUS_SUCCESS) {
- printf("CTL:precheck, ctl_ha_msg_send returned %d\n",
- isc_retval);
- printf("CTL:opcode is %x\n", ctsio->cdb[0]);
- } else {
-#if 0
- printf("CTL:Precheck sent msg, opcode is %x\n",opcode);
-#endif
+ if ((isc_retval = ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.scsi) - sizeof(msg_info.scsi.sense_data),
+ M_WAITOK)) > CTL_HA_STATUS_SUCCESS) {
+ ctl_set_busy(ctsio);
+ ctl_done((union ctl_io *)ctsio);
+ return (retval);
}
-
- /*
- * XXX KDM this I/O is off the incoming queue, but hasn't
- * been inserted on any other queue. We may need to come
- * up with a holding queue while we wait for serialization
- * so that we have an idea of what we're waiting for from
- * the other side.
- */
- mtx_unlock(&lun->lun_lock);
return (retval);
}
@@ -11934,14 +11713,16 @@ ctl_cmd_applicable(uint8_t lun_type, const struct ctl_cmd_entry *entry)
{
switch (lun_type) {
+ case T_DIRECT:
+ if ((entry->flags & CTL_CMD_FLAG_OK_ON_DIRECT) == 0)
+ return (0);
+ break;
case T_PROCESSOR:
- if (((entry->flags & CTL_CMD_FLAG_OK_ON_PROC) == 0) &&
- ((entry->flags & CTL_CMD_FLAG_OK_ON_ALL_LUNS) == 0))
+ if ((entry->flags & CTL_CMD_FLAG_OK_ON_PROC) == 0)
return (0);
break;
- case T_DIRECT:
- if (((entry->flags & CTL_CMD_FLAG_OK_ON_SLUN) == 0) &&
- ((entry->flags & CTL_CMD_FLAG_OK_ON_ALL_LUNS) == 0))
+ case T_CDROM:
+ if ((entry->flags & CTL_CMD_FLAG_OK_ON_CDROM) == 0)
return (0);
break;
default:
@@ -11995,13 +11776,13 @@ static int
ctl_target_reset(struct ctl_softc *softc, union ctl_io *io,
ctl_ua_type ua_type)
{
+ struct ctl_port *port;
struct ctl_lun *lun;
int retval;
if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
union ctl_ha_msg msg_info;
- io->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC;
msg_info.hdr.nexus = io->io_hdr.nexus;
if (ua_type==CTL_UA_TARG_RESET)
msg_info.task.task_action = CTL_TASK_TARGET_RESET;
@@ -12010,17 +11791,21 @@ ctl_target_reset(struct ctl_softc *softc, union ctl_io *io,
msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
msg_info.hdr.original_sc = NULL;
msg_info.hdr.serializing_sc = NULL;
- if (CTL_HA_STATUS_SUCCESS != ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- (void *)&msg_info, sizeof(msg_info), 0)) {
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.task), M_WAITOK);
}
retval = 0;
mtx_lock(&softc->ctl_lock);
- STAILQ_FOREACH(lun, &softc->lun_list, links)
- retval += ctl_lun_reset(lun, io, ua_type);
+ port = ctl_io_port(&io->io_hdr);
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ if (port != NULL &&
+ ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
+ continue;
+ retval += ctl_do_lun_reset(lun, io, ua_type);
+ }
mtx_unlock(&softc->ctl_lock);
-
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
return (retval);
}
@@ -12046,15 +11831,13 @@ ctl_target_reset(struct ctl_softc *softc, union ctl_io *io,
* XXX KDM for now, we're setting unit attention for all initiators.
*/
static int
-ctl_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type)
+ctl_do_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type)
{
union ctl_io *xio;
#if 0
uint32_t initidx;
#endif
-#ifdef CTL_WITH_CA
int i;
-#endif
mtx_lock(&lun->lun_lock);
/*
@@ -12089,11 +11872,47 @@ ctl_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type)
for (i = 0; i < CTL_MAX_INITIATORS; i++)
ctl_clear_mask(lun->have_ca, i);
#endif
+ lun->prevent_count = 0;
+ for (i = 0; i < CTL_MAX_INITIATORS; i++)
+ ctl_clear_mask(lun->prevent, i);
mtx_unlock(&lun->lun_lock);
return (0);
}
+static int
+ctl_lun_reset(struct ctl_softc *softc, union ctl_io *io)
+{
+ struct ctl_lun *lun;
+ uint32_t targ_lun;
+ int retval;
+
+ targ_lun = io->io_hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ (lun = softc->ctl_luns[targ_lun]) == NULL) {
+ mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
+ return (1);
+ }
+ retval = ctl_do_lun_reset(lun, io, CTL_UA_LUN_RESET);
+ mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
+
+ if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0) {
+ union ctl_ha_msg msg_info;
+
+ msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
+ msg_info.hdr.nexus = io->io_hdr.nexus;
+ msg_info.task.task_action = CTL_TASK_LUN_RESET;
+ msg_info.hdr.original_sc = NULL;
+ msg_info.hdr.serializing_sc = NULL;
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.task), M_WAITOK);
+ }
+ return (retval);
+}
+
static void
ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id,
int other_sc)
@@ -12115,9 +11934,9 @@ ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id,
if ((targ_port == UINT32_MAX ||
targ_port == xio->io_hdr.nexus.targ_port) &&
(init_id == UINT32_MAX ||
- init_id == xio->io_hdr.nexus.initid.id)) {
+ init_id == xio->io_hdr.nexus.initid)) {
if (targ_port != xio->io_hdr.nexus.targ_port ||
- init_id != xio->io_hdr.nexus.initid.id)
+ init_id != xio->io_hdr.nexus.initid)
xio->io_hdr.flags |= CTL_FLAG_ABORT_STATUS;
xio->io_hdr.flags |= CTL_FLAG_ABORT;
if (!other_sc && !(lun->flags & CTL_LUN_PRIMARY_SC)) {
@@ -12130,8 +11949,8 @@ ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id,
msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
msg_info.hdr.original_sc = NULL;
msg_info.hdr.serializing_sc = NULL;
- ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- (void *)&msg_info, sizeof(msg_info), 0);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.task), M_NOWAIT);
}
}
}
@@ -12149,10 +11968,10 @@ ctl_abort_task_set(union ctl_io *io)
*/
targ_lun = io->io_hdr.nexus.targ_mapped_lun;
mtx_lock(&softc->ctl_lock);
- if ((targ_lun < CTL_MAX_LUNS) && (softc->ctl_luns[targ_lun] != NULL))
- lun = softc->ctl_luns[targ_lun];
- else {
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ (lun = softc->ctl_luns[targ_lun]) == NULL) {
mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
return (1);
}
@@ -12160,13 +11979,14 @@ ctl_abort_task_set(union ctl_io *io)
mtx_unlock(&softc->ctl_lock);
if (io->taskio.task_action == CTL_TASK_ABORT_TASK_SET) {
ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.initid.id,
+ io->io_hdr.nexus.initid,
(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
} else { /* CTL_TASK_CLEAR_TASK_SET */
ctl_abort_tasks_lun(lun, UINT32_MAX, UINT32_MAX,
(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
}
mtx_unlock(&lun->lun_lock);
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
return (0);
}
@@ -12175,25 +11995,40 @@ ctl_i_t_nexus_reset(union ctl_io *io)
{
struct ctl_softc *softc = control_softc;
struct ctl_lun *lun;
- uint32_t initidx, residx;
+ uint32_t initidx;
+
+ if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
+ union ctl_ha_msg msg_info;
+
+ msg_info.hdr.nexus = io->io_hdr.nexus;
+ msg_info.task.task_action = CTL_TASK_I_T_NEXUS_RESET;
+ msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS;
+ msg_info.hdr.original_sc = NULL;
+ msg_info.hdr.serializing_sc = NULL;
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.task), M_WAITOK);
+ }
initidx = ctl_get_initindex(&io->io_hdr.nexus);
- residx = ctl_get_resindex(&io->io_hdr.nexus);
mtx_lock(&softc->ctl_lock);
STAILQ_FOREACH(lun, &softc->lun_list, links) {
mtx_lock(&lun->lun_lock);
ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.initid.id,
- (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0);
+ io->io_hdr.nexus.initid, 1);
#ifdef CTL_WITH_CA
ctl_clear_mask(lun->have_ca, initidx);
#endif
- if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == residx))
+ if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == initidx))
lun->flags &= ~CTL_LUN_RESERVED;
+ if (ctl_is_set(lun->prevent, initidx)) {
+ ctl_clear_mask(lun->prevent, initidx);
+ lun->prevent_count--;
+ }
ctl_est_ua(lun, initidx, CTL_UA_I_T_NEXUS_LOSS);
mtx_unlock(&lun->lun_lock);
}
mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
return (0);
}
@@ -12218,11 +12053,10 @@ ctl_abort_task(union ctl_io *io)
*/
targ_lun = io->io_hdr.nexus.targ_mapped_lun;
mtx_lock(&softc->ctl_lock);
- if ((targ_lun < CTL_MAX_LUNS)
- && (softc->ctl_luns[targ_lun] != NULL))
- lun = softc->ctl_luns[targ_lun];
- else {
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ (lun = softc->ctl_luns[targ_lun]) == NULL) {
mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
return (1);
}
@@ -12262,7 +12096,7 @@ ctl_abort_task(union ctl_io *io)
#endif
if ((xio->io_hdr.nexus.targ_port != io->io_hdr.nexus.targ_port)
- || (xio->io_hdr.nexus.initid.id != io->io_hdr.nexus.initid.id)
+ || (xio->io_hdr.nexus.initid != io->io_hdr.nexus.initid)
|| (xio->io_hdr.flags & CTL_FLAG_ABORT))
continue;
@@ -12293,7 +12127,6 @@ ctl_abort_task(union ctl_io *io)
!(lun->flags & CTL_LUN_PRIMARY_SC)) {
union ctl_ha_msg msg_info;
- io->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC;
msg_info.hdr.nexus = io->io_hdr.nexus;
msg_info.task.task_action = CTL_TASK_ABORT_TASK;
msg_info.task.tag_num = io->taskio.tag_num;
@@ -12304,10 +12137,8 @@ ctl_abort_task(union ctl_io *io)
#if 0
printf("Sent Abort to other side\n");
#endif
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- (void *)&msg_info, sizeof(msg_info), 0) !=
- CTL_HA_STATUS_SUCCESS) {
- }
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info,
+ sizeof(msg_info.task), M_NOWAIT);
}
#if 0
printf("ctl_abort_task: found I/O to abort\n");
@@ -12324,14 +12155,84 @@ ctl_abort_task(union ctl_io *io)
*/
#if 0
printf("ctl_abort_task: ABORT sent for nonexistent I/O: "
- "%d:%d:%d:%d tag %d type %d\n",
- io->io_hdr.nexus.initid.id,
+ "%u:%u:%u tag %d type %d\n",
+ io->io_hdr.nexus.initid,
io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.targ_target.id,
io->io_hdr.nexus.targ_lun, io->taskio.tag_num,
io->taskio.tag_type);
#endif
}
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
+ return (0);
+}
+
+static int
+ctl_query_task(union ctl_io *io, int task_set)
+{
+ union ctl_io *xio;
+ struct ctl_lun *lun;
+ struct ctl_softc *softc;
+ int found = 0;
+ uint32_t targ_lun;
+
+ softc = control_softc;
+ targ_lun = io->io_hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ (lun = softc->ctl_luns[targ_lun]) == NULL) {
+ mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
+ return (1);
+ }
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL;
+ xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) {
+
+ if ((xio->io_hdr.nexus.targ_port != io->io_hdr.nexus.targ_port)
+ || (xio->io_hdr.nexus.initid != io->io_hdr.nexus.initid)
+ || (xio->io_hdr.flags & CTL_FLAG_ABORT))
+ continue;
+
+ if (task_set || xio->scsiio.tag_num == io->taskio.tag_num) {
+ found = 1;
+ break;
+ }
+ }
+ mtx_unlock(&lun->lun_lock);
+ if (found)
+ io->taskio.task_status = CTL_TASK_FUNCTION_SUCCEEDED;
+ else
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
+ return (0);
+}
+
+static int
+ctl_query_async_event(union ctl_io *io)
+{
+ struct ctl_lun *lun;
+ struct ctl_softc *softc;
+ ctl_ua_type ua;
+ uint32_t targ_lun, initidx;
+
+ softc = control_softc;
+ targ_lun = io->io_hdr.nexus.targ_mapped_lun;
+ mtx_lock(&softc->ctl_lock);
+ if ((targ_lun >= CTL_MAX_LUNS) ||
+ (lun = softc->ctl_luns[targ_lun]) == NULL) {
+ mtx_unlock(&softc->ctl_lock);
+ io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST;
+ return (1);
+ }
+ mtx_lock(&lun->lun_lock);
+ mtx_unlock(&softc->ctl_lock);
+ initidx = ctl_get_initindex(&io->io_hdr.nexus);
+ ua = ctl_build_qae(lun, initidx, io->taskio.task_resp);
+ mtx_unlock(&lun->lun_lock);
+ if (ua != CTL_UA_NONE)
+ io->taskio.task_status = CTL_TASK_FUNCTION_SUCCEEDED;
+ else
+ io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE;
return (0);
}
@@ -12340,41 +12241,12 @@ ctl_run_task(union ctl_io *io)
{
struct ctl_softc *softc = control_softc;
int retval = 1;
- const char *task_desc;
CTL_DEBUG_PRINT(("ctl_run_task\n"));
-
KASSERT(io->io_hdr.io_type == CTL_IO_TASK,
- ("ctl_run_task: Unextected io_type %d\n",
- io->io_hdr.io_type));
-
- task_desc = ctl_scsi_task_string(&io->taskio);
- if (task_desc != NULL) {
-#ifdef NEEDTOPORT
- csevent_log(CSC_CTL | CSC_SHELF_SW |
- CTL_TASK_REPORT,
- csevent_LogType_Trace,
- csevent_Severity_Information,
- csevent_AlertLevel_Green,
- csevent_FRU_Firmware,
- csevent_FRU_Unknown,
- "CTL: received task: %s",task_desc);
-#endif
- } else {
-#ifdef NEEDTOPORT
- csevent_log(CSC_CTL | CSC_SHELF_SW |
- CTL_TASK_REPORT,
- csevent_LogType_Trace,
- csevent_Severity_Information,
- csevent_AlertLevel_Green,
- csevent_FRU_Firmware,
- csevent_FRU_Unknown,
- "CTL: received unknown task "
- "type: %d (%#x)",
- io->taskio.task_action,
- io->taskio.task_action);
-#endif
- }
+ ("ctl_run_task: Unextected io_type %d\n", io->io_hdr.io_type));
+ io->taskio.task_status = CTL_TASK_FUNCTION_NOT_SUPPORTED;
+ bzero(io->taskio.task_resp, sizeof(io->taskio.task_resp));
switch (io->taskio.task_action) {
case CTL_TASK_ABORT_TASK:
retval = ctl_abort_task(io);
@@ -12388,46 +12260,9 @@ ctl_run_task(union ctl_io *io)
case CTL_TASK_I_T_NEXUS_RESET:
retval = ctl_i_t_nexus_reset(io);
break;
- case CTL_TASK_LUN_RESET: {
- struct ctl_lun *lun;
- uint32_t targ_lun;
-
- targ_lun = io->io_hdr.nexus.targ_mapped_lun;
- mtx_lock(&softc->ctl_lock);
- if ((targ_lun < CTL_MAX_LUNS)
- && (softc->ctl_luns[targ_lun] != NULL))
- lun = softc->ctl_luns[targ_lun];
- else {
- mtx_unlock(&softc->ctl_lock);
- retval = 1;
- break;
- }
-
- if (!(io->io_hdr.flags &
- CTL_FLAG_FROM_OTHER_SC)) {
- union ctl_ha_msg msg_info;
-
- io->io_hdr.flags |=
- CTL_FLAG_SENT_2OTHER_SC;
- msg_info.hdr.msg_type =
- CTL_MSG_MANAGE_TASKS;
- msg_info.hdr.nexus = io->io_hdr.nexus;
- msg_info.task.task_action =
- CTL_TASK_LUN_RESET;
- msg_info.hdr.original_sc = NULL;
- msg_info.hdr.serializing_sc = NULL;
- if (CTL_HA_STATUS_SUCCESS !=
- ctl_ha_msg_send(CTL_HA_CHAN_CTL,
- (void *)&msg_info,
- sizeof(msg_info), 0)) {
- }
- }
-
- retval = ctl_lun_reset(lun, io,
- CTL_UA_LUN_RESET);
- mtx_unlock(&softc->ctl_lock);
+ case CTL_TASK_LUN_RESET:
+ retval = ctl_lun_reset(softc, io);
break;
- }
case CTL_TASK_TARGET_RESET:
retval = ctl_target_reset(softc, io, CTL_UA_TARG_RESET);
break;
@@ -12438,9 +12273,18 @@ ctl_run_task(union ctl_io *io)
break;
case CTL_TASK_PORT_LOGOUT:
break;
+ case CTL_TASK_QUERY_TASK:
+ retval = ctl_query_task(io, 0);
+ break;
+ case CTL_TASK_QUERY_TASK_SET:
+ retval = ctl_query_task(io, 1);
+ break;
+ case CTL_TASK_QUERY_ASYNC_EVENT:
+ retval = ctl_query_async_event(io);
+ break;
default:
- printf("ctl_run_task: got unknown task management event %d\n",
- io->taskio.task_action);
+ printf("%s: got unknown task management event %d\n",
+ __func__, io->taskio.task_action);
break;
}
if (retval == 0)
@@ -12459,11 +12303,9 @@ ctl_handle_isc(union ctl_io *io)
{
int free_io;
struct ctl_lun *lun;
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
uint32_t targ_lun;
- softc = control_softc;
-
targ_lun = io->io_hdr.nexus.targ_mapped_lun;
lun = softc->ctl_luns[targ_lun];
@@ -12523,6 +12365,10 @@ ctl_handle_isc(union ctl_io *io)
free_io = 0;
io->scsiio.be_move_done(io);
break;
+ case CTL_MSG_FAILOVER:
+ ctl_failover_lun(io);
+ free_io = 1;
+ break;
default:
free_io = 1;
printf("%s: Invalid message type %d\n",
@@ -12623,7 +12469,9 @@ ctl_inject_error(struct ctl_lun *lun, union ctl_io *io)
ctl_set_aborted(&io->scsiio);
break;
case CTL_LUN_INJ_MEDIUM_ERR:
- ctl_set_medium_error(&io->scsiio);
+ ctl_set_medium_error(&io->scsiio,
+ (io->io_hdr.flags & CTL_FLAG_DATA_MASK) !=
+ CTL_FLAG_DATA_OUT);
break;
case CTL_LUN_INJ_UA:
/* 29h/00h POWER ON, RESET, OR BUS DEVICE RESET
@@ -12680,12 +12528,14 @@ ctl_datamove_timer_wakeup(void *arg)
void
ctl_datamove(union ctl_io *io)
{
+ struct ctl_lun *lun;
void (*fe_datamove)(union ctl_io *io);
mtx_assert(&control_softc->ctl_lock, MA_NOTOWNED);
CTL_DEBUG_PRINT(("ctl_datamove\n"));
+ lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
#ifdef CTL_TIME_IO
if ((time_uptime - io->io_hdr.start_time) > ctl_time_io_secs) {
char str[256];
@@ -12724,23 +12574,14 @@ ctl_datamove(union ctl_io *io)
#ifdef CTL_IO_DELAY
if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) {
- struct ctl_lun *lun;
-
- lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
-
io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE;
} else {
- struct ctl_lun *lun;
-
- lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
if ((lun != NULL)
&& (lun->delay_info.datamove_delay > 0)) {
- struct callout *callout;
- callout = (struct callout *)&io->io_hdr.timer_bytes;
- callout_init(callout, /*mpsafe*/ 1);
+ callout_init(&io->io_hdr.delay_callout, /*mpsafe*/ 1);
io->io_hdr.flags |= CTL_FLAG_DELAY_DONE;
- callout_reset(callout,
+ callout_reset(&io->io_hdr.delay_callout,
lun->delay_info.datamove_delay * hz,
ctl_datamove_timer_wakeup, io);
if (lun->delay_info.datamove_type ==
@@ -12756,10 +12597,9 @@ ctl_datamove(union ctl_io *io)
* the data move.
*/
if (io->io_hdr.flags & CTL_FLAG_ABORT) {
- printf("ctl_datamove: tag 0x%04x on (%ju:%d:%ju:%d) aborted\n",
- io->scsiio.tag_num,(uintmax_t)io->io_hdr.nexus.initid.id,
+ printf("ctl_datamove: tag 0x%04x on (%u:%u:%u) aborted\n",
+ io->scsiio.tag_num, io->io_hdr.nexus.initid,
io->io_hdr.nexus.targ_port,
- (uintmax_t)io->io_hdr.nexus.targ_target.id,
io->io_hdr.nexus.targ_lun);
io->io_hdr.port_status = 31337;
/*
@@ -12777,185 +12617,19 @@ ctl_datamove(union ctl_io *io)
return;
}
- /*
- * If we're in XFER mode and this I/O is from the other shelf
- * controller, we need to send the DMA to the other side to
- * actually transfer the data to/from the host. In serialize only
- * mode the transfer happens below CTL and ctl_datamove() is only
- * called on the machine that originally received the I/O.
- */
- if ((control_softc->ha_mode == CTL_HA_MODE_XFER)
- && (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
- union ctl_ha_msg msg;
- uint32_t sg_entries_sent;
- int do_sg_copy;
- int i;
-
- memset(&msg, 0, sizeof(msg));
- msg.hdr.msg_type = CTL_MSG_DATAMOVE;
- msg.hdr.original_sc = io->io_hdr.original_sc;
- msg.hdr.serializing_sc = io;
- msg.hdr.nexus = io->io_hdr.nexus;
- msg.dt.flags = io->io_hdr.flags;
- /*
- * We convert everything into a S/G list here. We can't
- * pass by reference, only by value between controllers.
- * So we can't pass a pointer to the S/G list, only as many
- * S/G entries as we can fit in here. If it's possible for
- * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries,
- * then we need to break this up into multiple transfers.
- */
- if (io->scsiio.kern_sg_entries == 0) {
- msg.dt.kern_sg_entries = 1;
- /*
- * If this is in cached memory, flush the cache
- * before we send the DMA request to the other
- * controller. We want to do this in either the
- * read or the write case. The read case is
- * straightforward. In the write case, we want to
- * make sure nothing is in the local cache that
- * could overwrite the DMAed data.
- */
- if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) {
- /*
- * XXX KDM use bus_dmamap_sync() here.
- */
- }
-
- /*
- * Convert to a physical address if this is a
- * virtual address.
- */
- if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
- msg.dt.sg_list[0].addr =
- io->scsiio.kern_data_ptr;
- } else {
- /*
- * XXX KDM use busdma here!
- */
-#if 0
- msg.dt.sg_list[0].addr = (void *)
- vtophys(io->scsiio.kern_data_ptr);
-#endif
- }
-
- msg.dt.sg_list[0].len = io->scsiio.kern_data_len;
- do_sg_copy = 0;
- } else {
- struct ctl_sg_entry *sgl;
-
- do_sg_copy = 1;
- msg.dt.kern_sg_entries = io->scsiio.kern_sg_entries;
- sgl = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr;
- if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) {
- /*
- * XXX KDM use bus_dmamap_sync() here.
- */
- }
- }
-
- msg.dt.kern_data_len = io->scsiio.kern_data_len;
- msg.dt.kern_total_len = io->scsiio.kern_total_len;
- msg.dt.kern_data_resid = io->scsiio.kern_data_resid;
- msg.dt.kern_rel_offset = io->scsiio.kern_rel_offset;
- msg.dt.sg_sequence = 0;
-
- /*
- * Loop until we've sent all of the S/G entries. On the
- * other end, we'll recompose these S/G entries into one
- * contiguous list before passing it to the
- */
- for (sg_entries_sent = 0; sg_entries_sent <
- msg.dt.kern_sg_entries; msg.dt.sg_sequence++) {
- msg.dt.cur_sg_entries = MIN((sizeof(msg.dt.sg_list)/
- sizeof(msg.dt.sg_list[0])),
- msg.dt.kern_sg_entries - sg_entries_sent);
-
- if (do_sg_copy != 0) {
- struct ctl_sg_entry *sgl;
- int j;
-
- sgl = (struct ctl_sg_entry *)
- io->scsiio.kern_data_ptr;
- /*
- * If this is in cached memory, flush the cache
- * before we send the DMA request to the other
- * controller. We want to do this in either
- * the * read or the write case. The read
- * case is straightforward. In the write
- * case, we want to make sure nothing is
- * in the local cache that could overwrite
- * the DMAed data.
- */
-
- for (i = sg_entries_sent, j = 0;
- i < msg.dt.cur_sg_entries; i++, j++) {
- if ((io->io_hdr.flags &
- CTL_FLAG_NO_DATASYNC) == 0) {
- /*
- * XXX KDM use bus_dmamap_sync()
- */
- }
- if ((io->io_hdr.flags &
- CTL_FLAG_BUS_ADDR) == 0) {
- /*
- * XXX KDM use busdma.
- */
-#if 0
- msg.dt.sg_list[j].addr =(void *)
- vtophys(sgl[i].addr);
-#endif
- } else {
- msg.dt.sg_list[j].addr =
- sgl[i].addr;
- }
- msg.dt.sg_list[j].len = sgl[i].len;
- }
- }
-
- sg_entries_sent += msg.dt.cur_sg_entries;
- if (sg_entries_sent >= msg.dt.kern_sg_entries)
- msg.dt.sg_last = 1;
- else
- msg.dt.sg_last = 0;
-
- /*
- * XXX KDM drop and reacquire the lock here?
- */
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
- sizeof(msg), 0) > CTL_HA_STATUS_SUCCESS) {
- /*
- * XXX do something here.
- */
- }
-
- msg.dt.sent_sg_entries = sg_entries_sent;
- }
- io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
- if (io->io_hdr.flags & CTL_FLAG_FAILOVER)
- ctl_failover_io(io, /*have_lock*/ 0);
-
- } else {
-
- /*
- * Lookup the fe_datamove() function for this particular
- * front end.
- */
- fe_datamove =
- control_softc->ctl_ports[ctl_port_idx(io->io_hdr.nexus.targ_port)]->fe_datamove;
-
- fe_datamove(io);
- }
+ fe_datamove = ctl_io_port(&io->io_hdr)->fe_datamove;
+ fe_datamove(io);
}
static void
ctl_send_datamove_done(union ctl_io *io, int have_lock)
{
union ctl_ha_msg msg;
- int isc_status;
+#ifdef CTL_TIME_IO
+ struct bintime cur_bt;
+#endif
memset(&msg, 0, sizeof(msg));
-
msg.hdr.msg_type = CTL_MSG_DATAMOVE_DONE;
msg.hdr.original_sc = io;
msg.hdr.serializing_sc = io->io_hdr.serializing_sc;
@@ -12965,23 +12639,26 @@ ctl_send_datamove_done(union ctl_io *io, int have_lock)
msg.scsi.tag_type = io->scsiio.tag_type;
msg.scsi.scsi_status = io->scsiio.scsi_status;
memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data,
- sizeof(io->scsiio.sense_data));
+ io->scsiio.sense_len);
msg.scsi.sense_len = io->scsiio.sense_len;
msg.scsi.sense_residual = io->scsiio.sense_residual;
msg.scsi.fetd_status = io->io_hdr.port_status;
msg.scsi.residual = io->scsiio.residual;
io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
-
if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
ctl_failover_io(io, /*have_lock*/ have_lock);
return;
}
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
+ sizeof(msg.scsi) - sizeof(msg.scsi.sense_data) +
+ msg.scsi.sense_len, M_WAITOK);
- isc_status = ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg), 0);
- if (isc_status > CTL_HA_STATUS_SUCCESS) {
- /* XXX do something if this fails */
- }
-
+#ifdef CTL_TIME_IO
+ getbinuptime(&cur_bt);
+ bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
+ bintime_add(&io->io_hdr.dma_bt, &cur_bt);
+#endif
+ io->io_hdr.num_dmas++;
}
/*
@@ -12992,6 +12669,7 @@ static void
ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq)
{
union ctl_io *io;
+ int i;
io = rq->context;
@@ -13005,14 +12683,12 @@ ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq)
ctl_dt_req_free(rq);
- /*
- * In this case, we had to malloc the memory locally. Free it.
- */
- if ((io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) == 0) {
- int i;
- for (i = 0; i < io->scsiio.kern_sg_entries; i++)
- free(io->io_hdr.local_sglist[i].addr, M_CTL);
- }
+ for (i = 0; i < io->scsiio.kern_sg_entries; i++)
+ free(io->io_hdr.local_sglist[i].addr, M_CTL);
+ free(io->io_hdr.remote_sglist, M_CTL);
+ io->io_hdr.remote_sglist = NULL;
+ io->io_hdr.local_sglist = NULL;
+
/*
* The data is in local and remote memory, so now we need to send
* status (good or back) back to the other side.
@@ -13029,11 +12705,8 @@ ctl_datamove_remote_dm_write_cb(union ctl_io *io)
{
int retval;
- retval = 0;
-
retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_WRITE,
ctl_datamove_remote_write_cb);
-
return (retval);
}
@@ -13062,12 +12735,8 @@ ctl_datamove_remote_write(union ctl_io *io)
*/
io->scsiio.be_move_done = ctl_datamove_remote_dm_write_cb;
- fe_datamove = control_softc->ctl_ports[ctl_port_idx(io->io_hdr.nexus.targ_port)]->fe_datamove;
-
+ fe_datamove = ctl_io_port(&io->io_hdr)->fe_datamove;
fe_datamove(io);
-
- return;
-
}
static int
@@ -13078,15 +12747,13 @@ ctl_datamove_remote_dm_read_cb(union ctl_io *io)
char path_str[64];
struct sbuf sb;
#endif
+ int i;
- /*
- * In this case, we had to malloc the memory locally. Free it.
- */
- if ((io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) == 0) {
- int i;
- for (i = 0; i < io->scsiio.kern_sg_entries; i++)
- free(io->io_hdr.local_sglist[i].addr, M_CTL);
- }
+ for (i = 0; i < io->scsiio.kern_sg_entries; i++)
+ free(io->io_hdr.local_sglist[i].addr, M_CTL);
+ free(io->io_hdr.remote_sglist, M_CTL);
+ io->io_hdr.remote_sglist = NULL;
+ io->io_hdr.local_sglist = NULL;
#if 0
scsi_path_string(io, path_str, sizeof(path_str));
@@ -13123,7 +12790,7 @@ ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq)
io = rq->context;
if (rq->ret != CTL_HA_STATUS_SUCCESS) {
- printf("%s: ISC DMA read failed with error %d", __func__,
+ printf("%s: ISC DMA read failed with error %d\n", __func__,
rq->ret);
ctl_set_internal_failure(&io->scsiio,
/*sks_valid*/ 1,
@@ -13143,143 +12810,48 @@ ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq)
/* XXX KDM add checks like the ones in ctl_datamove? */
- fe_datamove = control_softc->ctl_ports[ctl_port_idx(io->io_hdr.nexus.targ_port)]->fe_datamove;
-
+ fe_datamove = ctl_io_port(&io->io_hdr)->fe_datamove;
fe_datamove(io);
}
static int
ctl_datamove_remote_sgl_setup(union ctl_io *io)
{
- struct ctl_sg_entry *local_sglist, *remote_sglist;
- struct ctl_sg_entry *local_dma_sglist, *remote_dma_sglist;
- struct ctl_softc *softc;
+ struct ctl_sg_entry *local_sglist;
+ uint32_t len_to_go;
int retval;
int i;
retval = 0;
- softc = control_softc;
-
local_sglist = io->io_hdr.local_sglist;
- local_dma_sglist = io->io_hdr.local_dma_sglist;
- remote_sglist = io->io_hdr.remote_sglist;
- remote_dma_sglist = io->io_hdr.remote_dma_sglist;
+ len_to_go = io->scsiio.kern_data_len;
- if (io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) {
- for (i = 0; i < io->scsiio.kern_sg_entries; i++) {
- local_sglist[i].len = remote_sglist[i].len;
-
- /*
- * XXX Detect the situation where the RS-level I/O
- * redirector on the other side has already read the
- * data off of the AOR RS on this side, and
- * transferred it to remote (mirror) memory on the
- * other side. Since we already have the data in
- * memory here, we just need to use it.
- *
- * XXX KDM this can probably be removed once we
- * get the cache device code in and take the
- * current AOR implementation out.
- */
-#ifdef NEEDTOPORT
- if ((remote_sglist[i].addr >=
- (void *)vtophys(softc->mirr->addr))
- && (remote_sglist[i].addr <
- ((void *)vtophys(softc->mirr->addr) +
- CacheMirrorOffset))) {
- local_sglist[i].addr = remote_sglist[i].addr -
- CacheMirrorOffset;
- if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) ==
- CTL_FLAG_DATA_IN)
- io->io_hdr.flags |= CTL_FLAG_REDIR_DONE;
- } else {
- local_sglist[i].addr = remote_sglist[i].addr +
- CacheMirrorOffset;
- }
-#endif
-#if 0
- printf("%s: local %p, remote %p, len %d\n",
- __func__, local_sglist[i].addr,
- remote_sglist[i].addr, local_sglist[i].len);
-#endif
- }
- } else {
- uint32_t len_to_go;
-
- /*
- * In this case, we don't have automatically allocated
- * memory for this I/O on this controller. This typically
- * happens with internal CTL I/O -- e.g. inquiry, mode
- * sense, etc. Anything coming from RAIDCore will have
- * a mirror area available.
- */
- len_to_go = io->scsiio.kern_data_len;
-
- /*
- * Clear the no datasync flag, we have to use malloced
- * buffers.
- */
- io->io_hdr.flags &= ~CTL_FLAG_NO_DATASYNC;
-
- /*
- * The difficult thing here is that the size of the various
- * S/G segments may be different than the size from the
- * remote controller. That'll make it harder when DMAing
- * the data back to the other side.
- */
- for (i = 0; (i < sizeof(io->io_hdr.remote_sglist) /
- sizeof(io->io_hdr.remote_sglist[0])) &&
- (len_to_go > 0); i++) {
- local_sglist[i].len = MIN(len_to_go, 131072);
- CTL_SIZE_8B(local_dma_sglist[i].len,
- local_sglist[i].len);
- local_sglist[i].addr =
- malloc(local_dma_sglist[i].len, M_CTL,M_WAITOK);
-
- local_dma_sglist[i].addr = local_sglist[i].addr;
-
- if (local_sglist[i].addr == NULL) {
- int j;
-
- printf("malloc failed for %zd bytes!",
- local_dma_sglist[i].len);
- for (j = 0; j < i; j++) {
- free(local_sglist[j].addr, M_CTL);
- }
- ctl_set_internal_failure(&io->scsiio,
- /*sks_valid*/ 1,
- /*retry_count*/ 4857);
- retval = 1;
- goto bailout_error;
-
- }
- /* XXX KDM do we need a sync here? */
+ /*
+ * The difficult thing here is that the size of the various
+ * S/G segments may be different than the size from the
+ * remote controller. That'll make it harder when DMAing
+ * the data back to the other side.
+ */
+ for (i = 0; len_to_go > 0; i++) {
+ local_sglist[i].len = MIN(len_to_go, CTL_HA_DATAMOVE_SEGMENT);
+ local_sglist[i].addr =
+ malloc(local_sglist[i].len, M_CTL, M_WAITOK);
- len_to_go -= local_sglist[i].len;
- }
- /*
- * Reset the number of S/G entries accordingly. The
- * original number of S/G entries is available in
- * rem_sg_entries.
- */
- io->scsiio.kern_sg_entries = i;
+ len_to_go -= local_sglist[i].len;
+ }
+ /*
+ * Reset the number of S/G entries accordingly. The original
+ * number of S/G entries is available in rem_sg_entries.
+ */
+ io->scsiio.kern_sg_entries = i;
#if 0
- printf("%s: kern_sg_entries = %d\n", __func__,
- io->scsiio.kern_sg_entries);
- for (i = 0; i < io->scsiio.kern_sg_entries; i++)
- printf("%s: sg[%d] = %p, %d (DMA: %d)\n", __func__, i,
- local_sglist[i].addr, local_sglist[i].len,
- local_dma_sglist[i].len);
+ printf("%s: kern_sg_entries = %d\n", __func__,
+ io->scsiio.kern_sg_entries);
+ for (i = 0; i < io->scsiio.kern_sg_entries; i++)
+ printf("%s: sg[%d] = %p, %lu\n", __func__, i,
+ local_sglist[i].addr, local_sglist[i].len);
#endif
- }
-
-
- return (retval);
-
-bailout_error:
-
- ctl_send_datamove_done(io, /*have_lock*/ 0);
return (retval);
}
@@ -13290,12 +12862,8 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
{
struct ctl_ha_dt_req *rq;
struct ctl_sg_entry *remote_sglist, *local_sglist;
- struct ctl_sg_entry *remote_dma_sglist, *local_dma_sglist;
uint32_t local_used, remote_used, total_used;
- int retval;
- int i, j;
-
- retval = 0;
+ int i, j, isc_ret;
rq = ctl_dt_req_alloc();
@@ -13305,10 +12873,12 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
* failure.
*/
if ((rq == NULL)
- && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE))
+ && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
+ (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS))
ctl_set_busy(&io->scsiio);
- if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE) {
+ if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
+ (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) {
if (rq != NULL)
ctl_dt_req_free(rq);
@@ -13321,26 +12891,15 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
ctl_send_datamove_done(io, /*have_lock*/ 0);
- retval = 1;
-
- goto bailout;
+ return (1);
}
local_sglist = io->io_hdr.local_sglist;
- local_dma_sglist = io->io_hdr.local_dma_sglist;
remote_sglist = io->io_hdr.remote_sglist;
- remote_dma_sglist = io->io_hdr.remote_dma_sglist;
local_used = 0;
remote_used = 0;
total_used = 0;
- if (io->io_hdr.flags & CTL_FLAG_REDIR_DONE) {
- rq->ret = CTL_HA_STATUS_SUCCESS;
- rq->context = io;
- callback(rq);
- goto bailout;
- }
-
/*
* Pull/push the data over the wire from/to the other controller.
* This takes into account the possibility that the local and
@@ -13351,12 +12910,11 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
* both the local and remote sglists is identical. Otherwise, we've
* essentially got a coding error of some sort.
*/
+ isc_ret = CTL_HA_STATUS_SUCCESS;
for (i = 0, j = 0; total_used < io->scsiio.kern_data_len; ) {
- int isc_ret;
- uint32_t cur_len, dma_length;
+ uint32_t cur_len;
uint8_t *tmp_ptr;
- rq->id = CTL_HA_DATA_CTL;
rq->command = command;
rq->context = io;
@@ -13368,52 +12926,23 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
*/
cur_len = MIN(local_sglist[i].len - local_used,
remote_sglist[j].len - remote_used);
-
- /*
- * In this case, we have a size issue and need to decrease
- * the size, except in the case where we actually have less
- * than 8 bytes left. In that case, we need to increase
- * the DMA length to get the last bit.
- */
- if ((cur_len & 0x7) != 0) {
- if (cur_len > 0x7) {
- cur_len = cur_len - (cur_len & 0x7);
- dma_length = cur_len;
- } else {
- CTL_SIZE_8B(dma_length, cur_len);
- }
-
- } else
- dma_length = cur_len;
-
- /*
- * If we had to allocate memory for this I/O, instead of using
- * the non-cached mirror memory, we'll need to flush the cache
- * before trying to DMA to the other controller.
- *
- * We could end up doing this multiple times for the same
- * segment if we have a larger local segment than remote
- * segment. That shouldn't be an issue.
- */
- if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) {
- /*
- * XXX KDM use bus_dmamap_sync() here.
- */
- }
-
- rq->size = dma_length;
+ rq->size = cur_len;
tmp_ptr = (uint8_t *)local_sglist[i].addr;
tmp_ptr += local_used;
+#if 0
/* Use physical addresses when talking to ISC hardware */
if ((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0) {
/* XXX KDM use busdma */
-#if 0
rq->local = vtophys(tmp_ptr);
-#endif
} else
rq->local = tmp_ptr;
+#else
+ KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0,
+ ("HA does not support BUS_ADDR"));
+ rq->local = tmp_ptr;
+#endif
tmp_ptr = (uint8_t *)remote_sglist[j].addr;
tmp_ptr += remote_used;
@@ -13437,40 +12966,22 @@ ctl_datamove_remote_xfer(union ctl_io *io, unsigned command,
if (total_used >= io->scsiio.kern_data_len)
rq->callback = callback;
- if ((rq->size & 0x7) != 0) {
- printf("%s: warning: size %d is not on 8b boundary\n",
- __func__, rq->size);
- }
- if (((uintptr_t)rq->local & 0x7) != 0) {
- printf("%s: warning: local %p not on 8b boundary\n",
- __func__, rq->local);
- }
- if (((uintptr_t)rq->remote & 0x7) != 0) {
- printf("%s: warning: remote %p not on 8b boundary\n",
- __func__, rq->local);
- }
#if 0
- printf("%s: %s: local %#x remote %#x size %d\n", __func__,
+ printf("%s: %s: local %p remote %p size %d\n", __func__,
(command == CTL_HA_DT_CMD_WRITE) ? "WRITE" : "READ",
rq->local, rq->remote, rq->size);
#endif
isc_ret = ctl_dt_single(rq);
- if (isc_ret == CTL_HA_STATUS_WAIT)
- continue;
-
- if (isc_ret == CTL_HA_STATUS_DISCONNECT) {
- rq->ret = CTL_HA_STATUS_SUCCESS;
- } else {
- rq->ret = isc_ret;
- }
+ if (isc_ret > CTL_HA_STATUS_SUCCESS)
+ break;
+ }
+ if (isc_ret != CTL_HA_STATUS_WAIT) {
+ rq->ret = isc_ret;
callback(rq);
- goto bailout;
}
-bailout:
- return (retval);
-
+ return (0);
}
static void
@@ -13489,8 +13000,7 @@ ctl_datamove_remote_read(union ctl_io *io)
retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_READ,
ctl_datamove_remote_read_cb);
- if ((retval != 0)
- && ((io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) == 0)) {
+ if (retval != 0) {
/*
* Make sure we free memory if there was an error.. The
* ctl_datamove_remote_xfer() function will send the
@@ -13499,9 +13009,10 @@ ctl_datamove_remote_read(union ctl_io *io)
*/
for (i = 0; i < io->scsiio.kern_sg_entries; i++)
free(io->io_hdr.local_sglist[i].addr, M_CTL);
+ free(io->io_hdr.remote_sglist, M_CTL);
+ io->io_hdr.remote_sglist = NULL;
+ io->io_hdr.local_sglist = NULL;
}
-
- return;
}
/*
@@ -13514,11 +13025,13 @@ ctl_datamove_remote_read(union ctl_io *io)
static void
ctl_datamove_remote(union ctl_io *io)
{
- struct ctl_softc *softc;
- softc = control_softc;
+ mtx_assert(&control_softc->ctl_lock, MA_NOTOWNED);
- mtx_assert(&softc->ctl_lock, MA_NOTOWNED);
+ if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
+ ctl_failover_io(io, /*have_lock*/ 0);
+ return;
+ }
/*
* Note that we look for an aborted I/O here, but don't do some of
@@ -13527,76 +13040,35 @@ ctl_datamove_remote(union ctl_io *io)
* have been done if need be on the other controller.
*/
if (io->io_hdr.flags & CTL_FLAG_ABORT) {
- printf("%s: tag 0x%04x on (%d:%d:%d:%d) aborted\n", __func__,
- io->scsiio.tag_num, io->io_hdr.nexus.initid.id,
+ printf("%s: tag 0x%04x on (%u:%u:%u) aborted\n", __func__,
+ io->scsiio.tag_num, io->io_hdr.nexus.initid,
io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.targ_target.id,
io->io_hdr.nexus.targ_lun);
io->io_hdr.port_status = 31338;
ctl_send_datamove_done(io, /*have_lock*/ 0);
return;
}
- if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT) {
+ if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT)
ctl_datamove_remote_write(io);
- } else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN){
+ else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
ctl_datamove_remote_read(io);
- } else {
- union ctl_ha_msg msg;
- struct scsi_sense_data *sense;
- uint8_t sks[3];
- int retry_count;
-
- memset(&msg, 0, sizeof(msg));
-
- msg.hdr.msg_type = CTL_MSG_BAD_JUJU;
- msg.hdr.status = CTL_SCSI_ERROR;
- msg.scsi.scsi_status = SCSI_STATUS_CHECK_COND;
-
- retry_count = 4243;
-
- sense = &msg.scsi.sense_data;
- sks[0] = SSD_SCS_VALID;
- sks[1] = (retry_count >> 8) & 0xff;
- sks[2] = retry_count & 0xff;
-
- /* "Internal target failure" */
- scsi_set_sense_data(sense,
- /*sense_format*/ SSD_TYPE_NONE,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_HARDWARE_ERROR,
- /*asc*/ 0x44,
- /*ascq*/ 0x00,
- /*type*/ SSD_ELEM_SKS,
- /*size*/ sizeof(sks),
- /*data*/ sks,
- SSD_ELEM_NONE);
-
- io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE;
- if (io->io_hdr.flags & CTL_FLAG_FAILOVER) {
- ctl_failover_io(io, /*have_lock*/ 1);
- return;
- }
-
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg), 0) >
- CTL_HA_STATUS_SUCCESS) {
- /* XXX KDM what to do if this fails? */
- }
- return;
+ else {
+ io->io_hdr.port_status = 31339;
+ ctl_send_datamove_done(io, /*have_lock*/ 0);
}
-
}
-static int
+static void
ctl_process_done(union ctl_io *io)
{
struct ctl_lun *lun;
struct ctl_softc *softc = control_softc;
void (*fe_done)(union ctl_io *io);
- uint32_t targ_port = ctl_port_idx(io->io_hdr.nexus.targ_port);
+ union ctl_ha_msg msg;
+ uint32_t targ_port = io->io_hdr.nexus.targ_port;
CTL_DEBUG_PRINT(("ctl_process_done\n"));
-
fe_done = softc->ctl_ports[targ_port]->fe_done;
#ifdef CTL_TIME_IO
@@ -13641,11 +13113,8 @@ ctl_process_done(union ctl_io *io)
case CTL_IO_TASK:
if (ctl_debug & CTL_DEBUG_INFO)
ctl_io_error_print(io, NULL);
- if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)
- ctl_free_io(io);
- else
- fe_done(io);
- return (CTL_RETVAL_COMPLETE);
+ fe_done(io);
+ return;
default:
panic("ctl_process_done: invalid io type %d\n",
io->io_hdr.io_type);
@@ -13665,7 +13134,7 @@ ctl_process_done(union ctl_io *io)
* Check to see if we have any errors to inject here. We only
* inject errors for commands that don't already have errors set.
*/
- if ((STAILQ_FIRST(&lun->error_list) != NULL) &&
+ if (!STAILQ_EMPTY(&lun->error_list) &&
((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) &&
((io->io_hdr.flags & CTL_FLAG_STATUS_SENT) == 0))
ctl_inject_error(lun, io);
@@ -13698,12 +13167,12 @@ ctl_process_done(union ctl_io *io)
#ifdef CTL_TIME_IO
bintime_add(&lun->stats.ports[targ_port].dma_time[type],
&io->io_hdr.dma_bt);
- lun->stats.ports[targ_port].num_dmas[type] +=
- io->io_hdr.num_dmas;
- getbintime(&cur_bt);
+ getbinuptime(&cur_bt);
bintime_sub(&cur_bt, &io->io_hdr.start_bt);
bintime_add(&lun->stats.ports[targ_port].time[type], &cur_bt);
#endif
+ lun->stats.ports[targ_port].num_dmas[type] +=
+ io->io_hdr.num_dmas;
}
/*
@@ -13755,53 +13224,19 @@ bailout:
* Tell the FETD or the other shelf controller we're done with this
* command. Note that only SCSI commands get to this point. Task
* management commands are completed above.
- *
- * We only send status to the other controller if we're in XFER
- * mode. In SER_ONLY mode, the I/O is done on the controller that
- * received the I/O (from CTL's perspective), and so the status is
- * generated there.
- *
- * XXX KDM if we hold the lock here, we could cause a deadlock
- * if the frontend comes back in in this context to queue
- * something.
- */
- if ((softc->ha_mode == CTL_HA_MODE_XFER)
- && (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) {
- union ctl_ha_msg msg;
-
+ */
+ if ((softc->ha_mode != CTL_HA_MODE_XFER) &&
+ (io->io_hdr.flags & CTL_FLAG_SENT_2OTHER_SC)) {
memset(&msg, 0, sizeof(msg));
msg.hdr.msg_type = CTL_MSG_FINISH_IO;
- msg.hdr.original_sc = io->io_hdr.original_sc;
+ msg.hdr.serializing_sc = io->io_hdr.serializing_sc;
msg.hdr.nexus = io->io_hdr.nexus;
- msg.hdr.status = io->io_hdr.status;
- msg.scsi.scsi_status = io->scsiio.scsi_status;
- msg.scsi.tag_num = io->scsiio.tag_num;
- msg.scsi.tag_type = io->scsiio.tag_type;
- msg.scsi.sense_len = io->scsiio.sense_len;
- msg.scsi.sense_residual = io->scsiio.sense_residual;
- msg.scsi.residual = io->scsiio.residual;
- memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data,
- sizeof(io->scsiio.sense_data));
- /*
- * We copy this whether or not this is an I/O-related
- * command. Otherwise, we'd have to go and check to see
- * whether it's a read/write command, and it really isn't
- * worth it.
- */
- memcpy(&msg.scsi.lbalen,
- &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes,
- sizeof(msg.scsi.lbalen));
-
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
- sizeof(msg), 0) > CTL_HA_STATUS_SUCCESS) {
- /* XXX do something here */
- }
-
- ctl_free_io(io);
- } else
- fe_done(io);
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
+ sizeof(msg.scsi) - sizeof(msg.scsi.sense_data),
+ M_WAITOK);
+ }
- return (CTL_RETVAL_COMPLETE);
+ fe_done(io);
}
#ifdef CTL_WITH_CA
@@ -13881,7 +13316,7 @@ ctl_queue(union ctl_io *io)
#ifdef CTL_TIME_IO
io->io_hdr.start_time = time_uptime;
- getbintime(&io->io_hdr.start_bt);
+ getbinuptime(&io->io_hdr.start_bt);
#endif /* CTL_TIME_IO */
/* Map FE-specific LUN ID into global one. */
@@ -13916,6 +13351,21 @@ ctl_done_timer_wakeup(void *arg)
#endif /* CTL_IO_DELAY */
void
+ctl_serseq_done(union ctl_io *io)
+{
+ struct ctl_lun *lun;
+
+ lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
+ if (lun->be_lun == NULL ||
+ lun->be_lun->serseq == CTL_LUN_SERSEQ_OFF)
+ return;
+ mtx_lock(&lun->lun_lock);
+ io->io_hdr.flags |= CTL_FLAG_SERSEQ_DONE;
+ ctl_check_blocked(lun);
+ mtx_unlock(&lun->lun_lock);
+}
+
+void
ctl_done(union ctl_io *io)
{
@@ -13925,15 +13375,14 @@ ctl_done(union ctl_io *io)
#if 0
if (io->io_hdr.flags & CTL_FLAG_ALREADY_DONE) {
printf("%s: type %d msg %d cdb %x iptl: "
- "%d:%d:%d:%d tag 0x%04x "
+ "%u:%u:%u tag 0x%04x "
"flag %#x status %x\n",
__func__,
io->io_hdr.io_type,
io->io_hdr.msg_type,
io->scsiio.cdb[0],
- io->io_hdr.nexus.initid.id,
+ io->io_hdr.nexus.initid,
io->io_hdr.nexus.targ_port,
- io->io_hdr.nexus.targ_target.id,
io->io_hdr.nexus.targ_lun,
(io->io_hdr.io_type ==
CTL_IO_TASK) ?
@@ -13952,25 +13401,6 @@ ctl_done(union ctl_io *io)
if (io->io_hdr.flags & CTL_FLAG_INT_COPY)
return;
- /*
- * We need to send a msg to the serializing shelf to finish the IO
- * as well. We don't send a finish message to the other shelf if
- * this is a task management command. Task management commands
- * aren't serialized in the OOA queue, but rather just executed on
- * both shelf controllers for commands that originated on that
- * controller.
- */
- if ((io->io_hdr.flags & CTL_FLAG_SENT_2OTHER_SC)
- && (io->io_hdr.io_type != CTL_IO_TASK)) {
- union ctl_ha_msg msg_io;
-
- msg_io.hdr.msg_type = CTL_MSG_FINISH_IO;
- msg_io.hdr.serializing_sc = io->io_hdr.serializing_sc;
- if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_io,
- sizeof(msg_io), 0 ) != CTL_HA_STATUS_SUCCESS) {
- }
- /* continue on to finish IO */
- }
#ifdef CTL_IO_DELAY
if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) {
struct ctl_lun *lun;
@@ -13985,12 +13415,10 @@ ctl_done(union ctl_io *io)
if ((lun != NULL)
&& (lun->delay_info.done_delay > 0)) {
- struct callout *callout;
- callout = (struct callout *)&io->io_hdr.timer_bytes;
- callout_init(callout, /*mpsafe*/ 1);
+ callout_init(&io->io_hdr.delay_callout, /*mpsafe*/ 1);
io->io_hdr.flags |= CTL_FLAG_DELAY_DONE;
- callout_reset(callout,
+ callout_reset(&io->io_hdr.delay_callout,
lun->delay_info.done_delay * hz,
ctl_done_timer_wakeup, io);
if (lun->delay_info.done_type == CTL_DELAY_TYPE_ONESHOT)
@@ -14003,24 +13431,6 @@ ctl_done(union ctl_io *io)
ctl_enqueue_done(io);
}
-int
-ctl_isc(struct ctl_scsiio *ctsio)
-{
- struct ctl_lun *lun;
- int retval;
-
- lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
-
- CTL_DEBUG_PRINT(("ctl_isc: command: %02x\n", ctsio->cdb[0]));
-
- CTL_DEBUG_PRINT(("ctl_isc: calling data_submit()\n"));
-
- retval = lun->backend->data_submit((union ctl_io *)ctsio);
-
- return (retval);
-}
-
-
static void
ctl_work_thread(void *arg)
{
@@ -14032,8 +13442,6 @@ ctl_work_thread(void *arg)
CTL_DEBUG_PRINT(("ctl_work_thread starting\n"));
for (;;) {
- retval = 0;
-
/*
* We handle the queues in this order:
* - ISC
@@ -14057,7 +13465,7 @@ ctl_work_thread(void *arg)
STAILQ_REMOVE_HEAD(&thr->done_queue, links);
/* clear any blocked commands, call fe_done */
mtx_unlock(&thr->queue_lock);
- retval = ctl_process_done(io);
+ ctl_process_done(io);
continue;
}
io = (union ctl_io *)STAILQ_FIRST(&thr->incoming_queue);
@@ -14070,16 +13478,14 @@ ctl_work_thread(void *arg)
ctl_scsiio_precheck(softc, &io->scsiio);
continue;
}
- if (!ctl_pause_rtr) {
- io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue);
- if (io != NULL) {
- STAILQ_REMOVE_HEAD(&thr->rtr_queue, links);
- mtx_unlock(&thr->queue_lock);
- retval = ctl_scsiio(&io->scsiio);
- if (retval != CTL_RETVAL_COMPLETE)
- CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
- continue;
- }
+ io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue);
+ if (io != NULL) {
+ STAILQ_REMOVE_HEAD(&thr->rtr_queue, links);
+ mtx_unlock(&thr->queue_lock);
+ retval = ctl_scsiio(&io->scsiio);
+ if (retval != CTL_RETVAL_COMPLETE)
+ CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
+ continue;
}
/* Sleep until we have something to do. */
@@ -14092,12 +13498,10 @@ ctl_lun_thread(void *arg)
{
struct ctl_softc *softc = (struct ctl_softc *)arg;
struct ctl_be_lun *be_lun;
- int retval;
CTL_DEBUG_PRINT(("ctl_lun_thread starting\n"));
for (;;) {
- retval = 0;
mtx_lock(&softc->ctl_lock);
be_lun = STAILQ_FIRST(&softc->pending_lun_queue);
if (be_lun != NULL) {
@@ -14118,23 +13522,25 @@ ctl_thresh_thread(void *arg)
{
struct ctl_softc *softc = (struct ctl_softc *)arg;
struct ctl_lun *lun;
- struct ctl_be_lun *be_lun;
struct scsi_da_rw_recovery_page *rwpage;
struct ctl_logical_block_provisioning_page *page;
const char *attr;
+ union ctl_ha_msg msg;
uint64_t thres, val;
- int i, e;
+ int i, e, set;
CTL_DEBUG_PRINT(("ctl_thresh_thread starting\n"));
for (;;) {
mtx_lock(&softc->ctl_lock);
STAILQ_FOREACH(lun, &softc->lun_list, links) {
- be_lun = lun->be_lun;
if ((lun->flags & CTL_LUN_DISABLED) ||
- (lun->flags & CTL_LUN_OFFLINE) ||
+ (lun->flags & CTL_LUN_NO_MEDIA) ||
lun->backend->lun_attr == NULL)
continue;
+ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 &&
+ softc->ha_mode == CTL_HA_MODE_XFER)
+ continue;
rwpage = &lun->mode_pages.rw_er_page[CTL_PAGE_CURRENT];
if ((rwpage->byte8 & SMS_RWER_LBPERE) == 0)
continue;
@@ -14169,22 +13575,47 @@ ctl_thresh_thread(void *arg)
continue;
if ((page->descr[i].flags & SLBPPD_ARMING_MASK)
== SLBPPD_ARMING_INC)
- e |= (val >= thres);
+ e = (val >= thres);
else
- e |= (val <= thres);
+ e = (val <= thres);
+ if (e)
+ break;
}
mtx_lock(&lun->lun_lock);
if (e) {
+ scsi_u64to8b((uint8_t *)&page->descr[i] -
+ (uint8_t *)page, lun->ua_tpt_info);
if (lun->lasttpt == 0 ||
time_uptime - lun->lasttpt >= CTL_LBP_UA_PERIOD) {
lun->lasttpt = time_uptime;
ctl_est_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES);
- }
+ set = 1;
+ } else
+ set = 0;
} else {
lun->lasttpt = 0;
ctl_clr_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES);
+ set = -1;
}
mtx_unlock(&lun->lun_lock);
+ if (set != 0 &&
+ lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) {
+ /* Send msg to other side. */
+ bzero(&msg.ua, sizeof(msg.ua));
+ msg.hdr.msg_type = CTL_MSG_UA;
+ msg.hdr.nexus.initid = -1;
+ msg.hdr.nexus.targ_port = -1;
+ msg.hdr.nexus.targ_lun = lun->lun;
+ msg.hdr.nexus.targ_mapped_lun = lun->lun;
+ msg.ua.ua_all = 1;
+ msg.ua.ua_set = (set > 0);
+ msg.ua.ua_type = CTL_UA_THIN_PROV_THRES;
+ memcpy(msg.ua.ua_info, lun->ua_tpt_info, 8);
+ mtx_unlock(&softc->ctl_lock); // XXX
+ ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg,
+ sizeof(msg.ua), M_WAITOK);
+ mtx_lock(&softc->ctl_lock);
+ }
}
mtx_unlock(&softc->ctl_lock);
pause("-", CTL_LBP_PERIOD * hz);
@@ -14199,7 +13630,7 @@ ctl_enqueue_incoming(union ctl_io *io)
u_int idx;
idx = (io->io_hdr.nexus.targ_port * 127 +
- io->io_hdr.nexus.initid.id) % worker_threads;
+ io->io_hdr.nexus.initid) % worker_threads;
thr = &softc->threads[idx];
mtx_lock(&thr->queue_lock);
STAILQ_INSERT_TAIL(&thr->incoming_queue, &io->io_hdr, links);
@@ -14246,106 +13677,6 @@ ctl_enqueue_isc(union ctl_io *io)
wakeup(thr);
}
-/* Initialization and failover */
-
-void
-ctl_init_isc_msg(void)
-{
- printf("CTL: Still calling this thing\n");
-}
-
-/*
- * Init component
- * Initializes component into configuration defined by bootMode
- * (see hasc-sv.c)
- * returns hasc_Status:
- * OK
- * ERROR - fatal error
- */
-static ctl_ha_comp_status
-ctl_isc_init(struct ctl_ha_component *c)
-{
- ctl_ha_comp_status ret = CTL_HA_COMP_STATUS_OK;
-
- c->status = ret;
- return ret;
-}
-
-/* Start component
- * Starts component in state requested. If component starts successfully,
- * it must set its own state to the requestrd state
- * When requested state is HASC_STATE_HA, the component may refine it
- * by adding _SLAVE or _MASTER flags.
- * Currently allowed state transitions are:
- * UNKNOWN->HA - initial startup
- * UNKNOWN->SINGLE - initial startup when no parter detected
- * HA->SINGLE - failover
- * returns ctl_ha_comp_status:
- * OK - component successfully started in requested state
- * FAILED - could not start the requested state, failover may
- * be possible
- * ERROR - fatal error detected, no future startup possible
- */
-static ctl_ha_comp_status
-ctl_isc_start(struct ctl_ha_component *c, ctl_ha_state state)
-{
- ctl_ha_comp_status ret = CTL_HA_COMP_STATUS_OK;
-
- printf("%s: go\n", __func__);
-
- // UNKNOWN->HA or UNKNOWN->SINGLE (bootstrap)
- if (c->state == CTL_HA_STATE_UNKNOWN ) {
- control_softc->is_single = 0;
- if (ctl_ha_msg_create(CTL_HA_CHAN_CTL, ctl_isc_event_handler)
- != CTL_HA_STATUS_SUCCESS) {
- printf("ctl_isc_start: ctl_ha_msg_create failed.\n");
- ret = CTL_HA_COMP_STATUS_ERROR;
- }
- } else if (CTL_HA_STATE_IS_HA(c->state)
- && CTL_HA_STATE_IS_SINGLE(state)){
- // HA->SINGLE transition
- ctl_failover();
- control_softc->is_single = 1;
- } else {
- printf("ctl_isc_start:Invalid state transition %X->%X\n",
- c->state, state);
- ret = CTL_HA_COMP_STATUS_ERROR;
- }
- if (CTL_HA_STATE_IS_SINGLE(state))
- control_softc->is_single = 1;
-
- c->state = state;
- c->status = ret;
- return ret;
-}
-
-/*
- * Quiesce component
- * The component must clear any error conditions (set status to OK) and
- * prepare itself to another Start call
- * returns ctl_ha_comp_status:
- * OK
- * ERROR
- */
-static ctl_ha_comp_status
-ctl_isc_quiesce(struct ctl_ha_component *c)
-{
- int ret = CTL_HA_COMP_STATUS_OK;
-
- ctl_pause_rtr = 1;
- c->status = ret;
- return ret;
-}
-
-struct ctl_ha_component ctl_ha_component_ctlisc =
-{
- .name = "CTL ISC",
- .state = CTL_HA_STATE_UNKNOWN,
- .init = ctl_isc_init,
- .start = ctl_isc_start,
- .quiesce = ctl_isc_quiesce
-};
-
/*
* vim: ts=8
*/
diff --git a/sys/cam/ctl/ctl.h b/sys/cam/ctl/ctl.h
index 2693419..35fc4c3 100644
--- a/sys/cam/ctl/ctl.h
+++ b/sys/cam/ctl/ctl.h
@@ -120,14 +120,14 @@ typedef enum {
CTL_UA_LUN_CHANGE = 0x0020,
CTL_UA_MODE_CHANGE = 0x0040,
CTL_UA_LOG_CHANGE = 0x0080,
- CTL_UA_LVD = 0x0100,
- CTL_UA_SE = 0x0200,
+ CTL_UA_INQ_CHANGE = 0x0100,
CTL_UA_RES_PREEMPT = 0x0400,
CTL_UA_RES_RELEASE = 0x0800,
- CTL_UA_REG_PREEMPT = 0x1000,
- CTL_UA_ASYM_ACC_CHANGE = 0x2000,
- CTL_UA_CAPACITY_CHANGED = 0x4000,
- CTL_UA_THIN_PROV_THRES = 0x8000
+ CTL_UA_REG_PREEMPT = 0x1000,
+ CTL_UA_ASYM_ACC_CHANGE = 0x2000,
+ CTL_UA_CAPACITY_CHANGE = 0x4000,
+ CTL_UA_THIN_PROV_THRES = 0x8000,
+ CTL_UA_MEDIUM_CHANGE = 0x10000
} ctl_ua_type;
#ifdef _KERNEL
@@ -140,17 +140,9 @@ struct ctl_page_index;
SYSCTL_DECL(_kern_cam_ctl);
#endif
-/*
- * Call these routines to enable or disable front end ports.
- */
-int ctl_port_enable(ctl_port_type port_type);
-int ctl_port_disable(ctl_port_type port_type);
-/*
- * This routine grabs a list of frontend ports.
- */
-int ctl_port_list(struct ctl_port_entry *entries, int num_entries_alloced,
- int *num_entries_filled, int *num_entries_dropped,
- ctl_port_type port_type, int no_virtual);
+struct ctl_lun;
+struct ctl_port;
+struct ctl_softc;
/*
* Put a string into an sbuf, escaping characters that are illegal or not
@@ -158,7 +150,7 @@ int ctl_port_list(struct ctl_port_entry *entries, int num_entries_alloced,
*/
int ctl_sbuf_printf_esc(struct sbuf *sb, char *str, int size);
-int ctl_ffz(uint32_t *mask, uint32_t size);
+int ctl_ffz(uint32_t *mask, uint32_t first, uint32_t last);
int ctl_set_mask(uint32_t *mask, uint32_t bit);
int ctl_clear_mask(uint32_t *mask, uint32_t bit);
int ctl_is_set(uint32_t *mask, uint32_t bit);
@@ -167,11 +159,6 @@ int ctl_caching_sp_handler(struct ctl_scsiio *ctsio,
int ctl_control_page_handler(struct ctl_scsiio *ctsio,
struct ctl_page_index *page_index,
uint8_t *page_ptr);
-/**
-int ctl_failover_sp_handler(struct ctl_scsiio *ctsio,
- struct ctl_page_index *page_index,
- uint8_t *page_ptr);
-**/
int ctl_debugconf_sp_sense_handler(struct ctl_scsiio *ctsio,
struct ctl_page_index *page_index,
int pc);
@@ -186,12 +173,29 @@ int ctl_sap_log_sense_handler(struct ctl_scsiio *ctsio,
int pc);
int ctl_config_move_done(union ctl_io *io);
void ctl_datamove(union ctl_io *io);
+void ctl_serseq_done(union ctl_io *io);
void ctl_done(union ctl_io *io);
void ctl_data_submit_done(union ctl_io *io);
void ctl_config_read_done(union ctl_io *io);
void ctl_config_write_done(union ctl_io *io);
void ctl_portDB_changed(int portnum);
-void ctl_init_isc_msg(void);
+int ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
+ struct thread *td);
+
+void ctl_est_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua);
+void ctl_est_ua_port(struct ctl_lun *lun, int port, uint32_t except,
+ ctl_ua_type ua);
+void ctl_est_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua);
+void ctl_clr_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua);
+void ctl_clr_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua);
+void ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx,
+ ctl_ua_type ua_type);
+
+void ctl_isc_announce_lun(struct ctl_lun *lun);
+void ctl_isc_announce_port(struct ctl_port *port);
+void ctl_isc_announce_iid(struct ctl_port *port, int iid);
+void ctl_isc_announce_mode(struct ctl_lun *lun, uint32_t initidx,
+ uint8_t page, uint8_t subpage);
/*
* KPI to manipulate LUN/port options
@@ -206,6 +210,8 @@ typedef STAILQ_HEAD(ctl_options, ctl_option) ctl_options_t;
struct ctl_be_arg;
void ctl_init_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args);
+void ctl_update_opts(ctl_options_t *opts, int num_args,
+ struct ctl_be_arg *args);
void ctl_free_opts(ctl_options_t *opts);
char * ctl_get_opt(ctl_options_t *opts, const char *name);
int ctl_expand_number(const char *buf, uint64_t *num);
diff --git a/sys/cam/ctl/ctl_backend.c b/sys/cam/ctl/ctl_backend.c
index cabecb7..91576c4 100644
--- a/sys/cam/ctl/ctl_backend.c
+++ b/sys/cam/ctl/ctl_backend.c
@@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
#include <cam/ctl/ctl_backend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_ioctl.h>
#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_private.h>
@@ -66,11 +65,9 @@ extern struct ctl_softc *control_softc;
int
ctl_backend_register(struct ctl_backend_driver *be)
{
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
struct ctl_backend_driver *be_tmp;
- softc = control_softc;
-
mtx_lock(&softc->ctl_lock);
/*
* Sanity check, make sure this isn't a duplicate registration.
@@ -121,9 +118,7 @@ ctl_backend_register(struct ctl_backend_driver *be)
int
ctl_backend_deregister(struct ctl_backend_driver *be)
{
- struct ctl_softc *softc;
-
- softc = control_softc;
+ struct ctl_softc *softc = control_softc;
mtx_lock(&softc->ctl_lock);
@@ -154,20 +149,16 @@ ctl_backend_deregister(struct ctl_backend_driver *be)
struct ctl_backend_driver *
ctl_backend_find(char *backend_name)
{
- struct ctl_softc *softc;
+ struct ctl_softc *softc = control_softc;
struct ctl_backend_driver *be_tmp;
- softc = control_softc;
-
mtx_lock(&softc->ctl_lock);
-
STAILQ_FOREACH(be_tmp, &softc->be_list, links) {
if (strcmp(be_tmp->name, backend_name) == 0) {
mtx_unlock(&softc->ctl_lock);
return (be_tmp);
}
}
-
mtx_unlock(&softc->ctl_lock);
return (NULL);
@@ -186,15 +177,48 @@ ctl_init_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args)
if ((args[i].flags & CTL_BEARG_ASCII) == 0)
continue;
opt = malloc(sizeof(*opt), M_CTL, M_WAITOK);
- opt->name = malloc(strlen(args[i].kname) + 1, M_CTL, M_WAITOK);
- strcpy(opt->name, args[i].kname);
- opt->value = malloc(strlen(args[i].kvalue) + 1, M_CTL, M_WAITOK);
- strcpy(opt->value, args[i].kvalue);
+ opt->name = strdup(args[i].kname, M_CTL);
+ opt->value = strdup(args[i].kvalue, M_CTL);
STAILQ_INSERT_TAIL(opts, opt, links);
}
}
void
+ctl_update_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args)
+{
+ struct ctl_option *opt;
+ int i;
+
+ for (i = 0; i < num_args; i++) {
+ if ((args[i].flags & CTL_BEARG_RD) == 0)
+ continue;
+ if ((args[i].flags & CTL_BEARG_ASCII) == 0)
+ continue;
+ STAILQ_FOREACH(opt, opts, links) {
+ if (strcmp(opt->name, args[i].kname) == 0)
+ break;
+ }
+ if (args[i].kvalue != NULL &&
+ ((char *)args[i].kvalue)[0] != 0) {
+ if (opt) {
+ free(opt->value, M_CTL);
+ opt->value = strdup(args[i].kvalue, M_CTL);
+ } else {
+ opt = malloc(sizeof(*opt), M_CTL, M_WAITOK);
+ opt->name = strdup(args[i].kname, M_CTL);
+ opt->value = strdup(args[i].kvalue, M_CTL);
+ STAILQ_INSERT_TAIL(opts, opt, links);
+ }
+ } else if (opt) {
+ STAILQ_REMOVE(opts, opt, ctl_option, links);
+ free(opt->name, M_CTL);
+ free(opt->value, M_CTL);
+ free(opt, M_CTL);
+ }
+ }
+}
+
+void
ctl_free_opts(ctl_options_t *opts)
{
struct ctl_option *opt;
diff --git a/sys/cam/ctl/ctl_backend.h b/sys/cam/ctl/ctl_backend.h
index 93a530c..4177e2d 100644
--- a/sys/cam/ctl/ctl_backend.h
+++ b/sys/cam/ctl/ctl_backend.h
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2003 Silicon Graphics International Corp.
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,14 +50,11 @@
* particular LUN ID in the req_lun_id field. If we cannot allocate that
* LUN ID, the ctl_add_lun() call will fail.
*
- * The POWERED_OFF flag tells us that the LUN should default to the powered
+ * The STOPPED flag tells us that the LUN should default to the powered
* off state. It will return 0x04,0x02 until it is powered up. ("Logical
* unit not ready, initializing command required.")
*
- * The INOPERABLE flag tells us that this LUN is not operable for whatever
- * reason. This means that user data may have been (or has been?) lost.
- * We will return 0x31,0x00 ("Medium format corrupted") until the host
- * issues a FORMAT UNIT command to clear the error.
+ * The NO_MEDIA flag tells us that the LUN has no media inserted.
*
* The PRIMARY flag tells us that this LUN is registered as a Primary LUN
* which is accessible via the Master shelf controller in an HA. This flag
@@ -72,23 +70,31 @@
*
* The DEV_TYPE flag tells us that the device_type field is filled in.
*
+ * The EJECTED flag tells us that the removable LUN has tray open.
+ *
* The UNMAP flag tells us that this LUN supports UNMAP.
*
* The OFFLINE flag tells us that this LUN can not access backing store.
*/
typedef enum {
CTL_LUN_FLAG_ID_REQ = 0x01,
- CTL_LUN_FLAG_POWERED_OFF = 0x02,
- CTL_LUN_FLAG_INOPERABLE = 0x04,
+ CTL_LUN_FLAG_STOPPED = 0x02,
+ CTL_LUN_FLAG_NO_MEDIA = 0x04,
CTL_LUN_FLAG_PRIMARY = 0x08,
CTL_LUN_FLAG_SERIAL_NUM = 0x10,
CTL_LUN_FLAG_DEVID = 0x20,
CTL_LUN_FLAG_DEV_TYPE = 0x40,
CTL_LUN_FLAG_UNMAP = 0x80,
- CTL_LUN_FLAG_OFFLINE = 0x100,
- CTL_LUN_FLAG_SERSEQ_READ = 0x200
+ CTL_LUN_FLAG_EJECTED = 0x100,
+ CTL_LUN_FLAG_READONLY = 0x200
} ctl_backend_lun_flags;
+typedef enum {
+ CTL_LUN_SERSEQ_OFF,
+ CTL_LUN_SERSEQ_READ,
+ CTL_LUN_SERSEQ_ON
+} ctl_lun_serseq;
+
#ifdef _KERNEL
#define CTL_BACKEND_DECLARE(name, driver) \
@@ -195,6 +201,7 @@ typedef void (*be_lun_config_t)(void *be_lun,
struct ctl_be_lun {
uint8_t lun_type; /* passed to CTL */
ctl_backend_lun_flags flags; /* passed to CTL */
+ ctl_lun_serseq serseq; /* passed to CTL */
void *be_lun; /* passed to CTL */
uint64_t maxlba; /* passed to CTL */
uint32_t blocksize; /* passed to CTL */
@@ -282,26 +289,20 @@ int ctl_start_lun(struct ctl_be_lun *be_lun);
int ctl_stop_lun(struct ctl_be_lun *be_lun);
/*
- * If a LUN is inoperable, call ctl_lun_inoperable(). Generally the LUN
- * will become operable once again when the user issues the SCSI FORMAT UNIT
- * command. (CTL will automatically clear the inoperable flag.) If we
- * need to re-enable the LUN, we can call ctl_lun_operable() to enable it
- * without a SCSI command.
+ * Methods to notify about media and tray status changes.
*/
-int ctl_lun_inoperable(struct ctl_be_lun *be_lun);
-int ctl_lun_operable(struct ctl_be_lun *be_lun);
+int ctl_lun_no_media(struct ctl_be_lun *be_lun);
+int ctl_lun_has_media(struct ctl_be_lun *be_lun);
+int ctl_lun_ejected(struct ctl_be_lun *be_lun);
/*
- * To take a LUN offline, call ctl_lun_offline(). Generally the LUN will
- * be online again once the user sends a SCSI START STOP UNIT command with
- * the start and on/offline bits set. The backend can bring the LUN back
- * online via the ctl_lun_online() function, if necessary.
+ * Called on LUN HA role change.
*/
-int ctl_lun_offline(struct ctl_be_lun *be_lun);
-int ctl_lun_online(struct ctl_be_lun *be_lun);
+int ctl_lun_primary(struct ctl_be_lun *be_lun);
+int ctl_lun_secondary(struct ctl_be_lun *be_lun);
/*
- * Let the backend notify the initiator about changed capacity.
+ * Let the backend notify the initiators about changes.
*/
void ctl_lun_capacity_changed(struct ctl_be_lun *be_lun);
diff --git a/sys/cam/ctl/ctl_backend_block.c b/sys/cam/ctl/ctl_backend_block.c
index 18e9dad..7e0dc74 100644
--- a/sys/cam/ctl/ctl_backend_block.c
+++ b/sys/cam/ctl/ctl_backend_block.c
@@ -2,6 +2,7 @@
* Copyright (c) 2003 Silicon Graphics International Corp.
* Copyright (c) 2009-2011 Spectra Logic Corporation
* Copyright (c) 2012 The FreeBSD Foundation
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Portions of this software were developed by Edward Tomasz Napierala
@@ -86,9 +87,10 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_backend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_scsi_all.h>
+#include <cam/ctl/ctl_private.h>
#include <cam/ctl/ctl_error.h>
/*
@@ -119,7 +121,6 @@ typedef enum {
CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01,
CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02,
CTL_BE_BLOCK_LUN_WAITING = 0x04,
- CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08
} ctl_be_block_lun_flags;
typedef enum {
@@ -128,18 +129,11 @@ typedef enum {
CTL_BE_BLOCK_FILE
} ctl_be_block_type;
-struct ctl_be_block_devdata {
- struct cdev *cdev;
- struct cdevsw *csw;
- int dev_ref;
-};
-
struct ctl_be_block_filedata {
struct ucred *cred;
};
union ctl_be_block_bedata {
- struct ctl_be_block_devdata dev;
struct ctl_be_block_filedata file;
};
@@ -157,7 +151,6 @@ typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
*/
struct ctl_be_block_lun {
struct ctl_lun_create_params params;
- struct ctl_block_disk *disk;
char lunname[32];
char *dev_path;
ctl_be_block_type dev_type;
@@ -171,19 +164,11 @@ struct ctl_be_block_lun {
uma_zone_t lun_zone;
uint64_t size_blocks;
uint64_t size_bytes;
- uint32_t blocksize;
- int blocksize_shift;
- uint16_t pblockexp;
- uint16_t pblockoff;
- uint16_t ublockexp;
- uint16_t ublockoff;
- uint32_t atomicblock;
- uint32_t opttxferlen;
struct ctl_be_block_softc *softc;
struct devstat *disk_stats;
ctl_be_block_lun_flags flags;
STAILQ_ENTRY(ctl_be_block_lun) links;
- struct ctl_be_lun ctl_be_lun;
+ struct ctl_be_lun cbe_lun;
struct taskqueue *io_taskqueue;
struct task io_task;
int num_threads;
@@ -200,8 +185,6 @@ struct ctl_be_block_lun {
*/
struct ctl_be_block_softc {
struct mtx lock;
- int num_disks;
- STAILQ_HEAD(, ctl_block_disk) disk_list;
int num_luns;
STAILQ_HEAD(, ctl_be_block_lun) lun_list;
};
@@ -232,6 +215,8 @@ struct ctl_be_block_io {
void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
};
+extern struct ctl_softc *control_softc;
+
static int cbb_num_threads = 14;
TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
@@ -275,17 +260,12 @@ static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
struct ctl_lun_req *req);
static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
-static int ctl_be_block_open(struct ctl_be_block_softc *softc,
- struct ctl_be_block_lun *be_lun,
+static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
struct ctl_lun_req *req);
static int ctl_be_block_create(struct ctl_be_block_softc *softc,
struct ctl_lun_req *req);
static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
struct ctl_lun_req *req);
-static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
- struct ctl_lun_req *req);
-static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
- struct ctl_lun_req *req);
static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
struct ctl_lun_req *req);
static void ctl_be_block_lun_shutdown(void *be_lun);
@@ -370,6 +350,48 @@ ctl_complete_beio(struct ctl_be_block_io *beio)
}
}
+static size_t
+cmp(uint8_t *a, uint8_t *b, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (a[i] != b[i])
+ break;
+ }
+ return (i);
+}
+
+static void
+ctl_be_block_compare(union ctl_io *io)
+{
+ struct ctl_be_block_io *beio;
+ uint64_t off, res;
+ int i;
+ uint8_t info[8];
+
+ beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
+ off = 0;
+ for (i = 0; i < beio->num_segs; i++) {
+ res = cmp(beio->sg_segs[i].addr,
+ beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
+ beio->sg_segs[i].len);
+ off += res;
+ if (res < beio->sg_segs[i].len)
+ break;
+ }
+ if (i < beio->num_segs) {
+ scsi_u64to8b(off, info);
+ ctl_set_sense(&io->scsiio, /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_MISCOMPARE,
+ /*asc*/ 0x1D, /*ascq*/ 0x00,
+ /*type*/ SSD_ELEM_INFO,
+ /*size*/ sizeof(info), /*data*/ &info,
+ /*type*/ SSD_ELEM_NONE);
+ } else
+ ctl_set_success(&io->scsiio);
+}
+
static int
ctl_be_block_move_done(union ctl_io *io)
{
@@ -379,7 +401,6 @@ ctl_be_block_move_done(union ctl_io *io)
#ifdef CTL_TIME_IO
struct bintime cur_bt;
#endif
- int i;
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
be_lun = beio->lun;
@@ -387,11 +408,11 @@ ctl_be_block_move_done(union ctl_io *io)
DPRINTF("entered\n");
#ifdef CTL_TIME_IO
- getbintime(&cur_bt);
+ getbinuptime(&cur_bt);
bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
bintime_add(&io->io_hdr.dma_bt, &cur_bt);
+#endif
io->io_hdr.num_dmas++;
-#endif
io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
/*
@@ -407,21 +428,7 @@ ctl_be_block_move_done(union ctl_io *io)
ctl_set_success(&io->scsiio);
} else if (lbalen->flags & CTL_LLF_COMPARE) {
/* We have two data blocks ready for comparison. */
- for (i = 0; i < beio->num_segs; i++) {
- if (memcmp(beio->sg_segs[i].addr,
- beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
- beio->sg_segs[i].len) != 0)
- break;
- }
- if (i < beio->num_segs)
- ctl_set_sense(&io->scsiio,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_MISCOMPARE,
- /*asc*/ 0x1D,
- /*ascq*/ 0x00,
- SSD_ELEM_NONE);
- else
- ctl_set_success(&io->scsiio);
+ ctl_be_block_compare(io);
}
} else if ((io->io_hdr.port_status != 0) &&
((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
@@ -459,14 +466,8 @@ ctl_be_block_move_done(union ctl_io *io)
* interrupt context, and therefore we cannot block.
*/
mtx_lock(&be_lun->queue_lock);
- /*
- * XXX KDM make sure that links is okay to use at this point.
- * Otherwise, we either need to add another field to ctl_io_hdr,
- * or deal with resource allocation here.
- */
STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
mtx_unlock(&be_lun->queue_lock);
-
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
return (0);
@@ -527,13 +528,17 @@ ctl_be_block_biodone(struct bio *bio)
ctl_set_invalid_opcode(&io->scsiio);
} else if (error == ENOSPC || error == EDQUOT) {
ctl_set_space_alloc_fail(&io->scsiio);
+ } else if (error == EROFS || error == EACCES) {
+ ctl_set_hw_write_protected(&io->scsiio);
} else if (beio->bio_cmd == BIO_FLUSH) {
/* XXX KDM is there is a better error here? */
ctl_set_internal_failure(&io->scsiio,
/*sks_valid*/ 1,
/*retry_count*/ 0xbad2);
- } else
- ctl_set_medium_error(&io->scsiio);
+ } else {
+ ctl_set_medium_error(&io->scsiio,
+ beio->bio_cmd == BIO_READ);
+ }
ctl_complete_beio(beio);
return;
}
@@ -550,11 +555,13 @@ ctl_be_block_biodone(struct bio *bio)
ctl_complete_beio(beio);
} else {
if ((ARGS(io)->flags & CTL_LLF_READ) &&
- beio->beio_cont == NULL)
+ beio->beio_cont == NULL) {
ctl_set_success(&io->scsiio);
+ ctl_serseq_done(io);
+ }
#ifdef CTL_TIME_IO
- getbintime(&io->io_hdr.dma_start_bt);
-#endif
+ getbinuptime(&io->io_hdr.dma_start_bt);
+#endif
ctl_datamove(io);
}
}
@@ -576,15 +583,12 @@ ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
- if (MNT_SHARED_WRITES(mountpoint)
- || ((mountpoint == NULL)
- && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
+ if (MNT_SHARED_WRITES(mountpoint) ||
+ ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
lock_flags = LK_SHARED;
else
lock_flags = LK_EXCLUSIVE;
-
vn_lock(be_lun->vn, lock_flags | LK_RETRY);
-
error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
curthread);
VOP_UNLOCK(be_lun->vn, 0);
@@ -622,8 +626,8 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
union ctl_io *io;
struct uio xuio;
struct iovec *xiovec;
- int flags;
- int error, i;
+ size_t s;
+ int error, flags, i;
DPRINTF("entered\n");
@@ -684,19 +688,33 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
VOP_UNLOCK(be_lun->vn, 0);
SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
+ if (error == 0 && xuio.uio_resid > 0) {
+ /*
+ * If we red less then requested (EOF), then
+ * we should clean the rest of the buffer.
+ */
+ s = beio->io_len - xuio.uio_resid;
+ for (i = 0; i < beio->num_segs; i++) {
+ if (s >= beio->sg_segs[i].len) {
+ s -= beio->sg_segs[i].len;
+ continue;
+ }
+ bzero((uint8_t *)beio->sg_segs[i].addr + s,
+ beio->sg_segs[i].len - s);
+ s = 0;
+ }
+ }
} else {
struct mount *mountpoint;
int lock_flags;
(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
- if (MNT_SHARED_WRITES(mountpoint)
- || ((mountpoint == NULL)
+ if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
&& MNT_SHARED_WRITES(be_lun->vn->v_mount)))
lock_flags = LK_SHARED;
else
lock_flags = LK_EXCLUSIVE;
-
vn_lock(be_lun->vn, lock_flags | LK_RETRY);
/*
@@ -732,15 +750,14 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
* return the I/O to the user.
*/
if (error != 0) {
- char path_str[32];
-
- ctl_scsi_path_string(io, path_str, sizeof(path_str));
- printf("%s%s command returned errno %d\n", path_str,
- (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
if (error == ENOSPC || error == EDQUOT) {
ctl_set_space_alloc_fail(&io->scsiio);
- } else
- ctl_set_medium_error(&io->scsiio);
+ } else if (error == EROFS || error == EACCES) {
+ ctl_set_hw_write_protected(&io->scsiio);
+ } else {
+ ctl_set_medium_error(&io->scsiio,
+ beio->bio_cmd == BIO_READ);
+ }
ctl_complete_beio(beio);
return;
}
@@ -755,11 +772,13 @@ ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
ctl_complete_beio(beio);
} else {
if ((ARGS(io)->flags & CTL_LLF_READ) &&
- beio->beio_cont == NULL)
+ beio->beio_cont == NULL) {
ctl_set_success(&io->scsiio);
+ ctl_serseq_done(io);
+ }
#ifdef CTL_TIME_IO
- getbintime(&io->io_hdr.dma_start_bt);
-#endif
+ getbinuptime(&io->io_hdr.dma_start_bt);
+#endif
ctl_datamove(io);
}
}
@@ -776,7 +795,7 @@ ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
DPRINTF("entered\n");
- off = roff = ((off_t)lbalen->lba) << be_lun->blocksize_shift;
+ off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
0, curthread->td_ucred, curthread);
@@ -794,11 +813,10 @@ ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
}
VOP_UNLOCK(be_lun->vn, 0);
- off >>= be_lun->blocksize_shift;
data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
scsi_u64to8b(lbalen->lba, data->descr[0].addr);
- scsi_ulto4b(MIN(UINT32_MAX, off - lbalen->lba),
- data->descr[0].length);
+ scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
+ lbalen->lba), data->descr[0].length);
data->descr[0].status = status;
ctl_complete_beio(beio);
@@ -809,40 +827,42 @@ ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
{
struct vattr vattr;
struct statfs statfs;
+ uint64_t val;
int error;
+ val = UINT64_MAX;
if (be_lun->vn == NULL)
- return (UINT64_MAX);
+ return (val);
+ vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
if (strcmp(attrname, "blocksused") == 0) {
error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
- if (error != 0)
- return (UINT64_MAX);
- return (vattr.va_bytes >> be_lun->blocksize_shift);
+ if (error == 0)
+ val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
}
- if (strcmp(attrname, "blocksavail") == 0) {
+ if (strcmp(attrname, "blocksavail") == 0 &&
+ (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
- if (error != 0)
- return (UINT64_MAX);
- return ((statfs.f_bavail * statfs.f_bsize) >>
- be_lun->blocksize_shift);
+ if (error == 0)
+ val = statfs.f_bavail * statfs.f_bsize /
+ be_lun->cbe_lun.blocksize;
}
- return (UINT64_MAX);
+ VOP_UNLOCK(be_lun->vn, 0);
+ return (val);
}
static void
ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
struct ctl_be_block_io *beio)
{
- struct ctl_be_block_devdata *dev_data;
union ctl_io *io;
+ struct cdevsw *csw;
+ struct cdev *dev;
struct uio xuio;
struct iovec *xiovec;
- int flags;
- int error, i;
+ int error, flags, i, ref;
DPRINTF("entered\n");
- dev_data = &be_lun->backend.dev;
io = beio->io;
flags = 0;
if (ARGS(io)->flags & CTL_LLF_DPO)
@@ -875,13 +895,20 @@ ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
mtx_unlock(&be_lun->io_lock);
- if (beio->bio_cmd == BIO_READ) {
- error = (*dev_data->csw->d_read)(dev_data->cdev, &xuio, flags);
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
+ if (csw) {
+ if (beio->bio_cmd == BIO_READ)
+ error = csw->d_read(dev, &xuio, flags);
+ else
+ error = csw->d_write(dev, &xuio, flags);
+ dev_relthread(dev, ref);
+ } else
+ error = ENXIO;
+
+ if (beio->bio_cmd == BIO_READ)
SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
- } else {
- error = (*dev_data->csw->d_write)(dev_data->cdev, &xuio, flags);
+ else
SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
- }
mtx_lock(&be_lun->io_lock);
devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
@@ -896,8 +923,12 @@ ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
if (error != 0) {
if (error == ENOSPC || error == EDQUOT) {
ctl_set_space_alloc_fail(&io->scsiio);
- } else
- ctl_set_medium_error(&io->scsiio);
+ } else if (error == EROFS || error == EACCES) {
+ ctl_set_hw_write_protected(&io->scsiio);
+ } else {
+ ctl_set_medium_error(&io->scsiio,
+ beio->bio_cmd == BIO_READ);
+ }
ctl_complete_beio(beio);
return;
}
@@ -912,11 +943,13 @@ ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
ctl_complete_beio(beio);
} else {
if ((ARGS(io)->flags & CTL_LLF_READ) &&
- beio->beio_cont == NULL)
+ beio->beio_cont == NULL) {
ctl_set_success(&io->scsiio);
+ ctl_serseq_done(io);
+ }
#ifdef CTL_TIME_IO
- getbintime(&io->io_hdr.dma_start_bt);
-#endif
+ getbinuptime(&io->io_hdr.dma_start_bt);
+#endif
ctl_datamove(io);
}
}
@@ -925,23 +958,30 @@ static void
ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
struct ctl_be_block_io *beio)
{
- struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev;
union ctl_io *io = beio->io;
+ struct cdevsw *csw;
+ struct cdev *dev;
struct ctl_lba_len_flags *lbalen = ARGS(io);
struct scsi_get_lba_status_data *data;
off_t roff, off;
- int error, status;
+ int error, ref, status;
DPRINTF("entered\n");
- off = roff = ((off_t)lbalen->lba) << be_lun->blocksize_shift;
- error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKHOLE,
- (caddr_t)&off, FREAD, curthread);
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
+ if (csw == NULL) {
+ status = 0; /* unknown up to the end */
+ off = be_lun->size_bytes;
+ goto done;
+ }
+ off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
+ error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
+ curthread);
if (error == 0 && off > roff)
status = 0; /* mapped up to off */
else {
- error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKDATA,
- (caddr_t)&off, FREAD, curthread);
+ error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
+ curthread);
if (error == 0 && off > roff)
status = 1; /* deallocated up to off */
else {
@@ -949,12 +989,13 @@ ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
off = be_lun->size_bytes;
}
}
+ dev_relthread(dev, ref);
- off >>= be_lun->blocksize_shift;
+done:
data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
scsi_u64to8b(lbalen->lba, data->descr[0].addr);
- scsi_ulto4b(MIN(UINT32_MAX, off - lbalen->lba),
- data->descr[0].length);
+ scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
+ lbalen->lba), data->descr[0].length);
data->descr[0].status = status;
ctl_complete_beio(beio);
@@ -965,11 +1006,9 @@ ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
struct ctl_be_block_io *beio)
{
struct bio *bio;
- union ctl_io *io;
- struct ctl_be_block_devdata *dev_data;
-
- dev_data = &be_lun->backend.dev;
- io = beio->io;
+ struct cdevsw *csw;
+ struct cdev *dev;
+ int ref;
DPRINTF("entered\n");
@@ -977,7 +1016,6 @@ ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
bio = g_alloc_bio();
bio->bio_cmd = BIO_FLUSH;
- bio->bio_dev = dev_data->cdev;
bio->bio_offset = 0;
bio->bio_data = 0;
bio->bio_done = ctl_be_block_biodone;
@@ -997,7 +1035,15 @@ ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
mtx_unlock(&be_lun->io_lock);
- (*dev_data->csw->d_strategy)(bio);
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
+ if (csw) {
+ bio->bio_dev = dev;
+ csw->d_strategy(bio);
+ dev_relthread(dev, ref);
+ } else {
+ bio->bio_error = ENXIO;
+ ctl_be_block_biodone(bio);
+ }
}
static void
@@ -1006,21 +1052,23 @@ ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
uint64_t off, uint64_t len, int last)
{
struct bio *bio;
- struct ctl_be_block_devdata *dev_data;
uint64_t maxlen;
+ struct cdevsw *csw;
+ struct cdev *dev;
+ int ref;
- dev_data = &be_lun->backend.dev;
- maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize);
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
+ maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
while (len > 0) {
bio = g_alloc_bio();
bio->bio_cmd = BIO_DELETE;
- bio->bio_dev = dev_data->cdev;
+ bio->bio_dev = dev;
bio->bio_offset = off;
bio->bio_length = MIN(len, maxlen);
bio->bio_data = 0;
bio->bio_done = ctl_be_block_biodone;
bio->bio_caller1 = beio;
- bio->bio_pblkno = off / be_lun->blocksize;
+ bio->bio_pblkno = off / be_lun->cbe_lun.blocksize;
off += bio->bio_length;
len -= bio->bio_length;
@@ -1031,8 +1079,15 @@ ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
beio->send_complete = 1;
mtx_unlock(&be_lun->io_lock);
- (*dev_data->csw->d_strategy)(bio);
+ if (csw) {
+ csw->d_strategy(bio);
+ } else {
+ bio->bio_error = ENXIO;
+ ctl_be_block_biodone(bio);
+ }
}
+ if (csw)
+ dev_relthread(dev, ref);
}
static void
@@ -1040,12 +1095,10 @@ ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
struct ctl_be_block_io *beio)
{
union ctl_io *io;
- struct ctl_be_block_devdata *dev_data;
struct ctl_ptr_len_flags *ptrlen;
struct scsi_unmap_desc *buf, *end;
uint64_t len;
- dev_data = &be_lun->backend.dev;
io = beio->io;
DPRINTF("entered\n");
@@ -1062,11 +1115,11 @@ ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
end = buf + ptrlen->len / sizeof(*buf);
for (; buf < end; buf++) {
len = (uint64_t)scsi_4btoul(buf->length) *
- be_lun->blocksize;
+ be_lun->cbe_lun.blocksize;
beio->io_len += len;
ctl_be_block_unmap_dev_range(be_lun, beio,
- scsi_8btou64(buf->lba) * be_lun->blocksize, len,
- (end - buf < 2) ? TRUE : FALSE);
+ scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
+ len, (end - buf < 2) ? TRUE : FALSE);
}
} else
ctl_be_block_unmap_dev_range(be_lun, beio,
@@ -1078,23 +1131,25 @@ ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
struct ctl_be_block_io *beio)
{
TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
- int i;
struct bio *bio;
- struct ctl_be_block_devdata *dev_data;
+ struct cdevsw *csw;
+ struct cdev *dev;
off_t cur_offset;
- int max_iosize;
+ int i, max_iosize, ref;
DPRINTF("entered\n");
-
- dev_data = &be_lun->backend.dev;
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
/*
* We have to limit our I/O size to the maximum supported by the
* backend device. Hopefully it is MAXPHYS. If the driver doesn't
* set it properly, use DFLTPHYS.
*/
- max_iosize = dev_data->cdev->si_iosize_max;
- if (max_iosize < PAGE_SIZE)
+ if (csw) {
+ max_iosize = dev->si_iosize_max;
+ if (max_iosize < PAGE_SIZE)
+ max_iosize = DFLTPHYS;
+ } else
max_iosize = DFLTPHYS;
cur_offset = beio->io_offset;
@@ -1112,13 +1167,13 @@ ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
bio->bio_cmd = beio->bio_cmd;
- bio->bio_dev = dev_data->cdev;
+ bio->bio_dev = dev;
bio->bio_caller1 = beio;
bio->bio_length = min(cur_size, max_iosize);
bio->bio_offset = cur_offset;
bio->bio_data = cur_ptr;
bio->bio_done = ctl_be_block_biodone;
- bio->bio_pblkno = cur_offset / be_lun->blocksize;
+ bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
cur_offset += bio->bio_length;
cur_ptr += bio->bio_length;
@@ -1139,23 +1194,36 @@ ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
*/
while ((bio = TAILQ_FIRST(&queue)) != NULL) {
TAILQ_REMOVE(&queue, bio, bio_queue);
- (*dev_data->csw->d_strategy)(bio);
+ if (csw)
+ csw->d_strategy(bio);
+ else {
+ bio->bio_error = ENXIO;
+ ctl_be_block_biodone(bio);
+ }
}
+ if (csw)
+ dev_relthread(dev, ref);
}
static uint64_t
ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
{
- struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev;
struct diocgattr_arg arg;
- int error;
+ struct cdevsw *csw;
+ struct cdev *dev;
+ int error, ref;
- if (dev_data->csw == NULL || dev_data->csw->d_ioctl == NULL)
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
+ if (csw == NULL)
return (UINT64_MAX);
strlcpy(arg.name, attrname, sizeof(arg.name));
arg.len = sizeof(arg.value.off);
- error = dev_data->csw->d_ioctl(dev_data->cdev,
- DIOCGATTR, (caddr_t)&arg, FREAD, curthread);
+ if (csw->d_ioctl) {
+ error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
+ curthread);
+ } else
+ error = ENODEV;
+ dev_relthread(dev, ref);
if (error != 0)
return (UINT64_MAX);
return (arg.value.off);
@@ -1165,6 +1233,7 @@ static void
ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
union ctl_io *io)
{
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
struct ctl_be_block_io *beio;
struct ctl_lba_len_flags *lbalen;
@@ -1172,8 +1241,8 @@ ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
- beio->io_len = lbalen->len * be_lun->blocksize;
- beio->io_offset = lbalen->lba * be_lun->blocksize;
+ beio->io_len = lbalen->len * cbe_lun->blocksize;
+ beio->io_offset = lbalen->lba * cbe_lun->blocksize;
beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
beio->bio_cmd = BIO_FLUSH;
beio->ds_trans_type = DEVSTAT_NO_DATA;
@@ -1202,6 +1271,7 @@ static void
ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
union ctl_io *io)
{
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
struct ctl_be_block_io *beio;
struct ctl_lba_len_flags *lbalen;
uint64_t len_left, lba;
@@ -1228,8 +1298,8 @@ ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
}
if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
- beio->io_offset = lbalen->lba * be_lun->blocksize;
- beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize;
+ beio->io_offset = lbalen->lba * cbe_lun->blocksize;
+ beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
beio->bio_cmd = BIO_DELETE;
beio->ds_trans_type = DEVSTAT_FREE;
@@ -1243,27 +1313,27 @@ ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
DPRINTF("WRITE SAME at LBA %jx len %u\n",
(uintmax_t)lbalen->lba, lbalen->len);
- pb = be_lun->blocksize << be_lun->pblockexp;
- if (be_lun->pblockoff > 0)
- pbo = pb - be_lun->blocksize * be_lun->pblockoff;
+ pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
+ if (be_lun->cbe_lun.pblockoff > 0)
+ pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
else
pbo = 0;
- len_left = (uint64_t)lbalen->len * be_lun->blocksize;
+ len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
/*
* Setup the S/G entry for this chunk.
*/
seglen = MIN(CTLBLK_MAX_SEG, len_left);
- if (pb > be_lun->blocksize) {
- adj = ((lbalen->lba + lba) * be_lun->blocksize +
+ if (pb > cbe_lun->blocksize) {
+ adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
seglen - pbo) % pb;
if (seglen > adj)
seglen -= adj;
else
- seglen -= seglen % be_lun->blocksize;
+ seglen -= seglen % cbe_lun->blocksize;
} else
- seglen -= seglen % be_lun->blocksize;
+ seglen -= seglen % cbe_lun->blocksize;
beio->sg_segs[i].len = seglen;
beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
@@ -1275,16 +1345,21 @@ ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
buf = beio->sg_segs[i].addr;
end = buf + seglen;
- for (; buf < end; buf += be_lun->blocksize) {
- memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize);
+ for (; buf < end; buf += cbe_lun->blocksize) {
+ if (lbalen->flags & SWS_NDOB) {
+ memset(buf, 0, cbe_lun->blocksize);
+ } else {
+ memcpy(buf, io->scsiio.kern_data_ptr,
+ cbe_lun->blocksize);
+ }
if (lbalen->flags & SWS_LBDATA)
scsi_ulto4b(lbalen->lba + lba, buf);
lba++;
}
}
- beio->io_offset = lbalen->lba * be_lun->blocksize;
- beio->io_len = lba * be_lun->blocksize;
+ beio->io_offset = lbalen->lba * cbe_lun->blocksize;
+ beio->io_len = lba * cbe_lun->blocksize;
/* We can not do all in one run. Correct and schedule rerun. */
if (len_left > 0) {
@@ -1454,14 +1529,8 @@ ctl_be_block_next(struct ctl_be_block_io *beio)
io->io_hdr.status |= CTL_STATUS_NONE;
mtx_lock(&be_lun->queue_lock);
- /*
- * XXX KDM make sure that links is okay to use at this point.
- * Otherwise, we either need to add another field to ctl_io_hdr,
- * or deal with resource allocation here.
- */
STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
mtx_unlock(&be_lun->queue_lock);
-
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
}
@@ -1469,6 +1538,7 @@ static void
ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
union ctl_io *io)
{
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
struct ctl_be_block_io *beio;
struct ctl_be_block_softc *softc;
struct ctl_lba_len_flags *lbalen;
@@ -1523,9 +1593,9 @@ ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
lbas = CTLBLK_HALF_IO_SIZE;
else
lbas = CTLBLK_MAX_IO_SIZE;
- lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize);
- beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize;
- beio->io_len = lbas * be_lun->blocksize;
+ lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
+ beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
+ beio->io_len = lbas * cbe_lun->blocksize;
bptrlen->len += lbas;
for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
@@ -1563,7 +1633,7 @@ ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
io->scsiio.kern_data_len = beio->io_len;
io->scsiio.kern_data_resid = 0;
io->scsiio.kern_sg_entries = beio->num_segs;
- io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
+ io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
/*
* For the read case, we need to read the data into our buffers and
@@ -1576,8 +1646,8 @@ ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
} else {
SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
#ifdef CTL_TIME_IO
- getbintime(&io->io_hdr.dma_start_bt);
-#endif
+ getbinuptime(&io->io_hdr.dma_start_bt);
+#endif
ctl_datamove(io);
}
}
@@ -1585,33 +1655,32 @@ ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
static void
ctl_be_block_worker(void *context, int pending)
{
- struct ctl_be_block_lun *be_lun;
- struct ctl_be_block_softc *softc;
+ struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
union ctl_io *io;
-
- be_lun = (struct ctl_be_block_lun *)context;
- softc = be_lun->softc;
+ struct ctl_be_block_io *beio;
DPRINTF("entered\n");
-
- mtx_lock(&be_lun->queue_lock);
+ /*
+ * Fetch and process I/Os from all queues. If we detect LUN
+ * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
+ * so make response maximally opaque to not confuse initiator.
+ */
for (;;) {
+ mtx_lock(&be_lun->queue_lock);
io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
if (io != NULL) {
- struct ctl_be_block_io *beio;
-
DPRINTF("datamove queue\n");
-
STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
ctl_io_hdr, links);
-
mtx_unlock(&be_lun->queue_lock);
-
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
-
+ if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
+ ctl_set_busy(&io->scsiio);
+ ctl_complete_beio(beio);
+ return;
+ }
be_lun->dispatch(be_lun, beio);
-
- mtx_lock(&be_lun->queue_lock);
continue;
}
io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
@@ -1620,8 +1689,12 @@ ctl_be_block_worker(void *context, int pending)
STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
ctl_io_hdr, links);
mtx_unlock(&be_lun->queue_lock);
+ if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
+ ctl_set_busy(&io->scsiio);
+ ctl_config_write_done(io);
+ return;
+ }
ctl_be_block_cw_dispatch(be_lun, io);
- mtx_lock(&be_lun->queue_lock);
continue;
}
io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
@@ -1630,25 +1703,26 @@ ctl_be_block_worker(void *context, int pending)
STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
ctl_io_hdr, links);
mtx_unlock(&be_lun->queue_lock);
+ if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
+ ctl_set_busy(&io->scsiio);
+ ctl_config_read_done(io);
+ return;
+ }
ctl_be_block_cr_dispatch(be_lun, io);
- mtx_lock(&be_lun->queue_lock);
continue;
}
io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
if (io != NULL) {
DPRINTF("input queue\n");
-
STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
ctl_io_hdr, links);
mtx_unlock(&be_lun->queue_lock);
-
- /*
- * We must drop the lock, since this routine and
- * its children may sleep.
- */
+ if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
+ ctl_set_busy(&io->scsiio);
+ ctl_data_submit_done(io);
+ return;
+ }
ctl_be_block_dispatch(be_lun, io);
-
- mtx_lock(&be_lun->queue_lock);
continue;
}
@@ -1656,9 +1730,9 @@ ctl_be_block_worker(void *context, int pending)
* If we get here, there is no work left in the queues, so
* just break out and let the task queue go to sleep.
*/
+ mtx_unlock(&be_lun->queue_lock);
break;
}
- mtx_unlock(&be_lun->queue_lock);
}
/*
@@ -1670,13 +1744,13 @@ static int
ctl_be_block_submit(union ctl_io *io)
{
struct ctl_be_block_lun *be_lun;
- struct ctl_be_lun *ctl_be_lun;
+ struct ctl_be_lun *cbe_lun;
DPRINTF("entered\n");
- ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
+ cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
CTL_PRIV_BACKEND_LUN].ptr;
- be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
+ be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
/*
* Make sure we only get SCSI I/O.
@@ -1687,11 +1761,6 @@ ctl_be_block_submit(union ctl_io *io)
PRIV(io)->len = 0;
mtx_lock(&be_lun->queue_lock);
- /*
- * XXX KDM make sure that links is okay to use at this point.
- * Otherwise, we either need to add another field to ctl_io_hdr,
- * or deal with resource allocation here.
- */
STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
mtx_unlock(&be_lun->queue_lock);
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
@@ -1746,6 +1815,7 @@ ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
static int
ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
{
+ struct ctl_be_lun *cbe_lun;
struct ctl_be_block_filedata *file_data;
struct ctl_lun_create_params *params;
char *value;
@@ -1753,7 +1823,7 @@ ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
off_t ps, pss, po, pos, us, uss, uo, uos;
int error;
- error = 0;
+ cbe_lun = &be_lun->cbe_lun;
file_data = &be_lun->backend.file;
params = &be_lun->params;
@@ -1762,6 +1832,8 @@ ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
be_lun->lun_flush = ctl_be_block_flush_file;
be_lun->get_lba_status = ctl_be_block_gls_file;
be_lun->getattr = ctl_be_block_getattr_file;
+ be_lun->unmap = NULL;
+ cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
if (error != 0) {
@@ -1786,19 +1858,11 @@ ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
}
}
-
file_data->cred = crhold(curthread->td_ucred);
if (params->lun_size_bytes != 0)
be_lun->size_bytes = params->lun_size_bytes;
else
be_lun->size_bytes = vattr.va_size;
- /*
- * We set the multi thread flag for file operations because all
- * filesystems (in theory) are capable of allowing multiple readers
- * of a file at once. So we want to get the maximum possible
- * concurrency.
- */
- be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD;
/*
* For files we can use any logical block size. Prefer 512 bytes
@@ -1807,83 +1871,87 @@ ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
* logical block size -- report it as physical block size.
*/
if (params->blocksize_bytes != 0)
- be_lun->blocksize = params->blocksize_bytes;
+ cbe_lun->blocksize = params->blocksize_bytes;
+ else if (cbe_lun->lun_type == T_CDROM)
+ cbe_lun->blocksize = 2048;
else
- be_lun->blocksize = 512;
+ cbe_lun->blocksize = 512;
+ be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
+ cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
+ 0 : (be_lun->size_blocks - 1);
us = ps = vattr.va_blocksize;
uo = po = 0;
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblocksize");
+ value = ctl_get_opt(&cbe_lun->options, "pblocksize");
if (value != NULL)
ctl_expand_number(value, &ps);
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblockoffset");
+ value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
if (value != NULL)
ctl_expand_number(value, &po);
- pss = ps / be_lun->blocksize;
- pos = po / be_lun->blocksize;
- if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
- ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
- be_lun->pblockexp = fls(pss) - 1;
- be_lun->pblockoff = (pss - pos) % pss;
+ pss = ps / cbe_lun->blocksize;
+ pos = po / cbe_lun->blocksize;
+ if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
+ ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
+ cbe_lun->pblockexp = fls(pss) - 1;
+ cbe_lun->pblockoff = (pss - pos) % pss;
}
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublocksize");
+ value = ctl_get_opt(&cbe_lun->options, "ublocksize");
if (value != NULL)
ctl_expand_number(value, &us);
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublockoffset");
+ value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
if (value != NULL)
ctl_expand_number(value, &uo);
- uss = us / be_lun->blocksize;
- uos = uo / be_lun->blocksize;
- if ((uss > 0) && (uss * be_lun->blocksize == us) && (uss >= uos) &&
- ((uss & (uss - 1)) == 0) && (uos * be_lun->blocksize == uo)) {
- be_lun->ublockexp = fls(uss) - 1;
- be_lun->ublockoff = (uss - uos) % uss;
+ uss = us / cbe_lun->blocksize;
+ uos = uo / cbe_lun->blocksize;
+ if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
+ ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
+ cbe_lun->ublockexp = fls(uss) - 1;
+ cbe_lun->ublockoff = (uss - uos) % uss;
}
/*
* Sanity check. The media size has to be at least one
* sector long.
*/
- if (be_lun->size_bytes < be_lun->blocksize) {
+ if (be_lun->size_bytes < cbe_lun->blocksize) {
error = EINVAL;
snprintf(req->error_str, sizeof(req->error_str),
"file %s size %ju < block size %u", be_lun->dev_path,
- (uintmax_t)be_lun->size_bytes, be_lun->blocksize);
+ (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
}
- be_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / be_lun->blocksize;
+ cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
return (error);
}
static int
ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
{
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
struct ctl_lun_create_params *params;
- struct vattr vattr;
+ struct cdevsw *csw;
struct cdev *dev;
- struct cdevsw *devsw;
char *value;
- int error, atomic, maxio, unmap;
- off_t ps, pss, po, pos, us, uss, uo, uos;
+ int error, atomic, maxio, ref, unmap, tmp;
+ off_t ps, pss, po, pos, us, uss, uo, uos, otmp;
params = &be_lun->params;
be_lun->dev_type = CTL_BE_BLOCK_DEV;
- be_lun->backend.dev.cdev = be_lun->vn->v_rdev;
- be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev,
- &be_lun->backend.dev.dev_ref);
- if (be_lun->backend.dev.csw == NULL)
- panic("Unable to retrieve device switch");
- if (strcmp(be_lun->backend.dev.csw->d_name, "zvol") == 0) {
+ csw = devvn_refthread(be_lun->vn, &dev, &ref);
+ if (csw == NULL)
+ return (ENXIO);
+ if (strcmp(csw->d_name, "zvol") == 0) {
be_lun->dispatch = ctl_be_block_dispatch_zvol;
be_lun->get_lba_status = ctl_be_block_gls_zvol;
atomic = maxio = CTLBLK_MAX_IO_SIZE;
} else {
be_lun->dispatch = ctl_be_block_dispatch_dev;
+ be_lun->get_lba_status = NULL;
atomic = 0;
- maxio = be_lun->backend.dev.cdev->si_iosize_max;
+ maxio = dev->si_iosize_max;
if (maxio <= 0)
maxio = DFLTPHYS;
if (maxio > CTLBLK_MAX_IO_SIZE)
@@ -1891,28 +1959,19 @@ ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
}
be_lun->lun_flush = ctl_be_block_flush_dev;
be_lun->getattr = ctl_be_block_getattr_dev;
+ be_lun->unmap = ctl_be_block_unmap_dev;
- error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED);
- if (error) {
- snprintf(req->error_str, sizeof(req->error_str),
- "error getting vnode attributes for device %s",
- be_lun->dev_path);
- return (error);
- }
-
- dev = be_lun->vn->v_rdev;
- devsw = dev->si_devsw;
- if (!devsw->d_ioctl) {
+ if (!csw->d_ioctl) {
+ dev_relthread(dev, ref);
snprintf(req->error_str, sizeof(req->error_str),
- "no d_ioctl for device %s!",
- be_lun->dev_path);
+ "no d_ioctl for device %s!", be_lun->dev_path);
return (ENODEV);
}
- error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
- (caddr_t)&be_lun->blocksize, FREAD,
+ error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
curthread);
if (error) {
+ dev_relthread(dev, ref);
snprintf(req->error_str, sizeof(req->error_str),
"error %d returned for DIOCGSECTORSIZE ioctl "
"on %s!", error, be_lun->dev_path);
@@ -1925,38 +1984,33 @@ ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
* the user is asking for is an even multiple of the underlying
* device's blocksize.
*/
- if ((params->blocksize_bytes != 0)
- && (params->blocksize_bytes > be_lun->blocksize)) {
- uint32_t bs_multiple, tmp_blocksize;
-
- bs_multiple = params->blocksize_bytes / be_lun->blocksize;
-
- tmp_blocksize = bs_multiple * be_lun->blocksize;
-
- if (tmp_blocksize == params->blocksize_bytes) {
- be_lun->blocksize = params->blocksize_bytes;
+ if ((params->blocksize_bytes != 0) &&
+ (params->blocksize_bytes >= tmp)) {
+ if (params->blocksize_bytes % tmp == 0) {
+ cbe_lun->blocksize = params->blocksize_bytes;
} else {
+ dev_relthread(dev, ref);
snprintf(req->error_str, sizeof(req->error_str),
"requested blocksize %u is not an even "
"multiple of backing device blocksize %u",
- params->blocksize_bytes,
- be_lun->blocksize);
+ params->blocksize_bytes, tmp);
return (EINVAL);
-
}
- } else if ((params->blocksize_bytes != 0)
- && (params->blocksize_bytes != be_lun->blocksize)) {
+ } else if (params->blocksize_bytes != 0) {
+ dev_relthread(dev, ref);
snprintf(req->error_str, sizeof(req->error_str),
"requested blocksize %u < backing device "
- "blocksize %u", params->blocksize_bytes,
- be_lun->blocksize);
+ "blocksize %u", params->blocksize_bytes, tmp);
return (EINVAL);
- }
+ } else if (cbe_lun->lun_type == T_CDROM)
+ cbe_lun->blocksize = MAX(tmp, 2048);
+ else
+ cbe_lun->blocksize = tmp;
- error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
- (caddr_t)&be_lun->size_bytes, FREAD,
- curthread);
+ error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
+ curthread);
if (error) {
+ dev_relthread(dev, ref);
snprintf(req->error_str, sizeof(req->error_str),
"error %d returned for DIOCGMEDIASIZE "
" ioctl on %s!", error,
@@ -1965,61 +2019,66 @@ ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
}
if (params->lun_size_bytes != 0) {
- if (params->lun_size_bytes > be_lun->size_bytes) {
+ if (params->lun_size_bytes > otmp) {
+ dev_relthread(dev, ref);
snprintf(req->error_str, sizeof(req->error_str),
"requested LUN size %ju > backing device "
"size %ju",
(uintmax_t)params->lun_size_bytes,
- (uintmax_t)be_lun->size_bytes);
+ (uintmax_t)otmp);
return (EINVAL);
}
be_lun->size_bytes = params->lun_size_bytes;
- }
+ } else
+ be_lun->size_bytes = otmp;
+ be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
+ cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
+ 0 : (be_lun->size_blocks - 1);
- error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE,
- (caddr_t)&ps, FREAD, curthread);
+ error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
+ curthread);
if (error)
ps = po = 0;
else {
- error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET,
- (caddr_t)&po, FREAD, curthread);
+ error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
+ FREAD, curthread);
if (error)
po = 0;
}
us = ps;
uo = po;
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblocksize");
+ value = ctl_get_opt(&cbe_lun->options, "pblocksize");
if (value != NULL)
ctl_expand_number(value, &ps);
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblockoffset");
+ value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
if (value != NULL)
ctl_expand_number(value, &po);
- pss = ps / be_lun->blocksize;
- pos = po / be_lun->blocksize;
- if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
- ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
- be_lun->pblockexp = fls(pss) - 1;
- be_lun->pblockoff = (pss - pos) % pss;
+ pss = ps / cbe_lun->blocksize;
+ pos = po / cbe_lun->blocksize;
+ if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
+ ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
+ cbe_lun->pblockexp = fls(pss) - 1;
+ cbe_lun->pblockoff = (pss - pos) % pss;
}
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublocksize");
+ value = ctl_get_opt(&cbe_lun->options, "ublocksize");
if (value != NULL)
ctl_expand_number(value, &us);
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublockoffset");
+ value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
if (value != NULL)
ctl_expand_number(value, &uo);
- uss = us / be_lun->blocksize;
- uos = uo / be_lun->blocksize;
- if ((uss > 0) && (uss * be_lun->blocksize == us) && (uss >= uos) &&
- ((uss & (uss - 1)) == 0) && (uos * be_lun->blocksize == uo)) {
- be_lun->ublockexp = fls(uss) - 1;
- be_lun->ublockoff = (uss - uos) % uss;
+ uss = us / cbe_lun->blocksize;
+ uos = uo / cbe_lun->blocksize;
+ if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
+ ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
+ cbe_lun->ublockexp = fls(uss) - 1;
+ cbe_lun->ublockoff = (uss - uos) % uss;
}
- be_lun->atomicblock = atomic / be_lun->blocksize;
- be_lun->opttxferlen = maxio / be_lun->blocksize;
+ cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
+ cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
unmap = 1;
@@ -2028,44 +2087,32 @@ ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
arg.len = sizeof(arg.value.i);
- error = devsw->d_ioctl(dev, DIOCGATTR,
- (caddr_t)&arg, FREAD, curthread);
+ error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
+ curthread);
unmap = (error == 0) ? arg.value.i : 0;
}
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap");
+ value = ctl_get_opt(&cbe_lun->options, "unmap");
if (value != NULL)
unmap = (strcmp(value, "on") == 0);
if (unmap)
- be_lun->unmap = ctl_be_block_unmap_dev;
+ cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
+ else
+ cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
+ dev_relthread(dev, ref);
return (0);
}
static int
ctl_be_block_close(struct ctl_be_block_lun *be_lun)
{
- DROP_GIANT();
- if (be_lun->vn) {
- int flags = FREAD | FWRITE;
-
- switch (be_lun->dev_type) {
- case CTL_BE_BLOCK_DEV:
- if (be_lun->backend.dev.csw) {
- dev_relthread(be_lun->backend.dev.cdev,
- be_lun->backend.dev.dev_ref);
- be_lun->backend.dev.csw = NULL;
- be_lun->backend.dev.cdev = NULL;
- }
- break;
- case CTL_BE_BLOCK_FILE:
- break;
- case CTL_BE_BLOCK_NONE:
- break;
- default:
- panic("Unexpected backend type.");
- break;
- }
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
+ int flags;
+ if (be_lun->vn) {
+ flags = FREAD;
+ if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
+ flags |= FWRITE;
(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
be_lun->vn = NULL;
@@ -2086,31 +2133,23 @@ ctl_be_block_close(struct ctl_be_block_lun *be_lun)
}
be_lun->dev_type = CTL_BE_BLOCK_NONE;
}
- PICKUP_GIANT();
-
return (0);
}
static int
-ctl_be_block_open(struct ctl_be_block_softc *softc,
- struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
+ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
{
+ struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
struct nameidata nd;
- int flags;
- int error;
+ char *value;
+ int error, flags;
- /*
- * XXX KDM allow a read-only option?
- */
- flags = FREAD | FWRITE;
error = 0;
-
if (rootvnode == NULL) {
snprintf(req->error_str, sizeof(req->error_str),
"Root filesystem is not mounted");
return (1);
}
-
if (!curthread->td_proc->p_fd->fd_cdir) {
curthread->td_proc->p_fd->fd_cdir = rootvnode;
VREF(rootvnode);
@@ -2124,9 +2163,30 @@ ctl_be_block_open(struct ctl_be_block_softc *softc,
VREF(rootvnode);
}
- again:
+ value = ctl_get_opt(&cbe_lun->options, "file");
+ if (value == NULL) {
+ snprintf(req->error_str, sizeof(req->error_str),
+ "no file argument specified");
+ return (1);
+ }
+ free(be_lun->dev_path, M_CTLBLK);
+ be_lun->dev_path = strdup(value, M_CTLBLK);
+
+ flags = FREAD;
+ value = ctl_get_opt(&cbe_lun->options, "readonly");
+ if (value != NULL) {
+ if (strcmp(value, "on") != 0)
+ flags |= FWRITE;
+ } else if (cbe_lun->lun_type == T_DIRECT)
+ flags |= FWRITE;
+
+again:
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
error = vn_open(&nd, &flags, 0, NULL);
+ if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
+ flags &= ~FWRITE;
+ goto again;
+ }
if (error) {
/*
* This is the only reasonable guess we can make as far as
@@ -2135,28 +2195,24 @@ ctl_be_block_open(struct ctl_be_block_softc *softc,
* full path.
*/
if (be_lun->dev_path[0] != '/') {
- char *dev_path = "/dev/";
char *dev_name;
- /* Try adding device path at beginning of name */
- dev_name = malloc(strlen(be_lun->dev_path)
- + strlen(dev_path) + 1,
- M_CTLBLK, M_WAITOK);
- if (dev_name) {
- sprintf(dev_name, "%s%s", dev_path,
- be_lun->dev_path);
- free(be_lun->dev_path, M_CTLBLK);
- be_lun->dev_path = dev_name;
- goto again;
- }
+ asprintf(&dev_name, M_CTLBLK, "/dev/%s",
+ be_lun->dev_path);
+ free(be_lun->dev_path, M_CTLBLK);
+ be_lun->dev_path = dev_name;
+ goto again;
}
snprintf(req->error_str, sizeof(req->error_str),
"error opening %s: %d", be_lun->dev_path, error);
return (error);
}
+ if (flags & FWRITE)
+ cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
+ else
+ cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
NDFREE(&nd, NDF_ONLY_PNBUF);
-
be_lun->vn = nd.ni_vp;
/* We only support disks and files. */
@@ -2171,20 +2227,25 @@ ctl_be_block_open(struct ctl_be_block_softc *softc,
}
VOP_UNLOCK(be_lun->vn, 0);
- if (error != 0) {
+ if (error != 0)
ctl_be_block_close(be_lun);
- return (error);
- }
-
- be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
- be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
-
+ cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
+ if (be_lun->dispatch != ctl_be_block_dispatch_dev)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
+ value = ctl_get_opt(&cbe_lun->options, "serseq");
+ if (value != NULL && strcmp(value, "on") == 0)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
+ else if (value != NULL && strcmp(value, "read") == 0)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
+ else if (value != NULL && strcmp(value, "off") == 0)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
return (0);
}
static int
ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
{
+ struct ctl_be_lun *cbe_lun;
struct ctl_be_block_lun *be_lun;
struct ctl_lun_create_params *params;
char num_thread_str[16];
@@ -2197,10 +2258,9 @@ ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
retval = 0;
req->status = CTL_LUN_OK;
- num_threads = cbb_num_threads;
-
be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
-
+ cbe_lun = &be_lun->cbe_lun;
+ cbe_lun->be_lun = be_lun;
be_lun->params = req->reqdata.create;
be_lun->softc = softc;
STAILQ_INIT(&be_lun->input_queue);
@@ -2210,12 +2270,10 @@ ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
- ctl_init_opts(&be_lun->ctl_be_lun.options,
+ ctl_init_opts(&cbe_lun->options,
req->num_be_args, req->kern_be_args);
-
be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
-
if (be_lun->lun_zone == NULL) {
snprintf(req->error_str, sizeof(req->error_str),
"error allocating UMA zone");
@@ -2223,50 +2281,45 @@ ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
}
if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
- be_lun->ctl_be_lun.lun_type = params->device_type;
+ cbe_lun->lun_type = params->device_type;
else
- be_lun->ctl_be_lun.lun_type = T_DIRECT;
+ cbe_lun->lun_type = T_DIRECT;
+ be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
+ cbe_lun->flags = 0;
+ value = ctl_get_opt(&cbe_lun->options, "ha_role");
+ if (value != NULL) {
+ if (strcmp(value, "primary") == 0)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
+ } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
- if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "file");
- if (value == NULL) {
- snprintf(req->error_str, sizeof(req->error_str),
- "no file argument specified");
- goto bailout_error;
- }
- be_lun->dev_path = strdup(value, M_CTLBLK);
- be_lun->blocksize = 512;
- be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
-
- retval = ctl_be_block_open(softc, be_lun, req);
- if (retval != 0) {
- retval = 0;
- req->status = CTL_LUN_WARNING;
+ if (cbe_lun->lun_type == T_DIRECT ||
+ cbe_lun->lun_type == T_CDROM) {
+ be_lun->size_bytes = params->lun_size_bytes;
+ if (params->blocksize_bytes != 0)
+ cbe_lun->blocksize = params->blocksize_bytes;
+ else if (cbe_lun->lun_type == T_CDROM)
+ cbe_lun->blocksize = 2048;
+ else
+ cbe_lun->blocksize = 512;
+ be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
+ cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
+ 0 : (be_lun->size_blocks - 1);
+
+ if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
+ control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
+ retval = ctl_be_block_open(be_lun, req);
+ if (retval != 0) {
+ retval = 0;
+ req->status = CTL_LUN_WARNING;
+ }
}
+ num_threads = cbb_num_threads;
} else {
- /*
- * For processor devices, we don't have any size.
- */
- be_lun->blocksize = 0;
- be_lun->pblockexp = 0;
- be_lun->pblockoff = 0;
- be_lun->ublockexp = 0;
- be_lun->ublockoff = 0;
- be_lun->size_blocks = 0;
- be_lun->size_bytes = 0;
- be_lun->ctl_be_lun.maxlba = 0;
-
- /*
- * Default to just 1 thread for processor devices.
- */
num_threads = 1;
}
- /*
- * XXX This searching loop might be refactored to be combined with
- * the loop above,
- */
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "num_threads");
+ value = ctl_get_opt(&cbe_lun->options, "num_threads");
if (value != NULL) {
tmp_num_threads = strtol(value, NULL, 0);
@@ -2284,67 +2337,46 @@ ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
num_threads = tmp_num_threads;
}
- be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
- be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
if (be_lun->vn == NULL)
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_OFFLINE;
- if (be_lun->unmap != NULL)
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
- if (be_lun->dispatch != ctl_be_block_dispatch_dev)
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_SERSEQ_READ;
- be_lun->ctl_be_lun.be_lun = be_lun;
- be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ?
- 0 : (be_lun->size_blocks - 1);
- be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
- be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
- be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
- be_lun->ctl_be_lun.ublockexp = be_lun->ublockexp;
- be_lun->ctl_be_lun.ublockoff = be_lun->ublockoff;
- be_lun->ctl_be_lun.atomicblock = be_lun->atomicblock;
- be_lun->ctl_be_lun.opttxferlen = be_lun->opttxferlen;
+ cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
/* Tell the user the blocksize we ended up using */
params->lun_size_bytes = be_lun->size_bytes;
- params->blocksize_bytes = be_lun->blocksize;
+ params->blocksize_bytes = cbe_lun->blocksize;
if (params->flags & CTL_LUN_FLAG_ID_REQ) {
- be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
+ cbe_lun->req_lun_id = params->req_lun_id;
+ cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
} else
- be_lun->ctl_be_lun.req_lun_id = 0;
+ cbe_lun->req_lun_id = 0;
- be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown;
- be_lun->ctl_be_lun.lun_config_status =
- ctl_be_block_lun_config_status;
- be_lun->ctl_be_lun.be = &ctl_be_block_driver;
+ cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
+ cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
+ cbe_lun->be = &ctl_be_block_driver;
if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
softc->num_luns);
- strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
- MIN(sizeof(be_lun->ctl_be_lun.serial_num),
- sizeof(tmpstr)));
+ strncpy((char *)cbe_lun->serial_num, tmpstr,
+ MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
/* Tell the user what we used for a serial number */
strncpy((char *)params->serial_num, tmpstr,
MIN(sizeof(params->serial_num), sizeof(tmpstr)));
} else {
- strncpy((char *)be_lun->ctl_be_lun.serial_num,
- params->serial_num,
- MIN(sizeof(be_lun->ctl_be_lun.serial_num),
+ strncpy((char *)cbe_lun->serial_num, params->serial_num,
+ MIN(sizeof(cbe_lun->serial_num),
sizeof(params->serial_num)));
}
if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
- strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
- MIN(sizeof(be_lun->ctl_be_lun.device_id),
- sizeof(tmpstr)));
+ strncpy((char *)cbe_lun->device_id, tmpstr,
+ MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
/* Tell the user what we used for a device ID */
strncpy((char *)params->device_id, tmpstr,
MIN(sizeof(params->device_id), sizeof(tmpstr)));
} else {
- strncpy((char *)be_lun->ctl_be_lun.device_id,
- params->device_id,
- MIN(sizeof(be_lun->ctl_be_lun.device_id),
+ strncpy((char *)cbe_lun->device_id, params->device_id,
+ MIN(sizeof(cbe_lun->device_id),
sizeof(params->device_id)));
}
@@ -2390,7 +2422,7 @@ ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
mtx_unlock(&softc->lock);
- retval = ctl_add_lun(&be_lun->ctl_be_lun);
+ retval = ctl_add_lun(&be_lun->cbe_lun);
if (retval != 0) {
mtx_lock(&softc->lock);
STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
@@ -2428,15 +2460,15 @@ ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
mtx_unlock(&softc->lock);
goto bailout_error;
} else {
- params->req_lun_id = be_lun->ctl_be_lun.lun_id;
+ params->req_lun_id = cbe_lun->lun_id;
}
mtx_unlock(&softc->lock);
be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
- be_lun->blocksize,
+ cbe_lun->blocksize,
DEVSTAT_ALL_SUPPORTED,
- be_lun->ctl_be_lun.lun_type
+ cbe_lun->lun_type
| DEVSTAT_TYPE_IF_OTHER,
DEVSTAT_PRIORITY_OTHER);
@@ -2452,7 +2484,7 @@ bailout_error:
free(be_lun->dev_path, M_CTLBLK);
if (be_lun->lun_zone != NULL)
uma_zdestroy(be_lun->lun_zone);
- ctl_free_opts(&be_lun->ctl_be_lun.options);
+ ctl_free_opts(&cbe_lun->options);
mtx_destroy(&be_lun->queue_lock);
mtx_destroy(&be_lun->io_lock);
free(be_lun, M_CTLBLK);
@@ -2465,38 +2497,41 @@ ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
{
struct ctl_lun_rm_params *params;
struct ctl_be_block_lun *be_lun;
+ struct ctl_be_lun *cbe_lun;
int retval;
params = &req->reqdata.rm;
mtx_lock(&softc->lock);
-
- be_lun = NULL;
-
STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
- if (be_lun->ctl_be_lun.lun_id == params->lun_id)
+ if (be_lun->cbe_lun.lun_id == params->lun_id)
break;
}
mtx_unlock(&softc->lock);
-
if (be_lun == NULL) {
snprintf(req->error_str, sizeof(req->error_str),
"LUN %u is not managed by the block backend",
params->lun_id);
goto bailout_error;
}
+ cbe_lun = &be_lun->cbe_lun;
- retval = ctl_disable_lun(&be_lun->ctl_be_lun);
-
+ retval = ctl_disable_lun(cbe_lun);
if (retval != 0) {
snprintf(req->error_str, sizeof(req->error_str),
"error %d returned from ctl_disable_lun() for "
"LUN %d", retval, params->lun_id);
goto bailout_error;
+ }
+ if (be_lun->vn != NULL) {
+ cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
+ ctl_lun_no_media(cbe_lun);
+ taskqueue_drain_all(be_lun->io_taskqueue);
+ ctl_be_block_close(be_lun);
}
- retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
+ retval = ctl_invalidate_lun(cbe_lun);
if (retval != 0) {
snprintf(req->error_str, sizeof(req->error_str),
"error %d returned from ctl_invalidate_lun() for "
@@ -2505,15 +2540,12 @@ ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
}
mtx_lock(&softc->lock);
-
be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
-
while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
if (retval == EINTR)
break;
}
-
be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
@@ -2528,102 +2560,25 @@ ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
softc->num_luns--;
mtx_unlock(&softc->lock);
- taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
-
+ taskqueue_drain_all(be_lun->io_taskqueue);
taskqueue_free(be_lun->io_taskqueue);
- ctl_be_block_close(be_lun);
-
if (be_lun->disk_stats != NULL)
devstat_remove_entry(be_lun->disk_stats);
uma_zdestroy(be_lun->lun_zone);
- ctl_free_opts(&be_lun->ctl_be_lun.options);
+ ctl_free_opts(&cbe_lun->options);
free(be_lun->dev_path, M_CTLBLK);
mtx_destroy(&be_lun->queue_lock);
mtx_destroy(&be_lun->io_lock);
free(be_lun, M_CTLBLK);
req->status = CTL_LUN_OK;
-
return (0);
bailout_error:
-
req->status = CTL_LUN_ERROR;
-
- return (0);
-}
-
-static int
-ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
- struct ctl_lun_req *req)
-{
- struct vattr vattr;
- int error;
- struct ctl_lun_create_params *params = &be_lun->params;
-
- if (params->lun_size_bytes != 0) {
- be_lun->size_bytes = params->lun_size_bytes;
- } else {
- vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
- error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
- VOP_UNLOCK(be_lun->vn, 0);
- if (error != 0) {
- snprintf(req->error_str, sizeof(req->error_str),
- "error calling VOP_GETATTR() for file %s",
- be_lun->dev_path);
- return (error);
- }
-
- be_lun->size_bytes = vattr.va_size;
- }
-
- return (0);
-}
-
-static int
-ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
- struct ctl_lun_req *req)
-{
- struct ctl_be_block_devdata *dev_data;
- int error;
- struct ctl_lun_create_params *params = &be_lun->params;
- uint64_t size_bytes;
-
- dev_data = &be_lun->backend.dev;
- if (!dev_data->csw->d_ioctl) {
- snprintf(req->error_str, sizeof(req->error_str),
- "no d_ioctl for device %s!", be_lun->dev_path);
- return (ENODEV);
- }
-
- error = dev_data->csw->d_ioctl(dev_data->cdev, DIOCGMEDIASIZE,
- (caddr_t)&size_bytes, FREAD,
- curthread);
- if (error) {
- snprintf(req->error_str, sizeof(req->error_str),
- "error %d returned for DIOCGMEDIASIZE ioctl "
- "on %s!", error, be_lun->dev_path);
- return (error);
- }
-
- if (params->lun_size_bytes != 0) {
- if (params->lun_size_bytes > size_bytes) {
- snprintf(req->error_str, sizeof(req->error_str),
- "requested LUN size %ju > backing device "
- "size %ju",
- (uintmax_t)params->lun_size_bytes,
- (uintmax_t)size_bytes);
- return (EINVAL);
- }
-
- be_lun->size_bytes = params->lun_size_bytes;
- } else {
- be_lun->size_bytes = size_bytes;
- }
-
return (0);
}
@@ -2632,72 +2587,90 @@ ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
{
struct ctl_lun_modify_params *params;
struct ctl_be_block_lun *be_lun;
+ struct ctl_be_lun *cbe_lun;
+ char *value;
uint64_t oldsize;
- int error;
+ int error, wasprim;
params = &req->reqdata.modify;
mtx_lock(&softc->lock);
- be_lun = NULL;
STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
- if (be_lun->ctl_be_lun.lun_id == params->lun_id)
+ if (be_lun->cbe_lun.lun_id == params->lun_id)
break;
}
mtx_unlock(&softc->lock);
-
if (be_lun == NULL) {
snprintf(req->error_str, sizeof(req->error_str),
"LUN %u is not managed by the block backend",
params->lun_id);
goto bailout_error;
}
+ cbe_lun = &be_lun->cbe_lun;
- be_lun->params.lun_size_bytes = params->lun_size_bytes;
+ if (params->lun_size_bytes != 0)
+ be_lun->params.lun_size_bytes = params->lun_size_bytes;
+ ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
- oldsize = be_lun->size_bytes;
- if (be_lun->vn == NULL)
- error = ctl_be_block_open(softc, be_lun, req);
- else if (be_lun->vn->v_type == VREG)
- error = ctl_be_block_modify_file(be_lun, req);
+ wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
+ value = ctl_get_opt(&cbe_lun->options, "ha_role");
+ if (value != NULL) {
+ if (strcmp(value, "primary") == 0)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
+ else
+ cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
+ } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
else
- error = ctl_be_block_modify_dev(be_lun, req);
-
- if (error == 0 && be_lun->size_bytes != oldsize) {
- be_lun->size_blocks = be_lun->size_bytes >>
- be_lun->blocksize_shift;
-
- /*
- * The maximum LBA is the size - 1.
- *
- * XXX: Note that this field is being updated without locking,
- * which might cause problems on 32-bit architectures.
- */
- if (be_lun->unmap != NULL)
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
- be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ?
- 0 : (be_lun->size_blocks - 1);
- be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
- be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
- be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
- be_lun->ctl_be_lun.ublockexp = be_lun->ublockexp;
- be_lun->ctl_be_lun.ublockoff = be_lun->ublockoff;
- be_lun->ctl_be_lun.atomicblock = be_lun->atomicblock;
- be_lun->ctl_be_lun.opttxferlen = be_lun->opttxferlen;
- ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
- if (oldsize == 0 && be_lun->size_blocks != 0)
- ctl_lun_online(&be_lun->ctl_be_lun);
+ cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
+ if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
+ if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
+ ctl_lun_primary(cbe_lun);
+ else
+ ctl_lun_secondary(cbe_lun);
+ }
+
+ oldsize = be_lun->size_blocks;
+ if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
+ control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
+ if (be_lun->vn == NULL)
+ error = ctl_be_block_open(be_lun, req);
+ else if (vn_isdisk(be_lun->vn, &error))
+ error = ctl_be_block_open_dev(be_lun, req);
+ else if (be_lun->vn->v_type == VREG)
+ error = ctl_be_block_open_file(be_lun, req);
+ else
+ error = EINVAL;
+ if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
+ be_lun->vn != NULL) {
+ cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
+ ctl_lun_has_media(cbe_lun);
+ } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
+ be_lun->vn == NULL) {
+ cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
+ ctl_lun_no_media(cbe_lun);
+ }
+ cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
+ } else {
+ if (be_lun->vn != NULL) {
+ cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
+ ctl_lun_no_media(cbe_lun);
+ taskqueue_drain_all(be_lun->io_taskqueue);
+ error = ctl_be_block_close(be_lun);
+ } else
+ error = 0;
}
+ if (be_lun->size_blocks != oldsize)
+ ctl_lun_capacity_changed(cbe_lun);
/* Tell the user the exact size we ended up using */
params->lun_size_bytes = be_lun->size_bytes;
req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
-
return (0);
bailout_error:
req->status = CTL_LUN_ERROR;
-
return (0);
}
@@ -2708,7 +2681,6 @@ ctl_be_block_lun_shutdown(void *be_lun)
struct ctl_be_block_softc *softc;
lun = (struct ctl_be_block_lun *)be_lun;
-
softc = lun->softc;
mtx_lock(&softc->lock);
@@ -2716,7 +2688,6 @@ ctl_be_block_lun_shutdown(void *be_lun)
if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
wakeup(lun);
mtx_unlock(&softc->lock);
-
}
static void
@@ -2738,9 +2709,9 @@ ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
/*
* We successfully added the LUN, attempt to enable it.
*/
- if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
+ if (ctl_enable_lun(&lun->cbe_lun) != 0) {
printf("%s: ctl_enable_lun() failed!\n", __func__);
- if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
+ if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
printf("%s: ctl_invalidate_lun() failed!\n",
__func__);
}
@@ -2762,17 +2733,16 @@ static int
ctl_be_block_config_write(union ctl_io *io)
{
struct ctl_be_block_lun *be_lun;
- struct ctl_be_lun *ctl_be_lun;
+ struct ctl_be_lun *cbe_lun;
int retval;
- retval = 0;
-
DPRINTF("entered\n");
- ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
+ cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
CTL_PRIV_BACKEND_LUN].ptr;
- be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
+ be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
+ retval = 0;
switch (io->scsiio.cdb[0]) {
case SYNCHRONIZE_CACHE:
case SYNCHRONIZE_CACHE_16:
@@ -2795,40 +2765,46 @@ ctl_be_block_config_write(union ctl_io *io)
break;
case START_STOP_UNIT: {
struct scsi_start_stop_unit *cdb;
+ struct ctl_lun_req req;
cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
-
- if (cdb->how & SSS_START)
- retval = ctl_start_lun(ctl_be_lun);
- else {
- retval = ctl_stop_lun(ctl_be_lun);
- /*
- * XXX KDM Copan-specific offline behavior.
- * Figure out a reasonable way to port this?
- */
-#ifdef NEEDTOPORT
- if ((retval == 0)
- && (cdb->byte2 & SSS_ONOFFLINE))
- retval = ctl_lun_offline(ctl_be_lun);
-#endif
+ if ((cdb->how & SSS_PC_MASK) != 0) {
+ ctl_set_success(&io->scsiio);
+ ctl_config_write_done(io);
+ break;
}
-
- /*
- * In general, the above routines should not fail. They
- * just set state for the LUN. So we've got something
- * pretty wrong here if we can't start or stop the LUN.
- */
- if (retval != 0) {
- ctl_set_internal_failure(&io->scsiio,
- /*sks_valid*/ 1,
- /*retry_count*/ 0xf051);
- retval = CTL_RETVAL_COMPLETE;
+ if (cdb->how & SSS_START) {
+ if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
+ retval = ctl_be_block_open(be_lun, &req);
+ cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
+ if (retval == 0) {
+ cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
+ ctl_lun_has_media(cbe_lun);
+ } else {
+ cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
+ ctl_lun_no_media(cbe_lun);
+ }
+ }
+ ctl_start_lun(cbe_lun);
} else {
- ctl_set_success(&io->scsiio);
+ ctl_stop_lun(cbe_lun);
+ if (cdb->how & SSS_LOEJ) {
+ cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
+ cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
+ ctl_lun_ejected(cbe_lun);
+ if (be_lun->vn != NULL)
+ ctl_be_block_close(be_lun);
+ }
}
+
+ ctl_set_success(&io->scsiio);
ctl_config_write_done(io);
break;
}
+ case PREVENT_ALLOW:
+ ctl_set_success(&io->scsiio);
+ ctl_config_write_done(io);
+ break;
default:
ctl_set_invalid_opcode(&io->scsiio);
ctl_config_write_done(io);
@@ -2843,14 +2819,14 @@ static int
ctl_be_block_config_read(union ctl_io *io)
{
struct ctl_be_block_lun *be_lun;
- struct ctl_be_lun *ctl_be_lun;
+ struct ctl_be_lun *cbe_lun;
int retval = 0;
DPRINTF("entered\n");
- ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
+ cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
CTL_PRIV_BACKEND_LUN].ptr;
- be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
+ be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
switch (io->scsiio.cdb[0]) {
case SERVICE_ACTION_IN:
@@ -2890,22 +2866,16 @@ ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
int retval;
lun = (struct ctl_be_block_lun *)be_lun;
- retval = 0;
retval = sbuf_printf(sb, "\t<num_threads>");
-
if (retval != 0)
goto bailout;
-
retval = sbuf_printf(sb, "%d", lun->num_threads);
-
if (retval != 0)
goto bailout;
-
retval = sbuf_printf(sb, "</num_threads>\n");
bailout:
-
return (retval);
}
@@ -2931,7 +2901,6 @@ ctl_be_block_init(void)
mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- STAILQ_INIT(&softc->disk_list);
STAILQ_INIT(&softc->lun_list);
return (retval);
diff --git a/sys/cam/ctl/ctl_backend_block.h b/sys/cam/ctl/ctl_backend_block.h
deleted file mode 100644
index e5781d5..0000000
--- a/sys/cam/ctl/ctl_backend_block.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*-
- * Copyright (c) 2003 Silicon Graphics International Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- *
- * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.h#1 $
- * $FreeBSD$
- */
-/*
- * CAM Target Layer driver backend interface for block devices.
- *
- * Author: Ken Merry <ken@FreeBSD.org>
- */
-
-#ifndef _CTL_BACKEND_BLOCK_H_
-#define _CTL_BACKEND_BLOCK_H_
-
-struct ctl_block_disk {
- uint32_t version; /* interface version */
- uint32_t disknum; /* returned device number */
- STAILQ_ENTRY(ctl_block_disk) links; /* linked list pointer */
- char disk_name[MAXPATHLEN]; /* name of this device */
- int allocated; /* disk is allocated to a LUN */
- uint64_t size_blocks; /* disk size in blocks */
- uint64_t size_bytes; /* disk size in bytes */
-};
-
-typedef enum {
- CTL_BLOCK_DEVLIST_MORE,
- CTL_BLOCK_DEVLIST_DONE
-} ctl_block_devlist_status;
-
-struct ctl_block_devlist {
- uint32_t version; /* interface version */
- uint32_t buf_len; /* passed in, buffer length */
- uint32_t ctl_disk_size; /* size of adddev, passed in */
- struct ctl_block_disk *devbuf; /* buffer passed in/filled out*/
- uint32_t num_bufs; /* number passed out */
- uint32_t buf_used; /* bytes passed out */
- uint32_t total_disks; /* number of disks in system */
- ctl_block_devlist_status status; /* did we get the whole list? */
-};
-
-#define CTL_BLOCK_ADDDEV _IOWR(COPAN_ARRAY_BE_BLOCK, 0x00, struct ctl_block_disk)
-#define CTL_BLOCK_DEVLIST _IOWR(COPAN_ARRAY_BE_BLOCK, 0x01, struct ctl_block_devlist)
-#define CTL_BLOCK_RMDEV _IOW(COPAN_ARRAY_BE_BLOCK, 0x02, struct ctl_block_disk)
-
-#endif /* _CTL_BACKEND_BLOCK_H_ */
diff --git a/sys/cam/ctl/ctl_backend_ramdisk.c b/sys/cam/ctl/ctl_backend_ramdisk.c
index ad90241..d170446 100644
--- a/sys/cam/ctl/ctl_backend_ramdisk.c
+++ b/sys/cam/ctl/ctl_backend_ramdisk.c
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 2003, 2008 Silicon Graphics International Corp.
* Copyright (c) 2012 The FreeBSD Foundation
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Portions of this software were developed by Edward Tomasz Napierala
@@ -56,15 +57,18 @@ __FBSDID("$FreeBSD$");
#include <sys/conf.h>
#include <sys/ioccom.h>
#include <sys/module.h>
+#include <sys/sysctl.h>
#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_da.h>
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_util.h>
#include <cam/ctl/ctl_backend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_debug.h>
#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_ha.h>
+#include <cam/ctl/ctl_private.h>
#include <cam/ctl/ctl_error.h>
typedef enum {
@@ -74,13 +78,14 @@ typedef enum {
} ctl_be_ramdisk_lun_flags;
struct ctl_be_ramdisk_lun {
+ struct ctl_lun_create_params params;
char lunname[32];
uint64_t size_bytes;
uint64_t size_blocks;
struct ctl_be_ramdisk_softc *softc;
ctl_be_ramdisk_lun_flags flags;
STAILQ_ENTRY(ctl_be_ramdisk_lun) links;
- struct ctl_be_lun ctl_be_lun;
+ struct ctl_be_lun cbe_lun;
struct taskqueue *io_taskqueue;
struct task io_task;
STAILQ_HEAD(, ctl_io_hdr) cont_queue;
@@ -101,6 +106,7 @@ struct ctl_be_ramdisk_softc {
};
static struct ctl_be_ramdisk_softc rd_softc;
+extern struct ctl_softc *control_softc;
int ctl_backend_ramdisk_init(void);
void ctl_backend_ramdisk_shutdown(void);
@@ -112,7 +118,7 @@ static int ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd,
static int ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
struct ctl_lun_req *req);
static int ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
- struct ctl_lun_req *req, int do_wait);
+ struct ctl_lun_req *req);
static int ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
struct ctl_lun_req *req);
static void ctl_backend_ramdisk_worker(void *context, int pending);
@@ -140,18 +146,13 @@ CTL_BACKEND_DECLARE(cbr, ctl_be_ramdisk_driver);
int
ctl_backend_ramdisk_init(void)
{
- struct ctl_be_ramdisk_softc *softc;
+ struct ctl_be_ramdisk_softc *softc = &rd_softc;
#ifdef CTL_RAMDISK_PAGES
int i;
#endif
-
- softc = &rd_softc;
-
memset(softc, 0, sizeof(*softc));
-
mtx_init(&softc->lock, "ctlramdisk", NULL, MTX_DEF);
-
STAILQ_INIT(&softc->lun_list);
softc->rd_size = 1024 * 1024;
#ifdef CTL_RAMDISK_PAGES
@@ -172,31 +173,22 @@ ctl_backend_ramdisk_init(void)
void
ctl_backend_ramdisk_shutdown(void)
{
- struct ctl_be_ramdisk_softc *softc;
+ struct ctl_be_ramdisk_softc *softc = &rd_softc;
struct ctl_be_ramdisk_lun *lun, *next_lun;
#ifdef CTL_RAMDISK_PAGES
int i;
#endif
- softc = &rd_softc;
-
mtx_lock(&softc->lock);
- for (lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; lun = next_lun){
- /*
- * Grab the next LUN. The current LUN may get removed by
- * ctl_invalidate_lun(), which will call our LUN shutdown
- * routine, if there is no outstanding I/O for this LUN.
- */
- next_lun = STAILQ_NEXT(lun, links);
-
+ STAILQ_FOREACH_SAFE(lun, &softc->lun_list, links, next_lun) {
/*
* Drop our lock here. Since ctl_invalidate_lun() can call
* back into us, this could potentially lead to a recursive
* lock of the same mutex, which would cause a hang.
*/
mtx_unlock(&softc->lock);
- ctl_disable_lun(&lun->ctl_be_lun);
- ctl_invalidate_lun(&lun->ctl_be_lun);
+ ctl_disable_lun(&lun->cbe_lun);
+ ctl_invalidate_lun(&lun->cbe_lun);
mtx_lock(&softc->lock);
}
mtx_unlock(&softc->lock);
@@ -219,22 +211,22 @@ ctl_backend_ramdisk_shutdown(void)
static int
ctl_backend_ramdisk_move_done(union ctl_io *io)
{
- struct ctl_be_lun *ctl_be_lun;
+ struct ctl_be_lun *cbe_lun;
struct ctl_be_ramdisk_lun *be_lun;
#ifdef CTL_TIME_IO
struct bintime cur_bt;
#endif
CTL_DEBUG_PRINT(("ctl_backend_ramdisk_move_done\n"));
- ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
+ cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
CTL_PRIV_BACKEND_LUN].ptr;
- be_lun = (struct ctl_be_ramdisk_lun *)ctl_be_lun->be_lun;
+ be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun->be_lun;
#ifdef CTL_TIME_IO
- getbintime(&cur_bt);
+ getbinuptime(&cur_bt);
bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
bintime_add(&io->io_hdr.dma_bt, &cur_bt);
- io->io_hdr.num_dmas++;
#endif
+ io->io_hdr.num_dmas++;
if (io->scsiio.kern_sg_entries > 0)
free(io->scsiio.kern_data_ptr, M_RAMDISK);
io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
@@ -275,10 +267,10 @@ ctl_backend_ramdisk_move_done(union ctl_io *io)
static int
ctl_backend_ramdisk_submit(union ctl_io *io)
{
- struct ctl_be_lun *ctl_be_lun;
+ struct ctl_be_lun *cbe_lun;
struct ctl_lba_len_flags *lbalen;
- ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
+ cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
CTL_PRIV_BACKEND_LUN].ptr;
lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
if (lbalen->flags & CTL_LLF_VERIFY) {
@@ -287,7 +279,7 @@ ctl_backend_ramdisk_submit(union ctl_io *io)
return (CTL_RETVAL_COMPLETE);
}
io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer =
- lbalen->len * ctl_be_lun->blocksize;
+ lbalen->len * cbe_lun->blocksize;
ctl_backend_ramdisk_continue(io);
return (CTL_RETVAL_COMPLETE);
}
@@ -316,7 +308,6 @@ ctl_backend_ramdisk_continue(union ctl_io *io)
sg_entries[i].len = MIN(PAGE_SIZE, len - len_filled);
len_filled += sg_entries[i].len;
}
- io->io_hdr.flags |= CTL_FLAG_KDPTR_SGLIST;
} else {
sg_filled = 0;
len_filled = len;
@@ -335,7 +326,7 @@ ctl_backend_ramdisk_continue(union ctl_io *io)
io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer -= len_filled;
#ifdef CTL_TIME_IO
- getbintime(&io->io_hdr.dma_start_bt);
+ getbinuptime(&io->io_hdr.dma_start_bt);
#endif
ctl_datamove(io);
}
@@ -343,12 +334,10 @@ ctl_backend_ramdisk_continue(union ctl_io *io)
static void
ctl_backend_ramdisk_worker(void *context, int pending)
{
- struct ctl_be_ramdisk_softc *softc;
struct ctl_be_ramdisk_lun *be_lun;
union ctl_io *io;
be_lun = (struct ctl_be_ramdisk_lun *)context;
- softc = be_lun->softc;
mtx_lock(&be_lun->queue_lock);
for (;;) {
@@ -356,11 +345,8 @@ ctl_backend_ramdisk_worker(void *context, int pending)
if (io != NULL) {
STAILQ_REMOVE(&be_lun->cont_queue, &io->io_hdr,
ctl_io_hdr, links);
-
mtx_unlock(&be_lun->queue_lock);
-
ctl_backend_ramdisk_continue(io);
-
mtx_lock(&be_lun->queue_lock);
continue;
}
@@ -378,22 +364,17 @@ static int
ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
int flag, struct thread *td)
{
- struct ctl_be_ramdisk_softc *softc;
+ struct ctl_be_ramdisk_softc *softc = &rd_softc;
+ struct ctl_lun_req *lun_req;
int retval;
retval = 0;
- softc = &rd_softc;
-
switch (cmd) {
- case CTL_LUN_REQ: {
- struct ctl_lun_req *lun_req;
-
+ case CTL_LUN_REQ:
lun_req = (struct ctl_lun_req *)addr;
-
switch (lun_req->reqtype) {
case CTL_LUNREQ_CREATE:
- retval = ctl_backend_ramdisk_create(softc, lun_req,
- /*do_wait*/ 1);
+ retval = ctl_backend_ramdisk_create(softc, lun_req);
break;
case CTL_LUNREQ_RM:
retval = ctl_backend_ramdisk_rm(softc, lun_req);
@@ -409,7 +390,6 @@ ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
break;
}
break;
- }
default:
retval = ENOTTY;
break;
@@ -426,20 +406,13 @@ ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
struct ctl_lun_rm_params *params;
int retval;
-
- retval = 0;
params = &req->reqdata.rm;
-
- be_lun = NULL;
-
mtx_lock(&softc->lock);
-
STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
- if (be_lun->ctl_be_lun.lun_id == params->lun_id)
+ if (be_lun->cbe_lun.lun_id == params->lun_id)
break;
}
mtx_unlock(&softc->lock);
-
if (be_lun == NULL) {
snprintf(req->error_str, sizeof(req->error_str),
"%s: LUN %u is not managed by the ramdisk backend",
@@ -447,8 +420,7 @@ ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
goto bailout_error;
}
- retval = ctl_disable_lun(&be_lun->ctl_be_lun);
-
+ retval = ctl_disable_lun(&be_lun->cbe_lun);
if (retval != 0) {
snprintf(req->error_str, sizeof(req->error_str),
"%s: error %d returned from ctl_disable_lun() for "
@@ -468,7 +440,7 @@ ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
be_lun->flags |= CTL_BE_RAMDISK_LUN_WAITING;
mtx_unlock(&softc->lock);
- retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
+ retval = ctl_invalidate_lun(&be_lun->cbe_lun);
if (retval != 0) {
snprintf(req->error_str, sizeof(req->error_str),
"%s: error %d returned from ctl_invalidate_lun() for "
@@ -480,10 +452,9 @@ ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
}
mtx_lock(&softc->lock);
-
while ((be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) == 0) {
retval = msleep(be_lun, &softc->lock, PCATCH, "ctlram", 0);
- if (retval == EINTR)
+ if (retval == EINTR)
break;
}
be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING;
@@ -503,141 +474,133 @@ ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc,
mtx_unlock(&softc->lock);
if (retval == 0) {
- taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
+ taskqueue_drain_all(be_lun->io_taskqueue);
taskqueue_free(be_lun->io_taskqueue);
- ctl_free_opts(&be_lun->ctl_be_lun.options);
+ ctl_free_opts(&be_lun->cbe_lun.options);
mtx_destroy(&be_lun->queue_lock);
free(be_lun, M_RAMDISK);
}
req->status = CTL_LUN_OK;
-
return (retval);
bailout_error:
req->status = CTL_LUN_ERROR;
-
return (0);
}
static int
ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
- struct ctl_lun_req *req, int do_wait)
+ struct ctl_lun_req *req)
{
struct ctl_be_ramdisk_lun *be_lun;
+ struct ctl_be_lun *cbe_lun;
struct ctl_lun_create_params *params;
- uint32_t blocksize;
char *value;
char tmpstr[32];
- int retval, unmap;
+ int retval;
retval = 0;
params = &req->reqdata.create;
- if (params->blocksize_bytes != 0)
- blocksize = params->blocksize_bytes;
- else
- blocksize = 512;
-
- be_lun = malloc(sizeof(*be_lun), M_RAMDISK, M_ZERO | (do_wait ?
- M_WAITOK : M_NOWAIT));
- if (be_lun == NULL) {
- snprintf(req->error_str, sizeof(req->error_str),
- "%s: error allocating %zd bytes", __func__,
- sizeof(*be_lun));
- goto bailout_error;
- }
+ be_lun = malloc(sizeof(*be_lun), M_RAMDISK, M_ZERO | M_WAITOK);
+ cbe_lun = &be_lun->cbe_lun;
+ cbe_lun->be_lun = be_lun;
+ be_lun->params = req->reqdata.create;
+ be_lun->softc = softc;
sprintf(be_lun->lunname, "cram%d", softc->num_luns);
- ctl_init_opts(&be_lun->ctl_be_lun.options,
- req->num_be_args, req->kern_be_args);
+ ctl_init_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
- be_lun->ctl_be_lun.lun_type = params->device_type;
+ cbe_lun->lun_type = params->device_type;
else
- be_lun->ctl_be_lun.lun_type = T_DIRECT;
-
- if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
-
- if (params->lun_size_bytes < blocksize) {
+ cbe_lun->lun_type = T_DIRECT;
+ be_lun->flags = CTL_BE_RAMDISK_LUN_UNCONFIGURED;
+ cbe_lun->flags = 0;
+ value = ctl_get_opt(&cbe_lun->options, "ha_role");
+ if (value != NULL) {
+ if (strcmp(value, "primary") == 0)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
+ } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
+
+ if (cbe_lun->lun_type == T_DIRECT ||
+ cbe_lun->lun_type == T_CDROM) {
+ if (params->blocksize_bytes != 0)
+ cbe_lun->blocksize = params->blocksize_bytes;
+ else if (cbe_lun->lun_type == T_CDROM)
+ cbe_lun->blocksize = 2048;
+ else
+ cbe_lun->blocksize = 512;
+ if (params->lun_size_bytes < cbe_lun->blocksize) {
snprintf(req->error_str, sizeof(req->error_str),
"%s: LUN size %ju < blocksize %u", __func__,
- params->lun_size_bytes, blocksize);
+ params->lun_size_bytes, cbe_lun->blocksize);
goto bailout_error;
}
-
- be_lun->size_blocks = params->lun_size_bytes / blocksize;
- be_lun->size_bytes = be_lun->size_blocks * blocksize;
-
- be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
- be_lun->ctl_be_lun.atomicblock = UINT32_MAX;
- be_lun->ctl_be_lun.opttxferlen = softc->rd_size / blocksize;
- } else {
- be_lun->ctl_be_lun.maxlba = 0;
- blocksize = 0;
- be_lun->size_bytes = 0;
- be_lun->size_blocks = 0;
+ be_lun->size_blocks = params->lun_size_bytes / cbe_lun->blocksize;
+ be_lun->size_bytes = be_lun->size_blocks * cbe_lun->blocksize;
+ cbe_lun->maxlba = be_lun->size_blocks - 1;
+ cbe_lun->atomicblock = UINT32_MAX;
+ cbe_lun->opttxferlen = softc->rd_size / cbe_lun->blocksize;
}
- be_lun->ctl_be_lun.blocksize = blocksize;
-
/* Tell the user the blocksize we ended up using */
- params->blocksize_bytes = blocksize;
-
- /* Tell the user the exact size we ended up using */
+ params->blocksize_bytes = cbe_lun->blocksize;
params->lun_size_bytes = be_lun->size_bytes;
- be_lun->softc = softc;
-
- unmap = 1;
- value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap");
+ value = ctl_get_opt(&cbe_lun->options, "unmap");
if (value != NULL && strcmp(value, "on") == 0)
- unmap = (strcmp(value, "on") == 0);
-
- be_lun->flags = CTL_BE_RAMDISK_LUN_UNCONFIGURED;
- be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
- if (unmap)
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
- be_lun->ctl_be_lun.be_lun = be_lun;
+ cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
+ value = ctl_get_opt(&cbe_lun->options, "readonly");
+ if (value != NULL) {
+ if (strcmp(value, "on") == 0)
+ cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
+ } else if (cbe_lun->lun_type != T_DIRECT)
+ cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
+ cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
+ value = ctl_get_opt(&cbe_lun->options, "serseq");
+ if (value != NULL && strcmp(value, "on") == 0)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
+ else if (value != NULL && strcmp(value, "read") == 0)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
+ else if (value != NULL && strcmp(value, "off") == 0)
+ cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
if (params->flags & CTL_LUN_FLAG_ID_REQ) {
- be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
- be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
+ cbe_lun->req_lun_id = params->req_lun_id;
+ cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
} else
- be_lun->ctl_be_lun.req_lun_id = 0;
+ cbe_lun->req_lun_id = 0;
- be_lun->ctl_be_lun.lun_shutdown = ctl_backend_ramdisk_lun_shutdown;
- be_lun->ctl_be_lun.lun_config_status =
- ctl_backend_ramdisk_lun_config_status;
- be_lun->ctl_be_lun.be = &ctl_be_ramdisk_driver;
+ cbe_lun->lun_shutdown = ctl_backend_ramdisk_lun_shutdown;
+ cbe_lun->lun_config_status = ctl_backend_ramdisk_lun_config_status;
+ cbe_lun->be = &ctl_be_ramdisk_driver;
if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
softc->num_luns);
- strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
- MIN(sizeof(be_lun->ctl_be_lun.serial_num),
- sizeof(tmpstr)));
+ strncpy((char *)cbe_lun->serial_num, tmpstr,
+ MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
/* Tell the user what we used for a serial number */
strncpy((char *)params->serial_num, tmpstr,
MIN(sizeof(params->serial_num), sizeof(tmpstr)));
} else {
- strncpy((char *)be_lun->ctl_be_lun.serial_num,
- params->serial_num,
- MIN(sizeof(be_lun->ctl_be_lun.serial_num),
+ strncpy((char *)cbe_lun->serial_num, params->serial_num,
+ MIN(sizeof(cbe_lun->serial_num),
sizeof(params->serial_num)));
}
if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
- strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
- MIN(sizeof(be_lun->ctl_be_lun.device_id),
- sizeof(tmpstr)));
+ strncpy((char *)cbe_lun->device_id, tmpstr,
+ MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
/* Tell the user what we used for a device ID */
strncpy((char *)params->device_id, tmpstr,
MIN(sizeof(params->device_id), sizeof(tmpstr)));
} else {
- strncpy((char *)be_lun->ctl_be_lun.device_id,
- params->device_id,
- MIN(sizeof(be_lun->ctl_be_lun.device_id),
+ strncpy((char *)cbe_lun->device_id, params->device_id,
+ MIN(sizeof(cbe_lun->device_id),
sizeof(params->device_id)));
}
@@ -665,10 +628,9 @@ ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
mtx_lock(&softc->lock);
softc->num_luns++;
STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
-
mtx_unlock(&softc->lock);
- retval = ctl_add_lun(&be_lun->ctl_be_lun);
+ retval = ctl_add_lun(&be_lun->cbe_lun);
if (retval != 0) {
mtx_lock(&softc->lock);
STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_ramdisk_lun,
@@ -682,9 +644,6 @@ ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
goto bailout_error;
}
- if (do_wait == 0)
- return (retval);
-
mtx_lock(&softc->lock);
/*
@@ -710,12 +669,11 @@ ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc,
mtx_unlock(&softc->lock);
goto bailout_error;
} else {
- params->req_lun_id = be_lun->ctl_be_lun.lun_id;
+ params->req_lun_id = cbe_lun->lun_id;
}
mtx_unlock(&softc->lock);
req->status = CTL_LUN_OK;
-
return (retval);
bailout_error:
@@ -724,11 +682,10 @@ bailout_error:
if (be_lun->io_taskqueue != NULL) {
taskqueue_free(be_lun->io_taskqueue);
}
- ctl_free_opts(&be_lun->ctl_be_lun.options);
+ ctl_free_opts(&cbe_lun->options);
mtx_destroy(&be_lun->queue_lock);
free(be_lun, M_RAMDISK);
}
-
return (retval);
}
@@ -737,65 +694,70 @@ ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc,
struct ctl_lun_req *req)
{
struct ctl_be_ramdisk_lun *be_lun;
+ struct ctl_be_lun *cbe_lun;
struct ctl_lun_modify_params *params;
+ char *value;
uint32_t blocksize;
+ int wasprim;
params = &req->reqdata.modify;
- be_lun = NULL;
-
mtx_lock(&softc->lock);
STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
- if (be_lun->ctl_be_lun.lun_id == params->lun_id)
+ if (be_lun->cbe_lun.lun_id == params->lun_id)
break;
}
mtx_unlock(&softc->lock);
-
if (be_lun == NULL) {
snprintf(req->error_str, sizeof(req->error_str),
"%s: LUN %u is not managed by the ramdisk backend",
__func__, params->lun_id);
goto bailout_error;
}
-
- if (params->lun_size_bytes == 0) {
- snprintf(req->error_str, sizeof(req->error_str),
- "%s: LUN size \"auto\" not supported "
- "by the ramdisk backend", __func__);
- goto bailout_error;
+ cbe_lun = &be_lun->cbe_lun;
+
+ if (params->lun_size_bytes != 0)
+ be_lun->params.lun_size_bytes = params->lun_size_bytes;
+ ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
+
+ wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
+ value = ctl_get_opt(&cbe_lun->options, "ha_role");
+ if (value != NULL) {
+ if (strcmp(value, "primary") == 0)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
+ else
+ cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
+ } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
+ cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
+ else
+ cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
+ if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
+ if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
+ ctl_lun_primary(cbe_lun);
+ else
+ ctl_lun_secondary(cbe_lun);
}
- blocksize = be_lun->ctl_be_lun.blocksize;
-
- if (params->lun_size_bytes < blocksize) {
+ blocksize = be_lun->cbe_lun.blocksize;
+ if (be_lun->params.lun_size_bytes < blocksize) {
snprintf(req->error_str, sizeof(req->error_str),
"%s: LUN size %ju < blocksize %u", __func__,
- params->lun_size_bytes, blocksize);
+ be_lun->params.lun_size_bytes, blocksize);
goto bailout_error;
}
-
- be_lun->size_blocks = params->lun_size_bytes / blocksize;
+ be_lun->size_blocks = be_lun->params.lun_size_bytes / blocksize;
be_lun->size_bytes = be_lun->size_blocks * blocksize;
-
- /*
- * The maximum LBA is the size - 1.
- *
- * XXX: Note that this field is being updated without locking,
- * which might cause problems on 32-bit architectures.
- */
- be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
- ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
+ be_lun->cbe_lun.maxlba = be_lun->size_blocks - 1;
+ ctl_lun_capacity_changed(&be_lun->cbe_lun);
/* Tell the user the exact size we ended up using */
params->lun_size_bytes = be_lun->size_bytes;
req->status = CTL_LUN_OK;
-
return (0);
bailout_error:
req->status = CTL_LUN_ERROR;
-
return (0);
}
@@ -811,9 +773,7 @@ ctl_backend_ramdisk_lun_shutdown(void *be_lun)
do_free = 0;
mtx_lock(&softc->lock);
-
lun->flags |= CTL_BE_RAMDISK_LUN_UNCONFIGURED;
-
if (lun->flags & CTL_BE_RAMDISK_LUN_WAITING) {
wakeup(lun);
} else {
@@ -822,7 +782,6 @@ ctl_backend_ramdisk_lun_shutdown(void *be_lun)
softc->num_luns--;
do_free = 1;
}
-
mtx_unlock(&softc->lock);
if (do_free != 0)
@@ -849,9 +808,9 @@ ctl_backend_ramdisk_lun_config_status(void *be_lun,
/*
* We successfully added the LUN, attempt to enable it.
*/
- if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
+ if (ctl_enable_lun(&lun->cbe_lun) != 0) {
printf("%s: ctl_enable_lun() failed!\n", __func__);
- if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
+ if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
printf("%s: ctl_invalidate_lun() failed!\n",
__func__);
}
@@ -883,12 +842,12 @@ ctl_backend_ramdisk_lun_config_status(void *be_lun,
static int
ctl_backend_ramdisk_config_write(union ctl_io *io)
{
- struct ctl_be_ramdisk_softc *softc;
+ struct ctl_be_lun *cbe_lun;
int retval;
+ cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
+ CTL_PRIV_BACKEND_LUN].ptr;
retval = 0;
- softc = &rd_softc;
-
switch (io->scsiio.cdb[0]) {
case SYNCHRONIZE_CACHE:
case SYNCHRONIZE_CACHE_16:
@@ -912,42 +871,27 @@ ctl_backend_ramdisk_config_write(union ctl_io *io)
break;
case START_STOP_UNIT: {
struct scsi_start_stop_unit *cdb;
- struct ctl_be_lun *ctl_be_lun;
- struct ctl_be_ramdisk_lun *be_lun;
cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
-
- ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
- CTL_PRIV_BACKEND_LUN].ptr;
- be_lun = (struct ctl_be_ramdisk_lun *)ctl_be_lun->be_lun;
-
- if (cdb->how & SSS_START)
- retval = ctl_start_lun(ctl_be_lun);
- else {
- retval = ctl_stop_lun(ctl_be_lun);
-#ifdef NEEDTOPORT
- if ((retval == 0)
- && (cdb->byte2 & SSS_ONOFFLINE))
- retval = ctl_lun_offline(ctl_be_lun);
-#endif
+ if ((cdb->how & SSS_PC_MASK) != 0) {
+ ctl_set_success(&io->scsiio);
+ ctl_config_write_done(io);
+ break;
}
-
- /*
- * In general, the above routines should not fail. They
- * just set state for the LUN. So we've got something
- * pretty wrong here if we can't start or stop the LUN.
- */
- if (retval != 0) {
- ctl_set_internal_failure(&io->scsiio,
- /*sks_valid*/ 1,
- /*retry_count*/ 0xf051);
- retval = CTL_RETVAL_COMPLETE;
+ if (cdb->how & SSS_START) {
+ if (cdb->how & SSS_LOEJ)
+ ctl_lun_has_media(cbe_lun);
+ ctl_start_lun(cbe_lun);
} else {
- ctl_set_success(&io->scsiio);
+ ctl_stop_lun(cbe_lun);
+ if (cdb->how & SSS_LOEJ)
+ ctl_lun_ejected(cbe_lun);
}
+ ctl_set_success(&io->scsiio);
ctl_config_write_done(io);
break;
}
+ case PREVENT_ALLOW:
case WRITE_SAME_10:
case WRITE_SAME_16:
case UNMAP:
diff --git a/sys/cam/ctl/ctl_cmd_table.c b/sys/cam/ctl/ctl_cmd_table.c
index 08ff88a..498f7c5 100644
--- a/sys/cam/ctl/ctl_cmd_table.c
+++ b/sys/cam/ctl/ctl_cmd_table.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2003, 2004, 2005, 2009 Silicon Graphics International Corp.
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,7 +53,6 @@
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
#include <cam/ctl/ctl_backend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_ioctl.h>
#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_private.h>
@@ -68,10 +68,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5e[32] =
/* 00 READ KEYS */
{ctl_persistent_reserve_in, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -80,10 +78,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5e[32] =
/* 01 READ RESERVATION */
{ctl_persistent_reserve_in, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -92,10 +88,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5e[32] =
/* 02 REPORT CAPABILITIES */
{ctl_persistent_reserve_in, CTL_SERIDX_INQ, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -104,10 +98,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5e[32] =
/* 03 READ FULL STATUS */
{ctl_persistent_reserve_in, CTL_SERIDX_INQ, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -122,10 +114,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 00 REGISTER */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -134,10 +124,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 01 RESERVE */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -146,10 +134,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 02 RELEASE */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -158,10 +144,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 03 CLEAR */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -170,10 +154,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 04 PREEMPT */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -182,10 +164,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 05 PREEMPT AND ABORT */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -194,10 +174,8 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] =
/* 06 REGISTER AND IGNORE EXISTING KEY */
{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -214,12 +192,14 @@ const struct ctl_cmd_entry ctl_cmd_table_83[32] =
{
/* 00 EXTENDED COPY (LID1) */
{ctl_extended_copy_lid1, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE,
16, { 0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 01 EXTENDED COPY (LID4) */
{ctl_extended_copy_lid4, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE,
16, { 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
@@ -267,7 +247,7 @@ const struct ctl_cmd_entry ctl_cmd_table_83[32] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 10 POPULATE TOKEN */
-{ctl_populate_token, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_populate_token, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_NONE,
@@ -275,7 +255,7 @@ const struct ctl_cmd_entry ctl_cmd_table_83[32] =
0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 11 WRITE USING TOKEN */
-{ctl_write_using_token, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_write_using_token, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE,
16, { 0x11, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff,
@@ -313,6 +293,7 @@ const struct ctl_cmd_entry ctl_cmd_table_83[32] =
/* 1C COPY OPERATION ABORT */
{ctl_copy_operation_abort, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_NONE,
CTL_LUN_PAT_NONE,
16, { 0x1c, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x07}},
@@ -324,6 +305,7 @@ const struct ctl_cmd_entry ctl_cmd_table_84[32] =
/* 00 RECEIVE COPY STATUS (LID1) */
{ctl_receive_copy_status_lid1, CTL_SERIDX_RD_CAP,
CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -338,9 +320,8 @@ const struct ctl_cmd_entry ctl_cmd_table_84[32] =
/* 03 RECEIVE COPY OPERATING PARAMETERS */
{ctl_receive_copy_operating_parameters, CTL_SERIDX_RD_CAP,
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -349,6 +330,7 @@ const struct ctl_cmd_entry ctl_cmd_table_84[32] =
/* 04 RECEIVE COPY FAILURE DETAILS (LID1) */
{ctl_receive_copy_failure_details, CTL_SERIDX_RD_CAP,
CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -357,6 +339,7 @@ const struct ctl_cmd_entry ctl_cmd_table_84[32] =
/* 05 RECEIVE COPY STATUS (LID4) */
{ctl_receive_copy_status_lid4, CTL_SERIDX_RD_CAP,
CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -368,6 +351,7 @@ const struct ctl_cmd_entry ctl_cmd_table_84[32] =
/* 07 RECEIVE ROD TOKEN INFORMATION */
{ctl_receive_rod_token_information, CTL_SERIDX_RD_CAP,
CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -376,6 +360,7 @@ const struct ctl_cmd_entry ctl_cmd_table_84[32] =
/* 08 REPORT ALL ROD TOKENS */
{ctl_report_all_rod_tokens, CTL_SERIDX_RD_CAP,
CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -434,10 +419,7 @@ const struct ctl_cmd_entry ctl_cmd_table_9e[32] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 10 READ CAPACITY(16) */
-{ctl_read_capacity_16, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_SLUN |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+{ctl_read_capacity_16, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_READCAP,
@@ -447,7 +429,7 @@ const struct ctl_cmd_entry ctl_cmd_table_9e[32] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 12 GET LBA STATUS */
-{ctl_get_lba_status, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_get_lba_status, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
@@ -492,24 +474,22 @@ const struct ctl_cmd_entry ctl_cmd_table_a3[32] =
/* 0A REPORT TARGET PORT GROUPS */
{ctl_report_tagret_port_groups, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
- 12, {0x0a, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
+ 12, {0xea, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 0B */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 0C REPORT SUPPORTED_OPCODES */
{ctl_report_supported_opcodes, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -517,10 +497,9 @@ const struct ctl_cmd_entry ctl_cmd_table_a3[32] =
/* 0D REPORT SUPPORTED_TASK MANAGEMENT FUNCTIONS */
{ctl_report_supported_tmf, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -531,10 +510,9 @@ const struct ctl_cmd_entry ctl_cmd_table_a3[32] =
/* 0F REPORT TIMESTAMP */
{ctl_report_timestamp, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE,
@@ -559,19 +537,19 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 03 REQUEST SENSE */
{ctl_request_sense, CTL_SERIDX_RQ_SNS, CTL_FLAG_DATA_IN |
- CTL_CMD_FLAG_OK_ON_ALL_LUNS |
+ CTL_CMD_FLAG_OK_ON_NO_LUN |
+ CTL_CMD_FLAG_OK_ON_BOTH |
CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_NO_SENSE |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
- CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
+ CTL_CMD_FLAG_ALLOW_ON_PR_RESV |
+ CTL_CMD_FLAG_RUN_HERE,
CTL_LUN_PAT_NONE, 6, {0x01, 0, 0, 0xff, 0x07}},
/* 04 FORMAT UNIT */
-{ctl_format, CTL_SERIDX_FORMAT, CTL_CMD_FLAG_OK_ON_SLUN |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
+{ctl_format, CTL_SERIDX_FORMAT, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE, 6, {0xff, 0, 0, 0, 0x07}},
@@ -585,7 +563,7 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 08 READ(6) */
-{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE, 6, {0x1f, 0xff, 0xff, 0xff, 0x07}},
@@ -594,7 +572,7 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 0A WRITE(6) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE, 6, {0x1f, 0xff, 0xff, 0xff, 0x07}},
@@ -620,13 +598,13 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 12 INQUIRY */
-{ctl_inquiry, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_ALL_LUNS |
+{ctl_inquiry, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_NO_LUN |
+ CTL_CMD_FLAG_OK_ON_BOTH |
CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_NO_SENSE |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE, 6, {0xe1, 0xff, 0xff, 0xff, 0x07}},
@@ -639,30 +617,24 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 15 MODE SELECT(6) */
{ctl_mode_select, CTL_SERIDX_MD_SEL, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE, 6, {0x11, 0, 0, 0xff, 0x07}},
/* 16 RESERVE(6) */
{ctl_scsi_reserve, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE, 6, {0, 0, 0, 0, 0x07}},
/* 17 RELEASE(6) */
{ctl_scsi_release, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_NONE,
CTL_LUN_PAT_NONE, 6, {0, 0, 0, 0, 0x07}},
@@ -674,22 +646,19 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 1A MODE SENSE(6) */
{ctl_mode_sense, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_NONE, 6, {0x08, 0xff, 0xff, 0xff, 0x07}},
/* 1B START STOP UNIT */
-{ctl_start_stop, CTL_SERIDX_START, CTL_CMD_FLAG_OK_ON_SLUN |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
+{ctl_start_stop, CTL_SERIDX_START, CTL_CMD_FLAG_OK_ON_DIRECT |
+ CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
CTL_FLAG_DATA_NONE |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
- CTL_LUN_PAT_NONE, 6, {0x01, 0, 0, 0x03, 0x07}},
+ CTL_LUN_PAT_NONE, 6, {0x01, 0, 0x0f, 0xf7, 0x07}},
/* 1C RECEIVE DIAGNOSTIC RESULTS */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -698,7 +667,11 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 1E PREVENT ALLOW MEDIUM REMOVAL */
-{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
+{ctl_prevent_allow, CTL_SERIDX_START, CTL_CMD_FLAG_OK_ON_DIRECT |
+ CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_FLAG_DATA_NONE,
+ CTL_LUN_PAT_NONE, 6, {0x01, 0, 0, 0x03, 0x07}},
/* 1F */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -719,10 +692,8 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 25 READ CAPACITY(10) */
-{ctl_read_capacity, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_SLUN|
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+{ctl_read_capacity, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_DIRECT |
+ CTL_CMD_FLAG_OK_ON_CDROM |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_READCAP, 10, {0, 0, 0, 0, 0, 0, 0, 0, 0x07}},
@@ -734,7 +705,8 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 28 READ(10) */
-{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
+ CTL_CMD_FLAG_OK_ON_CDROM |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
@@ -744,7 +716,7 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 2A WRITE(10) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
10, {0x1a, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0x07}},
@@ -758,12 +730,12 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 2E WRITE AND VERIFY(10) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
10, {0x12, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0x07}},
/* 2F VERIFY(10) */
-{ctl_verify, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_verify, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
@@ -785,16 +757,16 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 35 SYNCHRONIZE CACHE(10) */
-{ctl_sync_cache, CTL_SERIDX_SYNC, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_sync_cache, CTL_SERIDX_SYNC, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_NONE,
- CTL_LUN_PAT_NONE,
- 10, {0x02, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0x07}},
+ CTL_LUN_PAT_WRITE,
+ 10, {0x06, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0x07}},
/* 36 LOCK UNLOCK CACHE(10) */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 37 READ DEFECT DATA(10) */
-{ctl_read_defect, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_read_defect, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_NONE,
@@ -811,18 +783,16 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 3B WRITE BUFFER */
{ctl_write_buffer, CTL_SERIDX_MD_SEL, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE,
10, {0x1f, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07}},
/* 3C READ BUFFER */
{ctl_read_buffer, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_NONE,
@@ -841,18 +811,22 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 41 WRITE SAME(10) */
-{ctl_write_same, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_write_same, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
10, {0x1a, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0x07}},
/* 42 READ SUB-CHANNEL / UNMAP */
-{ctl_unmap, CTL_SERIDX_UNMAP, CTL_CMD_FLAG_OK_ON_SLUN | CTL_FLAG_DATA_OUT,
+{ctl_unmap, CTL_SERIDX_UNMAP, CTL_CMD_FLAG_OK_ON_DIRECT | CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE,
10, {1, 0, 0, 0, 0, 0, 0xff, 0xff, 0x07}},
/* 43 READ TOC/PMA/ATIP */
-{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
+{ctl_read_toc, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_CMD_FLAG_ALLOW_ON_PR_WRESV |
+ CTL_FLAG_DATA_IN,
+ CTL_LUN_PAT_NONE,
+ 10, {0x02, 0x01, 0, 0, 0, 0xff, 0xff, 0xff, 0x07}},
/* 44 REPORT DENSITY SUPPORT */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -861,7 +835,12 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 46 GET CONFIGURATION */
-{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
+{ctl_get_config, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_ALLOW_ON_PR_RESV |
+ CTL_FLAG_DATA_IN,
+ CTL_LUN_PAT_NONE,
+ 10, {0x03, 0xff, 0xff, 0, 0, 0, 0xff, 0xff, 0x07}},
/* 47 PLAY AUDIO MSF */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -873,7 +852,12 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 4A GET EVENT STATUS NOTIFICATION */
-{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
+{ctl_get_event_status, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_ALLOW_ON_PR_RESV |
+ CTL_FLAG_DATA_IN,
+ CTL_LUN_PAT_NONE,
+ 10, {0x02, 0x01, 0, 0, 0, 0xff, 0xff, 0xff, 0x07}},
/* 4B PAUSE/RESUME */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -882,7 +866,9 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 4D LOG SENSE */
-{ctl_log_sense, CTL_SERIDX_LOG_SNS, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_log_sense, CTL_SERIDX_LOG_SNS, CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
CTL_LUN_PAT_NONE, 10, {0, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0x07} },
@@ -910,32 +896,26 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 55 MODE SELECT(10) */
{ctl_mode_select, CTL_SERIDX_MD_SEL, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_NONE, 10, {0x11, 0, 0, 0, 0, 0, 0xff, 0xff, 0x07} },
/* 56 RESERVE(10) */
{ctl_scsi_reserve, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT,
- CTL_LUN_PAT_NONE, 10, {0x02, 0, 0xff, 0, 0, 0, 0xff, 0xff, 0x07} },
+ CTL_LUN_PAT_NONE, 10, {0, 0, 0, 0, 0, 0, 0, 0, 0x07} },
/* 57 RELEASE(10) */
{ctl_scsi_release, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_OUT,
- CTL_LUN_PAT_NONE, 10, {0x02, 0, 0xff, 0, 0, 0, 0xff, 0xff, 0x07} },
+ CTL_LUN_PAT_NONE, 10, {0, 0, 0, 0, 0, 0, 0, 0, 0x07}},
/* 58 REPAIR TRACK */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -945,10 +925,8 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 5A MODE SENSE(10) */
{ctl_mode_sense, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_BOTH |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_NONE, 10, {0x18, 0xff, 0xff, 0, 0, 0, 0xff, 0xff, 0x07} },
@@ -1093,20 +1071,20 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 88 READ(16) */
-{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN | CTL_FLAG_DATA_IN |
+{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT | CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
16, {0x1a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 89 COMPARE AND WRITE */
-{ctl_cnw, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_cnw, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
16, {0x18, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0, 0, 0, 0xff, 0, 0x07}},
/* 8A WRITE(16) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
16, {0x1a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
@@ -1121,13 +1099,13 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 8E WRITE AND VERIFY(16) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
16, {0x12, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 8F VERIFY(16) */
-{ctl_verify, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_verify, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
@@ -1138,17 +1116,17 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 91 SYNCHRONIZE CACHE(16) */
-{ctl_sync_cache, CTL_SERIDX_SYNC, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_sync_cache, CTL_SERIDX_SYNC, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_NONE,
- CTL_LUN_PAT_NONE,
- 16, {0x02, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ CTL_LUN_PAT_WRITE,
+ 16, {0x06, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 92 LOCK UNLOCK CACHE(16) */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* 93 WRITE SAME(16) */
-{ctl_write_same, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_write_same, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
16, {0x1b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
@@ -1175,11 +1153,18 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* 9A */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
-/* 9B */
-{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
+/* 9B READ BUFFER(16) */
+{ctl_read_buffer, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_BOTH |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_FLAG_DATA_IN |
+ CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
+ CTL_LUN_PAT_NONE,
+ 10, {0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* 9C WRITE ATOMIC (16) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
16, {0x18, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0, 0, 0xff, 0xff, 0, 0x07}},
@@ -1195,15 +1180,16 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* A0 REPORT LUNS */
-{ctl_report_luns, CTL_SERIDX_INQ, CTL_CMD_FLAG_OK_ON_ALL_LUNS |
+{ctl_report_luns, CTL_SERIDX_INQ, CTL_FLAG_DATA_IN |
+ CTL_CMD_FLAG_OK_ON_NO_LUN |
+ CTL_CMD_FLAG_OK_ON_BOTH |
CTL_CMD_FLAG_ALLOW_ON_RESV |
CTL_CMD_FLAG_NO_SENSE |
- CTL_CMD_FLAG_OK_ON_STOPPED |
- CTL_CMD_FLAG_OK_ON_INOPERABLE |
- CTL_CMD_FLAG_OK_ON_OFFLINE |
- CTL_CMD_FLAG_OK_ON_SECONDARY |
- CTL_FLAG_DATA_IN |
- CTL_CMD_FLAG_ALLOW_ON_PR_RESV,
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_OK_ON_STANDBY |
+ CTL_CMD_FLAG_OK_ON_UNAVAIL |
+ CTL_CMD_FLAG_ALLOW_ON_PR_RESV |
+ CTL_CMD_FLAG_RUN_HERE,
CTL_LUN_PAT_NONE,
12, {0, 0xff, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
@@ -1230,8 +1216,10 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* A8 READ(12) */
-{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN | CTL_FLAG_DATA_IN |
- CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
+{ctl_read_write, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
+ CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_FLAG_DATA_IN |
+ CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
12, {0x1a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
@@ -1239,7 +1227,7 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* AA WRITE(12) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
12, {0x1a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
@@ -1253,12 +1241,12 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* AE WRITE AND VERIFY(12) */
-{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT,
+{ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_DIRECT| CTL_FLAG_DATA_OUT,
CTL_LUN_PAT_WRITE | CTL_LUN_PAT_RANGE,
12, {0x12, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}},
/* AF VERIFY(12) */
-{ctl_verify, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_verify, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_OUT |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_READ | CTL_LUN_PAT_RANGE,
@@ -1286,7 +1274,7 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* B7 READ DEFECT DATA(12) */
-{ctl_read_defect, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_SLUN |
+{ctl_read_defect, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_DIRECT |
CTL_FLAG_DATA_IN |
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV,
CTL_LUN_PAT_NONE,
@@ -1307,8 +1295,13 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* BC SPARE IN */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
-/* BD SPARE OUT */
-{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
+/* BD SPARE OUT / MECHANISM STATUS */
+{ctl_mechanism_status, CTL_SERIDX_RD_CAP, CTL_CMD_FLAG_OK_ON_CDROM |
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA |
+ CTL_CMD_FLAG_ALLOW_ON_PR_RESV |
+ CTL_FLAG_DATA_IN,
+ CTL_LUN_PAT_NONE,
+ 12, {0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0x07}},
/* BE VOLUME SET IN */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
@@ -1316,33 +1309,17 @@ const struct ctl_cmd_entry ctl_cmd_table[256] =
/* BF VOLUME SET OUT */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
-/* C0 - ISC_SEND_MSG_SHORT */
-//{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE},
-{ctl_isc, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_PROC | CTL_FLAG_DATA_NONE,
- CTL_LUN_PAT_NONE,
- 16, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
+/* C0 */
+{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
-/* C1 - ISC_SEND_MSG */
-//{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE},
-{ctl_isc, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_PROC | CTL_FLAG_DATA_OUT,
- CTL_LUN_PAT_NONE,
- 16, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
+/* C1 */
+{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
-/* C2 - ISC_WRITE */
-//{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE},
-{ctl_isc, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_PROC | CTL_FLAG_DATA_OUT,
- CTL_LUN_PAT_NONE,
- 16, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
+/* C2 */
+{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
-/* C3 - ISC_READ */
-//{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE},
-{ctl_isc, CTL_SERIDX_READ, CTL_CMD_FLAG_OK_ON_PROC | CTL_FLAG_DATA_IN,
- CTL_LUN_PAT_NONE,
- 16, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
+/* C3 */
+{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
/* C4 */
{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE},
diff --git a/sys/cam/ctl/ctl_error.c b/sys/cam/ctl/ctl_error.c
index d4d7f79..61ec4ec 100644
--- a/sys/cam/ctl/ctl_error.c
+++ b/sys/cam/ctl/ctl_error.c
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 2003-2009 Silicon Graphics International Corp.
* Copyright (c) 2011 Spectra Logic Corporation
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,7 +58,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
#include <cam/ctl/ctl_error.h>
@@ -366,62 +366,35 @@ ctl_set_ua(struct ctl_scsiio *ctsio, int asc, int ascq)
SSD_ELEM_NONE);
}
-ctl_ua_type
-ctl_build_ua(struct ctl_lun *lun, uint32_t initidx,
- struct scsi_sense_data *sense, scsi_sense_data_type sense_format)
+static void
+ctl_ua_to_acsq(struct ctl_lun *lun, ctl_ua_type ua_to_build, int *asc,
+ int *ascq, ctl_ua_type *ua_to_clear, uint8_t **info)
{
- ctl_ua_type *ua;
- ctl_ua_type ua_to_build, ua_to_clear;
- int asc, ascq;
- uint32_t p, i;
-
- mtx_assert(&lun->lun_lock, MA_OWNED);
- p = initidx / CTL_MAX_INIT_PER_PORT;
- if ((ua = lun->pending_ua[p]) == NULL) {
- mtx_unlock(&lun->lun_lock);
- ua = malloc(sizeof(ctl_ua_type) * CTL_MAX_INIT_PER_PORT,
- M_CTL, M_WAITOK);
- mtx_lock(&lun->lun_lock);
- if (lun->pending_ua[p] == NULL) {
- lun->pending_ua[p] = ua;
- for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++)
- ua[i] = CTL_UA_POWERON;
- } else {
- free(ua, M_CTL);
- ua = lun->pending_ua[p];
- }
- }
- i = initidx % CTL_MAX_INIT_PER_PORT;
- if (ua[i] == CTL_UA_NONE)
- return (CTL_UA_NONE);
-
- ua_to_build = (1 << (ffs(ua[i]) - 1));
- ua_to_clear = ua_to_build;
switch (ua_to_build) {
case CTL_UA_POWERON:
/* 29h/01h POWER ON OCCURRED */
- asc = 0x29;
- ascq = 0x01;
- ua_to_clear = ~0;
+ *asc = 0x29;
+ *ascq = 0x01;
+ *ua_to_clear = ~0;
break;
case CTL_UA_BUS_RESET:
/* 29h/02h SCSI BUS RESET OCCURRED */
- asc = 0x29;
- ascq = 0x02;
- ua_to_clear = ~0;
+ *asc = 0x29;
+ *ascq = 0x02;
+ *ua_to_clear = ~0;
break;
case CTL_UA_TARG_RESET:
/* 29h/03h BUS DEVICE RESET FUNCTION OCCURRED*/
- asc = 0x29;
- ascq = 0x03;
- ua_to_clear = ~0;
+ *asc = 0x29;
+ *ascq = 0x03;
+ *ua_to_clear = ~0;
break;
case CTL_UA_I_T_NEXUS_LOSS:
/* 29h/07h I_T NEXUS LOSS OCCURRED */
- asc = 0x29;
- ascq = 0x07;
- ua_to_clear = ~0;
+ *asc = 0x29;
+ *ascq = 0x07;
+ *ua_to_clear = ~0;
break;
case CTL_UA_LUN_RESET:
/* 29h/00h POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
@@ -429,76 +402,143 @@ ctl_build_ua(struct ctl_lun *lun, uint32_t initidx,
* Since we don't have a specific ASC/ASCQ pair for a LUN
* reset, just return the generic reset code.
*/
- asc = 0x29;
- ascq = 0x00;
+ *asc = 0x29;
+ *ascq = 0x00;
break;
case CTL_UA_LUN_CHANGE:
/* 3Fh/0Eh REPORTED LUNS DATA HAS CHANGED */
- asc = 0x3F;
- ascq = 0x0E;
+ *asc = 0x3F;
+ *ascq = 0x0E;
break;
case CTL_UA_MODE_CHANGE:
/* 2Ah/01h MODE PARAMETERS CHANGED */
- asc = 0x2A;
- ascq = 0x01;
+ *asc = 0x2A;
+ *ascq = 0x01;
break;
case CTL_UA_LOG_CHANGE:
/* 2Ah/02h LOG PARAMETERS CHANGED */
- asc = 0x2A;
- ascq = 0x02;
- break;
- case CTL_UA_LVD:
- /* 29h/06h TRANSCEIVER MODE CHANGED TO LVD */
- asc = 0x29;
- ascq = 0x06;
+ *asc = 0x2A;
+ *ascq = 0x02;
break;
- case CTL_UA_SE:
- /* 29h/05h TRANSCEIVER MODE CHANGED TO SINGLE-ENDED */
- asc = 0x29;
- ascq = 0x05;
+ case CTL_UA_INQ_CHANGE:
+ /* 3Fh/03h INQUIRY DATA HAS CHANGED */
+ *asc = 0x3F;
+ *ascq = 0x03;
break;
case CTL_UA_RES_PREEMPT:
/* 2Ah/03h RESERVATIONS PREEMPTED */
- asc = 0x2A;
- ascq = 0x03;
+ *asc = 0x2A;
+ *ascq = 0x03;
break;
case CTL_UA_RES_RELEASE:
/* 2Ah/04h RESERVATIONS RELEASED */
- asc = 0x2A;
- ascq = 0x04;
+ *asc = 0x2A;
+ *ascq = 0x04;
break;
case CTL_UA_REG_PREEMPT:
/* 2Ah/05h REGISTRATIONS PREEMPTED */
- asc = 0x2A;
- ascq = 0x05;
+ *asc = 0x2A;
+ *ascq = 0x05;
break;
case CTL_UA_ASYM_ACC_CHANGE:
- /* 2Ah/06n ASYMMETRIC ACCESS STATE CHANGED */
- asc = 0x2A;
- ascq = 0x06;
+ /* 2Ah/06h ASYMMETRIC ACCESS STATE CHANGED */
+ *asc = 0x2A;
+ *ascq = 0x06;
break;
- case CTL_UA_CAPACITY_CHANGED:
- /* 2Ah/09n CAPACITY DATA HAS CHANGED */
- asc = 0x2A;
- ascq = 0x09;
+ case CTL_UA_CAPACITY_CHANGE:
+ /* 2Ah/09h CAPACITY DATA HAS CHANGED */
+ *asc = 0x2A;
+ *ascq = 0x09;
break;
case CTL_UA_THIN_PROV_THRES:
- /* 38h/07n THIN PROVISIONING SOFT THRESHOLD REACHED */
- asc = 0x38;
- ascq = 0x07;
+ /* 38h/07h THIN PROVISIONING SOFT THRESHOLD REACHED */
+ *asc = 0x38;
+ *ascq = 0x07;
+ *info = lun->ua_tpt_info;
+ break;
+ case CTL_UA_MEDIUM_CHANGE:
+ /* 28h/00h NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
+ *asc = 0x28;
+ *ascq = 0x00;
break;
default:
- panic("ctl_build_ua: Unknown UA %x", ua_to_build);
+ panic("%s: Unknown UA %x", __func__, ua_to_build);
}
+}
- ctl_set_sense_data(sense,
- /*lun*/ NULL,
- sense_format,
- /*current_error*/ 1,
- /*sense_key*/ SSD_KEY_UNIT_ATTENTION,
- asc,
- ascq,
- SSD_ELEM_NONE);
+ctl_ua_type
+ctl_build_qae(struct ctl_lun *lun, uint32_t initidx, uint8_t *resp)
+{
+ ctl_ua_type ua;
+ ctl_ua_type ua_to_build, ua_to_clear;
+ uint8_t *info;
+ int asc, ascq;
+ uint32_t p, i;
+
+ mtx_assert(&lun->lun_lock, MA_OWNED);
+ p = initidx / CTL_MAX_INIT_PER_PORT;
+ i = initidx % CTL_MAX_INIT_PER_PORT;
+ if (lun->pending_ua[p] == NULL)
+ ua = CTL_UA_POWERON;
+ else
+ ua = lun->pending_ua[p][i];
+ if (ua == CTL_UA_NONE)
+ return (CTL_UA_NONE);
+
+ ua_to_build = (1 << (ffs(ua) - 1));
+ ua_to_clear = ua_to_build;
+ info = NULL;
+ ctl_ua_to_acsq(lun, ua_to_build, &asc, &ascq, &ua_to_clear, &info);
+
+ resp[0] = SSD_KEY_UNIT_ATTENTION;
+ if (ua_to_build == ua)
+ resp[0] |= 0x10;
+ else
+ resp[0] |= 0x20;
+ resp[1] = asc;
+ resp[2] = ascq;
+ return (ua);
+}
+
+ctl_ua_type
+ctl_build_ua(struct ctl_lun *lun, uint32_t initidx,
+ struct scsi_sense_data *sense, scsi_sense_data_type sense_format)
+{
+ ctl_ua_type *ua;
+ ctl_ua_type ua_to_build, ua_to_clear;
+ uint8_t *info;
+ int asc, ascq;
+ uint32_t p, i;
+
+ mtx_assert(&lun->lun_lock, MA_OWNED);
+ p = initidx / CTL_MAX_INIT_PER_PORT;
+ if ((ua = lun->pending_ua[p]) == NULL) {
+ mtx_unlock(&lun->lun_lock);
+ ua = malloc(sizeof(ctl_ua_type) * CTL_MAX_INIT_PER_PORT,
+ M_CTL, M_WAITOK);
+ mtx_lock(&lun->lun_lock);
+ if (lun->pending_ua[p] == NULL) {
+ lun->pending_ua[p] = ua;
+ for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++)
+ ua[i] = CTL_UA_POWERON;
+ } else {
+ free(ua, M_CTL);
+ ua = lun->pending_ua[p];
+ }
+ }
+ i = initidx % CTL_MAX_INIT_PER_PORT;
+ if (ua[i] == CTL_UA_NONE)
+ return (CTL_UA_NONE);
+
+ ua_to_build = (1 << (ffs(ua[i]) - 1));
+ ua_to_clear = ua_to_build;
+ info = NULL;
+ ctl_ua_to_acsq(lun, ua_to_build, &asc, &ascq, &ua_to_clear, &info);
+
+ ctl_set_sense_data(sense, lun, sense_format, /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_UNIT_ATTENTION, asc, ascq,
+ ((info != NULL) ? SSD_ELEM_INFO : SSD_ELEM_SKIP), 8, info,
+ SSD_ELEM_NONE);
/* We're reporting this UA, so clear it */
ua[i] &= ~ua_to_clear;
@@ -572,11 +612,8 @@ ctl_set_invalid_field(struct ctl_scsiio *ctsio, int sks_valid, int command,
void
ctl_set_invalid_opcode(struct ctl_scsiio *ctsio)
{
- struct scsi_sense_data *sense;
uint8_t sks[3];
- sense = &ctsio->sense_data;
-
sks[0] = SSD_SCS_VALID | SSD_FIELDPTR_CMD;
scsi_ulto2b(0, &sks[1]);
@@ -653,9 +690,9 @@ ctl_set_internal_failure(struct ctl_scsiio *ctsio, int sks_valid,
}
void
-ctl_set_medium_error(struct ctl_scsiio *ctsio)
+ctl_set_medium_error(struct ctl_scsiio *ctsio, int read)
{
- if ((ctsio->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) {
+ if (read) {
/* "Unrecovered read error" */
ctl_set_sense(ctsio,
/*current_error*/ 1,
@@ -710,7 +747,7 @@ ctl_set_lun_stopped(struct ctl_scsiio *ctsio)
}
void
-ctl_set_lun_not_ready(struct ctl_scsiio *ctsio)
+ctl_set_lun_int_reqd(struct ctl_scsiio *ctsio)
{
/* "Logical unit not ready, manual intervention required" */
ctl_set_sense(ctsio,
@@ -722,6 +759,30 @@ ctl_set_lun_not_ready(struct ctl_scsiio *ctsio)
}
void
+ctl_set_lun_ejected(struct ctl_scsiio *ctsio)
+{
+ /* "Medium not present - tray open" */
+ ctl_set_sense(ctsio,
+ /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_NOT_READY,
+ /*asc*/ 0x3A,
+ /*ascq*/ 0x02,
+ SSD_ELEM_NONE);
+}
+
+void
+ctl_set_lun_no_media(struct ctl_scsiio *ctsio)
+{
+ /* "Medium not present - tray closed" */
+ ctl_set_sense(ctsio,
+ /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_NOT_READY,
+ /*asc*/ 0x3A,
+ /*ascq*/ 0x01,
+ SSD_ELEM_NONE);
+}
+
+void
ctl_set_illegal_pr_release(struct ctl_scsiio *ctsio)
{
/* "Invalid release of persistent reservation" */
@@ -734,6 +795,18 @@ ctl_set_illegal_pr_release(struct ctl_scsiio *ctsio)
}
void
+ctl_set_lun_transit(struct ctl_scsiio *ctsio)
+{
+ /* "Logical unit not ready, asymmetric access state transition" */
+ ctl_set_sense(ctsio,
+ /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_NOT_READY,
+ /*asc*/ 0x04,
+ /*ascq*/ 0x0a,
+ SSD_ELEM_NONE);
+}
+
+void
ctl_set_lun_standby(struct ctl_scsiio *ctsio)
{
/* "Logical unit not ready, target port in standby state" */
@@ -746,6 +819,18 @@ ctl_set_lun_standby(struct ctl_scsiio *ctsio)
}
void
+ctl_set_lun_unavail(struct ctl_scsiio *ctsio)
+{
+ /* "Logical unit not ready, target port in unavailable state" */
+ ctl_set_sense(ctsio,
+ /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_NOT_READY,
+ /*asc*/ 0x04,
+ /*ascq*/ 0x0c,
+ SSD_ELEM_NONE);
+}
+
+void
ctl_set_medium_format_corrupted(struct ctl_scsiio *ctsio)
{
/* "Medium format corrupted" */
@@ -830,6 +915,18 @@ ctl_set_task_aborted(struct ctl_scsiio *ctsio)
}
void
+ctl_set_hw_write_protected(struct ctl_scsiio *ctsio)
+{
+ /* "Hardware write protected" */
+ ctl_set_sense(ctsio,
+ /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_DATA_PROTECT,
+ /*asc*/ 0x27,
+ /*ascq*/ 0x01,
+ SSD_ELEM_NONE);
+}
+
+void
ctl_set_space_alloc_fail(struct ctl_scsiio *ctsio)
{
/* "Space allocation failed write protect" */
diff --git a/sys/cam/ctl/ctl_error.h b/sys/cam/ctl/ctl_error.h
index 8430eef..9870643 100644
--- a/sys/cam/ctl/ctl_error.h
+++ b/sys/cam/ctl/ctl_error.h
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2003 Silicon Graphics International Corp.
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,6 +58,7 @@ void ctl_sense_to_desc(struct scsi_sense_data_fixed *sense_src,
void ctl_sense_to_fixed(struct scsi_sense_data_desc *sense_src,
struct scsi_sense_data_fixed *sense_dest);
void ctl_set_ua(struct ctl_scsiio *ctsio, int asc, int ascq);
+ctl_ua_type ctl_build_qae(struct ctl_lun *lun, uint32_t initidx, uint8_t *resp);
ctl_ua_type ctl_build_ua(struct ctl_lun *lun, uint32_t initidx,
struct scsi_sense_data *sense, scsi_sense_data_type sense_format);
void ctl_set_overlapped_cmd(struct ctl_scsiio *ctsio);
@@ -67,14 +69,18 @@ void ctl_set_invalid_opcode(struct ctl_scsiio *ctsio);
void ctl_set_param_len_error(struct ctl_scsiio *ctsio);
void ctl_set_already_locked(struct ctl_scsiio *ctsio);
void ctl_set_unsupported_lun(struct ctl_scsiio *ctsio);
+void ctl_set_lun_transit(struct ctl_scsiio *ctsio);
void ctl_set_lun_standby(struct ctl_scsiio *ctsio);
+void ctl_set_lun_unavail(struct ctl_scsiio *ctsio);
void ctl_set_internal_failure(struct ctl_scsiio *ctsio, int sks_valid,
uint16_t retry_count);
-void ctl_set_medium_error(struct ctl_scsiio *ctsio);
+void ctl_set_medium_error(struct ctl_scsiio *ctsio, int read);
void ctl_set_aborted(struct ctl_scsiio *ctsio);
void ctl_set_lba_out_of_range(struct ctl_scsiio *ctsio);
void ctl_set_lun_stopped(struct ctl_scsiio *ctsio);
-void ctl_set_lun_not_ready(struct ctl_scsiio *ctsio);
+void ctl_set_lun_int_reqd(struct ctl_scsiio *ctsio);
+void ctl_set_lun_ejected(struct ctl_scsiio *ctsio);
+void ctl_set_lun_no_media(struct ctl_scsiio *ctsio);
void ctl_set_illegal_pr_release(struct ctl_scsiio *ctsio);
void ctl_set_medium_format_corrupted(struct ctl_scsiio *ctsio);
void ctl_set_medium_magazine_inaccessible(struct ctl_scsiio *ctsio);
@@ -83,6 +89,7 @@ void ctl_set_reservation_conflict(struct ctl_scsiio *ctsio);
void ctl_set_queue_full(struct ctl_scsiio *ctsio);
void ctl_set_busy(struct ctl_scsiio *ctsio);
void ctl_set_task_aborted(struct ctl_scsiio *ctsio);
+void ctl_set_hw_write_protected(struct ctl_scsiio *ctsio);
void ctl_set_space_alloc_fail(struct ctl_scsiio *ctsio);
void ctl_set_success(struct ctl_scsiio *ctsio);
diff --git a/sys/cam/ctl/ctl_frontend.c b/sys/cam/ctl/ctl_frontend.c
index e22b9d4..87ed9d5 100644
--- a/sys/cam/ctl/ctl_frontend.c
+++ b/sys/cam/ctl/ctl_frontend.c
@@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_backend.h>
/* XXX KDM move defines from ctl_ioctl.h to somewhere else */
#include <cam/ctl/ctl_ioctl.h>
@@ -141,19 +140,22 @@ int
ctl_port_register(struct ctl_port *port)
{
struct ctl_softc *softc = control_softc;
+ struct ctl_port *tport, *nport;
void *pool;
int port_num;
int retval;
- retval = 0;
-
KASSERT(softc != NULL, ("CTL is not initialized"));
+ port->ctl_softc = softc;
mtx_lock(&softc->ctl_lock);
- port_num = ctl_ffz(softc->ctl_port_mask, CTL_MAX_PORTS);
- if ((port_num == -1)
- || (ctl_set_mask(softc->ctl_port_mask, port_num) == -1)) {
- port->targ_port = -1;
+ if (port->targ_port >= 0)
+ port_num = port->targ_port;
+ else
+ port_num = ctl_ffz(softc->ctl_port_mask,
+ softc->port_min, softc->port_max);
+ if ((port_num < 0) ||
+ (ctl_set_mask(softc->ctl_port_mask, port_num) < 0)) {
mtx_unlock(&softc->ctl_lock);
return (1);
}
@@ -196,10 +198,17 @@ error:
STAILQ_INIT(&port->options);
mtx_lock(&softc->ctl_lock);
- port->targ_port = port_num + softc->port_offset;
+ port->targ_port = port_num;
STAILQ_INSERT_TAIL(&port->frontend->port_list, port, fe_links);
- STAILQ_INSERT_TAIL(&softc->port_list, port, links);
- softc->ctl_ports[port_num] = port;
+ for (tport = NULL, nport = STAILQ_FIRST(&softc->port_list);
+ nport != NULL && nport->targ_port < port_num;
+ tport = nport, nport = STAILQ_NEXT(tport, links)) {
+ }
+ if (tport)
+ STAILQ_INSERT_AFTER(&softc->port_list, tport, port, links);
+ else
+ STAILQ_INSERT_HEAD(&softc->port_list, port, links);
+ softc->ctl_ports[port->targ_port] = port;
mtx_unlock(&softc->ctl_lock);
return (retval);
@@ -208,9 +217,9 @@ error:
int
ctl_port_deregister(struct ctl_port *port)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
struct ctl_io_pool *pool;
- int port_num, retval, i;
+ int retval, i;
retval = 0;
@@ -225,10 +234,8 @@ ctl_port_deregister(struct ctl_port *port)
STAILQ_REMOVE(&softc->port_list, port, ctl_port, links);
STAILQ_REMOVE(&port->frontend->port_list, port, ctl_port, fe_links);
softc->num_ports--;
- port_num = (port->targ_port < CTL_MAX_PORTS) ? port->targ_port :
- port->targ_port - CTL_MAX_PORTS;
- ctl_clear_mask(softc->ctl_port_mask, port_num);
- softc->ctl_ports[port_num] = NULL;
+ ctl_clear_mask(softc->ctl_port_mask, port->targ_port);
+ softc->ctl_ports[port->targ_port] = NULL;
mtx_unlock(&softc->ctl_lock);
ctl_pool_free(pool);
@@ -301,45 +308,71 @@ ctl_port_set_wwns(struct ctl_port *port, int wwnn_valid, uint64_t wwnn,
void
ctl_port_online(struct ctl_port *port)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
struct ctl_lun *lun;
uint32_t l;
- if (port->lun_map) {
- for (l = 0; l < CTL_MAX_LUNS; l++) {
- if (ctl_lun_map_from_port(port, l) >= CTL_MAX_LUNS)
- continue;
- port->lun_enable(port->targ_lun_arg, l);
+ if (port->lun_enable != NULL) {
+ if (port->lun_map) {
+ for (l = 0; l < CTL_MAX_LUNS; l++) {
+ if (ctl_lun_map_from_port(port, l) >=
+ CTL_MAX_LUNS)
+ continue;
+ port->lun_enable(port->targ_lun_arg, l);
+ }
+ } else {
+ STAILQ_FOREACH(lun, &softc->lun_list, links)
+ port->lun_enable(port->targ_lun_arg, lun->lun);
}
- } else {
- STAILQ_FOREACH(lun, &softc->lun_list, links)
- port->lun_enable(port->targ_lun_arg, lun->lun);
}
- port->port_online(port->onoff_arg);
- /* XXX KDM need a lock here? */
+ if (port->port_online != NULL)
+ port->port_online(port->onoff_arg);
+ mtx_lock(&softc->ctl_lock);
port->status |= CTL_PORT_STATUS_ONLINE;
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
+ continue;
+ mtx_lock(&lun->lun_lock);
+ ctl_est_ua_all(lun, -1, CTL_UA_INQ_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+ }
+ mtx_unlock(&softc->ctl_lock);
+ ctl_isc_announce_port(port);
}
void
ctl_port_offline(struct ctl_port *port)
{
- struct ctl_softc *softc = control_softc;
+ struct ctl_softc *softc = port->ctl_softc;
struct ctl_lun *lun;
uint32_t l;
- port->port_offline(port->onoff_arg);
- if (port->lun_map) {
- for (l = 0; l < CTL_MAX_LUNS; l++) {
- if (ctl_lun_map_from_port(port, l) >= CTL_MAX_LUNS)
- continue;
- port->lun_disable(port->targ_lun_arg, l);
+ if (port->port_offline != NULL)
+ port->port_offline(port->onoff_arg);
+ if (port->lun_disable != NULL) {
+ if (port->lun_map) {
+ for (l = 0; l < CTL_MAX_LUNS; l++) {
+ if (ctl_lun_map_from_port(port, l) >=
+ CTL_MAX_LUNS)
+ continue;
+ port->lun_disable(port->targ_lun_arg, l);
+ }
+ } else {
+ STAILQ_FOREACH(lun, &softc->lun_list, links)
+ port->lun_disable(port->targ_lun_arg, lun->lun);
}
- } else {
- STAILQ_FOREACH(lun, &softc->lun_list, links)
- port->lun_disable(port->targ_lun_arg, lun->lun);
}
- /* XXX KDM need a lock here? */
+ mtx_lock(&softc->ctl_lock);
port->status &= ~CTL_PORT_STATUS_ONLINE;
+ STAILQ_FOREACH(lun, &softc->lun_list, links) {
+ if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS)
+ continue;
+ mtx_lock(&lun->lun_lock);
+ ctl_est_ua_all(lun, -1, CTL_UA_INQ_CHANGE);
+ mtx_unlock(&lun->lun_lock);
+ }
+ mtx_unlock(&softc->ctl_lock);
+ ctl_isc_announce_port(port);
}
/*
diff --git a/sys/cam/ctl/ctl_frontend.h b/sys/cam/ctl/ctl_frontend.h
index 470d7a7..9a1a4ee 100644
--- a/sys/cam/ctl/ctl_frontend.h
+++ b/sys/cam/ctl/ctl_frontend.h
@@ -125,12 +125,12 @@ struct ctl_wwpn_iid {
* port_online(): This function is called, with onoff_arg as its
* argument, by the CTL layer when it wants the FETD
* to start responding to selections on the specified
- * target ID. (targ_target)
+ * target ID.
*
* port_offline(): This function is called, with onoff_arg as its
* argument, by the CTL layer when it wants the FETD
* to stop responding to selection on the specified
- * target ID. (targ_target)
+ * target ID.
*
* onoff_arg: This is supplied as an argument to port_online()
* and port_offline(). This is specified by the
@@ -211,6 +211,7 @@ struct ctl_wwpn_iid {
* shouldn't touch this field.
*/
struct ctl_port {
+ struct ctl_softc *ctl_softc;
struct ctl_frontend *frontend;
ctl_port_type port_type; /* passed to CTL */
int num_requested_ctl_io; /* passed to CTL */
diff --git a/sys/cam/ctl/ctl_frontend_cam_sim.c b/sys/cam/ctl/ctl_frontend_cam_sim.c
index 3abc572..428cf44 100644
--- a/sys/cam/ctl/ctl_frontend_cam_sim.c
+++ b/sys/cam/ctl/ctl_frontend_cam_sim.c
@@ -64,7 +64,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_debug.h>
#define io_ptr spriv_ptr1
@@ -99,8 +98,6 @@ int cfcs_init(void);
static void cfcs_poll(struct cam_sim *sim);
static void cfcs_online(void *arg);
static void cfcs_offline(void *arg);
-static int cfcs_lun_enable(void *arg, int lun_id);
-static int cfcs_lun_disable(void *arg, int lun_id);
static void cfcs_datamove(union ctl_io *io);
static void cfcs_done(union ctl_io *io);
void cfcs_action(struct cam_sim *sim, union ccb *ccb);
@@ -133,13 +130,9 @@ cfcs_init(void)
struct cfcs_softc *softc;
struct ccb_setasync csa;
struct ctl_port *port;
-#ifdef NEEDTOPORT
- char wwnn[8];
-#endif
int retval;
softc = &cfcs_softc;
- retval = 0;
bzero(softc, sizeof(*softc));
mtx_init(&softc->lock, "ctl2cam", NULL, MTX_DEF);
port = &softc->port;
@@ -153,9 +146,6 @@ cfcs_init(void)
port->port_online = cfcs_online;
port->port_offline = cfcs_offline;
port->onoff_arg = softc;
- port->lun_enable = cfcs_lun_enable;
- port->lun_disable = cfcs_lun_disable;
- port->targ_lun_arg = softc;
port->fe_datamove = cfcs_datamove;
port->fe_done = cfcs_done;
@@ -163,6 +153,7 @@ cfcs_init(void)
/* XXX These should probably be fetched from CTL. */
port->max_targets = 1;
port->max_target_id = 15;
+ port->targ_port = -1;
retval = ctl_port_register(port);
if (retval != 0) {
@@ -173,15 +164,6 @@ cfcs_init(void)
}
/*
- * Get the WWNN out of the database, and create a WWPN as well.
- */
-#ifdef NEEDTOPORT
- ddb_GetWWNN((char *)wwnn);
- softc->wwnn = be64dec(wwnn);
- softc->wwpn = softc->wwnn + (softc->port.targ_port & 0xff);
-#endif
-
- /*
* If the CTL frontend didn't tell us what our WWNN/WWPN is, go
* ahead and set something random.
*/
@@ -302,17 +284,6 @@ cfcs_offline(void *arg)
cfcs_onoffline(arg, /*online*/ 0);
}
-static int
-cfcs_lun_enable(void *arg, int lun_id)
-{
- return (0);
-}
-static int
-cfcs_lun_disable(void *arg, int lun_id)
-{
- return (0);
-}
-
/*
* This function is very similar to ctl_ioctl_do_datamove(). Is there a
* way to combine the functionality?
@@ -451,6 +422,14 @@ cfcs_datamove(union ctl_io *io)
io->scsiio.ext_data_filled += len_copied;
+ if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) {
+ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = NULL;
+ io->io_hdr.flags |= CTL_FLAG_STATUS_SENT;
+ ccb->ccb_h.status &= ~CAM_STATUS_MASK;
+ ccb->ccb_h.status |= CAM_REQ_CMP;
+ xpt_done(ccb);
+ }
+
io->scsiio.be_move_done(io);
}
@@ -474,12 +453,13 @@ cfcs_done(union ctl_io *io)
/*
* Translate CTL status to CAM status.
*/
+ ccb->ccb_h.status &= ~CAM_STATUS_MASK;
switch (io->io_hdr.status & CTL_STATUS_MASK) {
case CTL_SUCCESS:
- ccb->ccb_h.status = CAM_REQ_CMP;
+ ccb->ccb_h.status |= CAM_REQ_CMP;
break;
case CTL_SCSI_ERROR:
- ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_AUTOSNS_VALID;
+ ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR | CAM_AUTOSNS_VALID;
ccb->csio.scsi_status = io->scsiio.scsi_status;
bcopy(&io->scsiio.sense_data, &ccb->csio.sense_data,
min(io->scsiio.sense_len, ccb->csio.sense_len));
@@ -495,14 +475,18 @@ cfcs_done(union ctl_io *io)
}
break;
case CTL_CMD_ABORTED:
- ccb->ccb_h.status = CAM_REQ_ABORTED;
+ ccb->ccb_h.status |= CAM_REQ_ABORTED;
break;
case CTL_ERROR:
default:
- ccb->ccb_h.status = CAM_REQ_CMP_ERR;
+ ccb->ccb_h.status |= CAM_REQ_CMP_ERR;
break;
}
-
+ if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP &&
+ (ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
+ xpt_freeze_devq(ccb->ccb_h.path, 1);
+ ccb->ccb_h.status |= CAM_DEV_QFRZN;
+ }
xpt_done(ccb);
ctl_free_io(io);
}
@@ -563,12 +547,8 @@ cfcs_action(struct cam_sim *sim, union ccb *ccb)
* down via the XPT_RESET_BUS/LUN CCBs below.
*/
io->io_hdr.io_type = CTL_IO_SCSI;
- io->io_hdr.nexus.initid.id = 1;
+ io->io_hdr.nexus.initid = 1;
io->io_hdr.nexus.targ_port = softc->port.targ_port;
- /*
- * XXX KDM how do we handle target IDs?
- */
- io->io_hdr.nexus.targ_target.id = ccb->ccb_h.target_id;
io->io_hdr.nexus.targ_lun = ccb->ccb_h.target_lun;
/*
* This tag scheme isn't the best, since we could in theory
@@ -656,9 +636,8 @@ cfcs_action(struct cam_sim *sim, union ccb *ccb)
ccb->ccb_h.io_ptr = io;
io->io_hdr.io_type = CTL_IO_TASK;
- io->io_hdr.nexus.initid.id = 1;
+ io->io_hdr.nexus.initid = 1;
io->io_hdr.nexus.targ_port = softc->port.targ_port;
- io->io_hdr.nexus.targ_target.id = ccb->ccb_h.target_id;
io->io_hdr.nexus.targ_lun = ccb->ccb_h.target_lun;
io->taskio.task_action = CTL_TASK_ABORT_TASK;
io->taskio.tag_num = abort_ccb->csio.tag_id;
@@ -752,9 +731,8 @@ cfcs_action(struct cam_sim *sim, union ccb *ccb)
ccb->ccb_h.io_ptr = io;
io->io_hdr.io_type = CTL_IO_TASK;
- io->io_hdr.nexus.initid.id = 0;
+ io->io_hdr.nexus.initid = 1;
io->io_hdr.nexus.targ_port = softc->port.targ_port;
- io->io_hdr.nexus.targ_target.id = ccb->ccb_h.target_id;
io->io_hdr.nexus.targ_lun = ccb->ccb_h.target_lun;
if (ccb->ccb_h.func_code == XPT_RESET_BUS)
io->taskio.task_action = CTL_TASK_BUS_RESET;
diff --git a/sys/cam/ctl/ctl_frontend_internal.c b/sys/cam/ctl/ctl_frontend_internal.c
deleted file mode 100644
index 4768292..0000000
--- a/sys/cam/ctl/ctl_frontend_internal.c
+++ /dev/null
@@ -1,1612 +0,0 @@
-/*-
- * Copyright (c) 2004, 2005 Silicon Graphics International Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- *
- * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_frontend_internal.c#5 $
- */
-/*
- * CTL kernel internal frontend target driver. This allows kernel-level
- * clients to send commands into CTL.
- *
- * This has elements of a FETD (e.g. it has to set tag numbers, initiator,
- * port, target, and LUN) and elements of an initiator (LUN discovery and
- * probing, error recovery, command initiation). Even though this has some
- * initiator type elements, this is not intended to be a full fledged
- * initiator layer. It is only intended to send a limited number of
- * commands to a well known target layer.
- *
- * To be able to fulfill the role of a full initiator layer, it would need
- * a whole lot more functionality.
- *
- * Author: Ken Merry <ken@FreeBSD.org>
- *
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/condvar.h>
-#include <sys/queue.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include <vm/uma.h>
-#include <cam/scsi/scsi_all.h>
-#include <cam/scsi/scsi_da.h>
-#include <cam/ctl/ctl_io.h>
-#include <cam/ctl/ctl.h>
-#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
-#include <cam/ctl/ctl_backend.h>
-#include <cam/ctl/ctl_ioctl.h>
-#include <cam/ctl/ctl_util.h>
-#include <cam/ctl/ctl_ha.h>
-#include <cam/ctl/ctl_private.h>
-#include <cam/ctl/ctl_debug.h>
-#include <cam/ctl/ctl_scsi_all.h>
-#include <cam/ctl/ctl_error.h>
-
-/*
- * Task structure:
- * - overall metatask, different potential metatask types (e.g. forced
- * shutdown, gentle shutdown)
- * - forced shutdown metatask:
- * - states: report luns, pending, done?
- * - list of luns pending, with the relevant I/O for that lun attached.
- * This would allow moving ahead on LUNs with no errors, and going
- * into error recovery on LUNs with problems. Per-LUN states might
- * include inquiry, stop/offline, done.
- *
- * Use LUN enable for LUN list instead of getting it manually? We'd still
- * need inquiry data for each LUN.
- *
- * How to handle processor LUN w.r.t. found/stopped counts?
- */
-#ifdef oldapi
-typedef enum {
- CFI_TASK_NONE,
- CFI_TASK_SHUTDOWN,
- CFI_TASK_STARTUP
-} cfi_tasktype;
-
-struct cfi_task_startstop {
- int total_luns;
- int luns_complete;
- int luns_failed;
- cfi_cb_t callback;
- void *callback_arg;
- /* XXX KDM add more fields here */
-};
-
-union cfi_taskinfo {
- struct cfi_task_startstop startstop;
-};
-
-struct cfi_metatask {
- cfi_tasktype tasktype;
- cfi_mt_status status;
- union cfi_taskinfo taskinfo;
- void *cfi_context;
- STAILQ_ENTRY(cfi_metatask) links;
-};
-#endif
-
-typedef enum {
- CFI_ERR_RETRY = 0x000,
- CFI_ERR_FAIL = 0x001,
- CFI_ERR_LUN_RESET = 0x002,
- CFI_ERR_MASK = 0x0ff,
- CFI_ERR_NO_DECREMENT = 0x100
-} cfi_error_action;
-
-typedef enum {
- CFI_ERR_SOFT,
- CFI_ERR_HARD
-} cfi_error_policy;
-
-typedef enum {
- CFI_LUN_INQUIRY,
- CFI_LUN_READCAPACITY,
- CFI_LUN_READCAPACITY_16,
- CFI_LUN_READY
-} cfi_lun_state;
-
-struct cfi_lun {
- int lun_id;
- struct scsi_inquiry_data inq_data;
- uint64_t num_blocks;
- uint32_t blocksize;
- int blocksize_powerof2;
- uint32_t cur_tag_num;
- cfi_lun_state state;
- struct cfi_softc *softc;
- STAILQ_HEAD(, cfi_lun_io) io_list;
- STAILQ_ENTRY(cfi_lun) links;
-};
-
-struct cfi_lun_io {
- struct cfi_lun *lun;
- struct cfi_metatask *metatask;
- cfi_error_policy policy;
- void (*done_function)(union ctl_io *io);
- union ctl_io *ctl_io;
- struct cfi_lun_io *orig_lun_io;
- STAILQ_ENTRY(cfi_lun_io) links;
-};
-
-typedef enum {
- CFI_NONE = 0x00,
- CFI_ONLINE = 0x01,
-} cfi_flags;
-
-struct cfi_softc {
- struct ctl_port port;
- char fe_name[40];
- struct mtx lock;
- cfi_flags flags;
- STAILQ_HEAD(, cfi_lun) lun_list;
- STAILQ_HEAD(, cfi_metatask) metatask_list;
-};
-
-MALLOC_DEFINE(M_CTL_CFI, "ctlcfi", "CTL CFI");
-
-static uma_zone_t cfi_lun_zone;
-static uma_zone_t cfi_metatask_zone;
-
-static struct cfi_softc fetd_internal_softc;
-
-int cfi_init(void);
-void cfi_shutdown(void) __unused;
-static void cfi_online(void *arg);
-static void cfi_offline(void *arg);
-static int cfi_lun_enable(void *arg, int lun_id);
-static int cfi_lun_disable(void *arg, int lun_id);
-static void cfi_datamove(union ctl_io *io);
-static cfi_error_action cfi_checkcond_parse(union ctl_io *io,
- struct cfi_lun_io *lun_io);
-static cfi_error_action cfi_error_parse(union ctl_io *io,
- struct cfi_lun_io *lun_io);
-static void cfi_init_io(union ctl_io *io, struct cfi_lun *lun,
- struct cfi_metatask *metatask, cfi_error_policy policy,
- int retries, struct cfi_lun_io *orig_lun_io,
- void (*done_function)(union ctl_io *io));
-static void cfi_done(union ctl_io *io);
-static void cfi_lun_probe_done(union ctl_io *io);
-static void cfi_lun_probe(struct cfi_lun *lun, int have_lock);
-static void cfi_metatask_done(struct cfi_softc *softc,
- struct cfi_metatask *metatask);
-static void cfi_metatask_bbr_errorparse(struct cfi_metatask *metatask,
- union ctl_io *io);
-static void cfi_metatask_io_done(union ctl_io *io);
-static void cfi_err_recovery_done(union ctl_io *io);
-static void cfi_lun_io_done(union ctl_io *io);
-
-static struct ctl_frontend cfi_frontend =
-{
- .name = "kernel",
- .init = cfi_init,
- .shutdown = cfi_shutdown,
-};
-CTL_FRONTEND_DECLARE(ctlcfi, cfi_frontend);
-
-int
-cfi_init(void)
-{
- struct cfi_softc *softc;
- struct ctl_port *port;
- int retval;
-
- softc = &fetd_internal_softc;
-
- port = &softc->port;
-
- retval = 0;
-
- if (sizeof(struct cfi_lun_io) > CTL_PORT_PRIV_SIZE) {
- printf("%s: size of struct cfi_lun_io %zd > "
- "CTL_PORT_PRIV_SIZE %d\n", __func__,
- sizeof(struct cfi_lun_io),
- CTL_PORT_PRIV_SIZE);
- }
- memset(softc, 0, sizeof(*softc));
-
- mtx_init(&softc->lock, "CTL frontend mutex", NULL, MTX_DEF);
- STAILQ_INIT(&softc->lun_list);
- STAILQ_INIT(&softc->metatask_list);
- sprintf(softc->fe_name, "kernel");
- port->frontend = &cfi_frontend;
- port->port_type = CTL_PORT_INTERNAL;
- port->num_requested_ctl_io = 100;
- port->port_name = softc->fe_name;
- port->port_online = cfi_online;
- port->port_offline = cfi_offline;
- port->onoff_arg = softc;
- port->lun_enable = cfi_lun_enable;
- port->lun_disable = cfi_lun_disable;
- port->targ_lun_arg = softc;
- port->fe_datamove = cfi_datamove;
- port->fe_done = cfi_done;
- port->max_targets = 15;
- port->max_target_id = 15;
-
- if (ctl_port_register(port) != 0)
- {
- printf("%s: internal frontend registration failed\n", __func__);
- return (0);
- }
-
- cfi_lun_zone = uma_zcreate("cfi_lun", sizeof(struct cfi_lun),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- cfi_metatask_zone = uma_zcreate("cfi_metatask", sizeof(struct cfi_metatask),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-
- return (0);
-}
-
-void
-cfi_shutdown(void)
-{
- struct cfi_softc *softc;
-
- softc = &fetd_internal_softc;
-
- /*
- * XXX KDM need to clear out any I/O pending on each LUN.
- */
- if (ctl_port_deregister(&softc->port) != 0)
- printf("%s: ctl_frontend_deregister() failed\n", __func__);
-
- uma_zdestroy(cfi_lun_zone);
- uma_zdestroy(cfi_metatask_zone);
-}
-
-static void
-cfi_online(void *arg)
-{
- struct cfi_softc *softc;
- struct cfi_lun *lun;
-
- softc = (struct cfi_softc *)arg;
-
- softc->flags |= CFI_ONLINE;
-
- /*
- * Go through and kick off the probe for each lun. Should we check
- * the LUN flags here to determine whether or not to probe it?
- */
- mtx_lock(&softc->lock);
- STAILQ_FOREACH(lun, &softc->lun_list, links)
- cfi_lun_probe(lun, /*have_lock*/ 1);
- mtx_unlock(&softc->lock);
-}
-
-static void
-cfi_offline(void *arg)
-{
- struct cfi_softc *softc;
-
- softc = (struct cfi_softc *)arg;
-
- softc->flags &= ~CFI_ONLINE;
-}
-
-static int
-cfi_lun_enable(void *arg, int lun_id)
-{
- struct cfi_softc *softc;
- struct cfi_lun *lun;
- int found;
-
- softc = (struct cfi_softc *)arg;
-
- found = 0;
- mtx_lock(&softc->lock);
- STAILQ_FOREACH(lun, &softc->lun_list, links) {
- if (lun->lun_id == lun_id) {
- found = 1;
- break;
- }
- }
- mtx_unlock(&softc->lock);
-
- /*
- * If we already have this target/LUN, there is no reason to add
- * it to our lists again.
- */
- if (found != 0)
- return (0);
-
- lun = uma_zalloc(cfi_lun_zone, M_NOWAIT | M_ZERO);
- if (lun == NULL) {
- printf("%s: unable to allocate LUN structure\n", __func__);
- return (1);
- }
-
- lun->lun_id = lun_id;
- lun->cur_tag_num = 0;
- lun->state = CFI_LUN_INQUIRY;
- lun->softc = softc;
- STAILQ_INIT(&lun->io_list);
-
- mtx_lock(&softc->lock);
- STAILQ_INSERT_TAIL(&softc->lun_list, lun, links);
- mtx_unlock(&softc->lock);
-
- cfi_lun_probe(lun, /*have_lock*/ 0);
-
- return (0);
-}
-
-static int
-cfi_lun_disable(void *arg, int lun_id)
-{
- struct cfi_softc *softc;
- struct cfi_lun *lun;
- int found;
-
- softc = (struct cfi_softc *)arg;
-
- found = 0;
-
- /*
- * XXX KDM need to do an invalidate and then a free when any
- * pending I/O has completed. Or do we? CTL won't free a LUN
- * while any I/O is pending. So we won't get this notification
- * unless any I/O we have pending on a LUN has completed.
- */
- mtx_lock(&softc->lock);
- STAILQ_FOREACH(lun, &softc->lun_list, links) {
- if (lun->lun_id == lun_id) {
- found = 1;
- break;
- }
- }
- if (found != 0)
- STAILQ_REMOVE(&softc->lun_list, lun, cfi_lun, links);
-
- mtx_unlock(&softc->lock);
-
- if (found == 0) {
- printf("%s: can't find lun %d\n", __func__, lun_id);
- return (1);
- }
-
- uma_zfree(cfi_lun_zone, lun);
-
- return (0);
-}
-
-static void
-cfi_datamove(union ctl_io *io)
-{
- struct ctl_sg_entry *ext_sglist, *kern_sglist;
- struct ctl_sg_entry ext_entry, kern_entry;
- int ext_sglen, ext_sg_entries, kern_sg_entries;
- int ext_sg_start, ext_offset;
- int len_to_copy, len_copied;
- int kern_watermark, ext_watermark;
- int ext_sglist_malloced;
- struct ctl_scsiio *ctsio;
- int i, j;
-
- ext_sglist_malloced = 0;
- ext_sg_start = 0;
- ext_offset = 0;
- ext_sglist = NULL;
-
- CTL_DEBUG_PRINT(("%s\n", __func__));
-
- ctsio = &io->scsiio;
-
- /*
- * If this is the case, we're probably doing a BBR read and don't
- * actually need to transfer the data. This will effectively
- * bit-bucket the data.
- */
- if (ctsio->ext_data_ptr == NULL)
- goto bailout;
-
- /*
- * To simplify things here, if we have a single buffer, stick it in
- * a S/G entry and just make it a single entry S/G list.
- */
- if (ctsio->io_hdr.flags & CTL_FLAG_EDPTR_SGLIST) {
- int len_seen;
-
- ext_sglen = ctsio->ext_sg_entries * sizeof(*ext_sglist);
-
- ext_sglist = (struct ctl_sg_entry *)malloc(ext_sglen, M_CTL_CFI,
- M_WAITOK);
- ext_sglist_malloced = 1;
- if (memcpy(ext_sglist, ctsio->ext_data_ptr, ext_sglen) != 0) {
- ctl_set_internal_failure(ctsio,
- /*sks_valid*/ 0,
- /*retry_count*/ 0);
- goto bailout;
- }
- ext_sg_entries = ctsio->ext_sg_entries;
- len_seen = 0;
- for (i = 0; i < ext_sg_entries; i++) {
- if ((len_seen + ext_sglist[i].len) >=
- ctsio->ext_data_filled) {
- ext_sg_start = i;
- ext_offset = ctsio->ext_data_filled - len_seen;
- break;
- }
- len_seen += ext_sglist[i].len;
- }
- } else {
- ext_sglist = &ext_entry;
- ext_sglist->addr = ctsio->ext_data_ptr;
- ext_sglist->len = ctsio->ext_data_len;
- ext_sg_entries = 1;
- ext_sg_start = 0;
- ext_offset = ctsio->ext_data_filled;
- }
-
- if (ctsio->kern_sg_entries > 0) {
- kern_sglist = (struct ctl_sg_entry *)ctsio->kern_data_ptr;
- kern_sg_entries = ctsio->kern_sg_entries;
- } else {
- kern_sglist = &kern_entry;
- kern_sglist->addr = ctsio->kern_data_ptr;
- kern_sglist->len = ctsio->kern_data_len;
- kern_sg_entries = 1;
- }
-
-
- kern_watermark = 0;
- ext_watermark = ext_offset;
- len_copied = 0;
- for (i = ext_sg_start, j = 0;
- i < ext_sg_entries && j < kern_sg_entries;) {
- uint8_t *ext_ptr, *kern_ptr;
-
- len_to_copy = MIN(ext_sglist[i].len - ext_watermark,
- kern_sglist[j].len - kern_watermark);
-
- ext_ptr = (uint8_t *)ext_sglist[i].addr;
- ext_ptr = ext_ptr + ext_watermark;
- if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
- /*
- * XXX KDM fix this!
- */
- panic("need to implement bus address support");
-#if 0
- kern_ptr = bus_to_virt(kern_sglist[j].addr);
-#endif
- } else
- kern_ptr = (uint8_t *)kern_sglist[j].addr;
- kern_ptr = kern_ptr + kern_watermark;
-
- kern_watermark += len_to_copy;
- ext_watermark += len_to_copy;
-
- if ((ctsio->io_hdr.flags & CTL_FLAG_DATA_MASK) ==
- CTL_FLAG_DATA_IN) {
- CTL_DEBUG_PRINT(("%s: copying %d bytes to user\n",
- __func__, len_to_copy));
- CTL_DEBUG_PRINT(("%s: from %p to %p\n", __func__,
- kern_ptr, ext_ptr));
- memcpy(ext_ptr, kern_ptr, len_to_copy);
- } else {
- CTL_DEBUG_PRINT(("%s: copying %d bytes from user\n",
- __func__, len_to_copy));
- CTL_DEBUG_PRINT(("%s: from %p to %p\n", __func__,
- ext_ptr, kern_ptr));
- memcpy(kern_ptr, ext_ptr, len_to_copy);
- }
-
- len_copied += len_to_copy;
-
- if (ext_sglist[i].len == ext_watermark) {
- i++;
- ext_watermark = 0;
- }
-
- if (kern_sglist[j].len == kern_watermark) {
- j++;
- kern_watermark = 0;
- }
- }
-
- ctsio->ext_data_filled += len_copied;
-
- CTL_DEBUG_PRINT(("%s: ext_sg_entries: %d, kern_sg_entries: %d\n",
- __func__, ext_sg_entries, kern_sg_entries));
- CTL_DEBUG_PRINT(("%s: ext_data_len = %d, kern_data_len = %d\n",
- __func__, ctsio->ext_data_len, ctsio->kern_data_len));
-
-
- /* XXX KDM set residual?? */
-bailout:
-
- if (ext_sglist_malloced != 0)
- free(ext_sglist, M_CTL_CFI);
-
- io->scsiio.be_move_done(io);
-
- return;
-}
-
-/*
- * For any sort of check condition, busy, etc., we just retry. We do not
- * decrement the retry count for unit attention type errors. These are
- * normal, and we want to save the retry count for "real" errors. Otherwise,
- * we could end up with situations where a command will succeed in some
- * situations and fail in others, depending on whether a unit attention is
- * pending. Also, some of our error recovery actions, most notably the
- * LUN reset action, will cause a unit attention.
- *
- * We can add more detail here later if necessary.
- */
-static cfi_error_action
-cfi_checkcond_parse(union ctl_io *io, struct cfi_lun_io *lun_io)
-{
- cfi_error_action error_action;
- int error_code, sense_key, asc, ascq;
-
- /*
- * Default to retrying the command.
- */
- error_action = CFI_ERR_RETRY;
-
- scsi_extract_sense_len(&io->scsiio.sense_data,
- io->scsiio.sense_len,
- &error_code,
- &sense_key,
- &asc,
- &ascq,
- /*show_errors*/ 1);
-
- switch (error_code) {
- case SSD_DEFERRED_ERROR:
- case SSD_DESC_DEFERRED_ERROR:
- error_action |= CFI_ERR_NO_DECREMENT;
- break;
- case SSD_CURRENT_ERROR:
- case SSD_DESC_CURRENT_ERROR:
- default: {
- switch (sense_key) {
- case SSD_KEY_UNIT_ATTENTION:
- error_action |= CFI_ERR_NO_DECREMENT;
- break;
- case SSD_KEY_HARDWARE_ERROR:
- /*
- * This is our generic "something bad happened"
- * error code. It often isn't recoverable.
- */
- if ((asc == 0x44) && (ascq == 0x00))
- error_action = CFI_ERR_FAIL;
- break;
- case SSD_KEY_NOT_READY:
- /*
- * If the LUN is powered down, there likely isn't
- * much point in retrying right now.
- */
- if ((asc == 0x04) && (ascq == 0x02))
- error_action = CFI_ERR_FAIL;
- /*
- * If the LUN is offline, there probably isn't much
- * point in retrying, either.
- */
- if ((asc == 0x04) && (ascq == 0x03))
- error_action = CFI_ERR_FAIL;
- break;
- }
- }
- }
-
- return (error_action);
-}
-
-static cfi_error_action
-cfi_error_parse(union ctl_io *io, struct cfi_lun_io *lun_io)
-{
- cfi_error_action error_action;
-
- error_action = CFI_ERR_RETRY;
-
- switch (io->io_hdr.io_type) {
- case CTL_IO_SCSI:
- switch (io->io_hdr.status & CTL_STATUS_MASK) {
- case CTL_SCSI_ERROR:
- switch (io->scsiio.scsi_status) {
- case SCSI_STATUS_RESERV_CONFLICT:
- /*
- * For a reservation conflict, we'll usually
- * want the hard error recovery policy, so
- * we'll reset the LUN.
- */
- if (lun_io->policy == CFI_ERR_HARD)
- error_action =
- CFI_ERR_LUN_RESET;
- else
- error_action =
- CFI_ERR_RETRY;
- break;
- case SCSI_STATUS_CHECK_COND:
- default:
- error_action = cfi_checkcond_parse(io, lun_io);
- break;
- }
- break;
- default:
- error_action = CFI_ERR_RETRY;
- break;
- }
- break;
- case CTL_IO_TASK:
- /*
- * In theory task management commands shouldn't fail...
- */
- error_action = CFI_ERR_RETRY;
- break;
- default:
- printf("%s: invalid ctl_io type %d\n", __func__,
- io->io_hdr.io_type);
- panic("%s: invalid ctl_io type %d\n", __func__,
- io->io_hdr.io_type);
- break;
- }
-
- return (error_action);
-}
-
-static void
-cfi_init_io(union ctl_io *io, struct cfi_lun *lun,
- struct cfi_metatask *metatask, cfi_error_policy policy, int retries,
- struct cfi_lun_io *orig_lun_io,
- void (*done_function)(union ctl_io *io))
-{
- struct cfi_lun_io *lun_io;
-
- io->io_hdr.nexus.initid.id = 7;
- io->io_hdr.nexus.targ_port = lun->softc->port.targ_port;
- io->io_hdr.nexus.targ_target.id = 0;
- io->io_hdr.nexus.targ_lun = lun->lun_id;
- io->io_hdr.retries = retries;
- lun_io = (struct cfi_lun_io *)io->io_hdr.port_priv;
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = lun_io;
- lun_io->lun = lun;
- lun_io->metatask = metatask;
- lun_io->ctl_io = io;
- lun_io->policy = policy;
- lun_io->orig_lun_io = orig_lun_io;
- lun_io->done_function = done_function;
- /*
- * We only set the tag number for SCSI I/Os. For task management
- * commands, the tag number is only really needed for aborts, so
- * the caller can set it if necessary.
- */
- switch (io->io_hdr.io_type) {
- case CTL_IO_SCSI:
- io->scsiio.tag_num = lun->cur_tag_num++;
- break;
- case CTL_IO_TASK:
- default:
- break;
- }
-}
-
-static void
-cfi_done(union ctl_io *io)
-{
- struct cfi_lun_io *lun_io;
- struct cfi_softc *softc;
- struct cfi_lun *lun;
-
- lun_io = (struct cfi_lun_io *)
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
-
- lun = lun_io->lun;
- softc = lun->softc;
-
- /*
- * Very minimal retry logic. We basically retry if we got an error
- * back, and the retry count is greater than 0. If we ever want
- * more sophisticated initiator type behavior, the CAM error
- * recovery code in ../common might be helpful.
- */
- if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)
- && (io->io_hdr.retries > 0)) {
- ctl_io_status old_status;
- cfi_error_action error_action;
-
- error_action = cfi_error_parse(io, lun_io);
-
- switch (error_action & CFI_ERR_MASK) {
- case CFI_ERR_FAIL:
- goto done;
- break; /* NOTREACHED */
- case CFI_ERR_LUN_RESET: {
- union ctl_io *new_io;
- struct cfi_lun_io *new_lun_io;
-
- new_io = ctl_alloc_io(softc->port.ctl_pool_ref);
- ctl_zero_io(new_io);
-
- new_io->io_hdr.io_type = CTL_IO_TASK;
- new_io->taskio.task_action = CTL_TASK_LUN_RESET;
-
- cfi_init_io(new_io,
- /*lun*/ lun_io->lun,
- /*metatask*/ NULL,
- /*policy*/ CFI_ERR_SOFT,
- /*retries*/ 0,
- /*orig_lun_io*/lun_io,
- /*done_function*/ cfi_err_recovery_done);
-
-
- new_lun_io = (struct cfi_lun_io *)
- new_io->io_hdr.port_priv;
-
- mtx_lock(&lun->softc->lock);
- STAILQ_INSERT_TAIL(&lun->io_list, new_lun_io, links);
- mtx_unlock(&lun->softc->lock);
-
- io = new_io;
- break;
- }
- case CFI_ERR_RETRY:
- default:
- if ((error_action & CFI_ERR_NO_DECREMENT) == 0)
- io->io_hdr.retries--;
- break;
- }
-
- old_status = io->io_hdr.status;
- io->io_hdr.status = CTL_STATUS_NONE;
-#if 0
- io->io_hdr.flags &= ~CTL_FLAG_ALREADY_DONE;
-#endif
- io->io_hdr.flags &= ~CTL_FLAG_ABORT;
- io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC;
-
- if (ctl_queue(io) != CTL_RETVAL_COMPLETE) {
- printf("%s: error returned from ctl_queue()!\n",
- __func__);
- io->io_hdr.status = old_status;
- } else
- return;
- }
-done:
- lun_io->done_function(io);
-}
-
-static void
-cfi_lun_probe_done(union ctl_io *io)
-{
- struct cfi_lun *lun;
- struct cfi_lun_io *lun_io;
-
- lun_io = (struct cfi_lun_io *)
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
- lun = lun_io->lun;
-
- switch (lun->state) {
- case CFI_LUN_INQUIRY: {
- if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) {
- /* print out something here?? */
- printf("%s: LUN %d probe failed because inquiry "
- "failed\n", __func__, lun->lun_id);
- ctl_io_error_print(io, NULL);
- } else {
-
- if (SID_TYPE(&lun->inq_data) != T_DIRECT) {
- char path_str[40];
-
- lun->state = CFI_LUN_READY;
- ctl_scsi_path_string(io, path_str,
- sizeof(path_str));
- printf("%s", path_str);
- scsi_print_inquiry(&lun->inq_data);
- } else {
- lun->state = CFI_LUN_READCAPACITY;
- cfi_lun_probe(lun, /*have_lock*/ 0);
- }
- }
- mtx_lock(&lun->softc->lock);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&lun->softc->lock);
- ctl_free_io(io);
- break;
- }
- case CFI_LUN_READCAPACITY:
- case CFI_LUN_READCAPACITY_16: {
- uint64_t maxlba;
- uint32_t blocksize;
-
- maxlba = 0;
- blocksize = 0;
-
- if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) {
- printf("%s: LUN %d probe failed because READ CAPACITY "
- "failed\n", __func__, lun->lun_id);
- ctl_io_error_print(io, NULL);
- } else {
-
- if (lun->state == CFI_LUN_READCAPACITY) {
- struct scsi_read_capacity_data *rdcap;
-
- rdcap = (struct scsi_read_capacity_data *)
- io->scsiio.ext_data_ptr;
-
- maxlba = scsi_4btoul(rdcap->addr);
- blocksize = scsi_4btoul(rdcap->length);
- if (blocksize == 0) {
- printf("%s: LUN %d has invalid "
- "blocksize 0, probe aborted\n",
- __func__, lun->lun_id);
- } else if (maxlba == 0xffffffff) {
- lun->state = CFI_LUN_READCAPACITY_16;
- cfi_lun_probe(lun, /*have_lock*/ 0);
- } else
- lun->state = CFI_LUN_READY;
- } else {
- struct scsi_read_capacity_data_long *rdcap_long;
-
- rdcap_long = (struct
- scsi_read_capacity_data_long *)
- io->scsiio.ext_data_ptr;
- maxlba = scsi_8btou64(rdcap_long->addr);
- blocksize = scsi_4btoul(rdcap_long->length);
-
- if (blocksize == 0) {
- printf("%s: LUN %d has invalid "
- "blocksize 0, probe aborted\n",
- __func__, lun->lun_id);
- } else
- lun->state = CFI_LUN_READY;
- }
- }
-
- if (lun->state == CFI_LUN_READY) {
- char path_str[40];
-
- lun->num_blocks = maxlba + 1;
- lun->blocksize = blocksize;
-
- /*
- * If this is true, the blocksize is a power of 2.
- * We already checked for 0 above.
- */
- if (((blocksize - 1) & blocksize) == 0) {
- int i;
-
- for (i = 0; i < 32; i++) {
- if ((blocksize & (1 << i)) != 0) {
- lun->blocksize_powerof2 = i;
- break;
- }
- }
- }
- ctl_scsi_path_string(io, path_str,sizeof(path_str));
- printf("%s", path_str);
- scsi_print_inquiry(&lun->inq_data);
- printf("%s %ju blocks, blocksize %d\n", path_str,
- (uintmax_t)maxlba + 1, blocksize);
- }
- mtx_lock(&lun->softc->lock);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&lun->softc->lock);
- free(io->scsiio.ext_data_ptr, M_CTL_CFI);
- ctl_free_io(io);
- break;
- }
- case CFI_LUN_READY:
- default:
- mtx_lock(&lun->softc->lock);
- /* How did we get here?? */
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&lun->softc->lock);
- ctl_free_io(io);
- break;
- }
-}
-
-static void
-cfi_lun_probe(struct cfi_lun *lun, int have_lock)
-{
-
- if (have_lock == 0)
- mtx_lock(&lun->softc->lock);
- if ((lun->softc->flags & CFI_ONLINE) == 0) {
- if (have_lock == 0)
- mtx_unlock(&lun->softc->lock);
- return;
- }
- if (have_lock == 0)
- mtx_unlock(&lun->softc->lock);
-
- switch (lun->state) {
- case CFI_LUN_INQUIRY: {
- struct cfi_lun_io *lun_io;
- union ctl_io *io;
-
- io = ctl_alloc_io(lun->softc->port.ctl_pool_ref);
- ctl_scsi_inquiry(io,
- /*data_ptr*/(uint8_t *)&lun->inq_data,
- /*data_len*/ sizeof(lun->inq_data),
- /*byte2*/ 0,
- /*page_code*/ 0,
- /*tag_type*/ CTL_TAG_SIMPLE,
- /*control*/ 0);
-
- cfi_init_io(io,
- /*lun*/ lun,
- /*metatask*/ NULL,
- /*policy*/ CFI_ERR_SOFT,
- /*retries*/ 5,
- /*orig_lun_io*/ NULL,
- /*done_function*/
- cfi_lun_probe_done);
-
- lun_io = (struct cfi_lun_io *)io->io_hdr.port_priv;
-
- if (have_lock == 0)
- mtx_lock(&lun->softc->lock);
- STAILQ_INSERT_TAIL(&lun->io_list, lun_io, links);
- if (have_lock == 0)
- mtx_unlock(&lun->softc->lock);
-
- if (ctl_queue(io) != CTL_RETVAL_COMPLETE) {
- printf("%s: error returned from ctl_queue()!\n",
- __func__);
- STAILQ_REMOVE(&lun->io_list, lun_io,
- cfi_lun_io, links);
- ctl_free_io(io);
- }
- break;
- }
- case CFI_LUN_READCAPACITY:
- case CFI_LUN_READCAPACITY_16: {
- struct cfi_lun_io *lun_io;
- uint8_t *dataptr;
- union ctl_io *io;
-
- io = ctl_alloc_io(lun->softc->port.ctl_pool_ref);
-
- dataptr = malloc(sizeof(struct scsi_read_capacity_data_long),
- M_CTL_CFI, M_NOWAIT);
- if (dataptr == NULL) {
- printf("%s: unable to allocate SCSI read capacity "
- "buffer for lun %d\n", __func__, lun->lun_id);
- return;
- }
- if (lun->state == CFI_LUN_READCAPACITY) {
- ctl_scsi_read_capacity(io,
- /*data_ptr*/ dataptr,
- /*data_len*/
- sizeof(struct scsi_read_capacity_data_long),
- /*addr*/ 0,
- /*reladr*/ 0,
- /*pmi*/ 0,
- /*tag_type*/ CTL_TAG_SIMPLE,
- /*control*/ 0);
- } else {
- ctl_scsi_read_capacity_16(io,
- /*data_ptr*/ dataptr,
- /*data_len*/
- sizeof(struct scsi_read_capacity_data_long),
- /*addr*/ 0,
- /*reladr*/ 0,
- /*pmi*/ 0,
- /*tag_type*/ CTL_TAG_SIMPLE,
- /*control*/ 0);
- }
- cfi_init_io(io,
- /*lun*/ lun,
- /*metatask*/ NULL,
- /*policy*/ CFI_ERR_SOFT,
- /*retries*/ 7,
- /*orig_lun_io*/ NULL,
- /*done_function*/ cfi_lun_probe_done);
-
- lun_io = (struct cfi_lun_io *)io->io_hdr.port_priv;
-
- if (have_lock == 0)
- mtx_lock(&lun->softc->lock);
- STAILQ_INSERT_TAIL(&lun->io_list, lun_io, links);
- if (have_lock == 0)
- mtx_unlock(&lun->softc->lock);
-
- if (ctl_queue(io) != CTL_RETVAL_COMPLETE) {
- printf("%s: error returned from ctl_queue()!\n",
- __func__);
- STAILQ_REMOVE(&lun->io_list, lun_io,
- cfi_lun_io, links);
- free(dataptr, M_CTL_CFI);
- ctl_free_io(io);
- }
- break;
- }
- case CFI_LUN_READY:
- default:
- /* Why were we called? */
- break;
- }
-}
-
-static void
-cfi_metatask_done(struct cfi_softc *softc, struct cfi_metatask *metatask)
-{
- mtx_lock(&softc->lock);
- STAILQ_REMOVE(&softc->metatask_list, metatask, cfi_metatask, links);
- mtx_unlock(&softc->lock);
-
- /*
- * Return status to the caller. Caller allocated storage, and is
- * responsible for calling cfi_free_metatask to release it once
- * they've seen the status.
- */
- metatask->callback(metatask->callback_arg, metatask);
-}
-
-static void
-cfi_metatask_bbr_errorparse(struct cfi_metatask *metatask, union ctl_io *io)
-{
- int error_code, sense_key, asc, ascq;
-
- if (metatask->tasktype != CFI_TASK_BBRREAD)
- return;
-
- if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) {
- metatask->status = CFI_MT_SUCCESS;
- metatask->taskinfo.bbrread.status = CFI_BBR_SUCCESS;
- return;
- }
-
- if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SCSI_ERROR) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_ERROR;
- return;
- }
-
- metatask->taskinfo.bbrread.scsi_status = io->scsiio.scsi_status;
- memcpy(&metatask->taskinfo.bbrread.sense_data, &io->scsiio.sense_data,
- MIN(sizeof(metatask->taskinfo.bbrread.sense_data),
- sizeof(io->scsiio.sense_data)));
-
- if (io->scsiio.scsi_status == SCSI_STATUS_RESERV_CONFLICT) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_RESERV_CONFLICT;
- return;
- }
-
- if (io->scsiio.scsi_status != SCSI_STATUS_CHECK_COND) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_SCSI_ERROR;
- return;
- }
-
- scsi_extract_sense_len(&io->scsiio.sense_data,
- io->scsiio.sense_len,
- &error_code,
- &sense_key,
- &asc,
- &ascq,
- /*show_errors*/ 1);
-
- switch (error_code) {
- case SSD_DEFERRED_ERROR:
- case SSD_DESC_DEFERRED_ERROR:
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_SCSI_ERROR;
- break;
- case SSD_CURRENT_ERROR:
- case SSD_DESC_CURRENT_ERROR:
- default: {
- struct scsi_sense_data *sense;
-
- sense = &io->scsiio.sense_data;
-
- if ((asc == 0x04) && (ascq == 0x02)) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_LUN_STOPPED;
- } else if ((asc == 0x04) && (ascq == 0x03)) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status =
- CFI_BBR_LUN_OFFLINE_CTL;
- } else if ((asc == 0x44) && (ascq == 0x00)) {
-#ifdef NEEDTOPORT
- if (sense->sense_key_spec[0] & SSD_SCS_VALID) {
- uint16_t retry_count;
-
- retry_count = sense->sense_key_spec[1] << 8 |
- sense->sense_key_spec[2];
- if (((retry_count & 0xf000) == CSC_RAIDCORE)
- && ((retry_count & 0x0f00) == CSC_SHELF_SW)
- && ((retry_count & 0xff) ==
- RC_STS_DEVICE_OFFLINE)) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status =
- CFI_BBR_LUN_OFFLINE_RC;
- } else {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status =
- CFI_BBR_SCSI_ERROR;
- }
- } else {
-#endif /* NEEDTOPORT */
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status =
- CFI_BBR_SCSI_ERROR;
-#ifdef NEEDTOPORT
- }
-#endif
- } else {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_SCSI_ERROR;
- }
- break;
- }
- }
-}
-
-static void
-cfi_metatask_io_done(union ctl_io *io)
-{
- struct cfi_lun_io *lun_io;
- struct cfi_metatask *metatask;
- struct cfi_softc *softc;
- struct cfi_lun *lun;
-
- lun_io = (struct cfi_lun_io *)
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
-
- lun = lun_io->lun;
- softc = lun->softc;
-
- metatask = lun_io->metatask;
-
- switch (metatask->tasktype) {
- case CFI_TASK_STARTUP:
- case CFI_TASK_SHUTDOWN: {
- int failed, done, is_start;
-
- failed = 0;
- done = 0;
- if (metatask->tasktype == CFI_TASK_STARTUP)
- is_start = 1;
- else
- is_start = 0;
-
- mtx_lock(&softc->lock);
- if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)
- metatask->taskinfo.startstop.luns_complete++;
- else {
- metatask->taskinfo.startstop.luns_failed++;
- failed = 1;
- }
- if ((metatask->taskinfo.startstop.luns_complete +
- metatask->taskinfo.startstop.luns_failed) >=
- metatask->taskinfo.startstop.total_luns)
- done = 1;
-
- mtx_unlock(&softc->lock);
-
- if (failed != 0) {
- printf("%s: LUN %d %s request failed\n", __func__,
- lun_io->lun->lun_id, (is_start == 1) ? "start" :
- "stop");
- ctl_io_error_print(io, &lun_io->lun->inq_data);
- }
- if (done != 0) {
- if (metatask->taskinfo.startstop.luns_failed > 0)
- metatask->status = CFI_MT_ERROR;
- else
- metatask->status = CFI_MT_SUCCESS;
- cfi_metatask_done(softc, metatask);
- }
- mtx_lock(&softc->lock);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&softc->lock);
-
- ctl_free_io(io);
- break;
- }
- case CFI_TASK_BBRREAD: {
- /*
- * Translate the SCSI error into an enumeration.
- */
- cfi_metatask_bbr_errorparse(metatask, io);
-
- mtx_lock(&softc->lock);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&softc->lock);
-
- ctl_free_io(io);
-
- cfi_metatask_done(softc, metatask);
- break;
- }
- default:
- /*
- * This shouldn't happen.
- */
- mtx_lock(&softc->lock);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&softc->lock);
-
- ctl_free_io(io);
- break;
- }
-}
-
-static void
-cfi_err_recovery_done(union ctl_io *io)
-{
- struct cfi_lun_io *lun_io, *orig_lun_io;
- struct cfi_lun *lun;
- union ctl_io *orig_io;
-
- lun_io = (struct cfi_lun_io *)io->io_hdr.port_priv;
- orig_lun_io = lun_io->orig_lun_io;
- orig_io = orig_lun_io->ctl_io;
- lun = lun_io->lun;
-
- if (io->io_hdr.status != CTL_SUCCESS) {
- printf("%s: error recovery action failed. Original "
- "error:\n", __func__);
-
- ctl_io_error_print(orig_lun_io->ctl_io, &lun->inq_data);
-
- printf("%s: error from error recovery action:\n", __func__);
-
- ctl_io_error_print(io, &lun->inq_data);
-
- printf("%s: trying original command again...\n", __func__);
- }
-
- mtx_lock(&lun->softc->lock);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- mtx_unlock(&lun->softc->lock);
- ctl_free_io(io);
-
- orig_io->io_hdr.retries--;
- orig_io->io_hdr.status = CTL_STATUS_NONE;
-
- if (ctl_queue(orig_io) != CTL_RETVAL_COMPLETE) {
- printf("%s: error returned from ctl_queue()!\n", __func__);
- STAILQ_REMOVE(&lun->io_list, orig_lun_io,
- cfi_lun_io, links);
- ctl_free_io(orig_io);
- }
-}
-
-static void
-cfi_lun_io_done(union ctl_io *io)
-{
- struct cfi_lun *lun;
- struct cfi_lun_io *lun_io;
-
- lun_io = (struct cfi_lun_io *)
- io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
- lun = lun_io->lun;
-
- if (lun_io->metatask == NULL) {
- printf("%s: I/O has no metatask pointer, discarding\n",
- __func__);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- ctl_free_io(io);
- return;
- }
- cfi_metatask_io_done(io);
-}
-
-void
-cfi_action(struct cfi_metatask *metatask)
-{
- struct cfi_softc *softc;
-
- softc = &fetd_internal_softc;
-
- mtx_lock(&softc->lock);
-
- STAILQ_INSERT_TAIL(&softc->metatask_list, metatask, links);
-
- if ((softc->flags & CFI_ONLINE) == 0) {
- mtx_unlock(&softc->lock);
- metatask->status = CFI_MT_PORT_OFFLINE;
- cfi_metatask_done(softc, metatask);
- return;
- } else
- mtx_unlock(&softc->lock);
-
- switch (metatask->tasktype) {
- case CFI_TASK_STARTUP:
- case CFI_TASK_SHUTDOWN: {
- union ctl_io *io;
- int da_luns, ios_allocated, do_start;
- struct cfi_lun *lun;
- STAILQ_HEAD(, ctl_io_hdr) tmp_io_list;
-
- da_luns = 0;
- ios_allocated = 0;
- STAILQ_INIT(&tmp_io_list);
-
- if (metatask->tasktype == CFI_TASK_STARTUP)
- do_start = 1;
- else
- do_start = 0;
-
- mtx_lock(&softc->lock);
- STAILQ_FOREACH(lun, &softc->lun_list, links) {
- if (lun->state != CFI_LUN_READY)
- continue;
-
- if (SID_TYPE(&lun->inq_data) != T_DIRECT)
- continue;
- da_luns++;
- io = ctl_alloc_io_nowait(softc->port.ctl_pool_ref);
- if (io != NULL) {
- ios_allocated++;
- STAILQ_INSERT_TAIL(&tmp_io_list, &io->io_hdr,
- links);
- }
- }
-
- if (ios_allocated < da_luns) {
- printf("%s: error allocating ctl_io for %s\n",
- __func__, (do_start == 1) ? "startup" :
- "shutdown");
- da_luns = ios_allocated;
- }
-
- metatask->taskinfo.startstop.total_luns = da_luns;
-
- STAILQ_FOREACH(lun, &softc->lun_list, links) {
- struct cfi_lun_io *lun_io;
-
- if (lun->state != CFI_LUN_READY)
- continue;
-
- if (SID_TYPE(&lun->inq_data) != T_DIRECT)
- continue;
-
- io = (union ctl_io *)STAILQ_FIRST(&tmp_io_list);
- if (io == NULL)
- break;
-
- STAILQ_REMOVE(&tmp_io_list, &io->io_hdr, ctl_io_hdr,
- links);
-
- ctl_scsi_start_stop(io,
- /*start*/ do_start,
- /*load_eject*/ 0,
- /*immediate*/ 0,
- /*power_conditions*/
- SSS_PC_START_VALID,
- /*onoffline*/ 1,
- /*ctl_tag_type*/ CTL_TAG_ORDERED,
- /*control*/ 0);
-
- cfi_init_io(io,
- /*lun*/ lun,
- /*metatask*/ metatask,
- /*policy*/ CFI_ERR_HARD,
- /*retries*/ 3,
- /*orig_lun_io*/ NULL,
- /*done_function*/ cfi_lun_io_done);
-
- lun_io = (struct cfi_lun_io *) io->io_hdr.port_priv;
-
- STAILQ_INSERT_TAIL(&lun->io_list, lun_io, links);
-
- if (ctl_queue(io) != CTL_RETVAL_COMPLETE) {
- printf("%s: error returned from ctl_queue()!\n",
- __func__);
- STAILQ_REMOVE(&lun->io_list, lun_io,
- cfi_lun_io, links);
- ctl_free_io(io);
- metatask->taskinfo.startstop.total_luns--;
- }
- }
-
- if (STAILQ_FIRST(&tmp_io_list) != NULL) {
- printf("%s: error: tmp_io_list != NULL\n", __func__);
- for (io = (union ctl_io *)STAILQ_FIRST(&tmp_io_list);
- io != NULL;
- io = (union ctl_io *)STAILQ_FIRST(&tmp_io_list)) {
- STAILQ_REMOVE(&tmp_io_list, &io->io_hdr,
- ctl_io_hdr, links);
- ctl_free_io(io);
- }
- }
- mtx_unlock(&softc->lock);
-
- break;
- }
- case CFI_TASK_BBRREAD: {
- union ctl_io *io;
- struct cfi_lun *lun;
- struct cfi_lun_io *lun_io;
- cfi_bbrread_status status;
- int req_lun_num;
- uint32_t num_blocks;
-
- status = CFI_BBR_SUCCESS;
-
- req_lun_num = metatask->taskinfo.bbrread.lun_num;
-
- mtx_lock(&softc->lock);
- STAILQ_FOREACH(lun, &softc->lun_list, links) {
- if (lun->lun_id != req_lun_num)
- continue;
- if (lun->state != CFI_LUN_READY) {
- status = CFI_BBR_LUN_UNCONFIG;
- break;
- } else
- break;
- }
-
- if (lun == NULL)
- status = CFI_BBR_NO_LUN;
-
- if (status != CFI_BBR_SUCCESS) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = status;
- mtx_unlock(&softc->lock);
- cfi_metatask_done(softc, metatask);
- break;
- }
-
- /*
- * Convert the number of bytes given into blocks and check
- * that the number of bytes is a multiple of the blocksize.
- * CTL will verify that the LBA is okay.
- */
- if (lun->blocksize_powerof2 != 0) {
- if ((metatask->taskinfo.bbrread.len &
- (lun->blocksize - 1)) != 0) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status =
- CFI_BBR_BAD_LEN;
- cfi_metatask_done(softc, metatask);
- break;
- }
-
- num_blocks = metatask->taskinfo.bbrread.len >>
- lun->blocksize_powerof2;
- } else {
- /*
- * XXX KDM this could result in floating point
- * division, which isn't supported in the kernel on
- * x86 at least.
- */
- if ((metatask->taskinfo.bbrread.len %
- lun->blocksize) != 0) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status =
- CFI_BBR_BAD_LEN;
- cfi_metatask_done(softc, metatask);
- break;
- }
-
- /*
- * XXX KDM this could result in floating point
- * division in some cases.
- */
- num_blocks = metatask->taskinfo.bbrread.len /
- lun->blocksize;
-
- }
-
- io = ctl_alloc_io_nowait(softc->port.ctl_pool_ref);
- if (io == NULL) {
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_NO_MEM;
- mtx_unlock(&softc->lock);
- cfi_metatask_done(softc, metatask);
- break;
- }
-
- /*
- * XXX KDM need to do a read capacity to get the blocksize
- * for this device.
- */
- ctl_scsi_read_write(io,
- /*data_ptr*/ NULL,
- /*data_len*/ metatask->taskinfo.bbrread.len,
- /*read_op*/ 1,
- /*byte2*/ 0,
- /*minimum_cdb_size*/ 0,
- /*lba*/ metatask->taskinfo.bbrread.lba,
- /*num_blocks*/ num_blocks,
- /*tag_type*/ CTL_TAG_SIMPLE,
- /*control*/ 0);
-
- cfi_init_io(io,
- /*lun*/ lun,
- /*metatask*/ metatask,
- /*policy*/ CFI_ERR_SOFT,
- /*retries*/ 3,
- /*orig_lun_io*/ NULL,
- /*done_function*/ cfi_lun_io_done);
-
- lun_io = (struct cfi_lun_io *)io->io_hdr.port_priv;
-
- STAILQ_INSERT_TAIL(&lun->io_list, lun_io, links);
-
- if (ctl_queue(io) != CTL_RETVAL_COMPLETE) {
- printf("%s: error returned from ctl_queue()!\n",
- __func__);
- STAILQ_REMOVE(&lun->io_list, lun_io, cfi_lun_io, links);
- ctl_free_io(io);
- metatask->status = CFI_MT_ERROR;
- metatask->taskinfo.bbrread.status = CFI_BBR_ERROR;
- mtx_unlock(&softc->lock);
- cfi_metatask_done(softc, metatask);
- break;
- }
-
- mtx_unlock(&softc->lock);
- break;
- }
- default:
- panic("invalid metatask type %d", metatask->tasktype);
- break; /* NOTREACHED */
- }
-}
-
-struct cfi_metatask *
-cfi_alloc_metatask(int can_wait)
-{
- struct cfi_metatask *metatask;
- struct cfi_softc *softc;
-
- softc = &fetd_internal_softc;
-
- metatask = uma_zalloc(cfi_metatask_zone,
- (can_wait ? M_WAITOK : M_NOWAIT) | M_ZERO);
- if (metatask == NULL)
- return (NULL);
-
- metatask->status = CFI_MT_NONE;
-
- return (metatask);
-}
-
-void
-cfi_free_metatask(struct cfi_metatask *metatask)
-{
-
- uma_zfree(cfi_metatask_zone, metatask);
-}
-
-/*
- * vim: ts=8
- */
diff --git a/sys/cam/ctl/ctl_frontend_internal.h b/sys/cam/ctl/ctl_frontend_internal.h
deleted file mode 100644
index cb00dc6..0000000
--- a/sys/cam/ctl/ctl_frontend_internal.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*-
- * Copyright (c) 2004 Silicon Graphics International Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- *
- * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_frontend_internal.h#1 $
- * $FreeBSD$
- */
-/*
- * CTL kernel internal frontend target driver. This allows kernel-level
- * clients to send commands into CTL.
- *
- * Author: Ken Merry <ken@FreeBSD.org>
- */
-
-#ifndef _CTL_FRONTEND_INTERNAL_H_
-#define _CTL_FRONTEND_INTERNAL_H_
-
-/*
- * These are general metatask error codes. If the error code is CFI_MT_ERROR,
- * check any metatask-specific status codes for more detail on the problem.
- */
-typedef enum {
- CFI_MT_NONE,
- CFI_MT_PORT_OFFLINE,
- CFI_MT_ERROR,
- CFI_MT_SUCCESS
-} cfi_mt_status;
-
-typedef enum {
- CFI_TASK_NONE,
- CFI_TASK_SHUTDOWN,
- CFI_TASK_STARTUP,
- CFI_TASK_BBRREAD
-} cfi_tasktype;
-
-struct cfi_task_startstop {
- int total_luns;
- int luns_complete;
- int luns_failed;
-};
-
-/*
- * Error code description:
- * CFI_BBR_SUCCESS - the read was successful
- * CFI_BBR_LUN_UNCONFIG - CFI probe for this lun hasn't completed
- * CFI_BBR_NO_LUN - this lun doesn't exist, as far as CFI knows
- * CFI_BBR_NO_MEM - memory allocation error
- * CFI_BBR_BAD_LEN - data length isn't a multiple of the blocksize
- * CFI_BBR_RESERV_CONFLICT - another initiator has this lun reserved, so
- * we can't issue I/O at all.
- * CFI_BBR_LUN_STOPPED - the lun is powered off.
- * CFI_BBR_LUN_OFFLINE_CTL - the lun is offline from a CTL standpoint
- * CFI_BBR_LUN_OFFLINE_RC - the lun is offline from a RAIDCore standpoint.
- * This is bad, because it basically means we've
- * had a double failure on the LUN.
- * CFI_BBR_SCSI_ERROR - generic SCSI error, see status byte and sense
- * data for more resolution if you want it.
- * CFI_BBR_ERROR - the catch-all error code.
- */
-typedef enum {
- CFI_BBR_SUCCESS,
- CFI_BBR_LUN_UNCONFIG,
- CFI_BBR_NO_LUN,
- CFI_BBR_NO_MEM,
- CFI_BBR_BAD_LEN,
- CFI_BBR_RESERV_CONFLICT,
- CFI_BBR_LUN_STOPPED,
- CFI_BBR_LUN_OFFLINE_CTL,
- CFI_BBR_LUN_OFFLINE_RC,
- CFI_BBR_SCSI_ERROR,
- CFI_BBR_ERROR,
-} cfi_bbrread_status;
-
-struct cfi_task_bbrread {
- int lun_num; /* lun number */
- uint64_t lba; /* logical block address */
- int len; /* length in bytes */
- cfi_bbrread_status status; /* BBR status */
- uint8_t scsi_status; /* SCSI status */
- struct scsi_sense_data sense_data; /* SCSI sense data */
-};
-
-union cfi_taskinfo {
- struct cfi_task_startstop startstop;
- struct cfi_task_bbrread bbrread;
-};
-
-struct cfi_metatask;
-
-typedef void (*cfi_cb_t)(void *arg, struct cfi_metatask *metatask);
-
-struct cfi_metatask {
- cfi_tasktype tasktype; /* passed to CFI */
- cfi_mt_status status; /* returned from CFI */
- union cfi_taskinfo taskinfo; /* returned from CFI */
- struct ctl_mem_element *element; /* used by CFI, don't touch*/
- cfi_cb_t callback; /* passed to CFI */
- void *callback_arg; /* passed to CFI */
- STAILQ_ENTRY(cfi_metatask) links; /* used by CFI, don't touch*/
-};
-
-#ifdef _KERNEL
-
-MALLOC_DECLARE(M_CTL_CFI);
-
-/*
- * This is the API for sending meta commands (commands that are sent to more
- * than one LUN) to the internal frontend:
- * - Allocate a metatask using cfi_alloc_metatask(). can_wait == 0 means
- * that you're calling from an interrupt context. can_wait == 1 means
- * that you're calling from a thread context and don't mind waiting to
- * allocate memory.
- * - Setup the task type, callback and callback argument.
- * - Call cfi_action().
- * - When the callback comes, note the status and any per-command status
- * (see the taskinfo union) and then free the metatask with
- * cfi_free_metatask().
- */
-struct cfi_metatask *cfi_alloc_metatask(int can_wait);
-void cfi_free_metatask(struct cfi_metatask *metatask);
-void cfi_action(struct cfi_metatask *metatask);
-
-#endif /* _KERNEL */
-
-#endif /* _CTL_FRONTEND_INTERNAL_H_ */
-
-/*
- * vim: ts=8
- */
diff --git a/sys/cam/ctl/ctl_frontend_ioctl.c b/sys/cam/ctl/ctl_frontend_ioctl.c
new file mode 100644
index 0000000..6ef2f66
--- /dev/null
+++ b/sys/cam/ctl/ctl_frontend_ioctl.c
@@ -0,0 +1,439 @@
+/*-
+ * Copyright (c) 2003-2009 Silicon Graphics International Corp.
+ * Copyright (c) 2012 The FreeBSD Foundation
+ * Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer,
+ * without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+
+#include <cam/cam.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_da.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_frontend.h>
+#include <cam/ctl/ctl_util.h>
+#include <cam/ctl/ctl_backend.h>
+#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_ha.h>
+#include <cam/ctl/ctl_private.h>
+#include <cam/ctl/ctl_debug.h>
+#include <cam/ctl/ctl_error.h>
+
+typedef enum {
+ CTL_IOCTL_INPROG,
+ CTL_IOCTL_DATAMOVE,
+ CTL_IOCTL_DONE
+} ctl_fe_ioctl_state;
+
+struct ctl_fe_ioctl_params {
+ struct cv sem;
+ struct mtx ioctl_mtx;
+ ctl_fe_ioctl_state state;
+};
+
+struct cfi_softc {
+ uint32_t cur_tag_num;
+ struct ctl_port port;
+};
+
+static struct cfi_softc cfi_softc;
+
+static int cfi_init(void);
+static void cfi_shutdown(void);
+static void cfi_datamove(union ctl_io *io);
+static void cfi_done(union ctl_io *io);
+
+static struct ctl_frontend cfi_frontend =
+{
+ .name = "ioctl",
+ .init = cfi_init,
+ .shutdown = cfi_shutdown,
+};
+CTL_FRONTEND_DECLARE(ctlioctl, cfi_frontend);
+
+static int
+cfi_init(void)
+{
+ struct cfi_softc *isoftc = &cfi_softc;
+ struct ctl_port *port;
+
+ memset(isoftc, 0, sizeof(*isoftc));
+
+ port = &isoftc->port;
+ port->frontend = &cfi_frontend;
+ port->port_type = CTL_PORT_IOCTL;
+ port->num_requested_ctl_io = 100;
+ port->port_name = "ioctl";
+ port->fe_datamove = cfi_datamove;
+ port->fe_done = cfi_done;
+ port->max_targets = 1;
+ port->max_target_id = 0;
+ port->targ_port = -1;
+ port->max_initiators = 1;
+
+ if (ctl_port_register(port) != 0) {
+ printf("%s: ioctl port registration failed\n", __func__);
+ return (0);
+ }
+ ctl_port_online(port);
+ return (0);
+}
+
+void
+cfi_shutdown(void)
+{
+ struct cfi_softc *isoftc = &cfi_softc;
+ struct ctl_port *port;
+
+ port = &isoftc->port;
+ ctl_port_offline(port);
+ if (ctl_port_deregister(&isoftc->port) != 0)
+ printf("%s: ctl_frontend_deregister() failed\n", __func__);
+}
+
+/*
+ * Data movement routine for the CTL ioctl frontend port.
+ */
+static int
+ctl_ioctl_do_datamove(struct ctl_scsiio *ctsio)
+{
+ struct ctl_sg_entry *ext_sglist, *kern_sglist;
+ struct ctl_sg_entry ext_entry, kern_entry;
+ int ext_sglen, ext_sg_entries, kern_sg_entries;
+ int ext_sg_start, ext_offset;
+ int len_to_copy, len_copied;
+ int kern_watermark, ext_watermark;
+ int ext_sglist_malloced;
+ int i, j;
+
+ ext_sglist_malloced = 0;
+ ext_sg_start = 0;
+ ext_offset = 0;
+
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove\n"));
+
+ /*
+ * If this flag is set, fake the data transfer.
+ */
+ if (ctsio->io_hdr.flags & CTL_FLAG_NO_DATAMOVE) {
+ ctsio->ext_data_filled = ctsio->ext_data_len;
+ goto bailout;
+ }
+
+ /*
+ * To simplify things here, if we have a single buffer, stick it in
+ * a S/G entry and just make it a single entry S/G list.
+ */
+ if (ctsio->ext_sg_entries > 0) {
+ int len_seen;
+
+ ext_sglen = ctsio->ext_sg_entries * sizeof(*ext_sglist);
+
+ ext_sglist = (struct ctl_sg_entry *)malloc(ext_sglen, M_CTL,
+ M_WAITOK);
+ ext_sglist_malloced = 1;
+ if (copyin(ctsio->ext_data_ptr, ext_sglist, ext_sglen) != 0) {
+ ctsio->io_hdr.port_status = 31343;
+ goto bailout;
+ }
+ ext_sg_entries = ctsio->ext_sg_entries;
+ len_seen = 0;
+ for (i = 0; i < ext_sg_entries; i++) {
+ if ((len_seen + ext_sglist[i].len) >=
+ ctsio->ext_data_filled) {
+ ext_sg_start = i;
+ ext_offset = ctsio->ext_data_filled - len_seen;
+ break;
+ }
+ len_seen += ext_sglist[i].len;
+ }
+ } else {
+ ext_sglist = &ext_entry;
+ ext_sglist->addr = ctsio->ext_data_ptr;
+ ext_sglist->len = ctsio->ext_data_len;
+ ext_sg_entries = 1;
+ ext_sg_start = 0;
+ ext_offset = ctsio->ext_data_filled;
+ }
+
+ if (ctsio->kern_sg_entries > 0) {
+ kern_sglist = (struct ctl_sg_entry *)ctsio->kern_data_ptr;
+ kern_sg_entries = ctsio->kern_sg_entries;
+ } else {
+ kern_sglist = &kern_entry;
+ kern_sglist->addr = ctsio->kern_data_ptr;
+ kern_sglist->len = ctsio->kern_data_len;
+ kern_sg_entries = 1;
+ }
+
+
+ kern_watermark = 0;
+ ext_watermark = ext_offset;
+ len_copied = 0;
+ for (i = ext_sg_start, j = 0;
+ i < ext_sg_entries && j < kern_sg_entries;) {
+ uint8_t *ext_ptr, *kern_ptr;
+
+ len_to_copy = MIN(ext_sglist[i].len - ext_watermark,
+ kern_sglist[j].len - kern_watermark);
+
+ ext_ptr = (uint8_t *)ext_sglist[i].addr;
+ ext_ptr = ext_ptr + ext_watermark;
+ if (ctsio->io_hdr.flags & CTL_FLAG_BUS_ADDR) {
+ /*
+ * XXX KDM fix this!
+ */
+ panic("need to implement bus address support");
+#if 0
+ kern_ptr = bus_to_virt(kern_sglist[j].addr);
+#endif
+ } else
+ kern_ptr = (uint8_t *)kern_sglist[j].addr;
+ kern_ptr = kern_ptr + kern_watermark;
+
+ kern_watermark += len_to_copy;
+ ext_watermark += len_to_copy;
+
+ if ((ctsio->io_hdr.flags & CTL_FLAG_DATA_MASK) ==
+ CTL_FLAG_DATA_IN) {
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: copying %d "
+ "bytes to user\n", len_to_copy));
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: from %p "
+ "to %p\n", kern_ptr, ext_ptr));
+ if (copyout(kern_ptr, ext_ptr, len_to_copy) != 0) {
+ ctsio->io_hdr.port_status = 31344;
+ goto bailout;
+ }
+ } else {
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: copying %d "
+ "bytes from user\n", len_to_copy));
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: from %p "
+ "to %p\n", ext_ptr, kern_ptr));
+ if (copyin(ext_ptr, kern_ptr, len_to_copy)!= 0){
+ ctsio->io_hdr.port_status = 31345;
+ goto bailout;
+ }
+ }
+
+ len_copied += len_to_copy;
+
+ if (ext_sglist[i].len == ext_watermark) {
+ i++;
+ ext_watermark = 0;
+ }
+
+ if (kern_sglist[j].len == kern_watermark) {
+ j++;
+ kern_watermark = 0;
+ }
+ }
+
+ ctsio->ext_data_filled += len_copied;
+
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: ext_sg_entries: %d, "
+ "kern_sg_entries: %d\n", ext_sg_entries,
+ kern_sg_entries));
+ CTL_DEBUG_PRINT(("ctl_ioctl_do_datamove: ext_data_len = %d, "
+ "kern_data_len = %d\n", ctsio->ext_data_len,
+ ctsio->kern_data_len));
+
+
+ /* XXX KDM set residual?? */
+bailout:
+
+ if (ext_sglist_malloced != 0)
+ free(ext_sglist, M_CTL);
+
+ return (CTL_RETVAL_COMPLETE);
+}
+
+static void
+cfi_datamove(union ctl_io *io)
+{
+ struct ctl_fe_ioctl_params *params;
+
+ params = (struct ctl_fe_ioctl_params *)
+ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
+
+ mtx_lock(&params->ioctl_mtx);
+ params->state = CTL_IOCTL_DATAMOVE;
+ cv_broadcast(&params->sem);
+ mtx_unlock(&params->ioctl_mtx);
+}
+
+static void
+cfi_done(union ctl_io *io)
+{
+ struct ctl_fe_ioctl_params *params;
+
+ params = (struct ctl_fe_ioctl_params *)
+ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
+
+ mtx_lock(&params->ioctl_mtx);
+ params->state = CTL_IOCTL_DONE;
+ cv_broadcast(&params->sem);
+ mtx_unlock(&params->ioctl_mtx);
+}
+
+static int
+cfi_submit_wait(union ctl_io *io)
+{
+ struct ctl_fe_ioctl_params params;
+ ctl_fe_ioctl_state last_state;
+ int done, retval;
+
+ bzero(&params, sizeof(params));
+ mtx_init(&params.ioctl_mtx, "ctliocmtx", NULL, MTX_DEF);
+ cv_init(&params.sem, "ctlioccv");
+ params.state = CTL_IOCTL_INPROG;
+ last_state = params.state;
+
+ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = &params;
+
+ CTL_DEBUG_PRINT(("cfi_submit_wait\n"));
+
+ /* This shouldn't happen */
+ if ((retval = ctl_queue(io)) != CTL_RETVAL_COMPLETE)
+ return (retval);
+
+ done = 0;
+
+ do {
+ mtx_lock(&params.ioctl_mtx);
+ /*
+ * Check the state here, and don't sleep if the state has
+ * already changed (i.e. wakeup has already occured, but we
+ * weren't waiting yet).
+ */
+ if (params.state == last_state) {
+ /* XXX KDM cv_wait_sig instead? */
+ cv_wait(&params.sem, &params.ioctl_mtx);
+ }
+ last_state = params.state;
+
+ switch (params.state) {
+ case CTL_IOCTL_INPROG:
+ /* Why did we wake up? */
+ /* XXX KDM error here? */
+ mtx_unlock(&params.ioctl_mtx);
+ break;
+ case CTL_IOCTL_DATAMOVE:
+ CTL_DEBUG_PRINT(("got CTL_IOCTL_DATAMOVE\n"));
+
+ /*
+ * change last_state back to INPROG to avoid
+ * deadlock on subsequent data moves.
+ */
+ params.state = last_state = CTL_IOCTL_INPROG;
+
+ mtx_unlock(&params.ioctl_mtx);
+ ctl_ioctl_do_datamove(&io->scsiio);
+ /*
+ * Note that in some cases, most notably writes,
+ * this will queue the I/O and call us back later.
+ * In other cases, generally reads, this routine
+ * will immediately call back and wake us up,
+ * probably using our own context.
+ */
+ io->scsiio.be_move_done(io);
+ break;
+ case CTL_IOCTL_DONE:
+ mtx_unlock(&params.ioctl_mtx);
+ CTL_DEBUG_PRINT(("got CTL_IOCTL_DONE\n"));
+ done = 1;
+ break;
+ default:
+ mtx_unlock(&params.ioctl_mtx);
+ /* XXX KDM error here? */
+ break;
+ }
+ } while (done == 0);
+
+ mtx_destroy(&params.ioctl_mtx);
+ cv_destroy(&params.sem);
+
+ return (CTL_RETVAL_COMPLETE);
+}
+
+int
+ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
+ struct thread *td)
+{
+ union ctl_io *io;
+ void *pool_tmp;
+ int retval = 0;
+
+ /*
+ * If we haven't been "enabled", don't allow any SCSI I/O
+ * to this FETD.
+ */
+ if ((cfi_softc.port.status & CTL_PORT_STATUS_ONLINE) == 0)
+ return (EPERM);
+
+ io = ctl_alloc_io(cfi_softc.port.ctl_pool_ref);
+
+ /*
+ * Need to save the pool reference so it doesn't get
+ * spammed by the user's ctl_io.
+ */
+ pool_tmp = io->io_hdr.pool;
+ memcpy(io, (void *)addr, sizeof(*io));
+ io->io_hdr.pool = pool_tmp;
+
+ /*
+ * No status yet, so make sure the status is set properly.
+ */
+ io->io_hdr.status = CTL_STATUS_NONE;
+
+ /*
+ * The user sets the initiator ID, target and LUN IDs.
+ */
+ io->io_hdr.nexus.targ_port = cfi_softc.port.targ_port;
+ io->io_hdr.flags |= CTL_FLAG_USER_REQ;
+ if ((io->io_hdr.io_type == CTL_IO_SCSI) &&
+ (io->scsiio.tag_type != CTL_TAG_UNTAGGED))
+ io->scsiio.tag_num = cfi_softc.cur_tag_num++;
+
+ retval = cfi_submit_wait(io);
+ if (retval == 0)
+ memcpy((void *)addr, io, sizeof(*io));
+ ctl_free_io(io);
+ return (retval);
+}
diff --git a/sys/cam/ctl/ctl_frontend_iscsi.c b/sys/cam/ctl/ctl_frontend_iscsi.c
index e19852a..778a6ba 100644
--- a/sys/cam/ctl/ctl_frontend_iscsi.c
+++ b/sys/cam/ctl/ctl_frontend_iscsi.c
@@ -61,7 +61,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_error.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_debug.h>
#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_ioctl.h>
@@ -150,8 +149,6 @@ int cfiscsi_init(void);
static void cfiscsi_online(void *arg);
static void cfiscsi_offline(void *arg);
static int cfiscsi_info(void *arg, struct sbuf *sb);
-static int cfiscsi_lun_enable(void *arg, int lun_id);
-static int cfiscsi_lun_disable(void *arg, int lun_id);
static int cfiscsi_ioctl(struct cdev *dev,
u_long cmd, caddr_t addr, int flag, struct thread *td);
static void cfiscsi_datamove(union ctl_io *io);
@@ -174,7 +171,6 @@ static void cfiscsi_target_release(struct cfiscsi_target *ct);
static void cfiscsi_session_delete(struct cfiscsi_session *cs);
static struct cfiscsi_softc cfiscsi_softc;
-extern struct ctl_softc *control_softc;
static struct ctl_frontend cfiscsi_frontend =
{
@@ -462,6 +458,7 @@ cfiscsi_decode_lun(uint64_t encoded)
break;
}
result = (lun[1] << 16) + (lun[2] << 8) + lun[3];
+ break;
default:
CFISCSI_WARN("unsupported LUN format 0x%jx",
(uintmax_t)encoded);
@@ -564,9 +561,8 @@ cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request)
ctl_zero_io(io);
io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = request;
io->io_hdr.io_type = CTL_IO_SCSI;
- io->io_hdr.nexus.initid.id = cs->cs_ctl_initid;
+ io->io_hdr.nexus.initid = cs->cs_ctl_initid;
io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port;
- io->io_hdr.nexus.targ_target.id = 0;
io->io_hdr.nexus.targ_lun = cfiscsi_decode_lun(bhssc->bhssc_lun);
io->scsiio.tag_num = bhssc->bhssc_initiator_task_tag;
switch ((bhssc->bhssc_flags & BHSSC_FLAGS_ATTR)) {
@@ -621,9 +617,8 @@ cfiscsi_pdu_handle_task_request(struct icl_pdu *request)
ctl_zero_io(io);
io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = request;
io->io_hdr.io_type = CTL_IO_TASK;
- io->io_hdr.nexus.initid.id = cs->cs_ctl_initid;
+ io->io_hdr.nexus.initid = cs->cs_ctl_initid;
io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port;
- io->io_hdr.nexus.targ_target.id = 0;
io->io_hdr.nexus.targ_lun = cfiscsi_decode_lun(bhstmr->bhstmr_lun);
io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */
@@ -641,6 +636,12 @@ cfiscsi_pdu_handle_task_request(struct icl_pdu *request)
#endif
io->taskio.task_action = CTL_TASK_ABORT_TASK_SET;
break;
+ case BHSTMR_FUNCTION_CLEAR_TASK_SET:
+#if 0
+ CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_CLEAR_TASK_SET");
+#endif
+ io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET;
+ break;
case BHSTMR_FUNCTION_LOGICAL_UNIT_RESET:
#if 0
CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_LOGICAL_UNIT_RESET");
@@ -653,6 +654,37 @@ cfiscsi_pdu_handle_task_request(struct icl_pdu *request)
#endif
io->taskio.task_action = CTL_TASK_TARGET_RESET;
break;
+ case BHSTMR_FUNCTION_TARGET_COLD_RESET:
+#if 0
+ CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_COLD_RESET");
+#endif
+ io->taskio.task_action = CTL_TASK_TARGET_RESET;
+ break;
+ case BHSTMR_FUNCTION_QUERY_TASK:
+#if 0
+ CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK");
+#endif
+ io->taskio.task_action = CTL_TASK_QUERY_TASK;
+ io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag;
+ break;
+ case BHSTMR_FUNCTION_QUERY_TASK_SET:
+#if 0
+ CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK_SET");
+#endif
+ io->taskio.task_action = CTL_TASK_QUERY_TASK_SET;
+ break;
+ case BHSTMR_FUNCTION_I_T_NEXUS_RESET:
+#if 0
+ CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_I_T_NEXUS_RESET");
+#endif
+ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
+ break;
+ case BHSTMR_FUNCTION_QUERY_ASYNC_EVENT:
+#if 0
+ CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_ASYNC_EVENT");
+#endif
+ io->taskio.task_action = CTL_TASK_QUERY_ASYNC_EVENT;
+ break;
default:
CFISCSI_SESSION_DEBUG(cs, "unsupported function 0x%x",
bhstmr->bhstmr_function & ~0x80);
@@ -931,6 +963,7 @@ cfiscsi_pdu_handle_data_out(struct icl_pdu *request)
done = (io->scsiio.ext_data_filled != cdw->cdw_r2t_end ||
io->scsiio.ext_data_filled == io->scsiio.kern_data_len);
uma_zfree(cfiscsi_data_wait_zone, cdw);
+ io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
if (done)
io->scsiio.be_move_done(io);
else
@@ -1081,9 +1114,8 @@ cfiscsi_session_terminate_tasks(struct cfiscsi_session *cs)
ctl_zero_io(io);
io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = cs;
io->io_hdr.io_type = CTL_IO_TASK;
- io->io_hdr.nexus.initid.id = cs->cs_ctl_initid;
+ io->io_hdr.nexus.initid = cs->cs_ctl_initid;
io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port;
- io->io_hdr.nexus.targ_target.id = 0;
io->io_hdr.nexus.targ_lun = 0;
io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */
io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
@@ -1105,6 +1137,7 @@ cfiscsi_session_terminate_tasks(struct cfiscsi_session *cs)
* assuming that the data transfer actually succeeded
* and writing uninitialized data to disk.
*/
+ cdw->cdw_ctl_io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
cdw->cdw_ctl_io->scsiio.io_hdr.port_status = 42;
cdw->cdw_ctl_io->scsiio.be_move_done(cdw->cdw_ctl_io);
uma_zfree(cfiscsi_data_wait_zone, cdw);
@@ -1299,10 +1332,8 @@ int
cfiscsi_init(void)
{
struct cfiscsi_softc *softc;
- int retval;
softc = &cfiscsi_softc;
- retval = 0;
bzero(softc, sizeof(*softc));
mtx_init(&softc->lock, "cfiscsi", NULL, MTX_DEF);
@@ -1532,6 +1563,16 @@ restart:
if (cs2 != cs && cs2->cs_tasks_aborted == false &&
cs->cs_target == cs2->cs_target &&
strcmp(cs->cs_initiator_id, cs2->cs_initiator_id) == 0) {
+ if (strcmp(cs->cs_initiator_addr,
+ cs2->cs_initiator_addr) != 0) {
+ CFISCSI_SESSION_WARN(cs2,
+ "session reinstatement from "
+ "different address %s",
+ cs->cs_initiator_addr);
+ } else {
+ CFISCSI_SESSION_DEBUG(cs2,
+ "session reinstatement");
+ }
cfiscsi_session_terminate(cs2);
mtx_unlock(&softc->lock);
pause("cfiscsi_reinstate", 1);
@@ -1652,41 +1693,40 @@ cfiscsi_ioctl_list(struct ctl_iscsi *ci)
}
static void
-cfiscsi_ioctl_terminate(struct ctl_iscsi *ci)
+cfiscsi_ioctl_logout(struct ctl_iscsi *ci)
{
struct icl_pdu *response;
struct iscsi_bhs_asynchronous_message *bhsam;
- struct ctl_iscsi_terminate_params *citp;
+ struct ctl_iscsi_logout_params *cilp;
struct cfiscsi_session *cs;
struct cfiscsi_softc *softc;
int found = 0;
- citp = (struct ctl_iscsi_terminate_params *)&(ci->data);
+ cilp = (struct ctl_iscsi_logout_params *)&(ci->data);
softc = &cfiscsi_softc;
mtx_lock(&softc->lock);
TAILQ_FOREACH(cs, &softc->sessions, cs_next) {
- if (citp->all == 0 && cs->cs_id != citp->connection_id &&
- strcmp(cs->cs_initiator_name, citp->initiator_name) != 0 &&
- strcmp(cs->cs_initiator_addr, citp->initiator_addr) != 0)
+ if (cilp->all == 0 && cs->cs_id != cilp->connection_id &&
+ strcmp(cs->cs_initiator_name, cilp->initiator_name) != 0 &&
+ strcmp(cs->cs_initiator_addr, cilp->initiator_addr) != 0)
continue;
response = icl_pdu_new(cs->cs_conn, M_NOWAIT);
if (response == NULL) {
- /*
- * Oh well. Just terminate the connection.
- */
- } else {
- bhsam = (struct iscsi_bhs_asynchronous_message *)
- response->ip_bhs;
- bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE;
- bhsam->bhsam_flags = 0x80;
- bhsam->bhsam_0xffffffff = 0xffffffff;
- bhsam->bhsam_async_event =
- BHSAM_EVENT_TARGET_TERMINATES_SESSION;
- cfiscsi_pdu_queue(response);
+ ci->status = CTL_ISCSI_ERROR;
+ snprintf(ci->error_str, sizeof(ci->error_str),
+ "Unable to allocate memory");
+ mtx_unlock(&softc->lock);
+ return;
}
- cfiscsi_session_terminate(cs);
+ bhsam =
+ (struct iscsi_bhs_asynchronous_message *)response->ip_bhs;
+ bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE;
+ bhsam->bhsam_flags = 0x80;
+ bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_REQUESTS_LOGOUT;
+ bhsam->bhsam_parameter3 = htons(10);
+ cfiscsi_pdu_queue(response);
found++;
}
mtx_unlock(&softc->lock);
@@ -1702,40 +1742,41 @@ cfiscsi_ioctl_terminate(struct ctl_iscsi *ci)
}
static void
-cfiscsi_ioctl_logout(struct ctl_iscsi *ci)
+cfiscsi_ioctl_terminate(struct ctl_iscsi *ci)
{
struct icl_pdu *response;
struct iscsi_bhs_asynchronous_message *bhsam;
- struct ctl_iscsi_logout_params *cilp;
+ struct ctl_iscsi_terminate_params *citp;
struct cfiscsi_session *cs;
struct cfiscsi_softc *softc;
int found = 0;
- cilp = (struct ctl_iscsi_logout_params *)&(ci->data);
+ citp = (struct ctl_iscsi_terminate_params *)&(ci->data);
softc = &cfiscsi_softc;
mtx_lock(&softc->lock);
TAILQ_FOREACH(cs, &softc->sessions, cs_next) {
- if (cilp->all == 0 && cs->cs_id != cilp->connection_id &&
- strcmp(cs->cs_initiator_name, cilp->initiator_name) != 0 &&
- strcmp(cs->cs_initiator_addr, cilp->initiator_addr) != 0)
+ if (citp->all == 0 && cs->cs_id != citp->connection_id &&
+ strcmp(cs->cs_initiator_name, citp->initiator_name) != 0 &&
+ strcmp(cs->cs_initiator_addr, citp->initiator_addr) != 0)
continue;
response = icl_pdu_new(cs->cs_conn, M_NOWAIT);
if (response == NULL) {
- ci->status = CTL_ISCSI_ERROR;
- snprintf(ci->error_str, sizeof(ci->error_str),
- "Unable to allocate memory");
- mtx_unlock(&softc->lock);
- return;
+ /*
+ * Oh well. Just terminate the connection.
+ */
+ } else {
+ bhsam = (struct iscsi_bhs_asynchronous_message *)
+ response->ip_bhs;
+ bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE;
+ bhsam->bhsam_flags = 0x80;
+ bhsam->bhsam_0xffffffff = 0xffffffff;
+ bhsam->bhsam_async_event =
+ BHSAM_EVENT_TARGET_TERMINATES_SESSION;
+ cfiscsi_pdu_queue(response);
}
- bhsam =
- (struct iscsi_bhs_asynchronous_message *)response->ip_bhs;
- bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE;
- bhsam->bhsam_flags = 0x80;
- bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_REQUESTS_LOGOUT;
- bhsam->bhsam_parameter3 = htons(10);
- cfiscsi_pdu_queue(response);
+ cfiscsi_session_terminate(cs);
found++;
}
mtx_unlock(&softc->lock);
@@ -2032,9 +2073,6 @@ cfiscsi_ioctl_port_create(struct ctl_req *req)
port->port_offline = cfiscsi_offline;
port->port_info = cfiscsi_info;
port->onoff_arg = ct;
- port->lun_enable = cfiscsi_lun_enable;
- port->lun_disable = cfiscsi_lun_disable;
- port->targ_lun_arg = ct;
port->fe_datamove = cfiscsi_datamove;
port->fe_done = cfiscsi_done;
@@ -2042,6 +2080,7 @@ cfiscsi_ioctl_port_create(struct ctl_req *req)
/* XXX These should probably be fetched from CTL. */
port->max_targets = 1;
port->max_target_id = 15;
+ port->targ_port = -1;
port->options = opts;
STAILQ_INIT(&opts);
@@ -2170,12 +2209,12 @@ cfiscsi_ioctl(struct cdev *dev,
case CTL_ISCSI_LIST:
cfiscsi_ioctl_list(ci);
break;
- case CTL_ISCSI_TERMINATE:
- cfiscsi_ioctl_terminate(ci);
- break;
case CTL_ISCSI_LOGOUT:
cfiscsi_ioctl_logout(ci);
break;
+ case CTL_ISCSI_TERMINATE:
+ cfiscsi_ioctl_terminate(ci);
+ break;
#ifdef ICL_KERNEL_PROXY
case CTL_ISCSI_LISTEN:
cfiscsi_ioctl_listen(ci);
@@ -2298,20 +2337,6 @@ cfiscsi_target_find_or_create(struct cfiscsi_softc *softc, const char *name,
return (newct);
}
-static int
-cfiscsi_lun_enable(void *arg, int lun_id)
-{
-
- return (0);
-}
-
-static int
-cfiscsi_lun_disable(void *arg, int lun_id)
-{
-
- return (0);
-}
-
static void
cfiscsi_datamove_in(union ctl_io *io)
{
@@ -2650,6 +2675,7 @@ cfiscsi_datamove_out(union ctl_io *io)
cfiscsi_session_terminate(cs);
return;
}
+ io->io_hdr.flags |= CTL_FLAG_DMA_INPROG;
bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs;
bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T;
bhsr2t->bhsr2t_flags = 0x80;
@@ -2791,7 +2817,9 @@ cfiscsi_task_management_done(union ctl_io *io)
struct iscsi_bhs_task_management_request *bhstmr;
struct iscsi_bhs_task_management_response *bhstmr2;
struct cfiscsi_data_wait *cdw, *tmpcdw;
- struct cfiscsi_session *cs;
+ struct cfiscsi_session *cs, *tcs;
+ struct cfiscsi_softc *softc;
+ int cold_reset = 0;
request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
cs = PDU_SESSION(request);
@@ -2824,34 +2852,55 @@ cfiscsi_task_management_done(union ctl_io *io)
#endif
TAILQ_REMOVE(&cs->cs_waiting_for_data_out,
cdw, cdw_next);
+ io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
+ cdw->cdw_ctl_io->scsiio.io_hdr.port_status = 43;
cdw->cdw_ctl_io->scsiio.be_move_done(cdw->cdw_ctl_io);
uma_zfree(cfiscsi_data_wait_zone, cdw);
}
CFISCSI_SESSION_UNLOCK(cs);
}
+ if ((bhstmr->bhstmr_function & ~0x80) ==
+ BHSTMR_FUNCTION_TARGET_COLD_RESET &&
+ io->io_hdr.status == CTL_SUCCESS)
+ cold_reset = 1;
response = cfiscsi_pdu_new_response(request, M_WAITOK);
bhstmr2 = (struct iscsi_bhs_task_management_response *)
response->ip_bhs;
bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE;
bhstmr2->bhstmr_flags = 0x80;
- if (io->io_hdr.status == CTL_SUCCESS) {
+ switch (io->taskio.task_status) {
+ case CTL_TASK_FUNCTION_COMPLETE:
bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_COMPLETE;
- } else {
- /*
- * XXX: How to figure out what exactly went wrong? iSCSI spec
- * expects us to provide detailed error, e.g. "Task does
- * not exist" or "LUN does not exist".
- */
- CFISCSI_SESSION_DEBUG(cs, "BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED");
- bhstmr2->bhstmr_response =
- BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED;
+ break;
+ case CTL_TASK_FUNCTION_SUCCEEDED:
+ bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_SUCCEEDED;
+ break;
+ case CTL_TASK_LUN_DOES_NOT_EXIST:
+ bhstmr2->bhstmr_response = BHSTMR_RESPONSE_LUN_DOES_NOT_EXIST;
+ break;
+ case CTL_TASK_FUNCTION_NOT_SUPPORTED:
+ default:
+ bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED;
+ break;
}
+ memcpy(bhstmr2->bhstmr_additional_reponse_information,
+ io->taskio.task_resp, sizeof(io->taskio.task_resp));
bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag;
ctl_free_io(io);
icl_pdu_free(request);
cfiscsi_pdu_queue(response);
+
+ if (cold_reset) {
+ softc = cs->cs_target->ct_softc;
+ mtx_lock(&softc->lock);
+ TAILQ_FOREACH(tcs, &softc->sessions, cs_next) {
+ if (tcs->cs_target == cs->cs_target)
+ cfiscsi_session_terminate(tcs);
+ }
+ mtx_unlock(&softc->lock);
+ }
}
static void
diff --git a/sys/cam/ctl/ctl_ha.c b/sys/cam/ctl/ctl_ha.c
new file mode 100644
index 0000000..5e31b41
--- /dev/null
+++ b/sys/cam/ctl/ctl_ha.c
@@ -0,0 +1,1029 @@
+/*-
+ * Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer,
+ * without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/types.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <vm/uma.h>
+
+#include <cam/cam.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_da.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_frontend.h>
+#include <cam/ctl/ctl_util.h>
+#include <cam/ctl/ctl_backend.h>
+#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_ha.h>
+#include <cam/ctl/ctl_private.h>
+#include <cam/ctl/ctl_debug.h>
+#include <cam/ctl/ctl_error.h>
+
+#if (__FreeBSD_version < 1100000)
+struct mbufq {
+ struct mbuf *head;
+ struct mbuf *tail;
+};
+
+static void
+mbufq_init(struct mbufq *q, int limit)
+{
+
+ q->head = q->tail = NULL;
+}
+
+static void
+mbufq_drain(struct mbufq *q)
+{
+ struct mbuf *m;
+
+ while ((m = q->head) != NULL) {
+ q->head = m->m_nextpkt;
+ m_freem(m);
+ }
+ q->tail = NULL;
+}
+
+static struct mbuf *
+mbufq_dequeue(struct mbufq *q)
+{
+ struct mbuf *m;
+
+ m = q->head;
+ if (m) {
+ if (q->tail == m)
+ q->tail = NULL;
+ q->head = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ }
+ return (m);
+}
+
+static void
+mbufq_enqueue(struct mbufq *q, struct mbuf *m)
+{
+
+ m->m_nextpkt = NULL;
+ if (q->tail)
+ q->tail->m_nextpkt = m;
+ else
+ q->head = m;
+ q->tail = m;
+}
+
+static u_int
+sbavail(struct sockbuf *sb)
+{
+ return (sb->sb_cc);
+}
+
+#if (__FreeBSD_version < 1000000)
+#define mtodo(m, o) ((void *)(((m)->m_data) + (o)))
+#endif
+#endif
+
+struct ha_msg_wire {
+ uint32_t channel;
+ uint32_t length;
+};
+
+struct ha_dt_msg_wire {
+ ctl_ha_dt_cmd command;
+ uint32_t size;
+ uint8_t *local;
+ uint8_t *remote;
+};
+
+struct ha_softc {
+ struct ctl_softc *ha_ctl_softc;
+ ctl_evt_handler ha_handler[CTL_HA_CHAN_MAX];
+ char ha_peer[128];
+ struct sockaddr_in ha_peer_in;
+ struct socket *ha_lso;
+ struct socket *ha_so;
+ struct mbufq ha_sendq;
+ struct mbuf *ha_sending;
+ struct mtx ha_lock;
+ int ha_connect;
+ int ha_listen;
+ int ha_connected;
+ int ha_receiving;
+ int ha_wakeup;
+ int ha_disconnect;
+ int ha_shutdown;
+ eventhandler_tag ha_shutdown_eh;
+ TAILQ_HEAD(, ctl_ha_dt_req) ha_dts;
+} ha_softc;
+
+static void
+ctl_ha_conn_wake(struct ha_softc *softc)
+{
+
+ mtx_lock(&softc->ha_lock);
+ softc->ha_wakeup = 1;
+ mtx_unlock(&softc->ha_lock);
+ wakeup(&softc->ha_wakeup);
+}
+
+static int
+ctl_ha_lupcall(struct socket *so, void *arg, int waitflag)
+{
+ struct ha_softc *softc = arg;
+
+ ctl_ha_conn_wake(softc);
+ return (SU_OK);
+}
+
+static int
+ctl_ha_rupcall(struct socket *so, void *arg, int waitflag)
+{
+ struct ha_softc *softc = arg;
+
+ wakeup(&softc->ha_receiving);
+ return (SU_OK);
+}
+
+static int
+ctl_ha_supcall(struct socket *so, void *arg, int waitflag)
+{
+ struct ha_softc *softc = arg;
+
+ ctl_ha_conn_wake(softc);
+ return (SU_OK);
+}
+
+static void
+ctl_ha_evt(struct ha_softc *softc, ctl_ha_channel ch, ctl_ha_event evt,
+ int param)
+{
+ int i;
+
+ if (ch < CTL_HA_CHAN_MAX) {
+ if (softc->ha_handler[ch])
+ softc->ha_handler[ch](ch, evt, param);
+ return;
+ }
+ for (i = 0; i < CTL_HA_CHAN_MAX; i++) {
+ if (softc->ha_handler[i])
+ softc->ha_handler[i](i, evt, param);
+ }
+}
+
+static void
+ctl_ha_close(struct ha_softc *softc)
+{
+ struct socket *so = softc->ha_so;
+ int report = 0;
+
+ if (softc->ha_connected || softc->ha_disconnect) {
+ softc->ha_connected = 0;
+ mbufq_drain(&softc->ha_sendq);
+ m_freem(softc->ha_sending);
+ softc->ha_sending = NULL;
+ report = 1;
+ }
+ if (so) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ soupcall_clear(so, SO_RCV);
+ while (softc->ha_receiving) {
+ wakeup(&softc->ha_receiving);
+ msleep(&softc->ha_receiving, SOCKBUF_MTX(&so->so_rcv),
+ 0, "ha_rx exit", 0);
+ }
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ soupcall_clear(so, SO_SND);
+ SOCKBUF_UNLOCK(&so->so_snd);
+ softc->ha_so = NULL;
+ if (softc->ha_connect)
+ pause("reconnect", hz / 2);
+ soclose(so);
+ }
+ if (report) {
+ ctl_ha_evt(softc, CTL_HA_CHAN_MAX, CTL_HA_EVT_LINK_CHANGE,
+ (softc->ha_connect || softc->ha_listen) ?
+ CTL_HA_LINK_UNKNOWN : CTL_HA_LINK_OFFLINE);
+ }
+}
+
+static void
+ctl_ha_lclose(struct ha_softc *softc)
+{
+
+ if (softc->ha_lso) {
+ SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
+ soupcall_clear(softc->ha_lso, SO_RCV);
+ SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
+ soclose(softc->ha_lso);
+ softc->ha_lso = NULL;
+ }
+}
+
+static void
+ctl_ha_rx_thread(void *arg)
+{
+ struct ha_softc *softc = arg;
+ struct socket *so = softc->ha_so;
+ struct ha_msg_wire wire_hdr;
+ struct uio uio;
+ struct iovec iov;
+ int error, flags, next;
+
+ bzero(&wire_hdr, sizeof(wire_hdr));
+ while (1) {
+ if (wire_hdr.length > 0)
+ next = wire_hdr.length;
+ else
+ next = sizeof(wire_hdr);
+ SOCKBUF_LOCK(&so->so_rcv);
+ while (sbavail(&so->so_rcv) < next || softc->ha_disconnect) {
+ if (softc->ha_connected == 0 || softc->ha_disconnect ||
+ so->so_error ||
+ (so->so_rcv.sb_state & SBS_CANTRCVMORE)) {
+ goto errout;
+ }
+ so->so_rcv.sb_lowat = next;
+ msleep(&softc->ha_receiving, SOCKBUF_MTX(&so->so_rcv),
+ 0, "-", 0);
+ }
+ SOCKBUF_UNLOCK(&so->so_rcv);
+
+ if (wire_hdr.length == 0) {
+ iov.iov_base = &wire_hdr;
+ iov.iov_len = sizeof(wire_hdr);
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_rw = UIO_READ;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_td = curthread;
+ uio.uio_resid = sizeof(wire_hdr);
+ flags = MSG_DONTWAIT;
+ error = soreceive(softc->ha_so, NULL, &uio, NULL,
+ NULL, &flags);
+ if (error != 0) {
+ printf("%s: header receive error %d\n",
+ __func__, error);
+ SOCKBUF_LOCK(&so->so_rcv);
+ goto errout;
+ }
+ } else {
+ ctl_ha_evt(softc, wire_hdr.channel,
+ CTL_HA_EVT_MSG_RECV, wire_hdr.length);
+ wire_hdr.length = 0;
+ }
+ }
+
+errout:
+ softc->ha_receiving = 0;
+ wakeup(&softc->ha_receiving);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ ctl_ha_conn_wake(softc);
+ kthread_exit();
+}
+
+static void
+ctl_ha_send(struct ha_softc *softc)
+{
+ struct socket *so = softc->ha_so;
+ int error;
+
+ while (1) {
+ if (softc->ha_sending == NULL) {
+ mtx_lock(&softc->ha_lock);
+ softc->ha_sending = mbufq_dequeue(&softc->ha_sendq);
+ mtx_unlock(&softc->ha_lock);
+ if (softc->ha_sending == NULL) {
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
+ break;
+ }
+ }
+ SOCKBUF_LOCK(&so->so_snd);
+ if (sbspace(&so->so_snd) < softc->ha_sending->m_pkthdr.len) {
+ so->so_snd.sb_lowat = softc->ha_sending->m_pkthdr.len;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ break;
+ }
+ SOCKBUF_UNLOCK(&so->so_snd);
+ error = sosend(softc->ha_so, NULL, NULL, softc->ha_sending,
+ NULL, MSG_DONTWAIT, curthread);
+ softc->ha_sending = NULL;
+ if (error != 0) {
+ printf("%s: sosend() error %d\n", __func__, error);
+ return;
+ }
+ };
+}
+
+static void
+ctl_ha_sock_setup(struct ha_softc *softc)
+{
+ struct sockopt opt;
+ struct socket *so = softc->ha_so;
+ int error, val;
+
+ val = 1024 * 1024;
+ error = soreserve(so, val, val);
+ if (error)
+ printf("%s: soreserve failed %d\n", __func__, error);
+
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_lowat = sizeof(struct ha_msg_wire);
+ soupcall_set(so, SO_RCV, ctl_ha_rupcall, softc);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_lowat = sizeof(struct ha_msg_wire);
+ soupcall_set(so, SO_SND, ctl_ha_supcall, softc);
+ SOCKBUF_UNLOCK(&so->so_snd);
+
+ bzero(&opt, sizeof(struct sockopt));
+ opt.sopt_dir = SOPT_SET;
+ opt.sopt_level = SOL_SOCKET;
+ opt.sopt_name = SO_KEEPALIVE;
+ opt.sopt_val = &val;
+ opt.sopt_valsize = sizeof(val);
+ val = 1;
+ error = sosetopt(so, &opt);
+ if (error)
+ printf("%s: KEEPALIVE setting failed %d\n", __func__, error);
+
+ opt.sopt_level = IPPROTO_TCP;
+ opt.sopt_name = TCP_NODELAY;
+ val = 1;
+ error = sosetopt(so, &opt);
+ if (error)
+ printf("%s: NODELAY setting failed %d\n", __func__, error);
+
+ opt.sopt_name = TCP_KEEPINIT;
+ val = 3;
+ error = sosetopt(so, &opt);
+ if (error)
+ printf("%s: KEEPINIT setting failed %d\n", __func__, error);
+
+ opt.sopt_name = TCP_KEEPIDLE;
+ val = 1;
+ error = sosetopt(so, &opt);
+ if (error)
+ printf("%s: KEEPIDLE setting failed %d\n", __func__, error);
+
+ opt.sopt_name = TCP_KEEPINTVL;
+ val = 1;
+ error = sosetopt(so, &opt);
+ if (error)
+ printf("%s: KEEPINTVL setting failed %d\n", __func__, error);
+
+ opt.sopt_name = TCP_KEEPCNT;
+ val = 5;
+ error = sosetopt(so, &opt);
+ if (error)
+ printf("%s: KEEPCNT setting failed %d\n", __func__, error);
+}
+
+static int
+ctl_ha_connect(struct ha_softc *softc)
+{
+ struct thread *td = curthread;
+ struct sockaddr_in sa;
+ struct socket *so;
+ int error;
+
+ /* Create the socket */
+ error = socreate(PF_INET, &so, SOCK_STREAM,
+ IPPROTO_TCP, td->td_ucred, td);
+ if (error != 0) {
+ printf("%s: socreate() error %d\n", __func__, error);
+ return (error);
+ }
+ softc->ha_so = so;
+ ctl_ha_sock_setup(softc);
+
+ memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
+ error = soconnect(so, (struct sockaddr *)&sa, td);
+ if (error != 0) {
+ printf("%s: soconnect() error %d\n", __func__, error);
+ goto out;
+ }
+ return (0);
+
+out:
+ ctl_ha_close(softc);
+ return (error);
+}
+
+static int
+ctl_ha_accept(struct ha_softc *softc)
+{
+ struct socket *so;
+ struct sockaddr *sap;
+ int error;
+
+ ACCEPT_LOCK();
+ if (softc->ha_lso->so_rcv.sb_state & SBS_CANTRCVMORE)
+ softc->ha_lso->so_error = ECONNABORTED;
+ if (softc->ha_lso->so_error) {
+ error = softc->ha_lso->so_error;
+ softc->ha_lso->so_error = 0;
+ ACCEPT_UNLOCK();
+ printf("%s: socket error %d\n", __func__, error);
+ goto out;
+ }
+ so = TAILQ_FIRST(&softc->ha_lso->so_comp);
+ if (so == NULL) {
+ ACCEPT_UNLOCK();
+ return (EWOULDBLOCK);
+ }
+ KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
+ KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
+
+ /*
+ * Before changing the flags on the socket, we have to bump the
+ * reference count. Otherwise, if the protocol calls sofree(),
+ * the socket will be released due to a zero refcount.
+ */
+ SOCK_LOCK(so); /* soref() and so_state update */
+ soref(so); /* file descriptor reference */
+
+ TAILQ_REMOVE(&softc->ha_lso->so_comp, so, so_list);
+ softc->ha_lso->so_qlen--;
+ so->so_state |= SS_NBIO;
+ so->so_qstate &= ~SQ_COMP;
+ so->so_head = NULL;
+
+ SOCK_UNLOCK(so);
+ ACCEPT_UNLOCK();
+
+ sap = NULL;
+ error = soaccept(so, &sap);
+ if (error != 0) {
+ printf("%s: soaccept() error %d\n", __func__, error);
+ if (sap != NULL)
+ free(sap, M_SONAME);
+ goto out;
+ }
+ if (sap != NULL)
+ free(sap, M_SONAME);
+ softc->ha_so = so;
+ ctl_ha_sock_setup(softc);
+ return (0);
+
+out:
+ ctl_ha_lclose(softc);
+ return (error);
+}
+
+static int
+ctl_ha_listen(struct ha_softc *softc)
+{
+ struct thread *td = curthread;
+ struct sockaddr_in sa;
+ struct sockopt opt;
+ int error, val;
+
+ /* Create the socket */
+ if (softc->ha_lso == NULL) {
+ error = socreate(PF_INET, &softc->ha_lso, SOCK_STREAM,
+ IPPROTO_TCP, td->td_ucred, td);
+ if (error != 0) {
+ printf("%s: socreate() error %d\n", __func__, error);
+ return (error);
+ }
+ bzero(&opt, sizeof(struct sockopt));
+ opt.sopt_dir = SOPT_SET;
+ opt.sopt_level = SOL_SOCKET;
+ opt.sopt_name = SO_REUSEADDR;
+ opt.sopt_val = &val;
+ opt.sopt_valsize = sizeof(val);
+ val = 1;
+ error = sosetopt(softc->ha_lso, &opt);
+ if (error) {
+ printf("%s: REUSEADDR setting failed %d\n",
+ __func__, error);
+ }
+ bzero(&opt, sizeof(struct sockopt));
+ opt.sopt_dir = SOPT_SET;
+ opt.sopt_level = SOL_SOCKET;
+ opt.sopt_name = SO_REUSEPORT;
+ opt.sopt_val = &val;
+ opt.sopt_valsize = sizeof(val);
+ val = 1;
+ error = sosetopt(softc->ha_lso, &opt);
+ if (error) {
+ printf("%s: REUSEPORT setting failed %d\n",
+ __func__, error);
+ }
+ SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
+ soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc);
+ SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
+ }
+
+ memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
+ error = sobind(softc->ha_lso, (struct sockaddr *)&sa, td);
+ if (error != 0) {
+ printf("%s: sobind() error %d\n", __func__, error);
+ goto out;
+ }
+ error = solisten(softc->ha_lso, 1, td);
+ if (error != 0) {
+ printf("%s: solisten() error %d\n", __func__, error);
+ goto out;
+ }
+ return (0);
+
+out:
+ ctl_ha_lclose(softc);
+ return (error);
+}
+
+static void
+ctl_ha_conn_thread(void *arg)
+{
+ struct ha_softc *softc = arg;
+ int error;
+
+ while (1) {
+ if (softc->ha_disconnect || softc->ha_shutdown) {
+ ctl_ha_close(softc);
+ if (softc->ha_disconnect == 2 || softc->ha_shutdown)
+ ctl_ha_lclose(softc);
+ softc->ha_disconnect = 0;
+ if (softc->ha_shutdown)
+ break;
+ } else if (softc->ha_so != NULL &&
+ (softc->ha_so->so_error ||
+ softc->ha_so->so_rcv.sb_state & SBS_CANTRCVMORE))
+ ctl_ha_close(softc);
+ if (softc->ha_so == NULL) {
+ if (softc->ha_lso != NULL)
+ ctl_ha_accept(softc);
+ else if (softc->ha_listen)
+ ctl_ha_listen(softc);
+ else if (softc->ha_connect)
+ ctl_ha_connect(softc);
+ }
+ if (softc->ha_so != NULL) {
+ if (softc->ha_connected == 0 &&
+ softc->ha_so->so_error == 0 &&
+ (softc->ha_so->so_state & SS_ISCONNECTING) == 0) {
+ softc->ha_connected = 1;
+ ctl_ha_evt(softc, CTL_HA_CHAN_MAX,
+ CTL_HA_EVT_LINK_CHANGE,
+ CTL_HA_LINK_ONLINE);
+ softc->ha_receiving = 1;
+ error = kproc_kthread_add(ctl_ha_rx_thread,
+ softc, &softc->ha_ctl_softc->ctl_proc,
+ NULL, 0, 0, "ctl", "ha_rx");
+ if (error != 0) {
+ printf("Error creating CTL HA rx thread!\n");
+ softc->ha_receiving = 0;
+ softc->ha_disconnect = 1;
+ }
+ }
+ ctl_ha_send(softc);
+ }
+ mtx_lock(&softc->ha_lock);
+ if (softc->ha_so != NULL &&
+ (softc->ha_so->so_error ||
+ softc->ha_so->so_rcv.sb_state & SBS_CANTRCVMORE))
+ ;
+ else if (!softc->ha_wakeup)
+ msleep(&softc->ha_wakeup, &softc->ha_lock, 0, "-", hz);
+ softc->ha_wakeup = 0;
+ mtx_unlock(&softc->ha_lock);
+ }
+ mtx_lock(&softc->ha_lock);
+ softc->ha_shutdown = 2;
+ wakeup(&softc->ha_wakeup);
+ mtx_unlock(&softc->ha_lock);
+ kthread_exit();
+}
+
+static int
+ctl_ha_peer_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct ha_softc *softc = (struct ha_softc *)arg1;
+ struct sockaddr_in *sa;
+ int error, b1, b2, b3, b4, p, num;
+ char buf[128];
+
+ strlcpy(buf, softc->ha_peer, sizeof(buf));
+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+ if ((error != 0) || (req->newptr == NULL) ||
+ strncmp(buf, softc->ha_peer, sizeof(buf)) == 0)
+ return (error);
+
+ sa = &softc->ha_peer_in;
+ mtx_lock(&softc->ha_lock);
+ if ((num = sscanf(buf, "connect %d.%d.%d.%d:%d",
+ &b1, &b2, &b3, &b4, &p)) >= 4) {
+ softc->ha_connect = 1;
+ softc->ha_listen = 0;
+ } else if ((num = sscanf(buf, "listen %d.%d.%d.%d:%d",
+ &b1, &b2, &b3, &b4, &p)) >= 4) {
+ softc->ha_connect = 0;
+ softc->ha_listen = 1;
+ } else {
+ softc->ha_connect = 0;
+ softc->ha_listen = 0;
+ if (buf[0] != 0) {
+ buf[0] = 0;
+ error = EINVAL;
+ }
+ }
+ strlcpy(softc->ha_peer, buf, sizeof(softc->ha_peer));
+ if (softc->ha_connect || softc->ha_listen) {
+ memset(sa, 0, sizeof(*sa));
+ sa->sin_len = sizeof(struct sockaddr_in);
+ sa->sin_family = AF_INET;
+ sa->sin_port = htons((num >= 5) ? p : 999);
+ sa->sin_addr.s_addr =
+ htonl((b1 << 24) + (b2 << 16) + (b3 << 8) + b4);
+ }
+ softc->ha_disconnect = 2;
+ softc->ha_wakeup = 1;
+ mtx_unlock(&softc->ha_lock);
+ wakeup(&softc->ha_wakeup);
+ return (error);
+}
+
+ctl_ha_status
+ctl_ha_msg_register(ctl_ha_channel channel, ctl_evt_handler handler)
+{
+ struct ha_softc *softc = &ha_softc;
+
+ KASSERT(channel < CTL_HA_CHAN_MAX,
+ ("Wrong CTL HA channel %d", channel));
+ softc->ha_handler[channel] = handler;
+ return (CTL_HA_STATUS_SUCCESS);
+}
+
+ctl_ha_status
+ctl_ha_msg_deregister(ctl_ha_channel channel)
+{
+ struct ha_softc *softc = &ha_softc;
+
+ KASSERT(channel < CTL_HA_CHAN_MAX,
+ ("Wrong CTL HA channel %d", channel));
+ softc->ha_handler[channel] = NULL;
+ return (CTL_HA_STATUS_SUCCESS);
+}
+
+/*
+ * Receive a message of the specified size.
+ */
+ctl_ha_status
+ctl_ha_msg_recv(ctl_ha_channel channel, void *addr, size_t len,
+ int wait)
+{
+ struct ha_softc *softc = &ha_softc;
+ struct uio uio;
+ struct iovec iov;
+ int error, flags;
+
+ if (!softc->ha_connected)
+ return (CTL_HA_STATUS_DISCONNECT);
+
+ iov.iov_base = addr;
+ iov.iov_len = len;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_rw = UIO_READ;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_td = curthread;
+ uio.uio_resid = len;
+ flags = wait ? 0 : MSG_DONTWAIT;
+ error = soreceive(softc->ha_so, NULL, &uio, NULL, NULL, &flags);
+ if (error == 0)
+ return (CTL_HA_STATUS_SUCCESS);
+
+ /* Consider all errors fatal for HA sanity. */
+ mtx_lock(&softc->ha_lock);
+ if (softc->ha_connected) {
+ softc->ha_disconnect = 1;
+ softc->ha_wakeup = 1;
+ wakeup(&softc->ha_wakeup);
+ }
+ mtx_unlock(&softc->ha_lock);
+ return (CTL_HA_STATUS_ERROR);
+}
+
+/*
+ * Send a message of the specified size.
+ */
+ctl_ha_status
+ctl_ha_msg_send2(ctl_ha_channel channel, const void *addr, size_t len,
+ const void *addr2, size_t len2, int wait)
+{
+ struct ha_softc *softc = &ha_softc;
+ struct mbuf *mb, *newmb;
+ struct ha_msg_wire hdr;
+ size_t copylen, off;
+
+ if (!softc->ha_connected)
+ return (CTL_HA_STATUS_DISCONNECT);
+
+ newmb = m_getm2(NULL, sizeof(hdr) + len + len2, wait, MT_DATA,
+ M_PKTHDR);
+ if (newmb == NULL) {
+ /* Consider all errors fatal for HA sanity. */
+ mtx_lock(&softc->ha_lock);
+ if (softc->ha_connected) {
+ softc->ha_disconnect = 1;
+ softc->ha_wakeup = 1;
+ wakeup(&softc->ha_wakeup);
+ }
+ mtx_unlock(&softc->ha_lock);
+ printf("%s: Can't allocate mbuf chain\n", __func__);
+ return (CTL_HA_STATUS_ERROR);
+ }
+ hdr.channel = channel;
+ hdr.length = len + len2;
+ mb = newmb;
+ memcpy(mtodo(mb, 0), &hdr, sizeof(hdr));
+ mb->m_len += sizeof(hdr);
+ off = 0;
+ for (; mb != NULL && off < len; mb = mb->m_next) {
+ copylen = min(M_TRAILINGSPACE(mb), len - off);
+ memcpy(mtodo(mb, mb->m_len), (const char *)addr + off, copylen);
+ mb->m_len += copylen;
+ off += copylen;
+ if (off == len)
+ break;
+ }
+ KASSERT(off == len, ("%s: off (%zu) != len (%zu)", __func__,
+ off, len));
+ off = 0;
+ for (; mb != NULL && off < len2; mb = mb->m_next) {
+ copylen = min(M_TRAILINGSPACE(mb), len2 - off);
+ memcpy(mtodo(mb, mb->m_len), (const char *)addr2 + off, copylen);
+ mb->m_len += copylen;
+ off += copylen;
+ }
+ KASSERT(off == len2, ("%s: off (%zu) != len2 (%zu)", __func__,
+ off, len2));
+ newmb->m_pkthdr.len = sizeof(hdr) + len + len2;
+
+ mtx_lock(&softc->ha_lock);
+ if (!softc->ha_connected) {
+ mtx_unlock(&softc->ha_lock);
+ m_freem(newmb);
+ return (CTL_HA_STATUS_DISCONNECT);
+ }
+ mbufq_enqueue(&softc->ha_sendq, newmb);
+ softc->ha_wakeup = 1;
+ mtx_unlock(&softc->ha_lock);
+ wakeup(&softc->ha_wakeup);
+ return (CTL_HA_STATUS_SUCCESS);
+}
+
+ctl_ha_status
+ctl_ha_msg_send(ctl_ha_channel channel, const void *addr, size_t len,
+ int wait)
+{
+
+ return (ctl_ha_msg_send2(channel, addr, len, NULL, 0, wait));
+}
+
+ctl_ha_status
+ctl_ha_msg_abort(ctl_ha_channel channel)
+{
+ struct ha_softc *softc = &ha_softc;
+
+ mtx_lock(&softc->ha_lock);
+ softc->ha_disconnect = 1;
+ softc->ha_wakeup = 1;
+ mtx_unlock(&softc->ha_lock);
+ wakeup(&softc->ha_wakeup);
+ return (CTL_HA_STATUS_SUCCESS);
+}
+
+/*
+ * Allocate a data transfer request structure.
+ */
+struct ctl_ha_dt_req *
+ctl_dt_req_alloc(void)
+{
+
+ return (malloc(sizeof(struct ctl_ha_dt_req), M_CTL, M_WAITOK | M_ZERO));
+}
+
+/*
+ * Free a data transfer request structure.
+ */
+void
+ctl_dt_req_free(struct ctl_ha_dt_req *req)
+{
+
+ free(req, M_CTL);
+}
+
+/*
+ * Issue a DMA request for a single buffer.
+ */
+ctl_ha_status
+ctl_dt_single(struct ctl_ha_dt_req *req)
+{
+ struct ha_softc *softc = &ha_softc;
+ struct ha_dt_msg_wire wire_dt;
+ ctl_ha_status status;
+
+ wire_dt.command = req->command;
+ wire_dt.size = req->size;
+ wire_dt.local = req->local;
+ wire_dt.remote = req->remote;
+ if (req->command == CTL_HA_DT_CMD_READ && req->callback != NULL) {
+ mtx_lock(&softc->ha_lock);
+ TAILQ_INSERT_TAIL(&softc->ha_dts, req, links);
+ mtx_unlock(&softc->ha_lock);
+ ctl_ha_msg_send(CTL_HA_CHAN_DATA, &wire_dt, sizeof(wire_dt),
+ M_WAITOK);
+ return (CTL_HA_STATUS_WAIT);
+ }
+ if (req->command == CTL_HA_DT_CMD_READ) {
+ status = ctl_ha_msg_send(CTL_HA_CHAN_DATA, &wire_dt,
+ sizeof(wire_dt), M_WAITOK);
+ } else {
+ status = ctl_ha_msg_send2(CTL_HA_CHAN_DATA, &wire_dt,
+ sizeof(wire_dt), req->local, req->size, M_WAITOK);
+ }
+ return (status);
+}
+
+static void
+ctl_dt_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
+{
+ struct ha_softc *softc = &ha_softc;
+ struct ctl_ha_dt_req *req;
+ ctl_ha_status isc_status;
+
+ if (event == CTL_HA_EVT_MSG_RECV) {
+ struct ha_dt_msg_wire wire_dt;
+ uint8_t *tmp;
+ int size;
+
+ size = min(sizeof(wire_dt), param);
+ isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_DATA, &wire_dt,
+ size, M_WAITOK);
+ if (isc_status != CTL_HA_STATUS_SUCCESS) {
+ printf("%s: Error receiving message: %d\n",
+ __func__, isc_status);
+ return;
+ }
+
+ if (wire_dt.command == CTL_HA_DT_CMD_READ) {
+ wire_dt.command = CTL_HA_DT_CMD_WRITE;
+ tmp = wire_dt.local;
+ wire_dt.local = wire_dt.remote;
+ wire_dt.remote = tmp;
+ ctl_ha_msg_send2(CTL_HA_CHAN_DATA, &wire_dt,
+ sizeof(wire_dt), wire_dt.local, wire_dt.size,
+ M_WAITOK);
+ } else if (wire_dt.command == CTL_HA_DT_CMD_WRITE) {
+ isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_DATA,
+ wire_dt.remote, wire_dt.size, M_WAITOK);
+ mtx_lock(&softc->ha_lock);
+ TAILQ_FOREACH(req, &softc->ha_dts, links) {
+ if (req->local == wire_dt.remote) {
+ TAILQ_REMOVE(&softc->ha_dts, req, links);
+ break;
+ }
+ }
+ mtx_unlock(&softc->ha_lock);
+ if (req) {
+ req->ret = isc_status;
+ req->callback(req);
+ }
+ }
+ } else if (event == CTL_HA_EVT_LINK_CHANGE) {
+ CTL_DEBUG_PRINT(("%s: Link state change to %d\n", __func__,
+ param));
+ if (param != CTL_HA_LINK_ONLINE) {
+ mtx_lock(&softc->ha_lock);
+ while ((req = TAILQ_FIRST(&softc->ha_dts)) != NULL) {
+ TAILQ_REMOVE(&softc->ha_dts, req, links);
+ mtx_unlock(&softc->ha_lock);
+ req->ret = CTL_HA_STATUS_DISCONNECT;
+ req->callback(req);
+ mtx_lock(&softc->ha_lock);
+ }
+ mtx_unlock(&softc->ha_lock);
+ }
+ } else {
+ printf("%s: Unknown event %d\n", __func__, event);
+ }
+}
+
+
+ctl_ha_status
+ctl_ha_msg_init(struct ctl_softc *ctl_softc)
+{
+ struct ha_softc *softc = &ha_softc;
+ int error;
+
+ softc->ha_ctl_softc = ctl_softc;
+ mtx_init(&softc->ha_lock, "CTL HA mutex", NULL, MTX_DEF);
+ mbufq_init(&softc->ha_sendq, INT_MAX);
+ TAILQ_INIT(&softc->ha_dts);
+ error = kproc_kthread_add(ctl_ha_conn_thread, softc,
+ &ctl_softc->ctl_proc, NULL, 0, 0, "ctl", "ha_tx");
+ if (error != 0) {
+ printf("error creating CTL HA connection thread!\n");
+ mtx_destroy(&softc->ha_lock);
+ return (CTL_HA_STATUS_ERROR);
+ }
+ softc->ha_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+ ctl_ha_msg_shutdown, ctl_softc, SHUTDOWN_PRI_FIRST);
+ SYSCTL_ADD_PROC(&ctl_softc->sysctl_ctx,
+ SYSCTL_CHILDREN(ctl_softc->sysctl_tree),
+ OID_AUTO, "ha_peer", CTLTYPE_STRING | CTLFLAG_RWTUN,
+ softc, 0, ctl_ha_peer_sysctl, "A", "HA peer connection method");
+
+ if (ctl_ha_msg_register(CTL_HA_CHAN_DATA, ctl_dt_event_handler)
+ != CTL_HA_STATUS_SUCCESS) {
+ printf("%s: ctl_ha_msg_register failed.\n", __func__);
+ }
+
+ return (CTL_HA_STATUS_SUCCESS);
+};
+
+void
+ctl_ha_msg_shutdown(struct ctl_softc *ctl_softc)
+{
+ struct ha_softc *softc = &ha_softc;
+
+ /* Disconnect and shutdown threads. */
+ mtx_lock(&softc->ha_lock);
+ if (softc->ha_shutdown < 2) {
+ softc->ha_shutdown = 1;
+ softc->ha_wakeup = 1;
+ wakeup(&softc->ha_wakeup);
+ while (softc->ha_shutdown < 2) {
+ msleep(&softc->ha_wakeup, &softc->ha_lock, 0,
+ "shutdown", hz);
+ }
+ }
+ mtx_unlock(&softc->ha_lock);
+};
+
+ctl_ha_status
+ctl_ha_msg_destroy(struct ctl_softc *ctl_softc)
+{
+ struct ha_softc *softc = &ha_softc;
+
+ if (softc->ha_shutdown_eh != NULL) {
+ EVENTHANDLER_DEREGISTER(shutdown_pre_sync,
+ softc->ha_shutdown_eh);
+ softc->ha_shutdown_eh = NULL;
+ }
+
+ ctl_ha_msg_shutdown(ctl_softc); /* Just in case. */
+
+ if (ctl_ha_msg_deregister(CTL_HA_CHAN_DATA) != CTL_HA_STATUS_SUCCESS)
+ printf("%s: ctl_ha_msg_deregister failed.\n", __func__);
+
+ mtx_destroy(&softc->ha_lock);
+ return (CTL_HA_STATUS_SUCCESS);
+};
diff --git a/sys/cam/ctl/ctl_ha.h b/sys/cam/ctl/ctl_ha.h
index 0c004b3..f38f640 100644
--- a/sys/cam/ctl/ctl_ha.h
+++ b/sys/cam/ctl/ctl_ha.h
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 2003-2009 Silicon Graphics International Corp.
* Copyright (c) 2011 Spectra Logic Corporation
+ * Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,80 +39,27 @@
/*
* CTL High Availability Modes:
*
- * CTL_HA_MODE_ACT_STBY: One side is in Active state and processing commands,
- * the other side is in Standby state, returning errors.
- * CTL_HA_MODE_SER_ONLY: Commands are serialized to the other side. Write
- * mirroring and read re-direction are assumed to
- * happen in the back end.
- * CTL_HA_MODE_XFER: Commands are serialized and data is transferred
- * for write mirroring and read re-direction.
+ * CTL_HA_MODE_ACT_STBY: Commands are serialized to the master side.
+ * No media access commands on slave side (Standby).
+ * CTL_HA_MODE_SER_ONLY: Commands are serialized to the master side.
+ * Media can be accessed on both sides.
+ * CTL_HA_MODE_XFER: Commands and data are forwarded to the
+ * master side for execution.
*/
-
typedef enum {
CTL_HA_MODE_ACT_STBY,
CTL_HA_MODE_SER_ONLY,
CTL_HA_MODE_XFER
} ctl_ha_mode;
-
-/*
- * This is a stubbed out High Availability interface. It assumes two nodes
- * staying in sync.
- *
- * The reason this interface is here, and stubbed out, is that CTL was
- * originally written with support for Copan's (now SGI) high availability
- * framework. That framework was not released by SGI, and would not have
- * been generally applicable to FreeBSD anyway.
- *
- * The idea here is to show the kind of API that would need to be in place
- * in a HA framework to work with CTL's HA hooks. This API is very close
- * to the Copan/SGI API, so that the code using it could stay in place
- * as-is.
- *
- * So, in summary, this is a shell without real substance, and much more
- * work would be needed to actually make HA work. The implementation
- * inside CTL will also need to change to fit the eventual implementation.
- * The additional pieces we would need are:
- *
- * - HA "Supervisor" framework that can startup the components of the
- * system, and initiate failover (i.e. active/active to single mode)
- * and failback (single to active/active mode) state transitions.
- * This framework would be able to recognize when an event happens
- * that requires it to initiate state transitions in the components it
- * manages.
- *
- * - HA communication framework. This framework should have the following
- * features:
- * - Separate channels for separate system components. The CTL
- * instance on one node should communicate with the CTL instance
- * on another node.
- * - Short message passing. These messages would be fixed length, so
- * they could be preallocated and easily passed between the nodes.
- * i.e. conceptually like an ethernet packet.
- * - DMA/large buffer capability. This would require some negotiation
- * with the other node to define the destination. It could
- * allow for "push" (i.e. initiated by the requesting node) DMA or
- * "pull" (i.e. initiated by the target controller) DMA or both.
- * - Communication channel status change notification.
- * - HA capability in other portions of the storage stack. Having two CTL
- * instances communicate is just one part of an overall HA solution.
- * State needs to be synchronized at multiple levels of the system in
- * order for failover to actually work. For instance, if CTL is using a
- * file on a ZFS filesystem as its backing store, the ZFS array state
- * should be synchronized with the other node, so that the other node
- * can immediately take over if the node that is primary for a particular
- * array fails.
- */
-
/*
* Communication channel IDs for various system components. This is to
* make sure one CTL instance talks with another, one ZFS instance talks
* with another, etc.
*/
typedef enum {
- CTL_HA_CHAN_NONE,
CTL_HA_CHAN_CTL,
- CTL_HA_CHAN_ZFS,
+ CTL_HA_CHAN_DATA,
CTL_HA_CHAN_MAX
} ctl_ha_channel;
@@ -120,18 +68,12 @@ typedef enum {
* HA communication subsystem.
*
* CTL_HA_EVT_MSG_RECV: Message received by the other node.
- * CTL_HA_EVT_MSG_SENT: Message sent to the other node.
- * CTL_HA_EVT_DISCONNECT: Communication channel disconnected.
- * CTL_HA_EVT_DMA_SENT: DMA successfully sent to other node (push).
- * CTL_HA_EVT_DMA_RECEIVED: DMA successfully received by other node (pull).
+ * CTL_HA_EVT_LINK_CHANGE: Communication channel status changed.
*/
typedef enum {
CTL_HA_EVT_NONE,
CTL_HA_EVT_MSG_RECV,
- CTL_HA_EVT_MSG_SENT,
- CTL_HA_EVT_DISCONNECT,
- CTL_HA_EVT_DMA_SENT,
- CTL_HA_EVT_DMA_RECEIVED,
+ CTL_HA_EVT_LINK_CHANGE,
CTL_HA_EVT_MAX
} ctl_ha_event;
@@ -146,12 +88,6 @@ typedef enum {
} ctl_ha_status;
typedef enum {
- CTL_HA_DATA_CTL,
- CTL_HA_DATA_ZFS,
- CTL_HA_DATA_MAX
-} ctl_ha_dtid;
-
-typedef enum {
CTL_HA_DT_CMD_READ,
CTL_HA_DT_CMD_WRITE,
} ctl_ha_dt_cmd;
@@ -164,110 +100,42 @@ struct ctl_ha_dt_req {
ctl_ha_dt_cmd command;
void *context;
ctl_ha_dt_cb callback;
- ctl_ha_dtid id;
int ret;
uint32_t size;
uint8_t *local;
uint8_t *remote;
+ TAILQ_ENTRY(ctl_ha_dt_req) links;
};
+struct ctl_softc;
+ctl_ha_status ctl_ha_msg_init(struct ctl_softc *softc);
+void ctl_ha_msg_shutdown(struct ctl_softc *softc);
+ctl_ha_status ctl_ha_msg_destroy(struct ctl_softc *softc);
+
typedef void (*ctl_evt_handler)(ctl_ha_channel channel, ctl_ha_event event,
int param);
void ctl_ha_register_evthandler(ctl_ha_channel channel,
ctl_evt_handler handler);
-static inline ctl_ha_status
-ctl_ha_msg_create(ctl_ha_channel channel, ctl_evt_handler handler)
-{
- return (CTL_HA_STATUS_SUCCESS);
-}
-
-/*
- * Receive a message of the specified size.
- */
-static inline ctl_ha_status
-ctl_ha_msg_recv(ctl_ha_channel channel, void *buffer, unsigned int size,
- int wait)
-{
- return (CTL_HA_STATUS_SUCCESS);
-}
-
-/*
- * Send a message of the specified size.
- */
-static inline ctl_ha_status
-ctl_ha_msg_send(ctl_ha_channel channel, void *buffer, unsigned int size,
- int wait)
-{
- return (CTL_HA_STATUS_SUCCESS);
-}
+ctl_ha_status ctl_ha_msg_register(ctl_ha_channel channel,
+ ctl_evt_handler handler);
+ctl_ha_status ctl_ha_msg_recv(ctl_ha_channel channel, void *addr,
+ size_t len, int wait);
+ctl_ha_status ctl_ha_msg_send(ctl_ha_channel channel, const void *addr,
+ size_t len, int wait);
+ctl_ha_status ctl_ha_msg_send2(ctl_ha_channel channel, const void *addr,
+ size_t len, const void *addr2, size_t len2, int wait);
+ctl_ha_status ctl_ha_msg_abort(ctl_ha_channel channel);
+ctl_ha_status ctl_ha_msg_deregister(ctl_ha_channel channel);
+
+struct ctl_ha_dt_req * ctl_dt_req_alloc(void);
+void ctl_dt_req_free(struct ctl_ha_dt_req *req);
+ctl_ha_status ctl_dt_single(struct ctl_ha_dt_req *req);
-/*
- * Allocate a data transfer request structure.
- */
-static inline struct ctl_ha_dt_req *
-ctl_dt_req_alloc(void)
-{
- return (NULL);
-}
-
-/*
- * Free a data transfer request structure.
- */
-static inline void
-ctl_dt_req_free(struct ctl_ha_dt_req *req)
-{
- return;
-}
-
-/*
- * Issue a DMA request for a single buffer.
- */
-static inline ctl_ha_status
-ctl_dt_single(struct ctl_ha_dt_req *req)
-{
- return (CTL_HA_STATUS_WAIT);
-}
-
-/*
- * SINGLE: One node
- * HA: Two nodes (Active/Active implied)
- * SLAVE/MASTER: The component can set these flags to indicate which side
- * is in control. It has no effect on the HA framework.
- */
typedef enum {
- CTL_HA_STATE_UNKNOWN = 0x00,
- CTL_HA_STATE_SINGLE = 0x01,
- CTL_HA_STATE_HA = 0x02,
- CTL_HA_STATE_MASK = 0x0F,
- CTL_HA_STATE_SLAVE = 0x10,
- CTL_HA_STATE_MASTER = 0x20
-} ctl_ha_state;
-
-typedef enum {
- CTL_HA_COMP_STATUS_OK,
- CTL_HA_COMP_STATUS_FAILED,
- CTL_HA_COMP_STATUS_ERROR
-} ctl_ha_comp_status;
-
-struct ctl_ha_component;
-
-typedef ctl_ha_comp_status (*ctl_hacmp_init_t)(struct ctl_ha_component *);
-typedef ctl_ha_comp_status (*ctl_hacmp_start_t)(struct ctl_ha_component *,
- ctl_ha_state);
-
-struct ctl_ha_component {
- char *name;
- ctl_ha_state state;
- ctl_ha_comp_status status;
- ctl_hacmp_init_t init;
- ctl_hacmp_start_t start;
- ctl_hacmp_init_t quiesce;
-};
-
-#define CTL_HA_STATE_IS_SINGLE(state) ((state & CTL_HA_STATE_MASK) == \
- CTL_HA_STATE_SINGLE)
-#define CTL_HA_STATE_IS_HA(state) ((state & CTL_HA_STATE_MASK) == \
- CTL_HA_STATE_HA)
+ CTL_HA_LINK_OFFLINE = 0x00,
+ CTL_HA_LINK_UNKNOWN = 0x01,
+ CTL_HA_LINK_ONLINE = 0x02
+} ctl_ha_link_state;
#endif /* _CTL_HA_H_ */
diff --git a/sys/cam/ctl/ctl_io.h b/sys/cam/ctl/ctl_io.h
index 2c24591..21db94a 100644
--- a/sys/cam/ctl/ctl_io.h
+++ b/sys/cam/ctl/ctl_io.h
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2003 Silicon Graphics International Corp.
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,13 +59,12 @@ EXTERN(int ctl_time_io_secs, CTL_TIME_IO_DEFAULT_SECS);
#endif
/*
- * Uncomment these next two lines to enable the CTL I/O delay feature. You
+ * Uncomment this next line to enable the CTL I/O delay feature. You
* can delay I/O at two different points -- datamove and done. This is
* useful for diagnosing abort conditions (for hosts that send an abort on a
* timeout), and for determining how long a host's timeout is.
*/
-#define CTL_IO_DELAY
-#define CTL_TIMER_BYTES sizeof(struct callout)
+//#define CTL_IO_DELAY
typedef enum {
CTL_STATUS_NONE, /* No status */
@@ -89,17 +89,13 @@ typedef enum {
CTL_FLAG_DATA_OUT = 0x00000002, /* DATA OUT */
CTL_FLAG_DATA_NONE = 0x00000003, /* no data */
CTL_FLAG_DATA_MASK = 0x00000003,
- CTL_FLAG_KDPTR_SGLIST = 0x00000008, /* kern_data_ptr is S/G list*/
- CTL_FLAG_EDPTR_SGLIST = 0x00000010, /* ext_data_ptr is S/G list */
CTL_FLAG_DO_AUTOSENSE = 0x00000020, /* grab sense info */
CTL_FLAG_USER_REQ = 0x00000040, /* request came from userland */
- CTL_FLAG_CONTROL_DEV = 0x00000080, /* processor device */
CTL_FLAG_ALLOCATED = 0x00000100, /* data space allocated */
CTL_FLAG_BLOCKED = 0x00000200, /* on the blocked queue */
CTL_FLAG_ABORT_STATUS = 0x00000400, /* return TASK ABORTED status */
CTL_FLAG_ABORT = 0x00000800, /* this I/O should be aborted */
CTL_FLAG_DMA_INPROG = 0x00001000, /* DMA in progress */
- CTL_FLAG_NO_DATASYNC = 0x00002000, /* don't cache flush data */
CTL_FLAG_DELAY_DONE = 0x00004000, /* delay injection done */
CTL_FLAG_INT_COPY = 0x00008000, /* internal copy, no done call*/
CTL_FLAG_SENT_2OTHER_SC = 0x00010000,
@@ -109,9 +105,6 @@ typedef enum {
addresses, not virtual ones*/
CTL_FLAG_IO_CONT = 0x00100000, /* Continue I/O instead of
completing */
- CTL_FLAG_AUTO_MIRROR = 0x00200000, /* Automatically use memory
- from the RC cache mirrored
- address area. */
#if 0
CTL_FLAG_ALREADY_DONE = 0x00200000 /* I/O already completed */
#endif
@@ -119,15 +112,10 @@ typedef enum {
CTL_FLAG_DMA_QUEUED = 0x00800000, /* DMA queued but not started*/
CTL_FLAG_STATUS_QUEUED = 0x01000000, /* Status queued but not sent*/
- CTL_FLAG_REDIR_DONE = 0x02000000, /* Redirection has already
- been done. */
CTL_FLAG_FAILOVER = 0x04000000, /* Killed by a failover */
CTL_FLAG_IO_ACTIVE = 0x08000000, /* I/O active on this SC */
- CTL_FLAG_RDMA_MASK = CTL_FLAG_NO_DATASYNC | CTL_FLAG_BUS_ADDR |
- CTL_FLAG_AUTO_MIRROR | CTL_FLAG_REDIR_DONE,
- /* Flags we care about for
- remote DMA */
- CTL_FLAG_STATUS_SENT = 0x10000000 /* Status sent by datamove */
+ CTL_FLAG_STATUS_SENT = 0x10000000, /* Status sent by datamove */
+ CTL_FLAG_SERSEQ_DONE = 0x20000000 /* All storage I/O started */
} ctl_io_flags;
@@ -173,46 +161,17 @@ union ctl_priv {
#define CTL_PRIV_MODEPAGE 1 /* Modepage info for config write */
#define CTL_PRIV_BACKEND 2 /* Reserved for block, RAIDCore */
#define CTL_PRIV_BACKEND_LUN 3 /* Backend LUN pointer */
-#define CTL_PRIV_FRONTEND 4 /* LSI driver, ioctl front end */
-#define CTL_PRIV_USER 5 /* Userland use */
+#define CTL_PRIV_FRONTEND 4 /* Frontend storage */
+#define CTL_PRIV_FRONTEND2 5 /* Another frontend storage */
#define CTL_INVALID_PORTNAME 0xFF
#define CTL_UNMAPPED_IID 0xFF
-/*
- * XXX KDM this size is for the port_priv variable in struct ctl_io_hdr
- * below. This should be defined in terms of the size of struct
- * ctlfe_lun_cmd_info at the moment:
- * struct ctlfe_lun_cmd_info {
- * int cur_transfer_index;
- * ctlfe_cmd_flags flags;
- * bus_dma_segment_t cam_sglist[32];
- * };
- *
- * This isn't really the way I'd prefer to do it, but it does make some
- * sense, AS LONG AS we can guarantee that there will always only be one
- * outstanding DMA request per ctl_io. If that assumption isn't valid,
- * then we've got problems.
- *
- * At some point it may be nice switch CTL over to using CCBs for
- * everything. At that point we can probably use the ATIO/CTIO model, so
- * that multiple simultaneous DMAs per command will just work.
- *
- * Also note that the current size, 600, is appropriate for 64-bit
- * architectures, but is overkill for 32-bit architectures. Need a way to
- * figure out the size at compile time, or just get rid of this altogether.
- */
-#define CTL_PORT_PRIV_SIZE 600
struct ctl_sg_entry {
void *addr;
size_t len;
};
-struct ctl_id {
- uint32_t id;
- uint64_t wwid[2];
-};
-
typedef enum {
CTL_IO_NONE,
CTL_IO_SCSI,
@@ -220,9 +179,8 @@ typedef enum {
} ctl_io_type;
struct ctl_nexus {
- struct ctl_id initid; /* Initiator ID */
+ uint32_t initid; /* Initiator ID */
uint32_t targ_port; /* Target port, filled in by PORT */
- struct ctl_id targ_target; /* Destination target */
uint32_t targ_lun; /* Destination lun */
uint32_t targ_mapped_lun; /* Destination lun CTL-wide */
};
@@ -234,15 +192,19 @@ typedef enum {
CTL_MSG_BAD_JUJU,
CTL_MSG_MANAGE_TASKS,
CTL_MSG_PERS_ACTION,
- CTL_MSG_SYNC_FE,
CTL_MSG_DATAMOVE,
- CTL_MSG_DATAMOVE_DONE
+ CTL_MSG_DATAMOVE_DONE,
+ CTL_MSG_UA, /* Set/clear UA on secondary. */
+ CTL_MSG_PORT_SYNC, /* Information about port. */
+ CTL_MSG_LUN_SYNC, /* Information about LUN. */
+ CTL_MSG_IID_SYNC, /* Information about initiator. */
+ CTL_MSG_LOGIN, /* Information about HA peer. */
+ CTL_MSG_MODE_SYNC, /* Mode page current content. */
+ CTL_MSG_FAILOVER /* Fake, never sent though the wire */
} ctl_msg_type;
struct ctl_scsiio;
-#define CTL_NUM_SG_ENTRIES 9
-
struct ctl_io_hdr {
uint32_t version; /* interface version XXX */
ctl_io_type io_type; /* task I/O, SCSI I/O, etc. */
@@ -255,24 +217,21 @@ struct ctl_io_hdr {
uint32_t timeout; /* timeout in ms */
uint32_t retries; /* retry count */
#ifdef CTL_IO_DELAY
- uint8_t timer_bytes[CTL_TIMER_BYTES]; /* timer kludge */
+ struct callout delay_callout;
#endif /* CTL_IO_DELAY */
#ifdef CTL_TIME_IO
time_t start_time; /* I/O start time */
struct bintime start_bt; /* Timer start ticks */
struct bintime dma_start_bt; /* DMA start ticks */
struct bintime dma_bt; /* DMA total ticks */
- uint32_t num_dmas; /* Number of DMAs */
#endif /* CTL_TIME_IO */
+ uint32_t num_dmas; /* Number of DMAs */
union ctl_io *original_sc;
union ctl_io *serializing_sc;
void *pool; /* I/O pool */
union ctl_priv ctl_private[CTL_NUM_PRIV];/* CTL private area */
- uint8_t port_priv[CTL_PORT_PRIV_SIZE];/* PORT private area*/
- struct ctl_sg_entry remote_sglist[CTL_NUM_SG_ENTRIES];
- struct ctl_sg_entry remote_dma_sglist[CTL_NUM_SG_ENTRIES];
- struct ctl_sg_entry local_sglist[CTL_NUM_SG_ENTRIES];
- struct ctl_sg_entry local_dma_sglist[CTL_NUM_SG_ENTRIES];
+ struct ctl_sg_entry *remote_sglist;
+ struct ctl_sg_entry *local_sglist;
STAILQ_ENTRY(ctl_io_hdr) links; /* linked list pointer */
TAILQ_ENTRY(ctl_io_hdr) ooa_links;
TAILQ_ENTRY(ctl_io_hdr) blocked_links;
@@ -372,9 +331,20 @@ typedef enum {
CTL_TASK_TARGET_RESET,
CTL_TASK_BUS_RESET,
CTL_TASK_PORT_LOGIN,
- CTL_TASK_PORT_LOGOUT
+ CTL_TASK_PORT_LOGOUT,
+ CTL_TASK_QUERY_TASK,
+ CTL_TASK_QUERY_TASK_SET,
+ CTL_TASK_QUERY_ASYNC_EVENT
} ctl_task_type;
+typedef enum {
+ CTL_TASK_FUNCTION_COMPLETE,
+ CTL_TASK_FUNCTION_SUCCEEDED,
+ CTL_TASK_FUNCTION_REJECTED,
+ CTL_TASK_LUN_DOES_NOT_EXIST,
+ CTL_TASK_FUNCTION_NOT_SUPPORTED
+} ctl_task_status;
+
/*
* Task management I/O structure. Aborts, bus resets, etc., are sent using
* this structure.
@@ -387,6 +357,27 @@ struct ctl_taskio {
ctl_task_type task_action; /* Target Reset, Abort, etc. */
uint32_t tag_num; /* tag number */
ctl_tag_type tag_type; /* simple, ordered, etc. */
+ uint8_t task_status; /* Complete, Succeeded, etc. */
+ uint8_t task_resp[3];/* Response information */
+};
+
+
+/*
+ * HA link messages.
+ */
+#define CTL_HA_VERSION 1
+
+/*
+ * Used for CTL_MSG_LOGIN.
+ */
+struct ctl_ha_msg_login {
+ ctl_msg_type msg_type;
+ int version;
+ int ha_mode;
+ int ha_id;
+ int max_luns;
+ int max_ports;
+ int max_init_per_port;
};
typedef enum {
@@ -414,14 +405,14 @@ struct ctl_pr_info {
struct ctl_ha_msg_hdr {
ctl_msg_type msg_type;
+ uint32_t status; /* transaction status */
union ctl_io *original_sc;
union ctl_io *serializing_sc;
struct ctl_nexus nexus; /* Initiator, port, target, lun */
- uint32_t status; /* transaction status */
- TAILQ_ENTRY(ctl_ha_msg_hdr) links;
};
#define CTL_HA_MAX_SG_ENTRIES 16
+#define CTL_HA_DATAMOVE_SEGMENT 131072
/*
* Used for CTL_MSG_PERS_ACTION.
@@ -432,6 +423,17 @@ struct ctl_ha_msg_pr {
};
/*
+ * Used for CTL_MSG_UA.
+ */
+struct ctl_ha_msg_ua {
+ struct ctl_ha_msg_hdr hdr;
+ int ua_all;
+ int ua_set;
+ int ua_type;
+ uint8_t ua_info[8];
+};
+
+/*
* The S/G handling here is a little different than the standard ctl_scsiio
* structure, because we can't pass data by reference in between controllers.
* The S/G list in the ctl_scsiio struct is normally passed in the
@@ -463,17 +465,17 @@ struct ctl_ha_msg_dt {
*/
struct ctl_ha_msg_scsi {
struct ctl_ha_msg_hdr hdr;
- uint8_t cdb[CTL_MAX_CDBLEN]; /* CDB */
uint32_t tag_num; /* tag number */
ctl_tag_type tag_type; /* simple, ordered, etc. */
+ uint8_t cdb[CTL_MAX_CDBLEN]; /* CDB */
+ uint8_t cdb_len; /* CDB length */
uint8_t scsi_status; /* SCSI status byte */
- struct scsi_sense_data sense_data; /* sense data */
uint8_t sense_len; /* Returned sense length */
uint8_t sense_residual; /* sense residual length */
uint32_t residual; /* data residual length */
uint32_t fetd_status; /* trans status, set by FETD,
0 = good*/
- struct ctl_lba_len lbalen; /* used for stats */
+ struct scsi_sense_data sense_data; /* sense data */
};
/*
@@ -486,22 +488,83 @@ struct ctl_ha_msg_task {
ctl_tag_type tag_type; /* simple, ordered, etc. */
};
+/*
+ * Used for CTL_MSG_PORT_SYNC.
+ */
+struct ctl_ha_msg_port {
+ struct ctl_ha_msg_hdr hdr;
+ int port_type;
+ int physical_port;
+ int virtual_port;
+ int status;
+ int name_len;
+ int lun_map_len;
+ int port_devid_len;
+ int target_devid_len;
+ int init_devid_len;
+ uint8_t data[];
+};
+
+/*
+ * Used for CTL_MSG_LUN_SYNC.
+ */
+struct ctl_ha_msg_lun {
+ struct ctl_ha_msg_hdr hdr;
+ int flags;
+ unsigned int pr_generation;
+ uint32_t pr_res_idx;
+ uint8_t pr_res_type;
+ int lun_devid_len;
+ int pr_key_count;
+ uint8_t data[];
+};
+
+struct ctl_ha_msg_lun_pr_key {
+ uint32_t pr_iid;
+ uint64_t pr_key;
+};
+
+/*
+ * Used for CTL_MSG_IID_SYNC.
+ */
+struct ctl_ha_msg_iid {
+ struct ctl_ha_msg_hdr hdr;
+ int in_use;
+ int name_len;
+ uint64_t wwpn;
+ uint8_t data[];
+};
+
+/*
+ * Used for CTL_MSG_MODE_SYNC.
+ */
+struct ctl_ha_msg_mode {
+ struct ctl_ha_msg_hdr hdr;
+ uint8_t page_code;
+ uint8_t subpage;
+ uint16_t page_len;
+ uint8_t data[];
+};
+
union ctl_ha_msg {
struct ctl_ha_msg_hdr hdr;
struct ctl_ha_msg_task task;
struct ctl_ha_msg_scsi scsi;
struct ctl_ha_msg_dt dt;
struct ctl_ha_msg_pr pr;
+ struct ctl_ha_msg_ua ua;
+ struct ctl_ha_msg_port port;
+ struct ctl_ha_msg_lun lun;
+ struct ctl_ha_msg_iid iid;
+ struct ctl_ha_msg_login login;
+ struct ctl_ha_msg_mode mode;
};
-
struct ctl_prio {
struct ctl_io_hdr io_hdr;
struct ctl_ha_msg_pr pr_msg;
};
-
-
union ctl_io {
struct ctl_io_hdr io_hdr; /* common to all I/O types */
struct ctl_scsiio scsiio; /* Normal SCSI commands */
@@ -515,7 +578,6 @@ union ctl_io *ctl_alloc_io(void *pool_ref);
union ctl_io *ctl_alloc_io_nowait(void *pool_ref);
void ctl_free_io(union ctl_io *io);
void ctl_zero_io(union ctl_io *io);
-void ctl_copy_io(union ctl_io *src, union ctl_io *dest);
#endif /* _KERNEL */
diff --git a/sys/cam/ctl/ctl_ioctl.h b/sys/cam/ctl/ctl_ioctl.h
index 532953f..347f931 100644
--- a/sys/cam/ctl/ctl_ioctl.h
+++ b/sys/cam/ctl/ctl_ioctl.h
@@ -81,35 +81,6 @@
#define CTL_MINOR 225
typedef enum {
- CTL_OOA_INVALID_LUN,
- CTL_OOA_SUCCESS
-} ctl_ooa_status;
-
-struct ctl_ooa_info {
- uint32_t target_id; /* Passed in to CTL */
- uint32_t lun_id; /* Passed in to CTL */
- uint32_t num_entries; /* Returned from CTL */
- ctl_ooa_status status; /* Returned from CTL */
-};
-
-struct ctl_hard_startstop_info {
- cfi_mt_status status;
- int total_luns;
- int luns_complete;
- int luns_failed;
-};
-
-struct ctl_bbrread_info {
- int lun_num; /* Passed in to CTL */
- uint64_t lba; /* Passed in to CTL */
- int len; /* Passed in to CTL */
- cfi_mt_status status; /* Returned from CTL */
- cfi_bbrread_status bbr_status; /* Returned from CTL */
- uint8_t scsi_status; /* Returned from CTL */
- struct scsi_sense_data sense_data; /* Returned from CTL */
-};
-
-typedef enum {
CTL_DELAY_TYPE_NONE,
CTL_DELAY_TYPE_CONT,
CTL_DELAY_TYPE_ONESHOT
@@ -131,7 +102,6 @@ typedef enum {
} ctl_delay_status;
struct ctl_io_delay_info {
- uint32_t target_id;
uint32_t lun_id;
ctl_delay_type delay_type;
ctl_delay_location delay_loc;
@@ -140,23 +110,6 @@ struct ctl_io_delay_info {
};
typedef enum {
- CTL_GS_SYNC_NONE,
- CTL_GS_SYNC_OK,
- CTL_GS_SYNC_NO_LUN
-} ctl_gs_sync_status;
-
-/*
- * The target and LUN id specify which device to modify. The sync interval
- * means that we will let through every N SYNCHRONIZE CACHE commands.
- */
-struct ctl_sync_info {
- uint32_t target_id; /* passed to kernel */
- uint32_t lun_id; /* passed to kernel */
- int sync_interval; /* depends on whether get/set */
- ctl_gs_sync_status status; /* passed from kernel */
-};
-
-typedef enum {
CTL_STATS_NO_IO,
CTL_STATS_READ,
CTL_STATS_WRITE
@@ -279,7 +232,6 @@ struct ctl_error_desc_cmd {
/*
* Error injection descriptor.
*
- * target_id: Target ID to act on.
* lun_id LUN to act on.
* lun_error: The type of error to inject. See above for descriptions.
* error_pattern: What kind of command to act on. See above.
@@ -290,7 +242,6 @@ struct ctl_error_desc_cmd {
* links: Kernel use only.
*/
struct ctl_error_desc {
- uint32_t target_id; /* To kernel */
uint32_t lun_id; /* To kernel */
ctl_lun_error lun_error; /* To kernel */
ctl_lun_error_pattern error_pattern; /* To kernel */
@@ -344,23 +295,6 @@ struct ctl_ooa {
};
typedef enum {
- CTL_PORT_LIST_NONE,
- CTL_PORT_LIST_OK,
- CTL_PORT_LIST_NEED_MORE_SPACE,
- CTL_PORT_LIST_ERROR
-} ctl_port_list_status;
-
-struct ctl_port_list {
- uint32_t alloc_len; /* passed to kernel */
- uint32_t alloc_num; /* passed to kernel */
- struct ctl_port_entry *entries; /* filled in kernel */
- uint32_t fill_len; /* passed to userland */
- uint32_t fill_num; /* passed to userland */
- uint32_t dropped_num; /* passed to userland */
- ctl_port_list_status status; /* passed to userland */
-};
-
-typedef enum {
CTL_LUN_NOSTATUS,
CTL_LUN_OK,
CTL_LUN_ERROR,
@@ -814,23 +748,11 @@ struct ctl_lun_map {
#define CTL_IO _IOWR(CTL_MINOR, 0x00, union ctl_io)
#define CTL_ENABLE_PORT _IOW(CTL_MINOR, 0x04, struct ctl_port_entry)
#define CTL_DISABLE_PORT _IOW(CTL_MINOR, 0x05, struct ctl_port_entry)
-#define CTL_DUMP_OOA _IO(CTL_MINOR, 0x06)
-#define CTL_CHECK_OOA _IOWR(CTL_MINOR, 0x07, struct ctl_ooa_info)
-#define CTL_HARD_STOP _IOR(CTL_MINOR, 0x08, \
- struct ctl_hard_startstop_info)
-#define CTL_HARD_START _IOR(CTL_MINOR, 0x09, \
- struct ctl_hard_startstop_info)
#define CTL_DELAY_IO _IOWR(CTL_MINOR, 0x10, struct ctl_io_delay_info)
-#define CTL_REALSYNC_GET _IOR(CTL_MINOR, 0x11, int)
-#define CTL_REALSYNC_SET _IOW(CTL_MINOR, 0x12, int)
-#define CTL_SETSYNC _IOWR(CTL_MINOR, 0x13, struct ctl_sync_info)
-#define CTL_GETSYNC _IOWR(CTL_MINOR, 0x14, struct ctl_sync_info)
#define CTL_GETSTATS _IOWR(CTL_MINOR, 0x15, struct ctl_stats)
#define CTL_ERROR_INJECT _IOWR(CTL_MINOR, 0x16, struct ctl_error_desc)
-#define CTL_BBRREAD _IOWR(CTL_MINOR, 0x17, struct ctl_bbrread_info)
#define CTL_GET_OOA _IOWR(CTL_MINOR, 0x18, struct ctl_ooa)
#define CTL_DUMP_STRUCTS _IO(CTL_MINOR, 0x19)
-#define CTL_GET_PORT_LIST _IOWR(CTL_MINOR, 0x20, struct ctl_port_list)
#define CTL_LUN_REQ _IOWR(CTL_MINOR, 0x21, struct ctl_lun_req)
#define CTL_LUN_LIST _IOWR(CTL_MINOR, 0x22, struct ctl_lun_list)
#define CTL_ERROR_INJECT_DELETE _IOW(CTL_MINOR, 0x23, struct ctl_error_desc)
diff --git a/sys/cam/ctl/ctl_private.h b/sys/cam/ctl/ctl_private.h
index a038552..f9b29ff 100644
--- a/sys/cam/ctl/ctl_private.h
+++ b/sys/cam/ctl/ctl_private.h
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2003, 2004, 2005, 2008 Silicon Graphics International Corp.
+ * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,38 +40,19 @@
#ifndef _CTL_PRIVATE_H_
#define _CTL_PRIVATE_H_
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_cd.h>
+#include <cam/scsi/scsi_da.h>
+
/*
* SCSI vendor and product names.
*/
#define CTL_VENDOR "FREEBSD "
#define CTL_DIRECT_PRODUCT "CTLDISK "
#define CTL_PROCESSOR_PRODUCT "CTLPROCESSOR "
+#define CTL_CDROM_PRODUCT "CTLCDROM "
#define CTL_UNKNOWN_PRODUCT "CTLDEVICE "
-struct ctl_fe_ioctl_startstop_info {
- struct cv sem;
- struct ctl_hard_startstop_info hs_info;
-};
-
-struct ctl_fe_ioctl_bbrread_info {
- struct cv sem;
- struct ctl_bbrread_info *bbr_info;
- int wakeup_done;
- struct mtx *lock;
-};
-
-typedef enum {
- CTL_IOCTL_INPROG,
- CTL_IOCTL_DATAMOVE,
- CTL_IOCTL_DONE
-} ctl_fe_ioctl_state;
-
-struct ctl_fe_ioctl_params {
- struct cv sem;
- struct mtx ioctl_mtx;
- ctl_fe_ioctl_state state;
-};
-
#define CTL_POOL_ENTRIES_OTHER_SC 200
struct ctl_io_pool {
@@ -81,18 +63,6 @@ struct ctl_io_pool {
};
typedef enum {
- CTL_IOCTL_FLAG_NONE = 0x00,
- CTL_IOCTL_FLAG_ENABLED = 0x01
-} ctl_ioctl_flags;
-
-struct ctl_ioctl_info {
- ctl_ioctl_flags flags;
- uint32_t cur_tag_num;
- struct ctl_port port;
- char port_name[24];
-};
-
-typedef enum {
CTL_SER_BLOCK,
CTL_SER_BLOCKOPT,
CTL_SER_EXTENT,
@@ -115,25 +85,26 @@ typedef enum {
* WARNING: Keep the bottom nibble here free, we OR in the data direction
* flags for each command.
*
- * Note: "OK_ON_ALL_LUNS" == we don't have to have a lun configured
+ * Note: "OK_ON_NO_LUN" == we don't have to have a lun configured
* "OK_ON_BOTH" == we have to have a lun configured
* "SA5" == command has 5-bit service action at byte 1
*/
typedef enum {
CTL_CMD_FLAG_NONE = 0x0000,
CTL_CMD_FLAG_NO_SENSE = 0x0010,
- CTL_CMD_FLAG_OK_ON_ALL_LUNS = 0x0020,
- CTL_CMD_FLAG_ALLOW_ON_RESV = 0x0040,
+ CTL_CMD_FLAG_ALLOW_ON_RESV = 0x0020,
+ CTL_CMD_FLAG_ALLOW_ON_PR_RESV = 0x0040,
CTL_CMD_FLAG_ALLOW_ON_PR_WRESV = 0x0080,
CTL_CMD_FLAG_OK_ON_PROC = 0x0100,
- CTL_CMD_FLAG_OK_ON_SLUN = 0x0200,
- CTL_CMD_FLAG_OK_ON_BOTH = 0x0300,
- CTL_CMD_FLAG_OK_ON_STOPPED = 0x0400,
- CTL_CMD_FLAG_OK_ON_INOPERABLE = 0x0800,
- CTL_CMD_FLAG_OK_ON_OFFLINE = 0x1000,
- CTL_CMD_FLAG_OK_ON_SECONDARY = 0x2000,
- CTL_CMD_FLAG_ALLOW_ON_PR_RESV = 0x4000,
- CTL_CMD_FLAG_SA5 = 0x8000
+ CTL_CMD_FLAG_OK_ON_DIRECT = 0x0200,
+ CTL_CMD_FLAG_OK_ON_CDROM = 0x0400,
+ CTL_CMD_FLAG_OK_ON_BOTH = 0x0700,
+ CTL_CMD_FLAG_OK_ON_NO_LUN = 0x0800,
+ CTL_CMD_FLAG_OK_ON_NO_MEDIA = 0x1000,
+ CTL_CMD_FLAG_OK_ON_STANDBY = 0x2000,
+ CTL_CMD_FLAG_OK_ON_UNAVAIL = 0x4000,
+ CTL_CMD_FLAG_SA5 = 0x8000,
+ CTL_CMD_FLAG_RUN_HERE = 0x10000
} ctl_cmd_flags;
typedef enum {
@@ -176,21 +147,17 @@ typedef enum {
CTL_LUN_DISABLED = 0x008,
CTL_LUN_MALLOCED = 0x010,
CTL_LUN_STOPPED = 0x020,
- CTL_LUN_INOPERABLE = 0x040,
- CTL_LUN_OFFLINE = 0x080,
+ CTL_LUN_NO_MEDIA = 0x040,
+ CTL_LUN_EJECTED = 0x080,
CTL_LUN_PR_RESERVED = 0x100,
CTL_LUN_PRIMARY_SC = 0x200,
CTL_LUN_SENSE_DESC = 0x400,
- CTL_LUN_READONLY = 0x800
+ CTL_LUN_READONLY = 0x800,
+ CTL_LUN_PEER_SC_PRIMARY = 0x1000,
+ CTL_LUN_REMOVABLE = 0x2000
} ctl_lun_flags;
typedef enum {
- CTL_LUN_SERSEQ_OFF,
- CTL_LUN_SERSEQ_READ,
- CTL_LUN_SERSEQ_ON
-} ctl_lun_serseq;
-
-typedef enum {
CTLBLOCK_FLAG_NONE = 0x00,
CTLBLOCK_FLAG_INVALID = 0x01
} ctlblock_flags;
@@ -271,7 +238,10 @@ typedef int ctl_modesel_handler(struct ctl_scsiio *ctsio,
typedef enum {
CTL_PAGE_FLAG_NONE = 0x00,
- CTL_PAGE_FLAG_DISK_ONLY = 0x01
+ CTL_PAGE_FLAG_DIRECT = 0x01,
+ CTL_PAGE_FLAG_PROC = 0x02,
+ CTL_PAGE_FLAG_CDROM = 0x04,
+ CTL_PAGE_FLAG_ALL = 0x07
} ctl_page_flags;
struct ctl_page_index {
@@ -302,22 +272,29 @@ struct ctl_logical_block_provisioning_page {
static const struct ctl_page_index page_index_template[] = {
{SMS_RW_ERROR_RECOVERY_PAGE, 0, sizeof(struct scsi_da_rw_recovery_page), NULL,
- CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL},
+ CTL_PAGE_FLAG_DIRECT | CTL_PAGE_FLAG_CDROM, NULL, NULL},
{SMS_FORMAT_DEVICE_PAGE, 0, sizeof(struct scsi_format_page), NULL,
- CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL},
+ CTL_PAGE_FLAG_DIRECT, NULL, NULL},
{SMS_RIGID_DISK_PAGE, 0, sizeof(struct scsi_rigid_disk_page), NULL,
- CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL},
+ CTL_PAGE_FLAG_DIRECT, NULL, NULL},
{SMS_CACHING_PAGE, 0, sizeof(struct scsi_caching_page), NULL,
- CTL_PAGE_FLAG_DISK_ONLY, NULL, ctl_caching_sp_handler},
+ CTL_PAGE_FLAG_DIRECT | CTL_PAGE_FLAG_CDROM,
+ NULL, ctl_caching_sp_handler},
{SMS_CONTROL_MODE_PAGE, 0, sizeof(struct scsi_control_page), NULL,
- CTL_PAGE_FLAG_NONE, NULL, ctl_control_page_handler},
+ CTL_PAGE_FLAG_ALL, NULL, ctl_control_page_handler},
+ {SMS_CONTROL_MODE_PAGE | SMPH_SPF, 0x01,
+ sizeof(struct scsi_control_ext_page), NULL,
+ CTL_PAGE_FLAG_ALL, NULL, NULL},
{SMS_INFO_EXCEPTIONS_PAGE, 0, sizeof(struct scsi_info_exceptions_page), NULL,
- CTL_PAGE_FLAG_NONE, NULL, NULL},
+ CTL_PAGE_FLAG_ALL, NULL, NULL},
{SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, 0x02,
sizeof(struct ctl_logical_block_provisioning_page), NULL,
- CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL},
+ CTL_PAGE_FLAG_DIRECT, NULL, NULL},
+ {SMS_CDDVD_CAPS_PAGE, 0,
+ sizeof(struct scsi_cddvd_capabilities_page), NULL,
+ CTL_PAGE_FLAG_CDROM, NULL, NULL},
{SMS_VENDOR_SPECIFIC_PAGE | SMPH_SPF, DBGCNF_SUBPAGE_CODE,
- sizeof(struct copan_debugconf_subpage), NULL, CTL_PAGE_FLAG_NONE,
+ sizeof(struct copan_debugconf_subpage), NULL, CTL_PAGE_FLAG_ALL,
ctl_debugconf_sp_sense_handler, ctl_debugconf_sp_select_handler},
};
@@ -330,21 +307,23 @@ struct ctl_mode_pages {
struct scsi_rigid_disk_page rigid_disk_page[4];
struct scsi_caching_page caching_page[4];
struct scsi_control_page control_page[4];
+ struct scsi_control_ext_page control_ext_page[4];
struct scsi_info_exceptions_page ie_page[4];
struct ctl_logical_block_provisioning_page lbp_page[4];
+ struct scsi_cddvd_capabilities_page cddvd_page[4];
struct copan_debugconf_subpage debugconf_subpage[4];
struct ctl_page_index index[CTL_NUM_MODE_PAGES];
};
static const struct ctl_page_index log_page_index_template[] = {
{SLS_SUPPORTED_PAGES_PAGE, 0, 0, NULL,
- CTL_PAGE_FLAG_NONE, NULL, NULL},
+ CTL_PAGE_FLAG_ALL, NULL, NULL},
{SLS_SUPPORTED_PAGES_PAGE, SLS_SUPPORTED_SUBPAGES_SUBPAGE, 0, NULL,
- CTL_PAGE_FLAG_NONE, NULL, NULL},
+ CTL_PAGE_FLAG_ALL, NULL, NULL},
{SLS_LOGICAL_BLOCK_PROVISIONING, 0, 0, NULL,
- CTL_PAGE_FLAG_NONE, ctl_lbp_log_sense_handler, NULL},
+ CTL_PAGE_FLAG_DIRECT, ctl_lbp_log_sense_handler, NULL},
{SLS_STAT_AND_PERF, 0, 0, NULL,
- CTL_PAGE_FLAG_NONE, ctl_sap_log_sense_handler, NULL},
+ CTL_PAGE_FLAG_ALL, ctl_sap_log_sense_handler, NULL},
};
#define CTL_NUM_LOG_PAGES sizeof(log_page_index_template)/ \
@@ -369,17 +348,6 @@ struct ctl_lun_delay_info {
uint32_t done_delay;
};
-typedef enum {
- CTL_ERR_INJ_NONE = 0x00,
- CTL_ERR_INJ_ABORTED = 0x01
-} ctl_err_inject_flags;
-
-typedef enum {
- CTL_PR_FLAG_NONE = 0x00,
- CTL_PR_FLAG_REGISTERED = 0x01,
- CTL_PR_FLAG_ACTIVE_RES = 0x02
-} ctl_per_res_flags;
-
#define CTL_PR_ALL_REGISTRANTS 0xFFFFFFFF
#define CTL_PR_NO_RESERVATION 0xFFFFFFF0
@@ -400,16 +368,12 @@ struct ctl_lun {
struct mtx lun_lock;
uint64_t lun;
ctl_lun_flags flags;
- ctl_lun_serseq serseq;
STAILQ_HEAD(,ctl_error_desc) error_list;
uint64_t error_serial;
struct ctl_softc *ctl_softc;
struct ctl_be_lun *be_lun;
struct ctl_backend_driver *backend;
- int io_count;
struct ctl_lun_delay_info delay_info;
- int sync_interval;
- int sync_count;
#ifdef CTL_TIME_IO
sbintime_t idle_time;
sbintime_t last_busy;
@@ -417,29 +381,30 @@ struct ctl_lun {
TAILQ_HEAD(ctl_ooaq, ctl_io_hdr) ooa_queue;
TAILQ_HEAD(ctl_blockq,ctl_io_hdr) blocked_queue;
STAILQ_ENTRY(ctl_lun) links;
- STAILQ_ENTRY(ctl_lun) run_links;
#ifdef CTL_WITH_CA
uint32_t have_ca[CTL_MAX_INITIATORS >> 5];
struct scsi_sense_data pending_sense[CTL_MAX_INITIATORS];
#endif
ctl_ua_type *pending_ua[CTL_MAX_PORTS];
+ uint8_t ua_tpt_info[8];
time_t lasttpt;
struct ctl_mode_pages mode_pages;
struct ctl_log_pages log_pages;
struct ctl_lun_io_stats stats;
uint32_t res_idx;
- unsigned int PRGeneration;
- uint64_t *pr_keys[2 * CTL_MAX_PORTS];
+ uint32_t pr_generation;
+ uint64_t *pr_keys[CTL_MAX_PORTS];
int pr_key_count;
uint32_t pr_res_idx;
- uint8_t res_type;
+ uint8_t pr_res_type;
+ int prevent_count;
+ uint32_t prevent[(CTL_MAX_INITIATORS+31)/32];
uint8_t *write_buffer;
struct ctl_devid *lun_devid;
TAILQ_HEAD(tpc_lists, tpc_list) tpc_lists;
};
typedef enum {
- CTL_FLAG_REAL_SYNC = 0x02,
CTL_FLAG_ACTIVE_SHELF = 0x04
} ctl_gen_flags;
@@ -459,23 +424,21 @@ struct tpc_token;
struct ctl_softc {
struct mtx ctl_lock;
struct cdev *dev;
- int open_count;
- int num_disks;
int num_luns;
ctl_gen_flags flags;
ctl_ha_mode ha_mode;
int ha_id;
- int ha_state;
int is_single;
- int port_offset;
- int persis_offset;
- int inquiry_pq_no_lun;
+ ctl_ha_link_state ha_link;
+ int port_min;
+ int port_max;
+ int port_cnt;
+ int init_min;
+ int init_max;
struct sysctl_ctx_list sysctl_ctx;
struct sysctl_oid *sysctl_tree;
- struct ctl_ioctl_info ioctl_info;
void *othersc_pool;
struct proc *ctl_proc;
- int targ_online;
uint32_t ctl_lun_mask[(CTL_MAX_LUNS + 31) / 32];
struct ctl_lun *ctl_luns[CTL_MAX_LUNS];
uint32_t ctl_port_mask[(CTL_MAX_PORTS + 31) / 32];
@@ -501,8 +464,6 @@ struct ctl_softc {
extern const struct ctl_cmd_entry ctl_cmd_table[256];
uint32_t ctl_get_initindex(struct ctl_nexus *nexus);
-uint32_t ctl_get_resindex(struct ctl_nexus *nexus);
-uint32_t ctl_port_idx(int port_num);
int ctl_lun_map_init(struct ctl_port *port);
int ctl_lun_map_deinit(struct ctl_port *port);
int ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun);
@@ -515,6 +476,7 @@ void ctl_pool_free(struct ctl_io_pool *pool);
int ctl_scsi_release(struct ctl_scsiio *ctsio);
int ctl_scsi_reserve(struct ctl_scsiio *ctsio);
int ctl_start_stop(struct ctl_scsiio *ctsio);
+int ctl_prevent_allow(struct ctl_scsiio *ctsio);
int ctl_sync_cache(struct ctl_scsiio *ctsio);
int ctl_format(struct ctl_scsiio *ctsio);
int ctl_read_buffer(struct ctl_scsiio *ctsio);
@@ -527,6 +489,7 @@ int ctl_log_sense(struct ctl_scsiio *ctsio);
int ctl_read_capacity(struct ctl_scsiio *ctsio);
int ctl_read_capacity_16(struct ctl_scsiio *ctsio);
int ctl_read_defect(struct ctl_scsiio *ctsio);
+int ctl_read_toc(struct ctl_scsiio *ctsio);
int ctl_read_write(struct ctl_scsiio *ctsio);
int ctl_cnw(struct ctl_scsiio *ctsio);
int ctl_report_luns(struct ctl_scsiio *ctsio);
@@ -534,13 +497,15 @@ int ctl_request_sense(struct ctl_scsiio *ctsio);
int ctl_tur(struct ctl_scsiio *ctsio);
int ctl_verify(struct ctl_scsiio *ctsio);
int ctl_inquiry(struct ctl_scsiio *ctsio);
+int ctl_get_config(struct ctl_scsiio *ctsio);
+int ctl_get_event_status(struct ctl_scsiio *ctsio);
+int ctl_mechanism_status(struct ctl_scsiio *ctsio);
int ctl_persistent_reserve_in(struct ctl_scsiio *ctsio);
int ctl_persistent_reserve_out(struct ctl_scsiio *ctsio);
int ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio);
int ctl_report_supported_opcodes(struct ctl_scsiio *ctsio);
int ctl_report_supported_tmf(struct ctl_scsiio *ctsio);
int ctl_report_timestamp(struct ctl_scsiio *ctsio);
-int ctl_isc(struct ctl_scsiio *ctsio);
int ctl_get_lba_status(struct ctl_scsiio *ctsio);
void ctl_tpc_init(struct ctl_softc *softc);
diff --git a/sys/cam/ctl/ctl_scsi_all.c b/sys/cam/ctl/ctl_scsi_all.c
index 815e383..acac6ea 100644
--- a/sys/cam/ctl/ctl_scsi_all.c
+++ b/sys/cam/ctl/ctl_scsi_all.c
@@ -114,7 +114,7 @@ ctl_scsi_path_string(union ctl_io *io, char *path_str, int len)
{
snprintf(path_str, len, "(%u:%u:%u/%u): ",
- io->io_hdr.nexus.initid.id, io->io_hdr.nexus.targ_port,
+ io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port,
io->io_hdr.nexus.targ_lun, io->io_hdr.nexus.targ_mapped_lun);
}
diff --git a/sys/cam/ctl/ctl_ser_table.c b/sys/cam/ctl/ctl_ser_table.c
index 8d5d6bc..ef0ee2f 100644
--- a/sys/cam/ctl/ctl_ser_table.c
+++ b/sys/cam/ctl/ctl_ser_table.c
@@ -61,7 +61,7 @@
#define xO CTL_SER_EXTENTOPT /* Optional extent check */
#define xS CTL_SER_EXTENTSEQ /* Sequential extent check */
-static ctl_serialize_action
+const static ctl_serialize_action
ctl_serialize_table[CTL_SERIDX_COUNT][CTL_SERIDX_COUNT] = {
/**>IDX_ :: 2nd:TUR RD WRT UNM SYN MDSN MDSL RQSN INQ RDCP RES LSNS FMT STR*/
/*TUR */{ pS, pS, pS, pS, pS, bK, bK, bK, pS, pS, bK, pS, bK, bK},
diff --git a/sys/cam/ctl/ctl_tpc.c b/sys/cam/ctl/ctl_tpc.c
index 662ee3d..d21b5e3 100644
--- a/sys/cam/ctl/ctl_tpc.c
+++ b/sys/cam/ctl/ctl_tpc.c
@@ -47,7 +47,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_util.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
@@ -395,8 +394,7 @@ ctl_inquiry_evpd_tpc(struct ctl_scsiio *ctsio, int alloc_len)
scsi_ulto2b(0, rtfb_ptr->optimal_length_granularity);
scsi_u64to8b(0, rtfb_ptr->maximum_bytes);
scsi_u64to8b(0, rtfb_ptr->optimal_bytes);
- scsi_u64to8b(TPC_MAX_IOCHUNK_SIZE,
- rtfb_ptr->optimal_bytes_to_token_per_segment);
+ scsi_u64to8b(UINT64_MAX, rtfb_ptr->optimal_bytes_to_token_per_segment);
scsi_u64to8b(TPC_MAX_IOCHUNK_SIZE,
rtfb_ptr->optimal_bytes_from_token_per_segment);
@@ -535,7 +533,7 @@ ctl_receive_copy_status_lid1(struct ctl_scsiio *ctsio)
list_id = cdb->list_identifier;
mtx_lock(&lun->lun_lock);
list = tpc_find_list(lun, list_id,
- ctl_get_resindex(&ctsio->io_hdr.nexus));
+ ctl_get_initindex(&ctsio->io_hdr.nexus));
if (list == NULL) {
mtx_unlock(&lun->lun_lock);
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
@@ -617,7 +615,7 @@ ctl_receive_copy_failure_details(struct ctl_scsiio *ctsio)
list_id = cdb->list_identifier;
mtx_lock(&lun->lun_lock);
list = tpc_find_list(lun, list_id,
- ctl_get_resindex(&ctsio->io_hdr.nexus));
+ ctl_get_initindex(&ctsio->io_hdr.nexus));
if (list == NULL || !list->completed) {
mtx_unlock(&lun->lun_lock);
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
@@ -689,7 +687,7 @@ ctl_receive_copy_status_lid4(struct ctl_scsiio *ctsio)
list_id = scsi_4btoul(cdb->list_identifier);
mtx_lock(&lun->lun_lock);
list = tpc_find_list(lun, list_id,
- ctl_get_resindex(&ctsio->io_hdr.nexus));
+ ctl_get_initindex(&ctsio->io_hdr.nexus));
if (list == NULL) {
mtx_unlock(&lun->lun_lock);
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
@@ -772,7 +770,7 @@ ctl_copy_operation_abort(struct ctl_scsiio *ctsio)
list_id = scsi_4btoul(cdb->list_identifier);
mtx_lock(&lun->lun_lock);
list = tpc_find_list(lun, list_id,
- ctl_get_resindex(&ctsio->io_hdr.nexus));
+ ctl_get_initindex(&ctsio->io_hdr.nexus));
if (list == NULL) {
mtx_unlock(&lun->lun_lock);
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
@@ -822,7 +820,9 @@ tpc_process_b2b(struct tpc_list *list)
off_t srclba, dstlba, numbytes, donebytes, roundbytes;
int numlba;
uint32_t srcblock, dstblock, pb, pbo, adj;
+ uint8_t csi[4];
+ scsi_ulto4b(list->curseg, csi);
if (list->stage == 1) {
while ((tior = TAILQ_FIRST(&list->allio)) != NULL) {
TAILQ_REMOVE(&list->allio, tior, links);
@@ -836,7 +836,9 @@ tpc_process_b2b(struct tpc_list *list)
} else if (list->error) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x0d, /*ascq*/ 0x01, SSD_ELEM_NONE);
+ /*asc*/ 0x0d, /*ascq*/ 0x01,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
}
list->cursectors += list->segsectors;
@@ -851,7 +853,9 @@ tpc_process_b2b(struct tpc_list *list)
if (sl >= CTL_MAX_LUNS || dl >= CTL_MAX_LUNS) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x08, /*ascq*/ 0x04, SSD_ELEM_NONE);
+ /*asc*/ 0x08, /*ascq*/ 0x04,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
}
if (pbo > 0)
@@ -880,7 +884,9 @@ tpc_process_b2b(struct tpc_list *list)
if (numbytes % srcblock != 0 || numbytes % dstblock != 0) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x26, /*ascq*/ 0x0A, SSD_ELEM_NONE);
+ /*asc*/ 0x26, /*ascq*/ 0x0A,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
}
@@ -964,7 +970,9 @@ tpc_process_verify(struct tpc_list *list)
struct scsi_ec_segment_verify *seg;
struct tpc_io *tio;
uint64_t sl;
+ uint8_t csi[4];
+ scsi_ulto4b(list->curseg, csi);
if (list->stage == 1) {
while ((tio = TAILQ_FIRST(&list->allio)) != NULL) {
TAILQ_REMOVE(&list->allio, tio, links);
@@ -977,7 +985,9 @@ tpc_process_verify(struct tpc_list *list)
} else if (list->error) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x0d, /*ascq*/ 0x01, SSD_ELEM_NONE);
+ /*asc*/ 0x0d, /*ascq*/ 0x01,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
} else
return (CTL_RETVAL_COMPLETE);
@@ -989,7 +999,9 @@ tpc_process_verify(struct tpc_list *list)
if (sl >= CTL_MAX_LUNS) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x08, /*ascq*/ 0x04, SSD_ELEM_NONE);
+ /*asc*/ 0x08, /*ascq*/ 0x04,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
}
@@ -1021,7 +1033,9 @@ tpc_process_register_key(struct tpc_list *list)
struct tpc_io *tio;
uint64_t dl;
int datalen;
+ uint8_t csi[4];
+ scsi_ulto4b(list->curseg, csi);
if (list->stage == 1) {
while ((tio = TAILQ_FIRST(&list->allio)) != NULL) {
TAILQ_REMOVE(&list->allio, tio, links);
@@ -1035,7 +1049,9 @@ tpc_process_register_key(struct tpc_list *list)
} else if (list->error) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x0d, /*ascq*/ 0x01, SSD_ELEM_NONE);
+ /*asc*/ 0x0d, /*ascq*/ 0x01,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
} else
return (CTL_RETVAL_COMPLETE);
@@ -1047,7 +1063,9 @@ tpc_process_register_key(struct tpc_list *list)
if (dl >= CTL_MAX_LUNS) {
ctl_set_sense(list->ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x08, /*ascq*/ 0x04, SSD_ELEM_NONE);
+ /*asc*/ 0x08, /*ascq*/ 0x04,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
return (CTL_RETVAL_ERROR);
}
@@ -1110,7 +1128,7 @@ static int
tpc_process_wut(struct tpc_list *list)
{
struct tpc_io *tio, *tior, *tiow;
- struct runl run, *prun;
+ struct runl run;
int drange, srange;
off_t doffset, soffset;
off_t srclba, dstlba, numbytes, donebytes, roundbytes;
@@ -1190,8 +1208,7 @@ tpc_process_wut(struct tpc_list *list)
// srclba, dstlba);
donebytes = 0;
TAILQ_INIT(&run);
- prun = &run;
- list->tbdio = 1;
+ list->tbdio = 0;
TAILQ_INIT(&list->allio);
while (donebytes < numbytes) {
roundbytes = numbytes - donebytes;
@@ -1244,8 +1261,8 @@ tpc_process_wut(struct tpc_list *list)
tiow->io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = tiow;
TAILQ_INSERT_TAIL(&tior->run, tiow, rlinks);
- TAILQ_INSERT_TAIL(prun, tior, rlinks);
- prun = &tior->run;
+ TAILQ_INSERT_TAIL(&run, tior, rlinks);
+ list->tbdio++;
donebytes += roundbytes;
srclba += roundbytes / srcblock;
dstlba += roundbytes / dstblock;
@@ -1277,7 +1294,6 @@ complete:
ctl_free_io(tio->io);
free(tio, M_CTL);
}
- free(list->buf, M_CTL);
if (list->abort) {
ctl_set_task_aborted(list->ctsio);
return (CTL_RETVAL_ERROR);
@@ -1293,7 +1309,6 @@ complete:
}
dstblock = list->lun->be_lun->blocksize;
- list->buf = malloc(dstblock, M_CTL, M_WAITOK | M_ZERO);
TAILQ_INIT(&run);
prun = &run;
list->tbdio = 1;
@@ -1310,9 +1325,9 @@ complete:
TAILQ_INSERT_TAIL(&list->allio, tiow, links);
tiow->io = tpcl_alloc_io();
ctl_scsi_write_same(tiow->io,
- /*data_ptr*/ list->buf,
- /*data_len*/ dstblock,
- /*byte2*/ 0,
+ /*data_ptr*/ NULL,
+ /*data_len*/ 0,
+ /*byte2*/ SWS_NDOB,
/*lba*/ scsi_8btou64(list->range[r].lba),
/*num_blocks*/ len,
/*tag_type*/ CTL_TAG_SIMPLE,
@@ -1348,6 +1363,7 @@ tpc_process(struct tpc_list *list)
struct scsi_ec_segment *seg;
struct ctl_scsiio *ctsio = list->ctsio;
int retval = CTL_RETVAL_COMPLETE;
+ uint8_t csi[4];
if (list->service_action == EC_WUT) {
if (list->token != NULL)
@@ -1375,9 +1391,12 @@ tpc_process(struct tpc_list *list)
retval = tpc_process_register_key(list);
break;
default:
+ scsi_ulto4b(list->curseg, csi);
ctl_set_sense(ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_COPY_ABORTED,
- /*asc*/ 0x26, /*ascq*/ 0x09, SSD_ELEM_NONE);
+ /*asc*/ 0x26, /*ascq*/ 0x09,
+ SSD_ELEM_COMMAND, sizeof(csi), csi,
+ SSD_ELEM_NONE);
goto done;
}
if (retval == CTL_RETVAL_QUEUED)
@@ -1591,6 +1610,10 @@ ctl_extended_copy_lid1(struct ctl_scsiio *ctsio)
cdb = (struct scsi_extended_copy *)ctsio->cdb;
len = scsi_4btoul(cdb->length);
+ if (len == 0) {
+ ctl_set_success(ctsio);
+ goto done;
+ }
if (len < sizeof(struct scsi_extended_copy_lid1_data) ||
len > sizeof(struct scsi_extended_copy_lid1_data) +
TPC_MAX_LIST + TPC_MAX_INLINE) {
@@ -1621,20 +1644,22 @@ ctl_extended_copy_lid1(struct ctl_scsiio *ctsio)
lencscd = scsi_2btoul(data->cscd_list_length);
lenseg = scsi_4btoul(data->segment_list_length);
leninl = scsi_4btoul(data->inline_data_length);
- if (len < sizeof(struct scsi_extended_copy_lid1_data) +
- lencscd + lenseg + leninl ||
- leninl > TPC_MAX_INLINE) {
- ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0,
- /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0);
- goto done;
- }
if (lencscd > TPC_MAX_CSCDS * sizeof(struct scsi_ec_cscd)) {
ctl_set_sense(ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
/*asc*/ 0x26, /*ascq*/ 0x06, SSD_ELEM_NONE);
goto done;
}
- if (lencscd + lenseg > TPC_MAX_LIST) {
+ if (lenseg > TPC_MAX_SEGS * sizeof(struct scsi_ec_segment)) {
+ ctl_set_sense(ctsio, /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+ /*asc*/ 0x26, /*ascq*/ 0x08, SSD_ELEM_NONE);
+ goto done;
+ }
+ if (lencscd + lenseg > TPC_MAX_LIST ||
+ leninl > TPC_MAX_INLINE ||
+ len < sizeof(struct scsi_extended_copy_lid1_data) +
+ lencscd + lenseg + leninl) {
ctl_set_param_len_error(ctsio);
goto done;
}
@@ -1646,7 +1671,7 @@ ctl_extended_copy_lid1(struct ctl_scsiio *ctsio)
list->init_port = -1;
else
list->init_port = ctsio->io_hdr.nexus.targ_port;
- list->init_idx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ list->init_idx = ctl_get_initindex(&ctsio->io_hdr.nexus);
list->list_id = data->list_identifier;
list->flags = data->flags;
list->params = ctsio->kern_data_ptr;
@@ -1718,6 +1743,10 @@ ctl_extended_copy_lid4(struct ctl_scsiio *ctsio)
cdb = (struct scsi_extended_copy *)ctsio->cdb;
len = scsi_4btoul(cdb->length);
+ if (len == 0) {
+ ctl_set_success(ctsio);
+ goto done;
+ }
if (len < sizeof(struct scsi_extended_copy_lid4_data) ||
len > sizeof(struct scsi_extended_copy_lid4_data) +
TPC_MAX_LIST + TPC_MAX_INLINE) {
@@ -1748,20 +1777,22 @@ ctl_extended_copy_lid4(struct ctl_scsiio *ctsio)
lencscd = scsi_2btoul(data->cscd_list_length);
lenseg = scsi_2btoul(data->segment_list_length);
leninl = scsi_2btoul(data->inline_data_length);
- if (len < sizeof(struct scsi_extended_copy_lid4_data) +
- lencscd + lenseg + leninl ||
- leninl > TPC_MAX_INLINE) {
- ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0,
- /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0);
- goto done;
- }
if (lencscd > TPC_MAX_CSCDS * sizeof(struct scsi_ec_cscd)) {
ctl_set_sense(ctsio, /*current_error*/ 1,
/*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
/*asc*/ 0x26, /*ascq*/ 0x06, SSD_ELEM_NONE);
goto done;
}
- if (lencscd + lenseg > TPC_MAX_LIST) {
+ if (lenseg > TPC_MAX_SEGS * sizeof(struct scsi_ec_segment)) {
+ ctl_set_sense(ctsio, /*current_error*/ 1,
+ /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST,
+ /*asc*/ 0x26, /*ascq*/ 0x08, SSD_ELEM_NONE);
+ goto done;
+ }
+ if (lencscd + lenseg > TPC_MAX_LIST ||
+ leninl > TPC_MAX_INLINE ||
+ len < sizeof(struct scsi_extended_copy_lid1_data) +
+ lencscd + lenseg + leninl) {
ctl_set_param_len_error(ctsio);
goto done;
}
@@ -1773,7 +1804,7 @@ ctl_extended_copy_lid4(struct ctl_scsiio *ctsio)
list->init_port = -1;
else
list->init_port = ctsio->io_hdr.nexus.targ_port;
- list->init_idx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ list->init_idx = ctl_get_initindex(&ctsio->io_hdr.nexus);
list->list_id = scsi_4btoul(data->list_identifier);
list->flags = data->flags;
list->params = ctsio->kern_data_ptr;
@@ -1891,7 +1922,7 @@ ctl_populate_token(struct ctl_scsiio *ctsio)
lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr;
softc = lun->ctl_softc;
- port = softc->ctl_ports[ctl_port_idx(ctsio->io_hdr.nexus.targ_port)];
+ port = softc->ctl_ports[ctsio->io_hdr.nexus.targ_port];
cdb = (struct scsi_populate_token *)ctsio->cdb;
len = scsi_4btoul(cdb->length);
@@ -1945,7 +1976,7 @@ ctl_populate_token(struct ctl_scsiio *ctsio)
list = malloc(sizeof(struct tpc_list), M_CTL, M_WAITOK | M_ZERO);
list->service_action = cdb->service_action;
list->init_port = ctsio->io_hdr.nexus.targ_port;
- list->init_idx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ list->init_idx = ctl_get_initindex(&ctsio->io_hdr.nexus);
list->list_id = scsi_4btoul(cdb->list_identifier);
list->flags = data->flags;
list->ctsio = ctsio;
@@ -2071,7 +2102,7 @@ ctl_write_using_token(struct ctl_scsiio *ctsio)
list = malloc(sizeof(struct tpc_list), M_CTL, M_WAITOK | M_ZERO);
list->service_action = cdb->service_action;
list->init_port = ctsio->io_hdr.nexus.targ_port;
- list->init_idx = ctl_get_resindex(&ctsio->io_hdr.nexus);
+ list->init_idx = ctl_get_initindex(&ctsio->io_hdr.nexus);
list->list_id = scsi_4btoul(cdb->list_identifier);
list->flags = data->flags;
list->params = ctsio->kern_data_ptr;
@@ -2163,7 +2194,7 @@ ctl_receive_rod_token_information(struct ctl_scsiio *ctsio)
list_id = scsi_4btoul(cdb->list_identifier);
mtx_lock(&lun->lun_lock);
list = tpc_find_list(lun, list_id,
- ctl_get_resindex(&ctsio->io_hdr.nexus));
+ ctl_get_initindex(&ctsio->io_hdr.nexus));
if (list == NULL) {
mtx_unlock(&lun->lun_lock);
ctl_set_invalid_field(ctsio, /*sks_valid*/ 1,
diff --git a/sys/cam/ctl/ctl_tpc_local.c b/sys/cam/ctl/ctl_tpc_local.c
index d0319ee..4f368f9 100644
--- a/sys/cam/ctl/ctl_tpc_local.c
+++ b/sys/cam/ctl/ctl_tpc_local.c
@@ -47,7 +47,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_frontend.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_util.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
@@ -67,10 +66,6 @@ static struct tpcl_softc tpcl_softc;
static int tpcl_init(void);
static void tpcl_shutdown(void);
-static void tpcl_online(void *arg);
-static void tpcl_offline(void *arg);
-static int tpcl_lun_enable(void *arg, int lun_id);
-static int tpcl_lun_disable(void *arg, int lun_id);
static void tpcl_datamove(union ctl_io *io);
static void tpcl_done(union ctl_io *io);
@@ -98,21 +93,15 @@ tpcl_init(void)
port->port_type = CTL_PORT_INTERNAL;
port->num_requested_ctl_io = 100;
port->port_name = "tpc";
- port->port_online = tpcl_online;
- port->port_offline = tpcl_offline;
- port->onoff_arg = tsoftc;
- port->lun_enable = tpcl_lun_enable;
- port->lun_disable = tpcl_lun_disable;
- port->targ_lun_arg = tsoftc;
port->fe_datamove = tpcl_datamove;
port->fe_done = tpcl_done;
port->max_targets = 1;
port->max_target_id = 0;
+ port->targ_port = -1;
port->max_initiators = 1;
- if (ctl_port_register(port) != 0)
- {
- printf("%s: tpc frontend registration failed\n", __func__);
+ if (ctl_port_register(port) != 0) {
+ printf("%s: ctl_port_register() failed with error\n", __func__);
return (0);
}
@@ -142,30 +131,6 @@ tpcl_shutdown(void)
}
static void
-tpcl_online(void *arg)
-{
-}
-
-static void
-tpcl_offline(void *arg)
-{
-}
-
-static int
-tpcl_lun_enable(void *arg, int lun_id)
-{
-
- return (0);
-}
-
-static int
-tpcl_lun_disable(void *arg, int lun_id)
-{
-
- return (0);
-}
-
-static void
tpcl_datamove(union ctl_io *io)
{
struct ctl_sg_entry *ext_sglist, *kern_sglist;
@@ -177,10 +142,6 @@ tpcl_datamove(union ctl_io *io)
struct ctl_scsiio *ctsio;
int i, j;
- ext_sg_start = 0;
- ext_offset = 0;
- ext_sglist = NULL;
-
CTL_DEBUG_PRINT(("%s\n", __func__));
ctsio = &io->scsiio;
@@ -197,7 +158,7 @@ tpcl_datamove(union ctl_io *io)
* To simplify things here, if we have a single buffer, stick it in
* a S/G entry and just make it a single entry S/G list.
*/
- if (ctsio->io_hdr.flags & CTL_FLAG_EDPTR_SGLIST) {
+ if (ctsio->ext_sg_entries > 0) {
int len_seen;
ext_sglist = (struct ctl_sg_entry *)ctsio->ext_data_ptr;
@@ -316,13 +277,15 @@ tpcl_resolve(struct ctl_softc *softc, int init_port,
struct ctl_lun *lun;
uint64_t lunid = UINT64_MAX;
- if (cscd->type_code != EC_CSCD_ID)
+ if (cscd->type_code != EC_CSCD_ID ||
+ (cscd->luidt_pdt & EC_LUIDT_MASK) != EC_LUIDT_LUN ||
+ (cscd->luidt_pdt & EC_NUL) != 0)
return (lunid);
cscdid = (struct scsi_ec_cscd_id *)cscd;
mtx_lock(&softc->ctl_lock);
if (init_port >= 0)
- port = softc->ctl_ports[ctl_port_idx(init_port)];
+ port = softc->ctl_ports[init_port];
else
port = NULL;
STAILQ_FOREACH(lun, &softc->lun_list, links) {
@@ -363,9 +326,8 @@ tpcl_queue(union ctl_io *io, uint64_t lun)
{
struct tpcl_softc *tsoftc = &tpcl_softc;
- io->io_hdr.nexus.initid.id = 0;
+ io->io_hdr.nexus.initid = 0;
io->io_hdr.nexus.targ_port = tsoftc->port.targ_port;
- io->io_hdr.nexus.targ_target.id = 0;
io->io_hdr.nexus.targ_lun = lun;
io->scsiio.tag_num = atomic_fetchadd_int(&tsoftc->cur_tag_num, 1);
io->scsiio.ext_data_filled = 0;
diff --git a/sys/cam/ctl/ctl_util.c b/sys/cam/ctl/ctl_util.c
index a991cfb..6fcec03 100644
--- a/sys/cam/ctl/ctl_util.c
+++ b/sys/cam/ctl/ctl_util.c
@@ -89,7 +89,10 @@ static struct ctl_task_desc ctl_task_table[] = {
{CTL_TASK_TARGET_RESET, "Target Reset"},
{CTL_TASK_BUS_RESET, "Bus Reset"},
{CTL_TASK_PORT_LOGIN, "Port Login"},
- {CTL_TASK_PORT_LOGOUT, "Port Logout"}
+ {CTL_TASK_PORT_LOGOUT, "Port Logout"},
+ {CTL_TASK_QUERY_TASK, "Query Task"},
+ {CTL_TASK_QUERY_TASK_SET, "Query Task Set"},
+ {CTL_TASK_QUERY_ASYNC_EVENT, "Query Async Event"}
};
void
@@ -486,8 +489,7 @@ ctl_scsi_mode_sense(union ctl_io *io, uint8_t *data_ptr, uint32_t data_len,
void
ctl_scsi_start_stop(union ctl_io *io, int start, int load_eject, int immediate,
- int power_conditions, int onoffline __unused,
- ctl_tag_type tag_type, uint8_t control)
+ int power_conditions, ctl_tag_type tag_type, uint8_t control)
{
struct scsi_start_stop_unit *cdb;
@@ -498,10 +500,6 @@ ctl_scsi_start_stop(union ctl_io *io, int start, int load_eject, int immediate,
cdb->opcode = START_STOP_UNIT;
if (immediate)
cdb->byte2 |= SSS_IMMED;
-#ifdef NEEDTOPORT
- if (onoffline)
- cdb->byte2 |= SSS_ONOFFLINE;
-#endif
cdb->how = power_conditions;
if (load_eject)
cdb->how |= SSS_LOEJ;
@@ -679,7 +677,7 @@ ctl_scsi_maintenance_in(union ctl_io *io, uint8_t *data_ptr, uint32_t data_len,
#ifndef _KERNEL
union ctl_io *
-ctl_scsi_alloc_io(struct ctl_id initid)
+ctl_scsi_alloc_io(uint32_t initid)
{
union ctl_io *io;
@@ -846,24 +844,8 @@ void
ctl_io_error_print(union ctl_io *io, struct scsi_inquiry_data *inq_data)
{
char str[512];
-#ifdef NEEDTOPORT
- char *message;
- char *line;
-
- message = io_error_string(io, inq_data, str, sizeof(str));
-
- for (line = strsep(&message, "\n"); line != NULL;
- line = strsep(&message, "\n")) {
- csevent_log(CSC_CTL | CSC_SHELF_SW | CTL_ERROR_REPORT,
- csevent_LogType_Trace,
- csevent_Severity_Information,
- csevent_AlertLevel_Green,
- csevent_FRU_Firmware,
- csevent_FRU_Unknown, "%s", line);
- }
-#else
+
printf("%s", ctl_io_error_string(io, inq_data, str, sizeof(str)));
-#endif
}
@@ -879,7 +861,7 @@ ctl_data_print(union ctl_io *io)
return;
if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR)
return;
- if (io->io_hdr.flags & CTL_FLAG_EDPTR_SGLIST) /* XXX: Implement */
+ if (io->scsiio.ext_sg_entries > 0) /* XXX: Implement */
return;
ctl_scsi_path_string(io, path_str, sizeof(path_str));
len = min(io->scsiio.kern_data_len, 4096);
diff --git a/sys/cam/ctl/ctl_util.h b/sys/cam/ctl/ctl_util.h
index af5e55f..2966b49 100644
--- a/sys/cam/ctl/ctl_util.h
+++ b/sys/cam/ctl/ctl_util.h
@@ -77,7 +77,7 @@ void ctl_scsi_mode_sense(union ctl_io *io, uint8_t *data_ptr,
int minimum_cdb_size, ctl_tag_type tag_type,
uint8_t control);
void ctl_scsi_start_stop(union ctl_io *io, int start, int load_eject,
- int immediate, int power_conditions, int onoffline,
+ int immediate, int power_conditions,
ctl_tag_type tag_type, uint8_t control);
void ctl_scsi_sync_cache(union ctl_io *io, int immed, int reladr,
int minimum_cdb_size, uint64_t starting_lba,
@@ -94,7 +94,7 @@ void ctl_scsi_maintenance_in(union ctl_io *io, uint8_t *data_ptr,
uint32_t data_len, uint8_t action,
ctl_tag_type tag_type, uint8_t control);
#ifndef _KERNEL
-union ctl_io *ctl_scsi_alloc_io(struct ctl_id initid);
+union ctl_io *ctl_scsi_alloc_io(uint32_t initid);
void ctl_scsi_free_io(union ctl_io *io);
#endif /* !_KERNEL */
void ctl_scsi_zero_io(union ctl_io *io);
diff --git a/sys/cam/ctl/scsi_ctl.c b/sys/cam/ctl/scsi_ctl.c
index 6bd0196..25c745b 100644
--- a/sys/cam/ctl/scsi_ctl.c
+++ b/sys/cam/ctl/scsi_ctl.c
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
struct ctlfe_softc {
struct ctl_port port;
path_id_t path_id;
+ target_id_t target_id;
u_int maxio;
struct cam_sim *sim;
char port_name[DEV_IDLEN];
@@ -118,11 +119,7 @@ typedef enum {
CTLFE_CMD_PIECEWISE = 0x01
} ctlfe_cmd_flags;
-/*
- * The size limit of this structure is CTL_PORT_PRIV_SIZE, from ctl_io.h.
- * Currently that is 600 bytes.
- */
-struct ctlfe_lun_cmd_info {
+struct ctlfe_cmd_info {
int cur_transfer_index;
size_t cur_transfer_off;
ctlfe_cmd_flags flags;
@@ -134,7 +131,6 @@ struct ctlfe_lun_cmd_info {
#define CTLFE_MAX_SEGS 32
bus_dma_segment_t cam_sglist[CTLFE_MAX_SEGS];
};
-CTASSERT(sizeof(struct ctlfe_lun_cmd_info) <= CTL_PORT_PRIV_SIZE);
/*
* When we register the adapter/bus, request that this many ctl_ios be
@@ -357,6 +353,7 @@ ctlfeasync(void *callback_arg, uint32_t code, struct cam_path *path, void *arg)
}
softc->path_id = cpi->ccb_h.path_id;
+ softc->target_id = cpi->initiator_id;
softc->sim = xpt_path_sim(path);
if (cpi->maxio != 0)
softc->maxio = cpi->maxio;
@@ -403,6 +400,7 @@ ctlfeasync(void *callback_arg, uint32_t code, struct cam_path *path, void *arg)
*/
port->max_targets = cpi->max_target;
port->max_target_id = cpi->max_target;
+ port->targ_port = -1;
/*
* XXX KDM need to figure out whether we're the master or
@@ -531,6 +529,7 @@ ctlferegister(struct cam_periph *periph, void *arg)
for (i = 0; i < CTLFE_ATIO_PER_LUN; i++) {
union ccb *new_ccb;
union ctl_io *new_io;
+ struct ctlfe_cmd_info *cmd_info;
new_ccb = (union ccb *)malloc(sizeof(*new_ccb), M_CTLFE,
M_ZERO|M_NOWAIT);
@@ -544,6 +543,15 @@ ctlferegister(struct cam_periph *periph, void *arg)
status = CAM_RESRC_UNAVAIL;
break;
}
+ cmd_info = malloc(sizeof(*cmd_info), M_CTLFE,
+ M_ZERO | M_NOWAIT);
+ if (cmd_info == NULL) {
+ ctl_free_io(new_io);
+ free(new_ccb, M_CTLFE);
+ status = CAM_RESRC_UNAVAIL;
+ break;
+ }
+ new_io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr = cmd_info;
softc->atios_alloced++;
new_ccb->ccb_h.io_ptr = new_io;
@@ -554,6 +562,7 @@ ctlferegister(struct cam_periph *periph, void *arg)
xpt_action(new_ccb);
status = new_ccb->ccb_h.status;
if ((status & CAM_STATUS_MASK) != CAM_REQ_INPROG) {
+ free(cmd_info, M_CTLFE);
ctl_free_io(new_io);
free(new_ccb, M_CTLFE);
break;
@@ -684,13 +693,13 @@ ctlfedata(struct ctlfe_lun_softc *softc, union ctl_io *io,
u_int16_t *sglist_cnt)
{
struct ctlfe_softc *bus_softc;
- struct ctlfe_lun_cmd_info *cmd_info;
+ struct ctlfe_cmd_info *cmd_info;
struct ctl_sg_entry *ctl_sglist;
bus_dma_segment_t *cam_sglist;
size_t off;
int i, idx;
- cmd_info = (struct ctlfe_lun_cmd_info *)io->io_hdr.port_priv;
+ cmd_info = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr;
bus_softc = softc->parent_softc;
/*
@@ -766,7 +775,7 @@ static void
ctlfestart(struct cam_periph *periph, union ccb *start_ccb)
{
struct ctlfe_lun_softc *softc;
- struct ctlfe_lun_cmd_info *cmd_info;
+ struct ctlfe_cmd_info *cmd_info;
struct ccb_hdr *ccb_h;
struct ccb_accept_tio *atio;
struct ccb_scsiio *csio;
@@ -794,7 +803,7 @@ ctlfestart(struct cam_periph *periph, union ccb *start_ccb)
flags = atio->ccb_h.flags &
(CAM_DIS_DISCONNECT|CAM_TAG_ACTION_VALID|CAM_DIR_MASK);
- cmd_info = (struct ctlfe_lun_cmd_info *)io->io_hdr.port_priv;
+ cmd_info = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr;
cmd_info->cur_transfer_index = 0;
cmd_info->cur_transfer_off = 0;
cmd_info->flags = 0;
@@ -966,12 +975,17 @@ static void
ctlfe_free_ccb(struct cam_periph *periph, union ccb *ccb)
{
struct ctlfe_lun_softc *softc;
+ union ctl_io *io;
+ struct ctlfe_cmd_info *cmd_info;
softc = (struct ctlfe_lun_softc *)periph->softc;
+ io = ccb->ccb_h.io_ptr;
switch (ccb->ccb_h.func_code) {
case XPT_ACCEPT_TARGET_IO:
softc->atios_freed++;
+ cmd_info = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr;
+ free(cmd_info, M_CTLFE);
break;
case XPT_IMMEDIATE_NOTIFY:
case XPT_NOTIFY_ACKNOWLEDGE:
@@ -981,7 +995,7 @@ ctlfe_free_ccb(struct cam_periph *periph, union ccb *ccb)
break;
}
- ctl_free_io(ccb->ccb_h.io_ptr);
+ ctl_free_io(io);
free(ccb, M_CTLFE);
KASSERT(softc->atios_freed <= softc->atios_alloced, ("%s: "
@@ -1054,7 +1068,6 @@ ctlfe_adjust_cdb(struct ccb_accept_tio *atio, uint32_t offset)
}
case READ_16:
case WRITE_16:
- case WRITE_ATOMIC_16:
{
struct scsi_rw_16 *cdb = (struct scsi_rw_16 *)cmdbyt;
lba = scsi_8btou64(cdb->addr);
@@ -1076,6 +1089,7 @@ ctlfedone(struct cam_periph *periph, union ccb *done_ccb)
{
struct ctlfe_lun_softc *softc;
struct ctlfe_softc *bus_softc;
+ struct ctlfe_cmd_info *cmd_info;
struct ccb_accept_tio *atio = NULL;
union ctl_io *io = NULL;
struct mtx *mtx;
@@ -1137,10 +1151,12 @@ ctlfedone(struct cam_periph *periph, union ccb *done_ccb)
*/
mtx_unlock(mtx);
io = done_ccb->ccb_h.io_ptr;
+ cmd_info = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr;
ctl_zero_io(io);
/* Save pointers on both sides */
io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = done_ccb;
+ io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr = cmd_info;
done_ccb->ccb_h.io_ptr = io;
/*
@@ -1148,9 +1164,8 @@ ctlfedone(struct cam_periph *periph, union ccb *done_ccb)
* down the immediate notify path below.
*/
io->io_hdr.io_type = CTL_IO_SCSI;
- io->io_hdr.nexus.initid.id = atio->init_id;
+ io->io_hdr.nexus.initid = atio->init_id;
io->io_hdr.nexus.targ_port = bus_softc->port.targ_port;
- io->io_hdr.nexus.targ_target.id = atio->ccb_h.target_id;
io->io_hdr.nexus.targ_lun = atio->ccb_h.target_lun;
io->scsiio.tag_num = atio->tag_id;
switch (atio->tag_action) {
@@ -1184,10 +1199,9 @@ ctlfedone(struct cam_periph *periph, union ccb *done_ccb)
io->scsiio.cdb_len);
#ifdef CTLFEDEBUG
- printf("%s: %ju:%d:%ju:%d: tag %04x CDB %02x\n", __func__,
- (uintmax_t)io->io_hdr.nexus.initid.id,
+ printf("%s: %u:%u:%u: tag %04x CDB %02x\n", __func__,
+ io->io_hdr.nexus.initid,
io->io_hdr.nexus.targ_port,
- (uintmax_t)io->io_hdr.nexus.targ_target.id,
io->io_hdr.nexus.targ_lun,
io->scsiio.tag_num, io->scsiio.cdb[0]);
#endif
@@ -1289,12 +1303,11 @@ ctlfedone(struct cam_periph *periph, union ccb *done_ccb)
return;
}
} else {
- struct ctlfe_lun_cmd_info *cmd_info;
+ struct ctlfe_cmd_info *cmd_info;
struct ccb_scsiio *csio;
csio = &done_ccb->csio;
- cmd_info = (struct ctlfe_lun_cmd_info *)
- io->io_hdr.port_priv;
+ cmd_info = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr;
io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG;
@@ -1425,9 +1438,8 @@ ctlfedone(struct cam_periph *periph, union ccb *done_ccb)
io->io_hdr.io_type = CTL_IO_TASK;
io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr =done_ccb;
inot->ccb_h.io_ptr = io;
- io->io_hdr.nexus.initid.id = inot->initiator_id;
+ io->io_hdr.nexus.initid = inot->initiator_id;
io->io_hdr.nexus.targ_port = bus_softc->port.targ_port;
- io->io_hdr.nexus.targ_target.id = inot->ccb_h.target_id;
io->io_hdr.nexus.targ_lun = inot->ccb_h.target_lun;
/* XXX KDM should this be the tag_id? */
io->taskio.tag_num = inot->seq_id;
@@ -1557,6 +1569,8 @@ ctlfe_onoffline(void *arg, int online)
}
ccb = xpt_alloc_ccb();
xpt_setup_ccb(&ccb->ccb_h, path, CAM_PRIORITY_NONE);
+ ccb->ccb_h.func_code = XPT_GET_SIM_KNOB;
+ xpt_action(ccb);
/*
* Copan WWN format:
@@ -1570,15 +1584,7 @@ ctlfe_onoffline(void *arg, int online)
* 3 == NL-Port
* Bits 7-0: 0 == Node Name, >0 == Port Number
*/
-
if (online != 0) {
-
- ccb->ccb_h.func_code = XPT_GET_SIM_KNOB;
-
-
- xpt_action(ccb);
-
-
if ((ccb->knob.xport_specific.valid & KNOB_VALID_ADDRESS) != 0){
#ifdef RANDOM_WWNN
uint64_t random_bits;
@@ -1677,9 +1683,9 @@ ctlfe_onoffline(void *arg, int online)
ccb->knob.xport_specific.valid |= KNOB_VALID_ADDRESS;
if (online != 0)
- ccb->knob.xport_specific.fc.role = KNOB_ROLE_TARGET;
+ ccb->knob.xport_specific.fc.role |= KNOB_ROLE_TARGET;
else
- ccb->knob.xport_specific.fc.role = KNOB_ROLE_NONE;
+ ccb->knob.xport_specific.fc.role &= ~KNOB_ROLE_TARGET;
xpt_action(ccb);
@@ -1809,7 +1815,7 @@ ctlfe_lun_enable(void *arg, int lun_id)
bus_softc = (struct ctlfe_softc *)arg;
status = xpt_create_path(&path, /*periph*/ NULL,
- bus_softc->path_id, 0, lun_id);
+ bus_softc->path_id, bus_softc->target_id, lun_id);
/* XXX KDM need some way to return status to CTL here? */
if (status != CAM_REQ_CMP) {
printf("%s: could not create path, status %#x\n", __func__,
diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c
index 4013ff8..023103b 100644
--- a/sys/cam/scsi/scsi_all.c
+++ b/sys/cam/scsi/scsi_all.c
@@ -175,7 +175,7 @@ static struct op_table_entry scsi_op_codes[] = {
*
* SCSI Operation Codes
* Numeric Sorted Listing
- * as of 3/11/08
+ * as of 5/26/15
*
* D - DIRECT ACCESS DEVICE (SBC-2) device column key
* .T - SEQUENTIAL ACCESS DEVICE (SSC-2) -----------------
@@ -501,16 +501,22 @@ static struct op_table_entry scsi_op_codes[] = {
{ 0x93, D, "WRITE SAME(16)" },
/* 93 M ERASE(16) */
{ 0x93, T, "ERASE(16)" },
- /* 94 [usage proposed by SCSI Socket Services project] */
- /* 95 [usage proposed by SCSI Socket Services project] */
- /* 96 [usage proposed by SCSI Socket Services project] */
- /* 97 [usage proposed by SCSI Socket Services project] */
+ /* 94 O ZBC OUT */
+ { 0x94, D, "ZBC OUT" },
+ /* 95 O ZBC OUT */
+ { 0x95, D, "ZBC OUT" },
+ /* 96 */
+ /* 97 */
/* 98 */
/* 99 */
- /* 9A */
- /* 9B */
- /* 9C */
- /* 9D */
+ /* 9A O WRITE STREAM(16) */
+ { 0x9A, D, "WRITE STREAM(16)" },
+ /* 9B OOOOOOOOOO OOO READ BUFFER(16) */
+ { 0x9B, ALL & ~(B) , "READ BUFFER(16)" },
+ /* 9C O WRITE ATOMIC(16) */
+ { 0x9C, D, "WRITE ATOMIC(16)" },
+ /* 9D SERVICE ACTION BIDIRECTIONAL */
+ { 0x9D, ALL, "SERVICE ACTION BIDIRECTIONAL" },
/* XXX KDM ALL for this? op-num.txt defines it for none.. */
/* 9E SERVICE ACTION IN(16) */
{ 0x9E, ALL, "SERVICE ACTION IN(16)" },
@@ -968,7 +974,7 @@ static struct asc_table_entry asc_table[] = {
*
* SCSI ASC/ASCQ Assignments
* Numeric Sorted Listing
- * as of 5/20/12
+ * as of 8/12/15
*
* D - DIRECT ACCESS DEVICE (SBC-2) device column key
* .T - SEQUENTIAL ACCESS DEVICE (SSC) -------------------
@@ -1060,6 +1066,9 @@ static struct asc_table_entry asc_table[] = {
/* DT P B */
{ SST(0x00, 0x20, SS_RDEF, /* XXX TBD */
"Extended copy information available") },
+ /* D */
+ { SST(0x00, 0x21, SS_RDEF, /* XXX TBD */
+ "Atomic command aborted due to ACA") },
/* D W O BK */
{ SST(0x01, 0x00, SS_RDEF,
"No index/sector signal") },
@@ -1079,7 +1088,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x00, SS_RDEF,
"Logical unit not ready, cause not reportable") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x01, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | EBUSY,
+ { SST(0x04, 0x01, SS_WAIT | EBUSY,
"Logical unit is in process of becoming ready") },
/* DTLPWROMAEBKVF */
{ SST(0x04, 0x02, SS_START | SSQ_DECREMENT_COUNT | ENXIO,
@@ -1106,7 +1115,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x09, SS_RDEF, /* XXX TBD */
"Logical unit not ready, self-test in progress") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x0A, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | ENXIO,
+ { SST(0x04, 0x0A, SS_WAIT | ENXIO,
"Logical unit not accessible, asymmetric access state transition")},
/* DTLPWROMAEBKVF */
{ SST(0x04, 0x0B, SS_FATAL | ENXIO,
@@ -1117,11 +1126,14 @@ static struct asc_table_entry asc_table[] = {
/* F */
{ SST(0x04, 0x0D, SS_RDEF, /* XXX TBD */
"Logical unit not ready, structure check required") },
+ /* DTL WR MAEBKVF */
+ { SST(0x04, 0x0E, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, security session in progress") },
/* DT WROM B */
{ SST(0x04, 0x10, SS_RDEF, /* XXX TBD */
"Logical unit not ready, auxiliary memory not accessible") },
/* DT WRO AEB VF */
- { SST(0x04, 0x11, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | EBUSY,
+ { SST(0x04, 0x11, SS_WAIT | EBUSY,
"Logical unit not ready, notify (enable spinup) required") },
/* M V */
{ SST(0x04, 0x12, SS_RDEF, /* XXX TBD */
@@ -1156,6 +1168,24 @@ static struct asc_table_entry asc_table[] = {
/* DT MAEB */
{ SST(0x04, 0x1C, SS_RDEF, /* XXX TBD */
"Logical unit not ready, additional power use not yet granted") },
+ /* D */
+ { SST(0x04, 0x1D, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, configuration in progress") },
+ /* D */
+ { SST(0x04, 0x1E, SS_FATAL | ENXIO,
+ "Logical unit not ready, microcode activation required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x1F, SS_FATAL | ENXIO,
+ "Logical unit not ready, microcode download required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x20, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, logical unit reset required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x21, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, hard reset required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x22, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, power cycle required") },
/* DTL WROMAEBKVF */
{ SST(0x05, 0x00, SS_RDEF,
"Logical unit does not respond to selection") },
@@ -1195,6 +1225,9 @@ static struct asc_table_entry asc_table[] = {
/* DT WRO B */
{ SST(0x09, 0x04, SS_RDEF,
"Head select fault") },
+ /* DT RO B */
+ { SST(0x09, 0x05, SS_RDEF,
+ "Vibration induced tracking error") },
/* DTLPWROMAEBKVF */
{ SST(0x0A, 0x00, SS_FATAL | ENOSPC,
"Error log overflow") },
@@ -1228,6 +1261,30 @@ static struct asc_table_entry asc_table[] = {
/* D */
{ SST(0x0B, 0x09, SS_RDEF, /* XXX TBD */
"Warning - device statistics notification available") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0A, SS_RDEF, /* XXX TBD */
+ "Warning - High critical temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0B, SS_RDEF, /* XXX TBD */
+ "Warning - Low critical temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0C, SS_RDEF, /* XXX TBD */
+ "Warning - High operating temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0D, SS_RDEF, /* XXX TBD */
+ "Warning - Low operating temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0E, SS_RDEF, /* XXX TBD */
+ "Warning - High citical humidity limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0F, SS_RDEF, /* XXX TBD */
+ "Warning - Low citical humidity limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x10, SS_RDEF, /* XXX TBD */
+ "Warning - High operating humidity limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x11, SS_RDEF, /* XXX TBD */
+ "Warning - Low operating humidity limit exceeded") },
/* T R */
{ SST(0x0C, 0x00, SS_RDEF,
"Write error") },
@@ -1276,6 +1333,15 @@ static struct asc_table_entry asc_table[] = {
/* R */
{ SST(0x0C, 0x0F, SS_RDEF, /* XXX TBD */
"Defects in error window") },
+ /* D */
+ { SST(0x0C, 0x10, SS_RDEF, /* XXX TBD */
+ "Incomplete multiple atomic write operations") },
+ /* D */
+ { SST(0x0C, 0x11, SS_RDEF, /* XXX TBD */
+ "Write error - recovery scan needed") },
+ /* D */
+ { SST(0x0C, 0x12, SS_RDEF, /* XXX TBD */
+ "Write error - insufficient zone resources") },
/* DTLPWRO A K */
{ SST(0x0D, 0x00, SS_RDEF, /* XXX TBD */
"Error detected by third party temporary initiator") },
@@ -1387,6 +1453,9 @@ static struct asc_table_entry asc_table[] = {
/* D */
{ SST(0x11, 0x14, SS_RDEF, /* XXX TBD */
"Read error - LBA marked bad by application client") },
+ /* D */
+ { SST(0x11, 0x15, SS_RDEF, /* XXX TBD */
+ "Write after sanitize required") },
/* D W O BK */
{ SST(0x12, 0x00, SS_RDEF,
"Address mark not found for ID field") },
@@ -1589,6 +1658,18 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x21, 0x03, SS_RDEF, /* XXX TBD */
"Invalid write crossing layer jump") },
/* D */
+ { SST(0x21, 0x04, SS_RDEF, /* XXX TBD */
+ "Unaligned write command") },
+ /* D */
+ { SST(0x21, 0x05, SS_RDEF, /* XXX TBD */
+ "Write boundary violation") },
+ /* D */
+ { SST(0x21, 0x06, SS_RDEF, /* XXX TBD */
+ "Attempt to read invalid data") },
+ /* D */
+ { SST(0x21, 0x07, SS_RDEF, /* XXX TBD */
+ "Read boundary violation") },
+ /* D */
{ SST(0x22, 0x00, SS_FATAL | EINVAL,
"Illegal function (use 20 00, 24 00, or 26 00)") },
/* DT P B */
@@ -1711,6 +1792,9 @@ static struct asc_table_entry asc_table[] = {
/* T */
{ SST(0x26, 0x12, SS_RDEF, /* XXX TBD */
"Vendor specific key reference not found") },
+ /* D */
+ { SST(0x26, 0x13, SS_RDEF, /* XXX TBD */
+ "Application tag mode page is invalid") },
/* DT WRO BK */
{ SST(0x27, 0x00, SS_FATAL | EACCES,
"Write protected") },
@@ -1735,6 +1819,9 @@ static struct asc_table_entry asc_table[] = {
/* D B */
{ SST(0x27, 0x07, SS_FATAL | ENOSPC,
"Space allocation failed write protect") },
+ /* D */
+ { SST(0x27, 0x08, SS_FATAL | EACCES,
+ "Zone is read only") },
/* DTLPWROMAEBKVF */
{ SST(0x28, 0x00, SS_FATAL | ENXIO,
"Not ready to ready change, medium may have changed") },
@@ -1878,12 +1965,33 @@ static struct asc_table_entry asc_table[] = {
/* D */
{ SST(0x2C, 0x0C, SS_RDEF, /* XXX TBD */
"ORWRITE generation does not match") },
+ /* D */
+ { SST(0x2C, 0x0D, SS_RDEF, /* XXX TBD */
+ "Reset write pointer not allowed") },
+ /* D */
+ { SST(0x2C, 0x0E, SS_RDEF, /* XXX TBD */
+ "Zone is offline") },
+ /* D */
+ { SST(0x2C, 0x0F, SS_RDEF, /* XXX TBD */
+ "Stream not open") },
+ /* D */
+ { SST(0x2C, 0x10, SS_RDEF, /* XXX TBD */
+ "Unwritten data in zone") },
/* T */
{ SST(0x2D, 0x00, SS_RDEF,
"Overwrite error on update in place") },
/* R */
{ SST(0x2E, 0x00, SS_RDEF, /* XXX TBD */
"Insufficient time for operation") },
+ /* D */
+ { SST(0x2E, 0x01, SS_RDEF, /* XXX TBD */
+ "Command timeout before processing") },
+ /* D */
+ { SST(0x2E, 0x02, SS_RDEF, /* XXX TBD */
+ "Command timeout during processing") },
+ /* D */
+ { SST(0x2E, 0x03, SS_RDEF, /* XXX TBD */
+ "Command timeout during processing due to error recovery") },
/* DTLPWROMAEBKVF */
{ SST(0x2F, 0x00, SS_RDEF,
"Commands cleared by another initiator") },
@@ -1893,6 +2001,9 @@ static struct asc_table_entry asc_table[] = {
/* DTLPWROMAEBKVF */
{ SST(0x2F, 0x02, SS_RDEF, /* XXX TBD */
"Commands cleared by device server") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x2F, 0x03, SS_RDEF, /* XXX TBD */
+ "Some commands cleared by queuing layer event") },
/* DT WROM BK */
{ SST(0x30, 0x00, SS_RDEF,
"Incompatible medium installed") },
@@ -2190,6 +2301,15 @@ static struct asc_table_entry asc_table[] = {
/* DTLPWR MAEBK F */
{ SST(0x3F, 0x14, SS_RDEF, /* XXX TBD */
"iSCSI IP address changed") },
+ /* DTLPWR MAEBK */
+ { SST(0x3F, 0x15, SS_RDEF, /* XXX TBD */
+ "Inspect referrals sense descriptors") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x3F, 0x16, SS_RDEF, /* XXX TBD */
+ "Microcode has been changed without reset") },
+ /* D */
+ { SST(0x3F, 0x17, SS_RDEF, /* XXX TBD */
+ "Zone transition to full") },
/* D */
{ SST(0x40, 0x00, SS_RDEF,
"RAM failure") }, /* deprecated - use 40 NN instead */
@@ -2299,6 +2419,30 @@ static struct asc_table_entry asc_table[] = {
/* DT PWROMAEBK F */
{ SST(0x4B, 0x0D, SS_RDEF, /* XXX TBD */
"Data-out buffer error") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x0E, SS_RDEF, /* XXX TBD */
+ "PCIe fabric error") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x0F, SS_RDEF, /* XXX TBD */
+ "PCIe completion timeout") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x10, SS_RDEF, /* XXX TBD */
+ "PCIe completer abort") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x11, SS_RDEF, /* XXX TBD */
+ "PCIe poisoned TLP received") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x12, SS_RDEF, /* XXX TBD */
+ "PCIe ECRC check failed") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x13, SS_RDEF, /* XXX TBD */
+ "PCIe unsupported request") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x14, SS_RDEF, /* XXX TBD */
+ "PCIe ACS violation") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x15, SS_RDEF, /* XXX TBD */
+ "PCIe TLP prefix blocket") },
/* DTLPWROMAEBKVF */
{ SST(0x4C, 0x00, SS_RDEF,
"Logical unit failed self-configuration") },
@@ -2356,6 +2500,21 @@ static struct asc_table_entry asc_table[] = {
/* M */
{ SST(0x53, 0x08, SS_RDEF, /* XXX TBD */
"Element status unknown") },
+ /* M */
+ { SST(0x53, 0x09, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - load failed") },
+ /* M */
+ { SST(0x53, 0x0A, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - unload failed") },
+ /* M */
+ { SST(0x53, 0x0B, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - unload missing") },
+ /* M */
+ { SST(0x53, 0x0C, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - eject failed") },
+ /* M */
+ { SST(0x53, 0x0D, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - library communication failed") },
/* P */
{ SST(0x54, 0x00, SS_RDEF,
"SCSI to host system interface failure") },
@@ -2401,6 +2560,15 @@ static struct asc_table_entry asc_table[] = {
/* DT P B */
{ SST(0x55, 0x0D, SS_RDEF, /* XXX TBD */
"Insufficient resources to create ROD token") },
+ /* D */
+ { SST(0x55, 0x0E, SS_RDEF, /* XXX TBD */
+ "Insufficient zone resources") },
+ /* D */
+ { SST(0x55, 0x0F, SS_RDEF, /* XXX TBD */
+ "Insufficient zone resources to complete write") },
+ /* D */
+ { SST(0x55, 0x10, SS_RDEF, /* XXX TBD */
+ "Maximum number of streams open") },
/* R */
{ SST(0x57, 0x00, SS_RDEF,
"Unable to recover table-of-contents") },
@@ -2821,6 +2989,9 @@ static struct asc_table_entry asc_table[] = {
/* A */
{ SST(0x68, 0x00, SS_RDEF,
"Logical unit not configured") },
+ /* D */
+ { SST(0x68, 0x01, SS_RDEF,
+ "Subsidiary logical unit not configured") },
/* A */
{ SST(0x69, 0x00, SS_RDEF,
"Data loss on logical unit") },
@@ -3803,8 +3974,6 @@ scsi_set_sense_data_va(struct scsi_sense_data *sense_data,
*/
sense->extra_len = 10;
sense_len = (int)va_arg(ap, int);
- len_to_copy = MIN(sense_len, SSD_EXTRA_MAX -
- sense->extra_len);
data = (uint8_t *)va_arg(ap, uint8_t *);
switch (elem_type) {
@@ -3822,10 +3991,14 @@ scsi_set_sense_data_va(struct scsi_sense_data *sense_data,
uint8_t *data_dest;
int i;
- if (elem_type == SSD_ELEM_COMMAND)
+ if (elem_type == SSD_ELEM_COMMAND) {
data_dest = &sense->cmd_spec_info[0];
- else {
+ len_to_copy = MIN(sense_len,
+ sizeof(sense->cmd_spec_info));
+ } else {
data_dest = &sense->info[0];
+ len_to_copy = MIN(sense_len,
+ sizeof(sense->info));
/*
* We're setting the info field, so
* set the valid bit.
@@ -6069,13 +6242,12 @@ scsi_parse_transportid_iscsi(char *id_str, struct scsi_transportid_header **hdr,
char *error_str, int error_str_len)
{
size_t id_len, sep_len, id_size, name_len;
- int is_full_id, retval;
+ int retval;
unsigned int i, sep_pos, sep_found;
const char *sep_template = ",i,0x";
const char *iqn_prefix = "iqn.";
struct scsi_transportid_iscsi_device *iscsi;
- is_full_id = 0;
retval = 0;
sep_found = 0;
diff --git a/sys/cam/scsi/scsi_all.h b/sys/cam/scsi/scsi_all.h
index 253e28e..f2b4b21 100644
--- a/sys/cam/scsi/scsi_all.h
+++ b/sys/cam/scsi/scsi_all.h
@@ -103,6 +103,9 @@ typedef enum {
/* The retyable, error action, with table specified error code */
#define SS_RET SS_RETRY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE
+/* Wait for transient error status to change */
+#define SS_WAIT SS_TUR|SSQ_MANY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE
+
/* Fatal error action, with table specified error code */
#define SS_FATAL SS_FAIL|SSQ_PRINT_SENSE
@@ -697,6 +700,19 @@ struct scsi_control_page {
u_int8_t extended_selftest_completion_time[2];
};
+struct scsi_control_ext_page {
+ uint8_t page_code;
+ uint8_t subpage_code;
+ uint8_t page_length[2];
+ uint8_t flags;
+#define SCEP_TCMOS 0x04 /* Timestamp Changeable by */
+#define SCEP_SCSIP 0x02 /* SCSI Precedence (clock) */
+#define SCEP_IALUAE 0x01 /* Implicit ALUA Enabled */
+ uint8_t prio;
+ uint8_t max_sense;
+ uint8_t reserve[25];
+};
+
struct scsi_cache_page {
u_int8_t page_code;
#define SCHP_PAGE_SAVABLE 0x80 /* Page is savable */
@@ -986,6 +1002,16 @@ struct scsi_read_buffer
u_int8_t control;
};
+struct scsi_read_buffer_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t offset[8];
+ uint8_t length[4];
+ uint8_t buffer_id;
+ uint8_t control;
+};
+
struct scsi_write_buffer
{
u_int8_t opcode;
@@ -1257,6 +1283,17 @@ struct scsi_rw_16
u_int8_t control;
};
+struct scsi_write_atomic_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t addr[8];
+ uint8_t boundary[2];
+ uint8_t length[2];
+ uint8_t group;
+ uint8_t control;
+};
+
struct scsi_write_same_10
{
uint8_t opcode;
@@ -1666,6 +1703,7 @@ struct scsi_ec_cscd
uint8_t type_code;
#define EC_CSCD_EXT 0xff
uint8_t luidt_pdt;
+#define EC_NUL 0x20
#define EC_LUIDT_MASK 0xc0
#define EC_LUIDT_LUN 0x00
#define EC_LUIDT_PROXY_TOKEN 0x40
@@ -1971,6 +2009,7 @@ struct ata_pass_16 {
#define VERIFY_16 0x8F
#define SYNCHRONIZE_CACHE_16 0x91
#define WRITE_SAME_16 0x93
+#define READ_BUFFER_16 0x9B
#define WRITE_ATOMIC_16 0x9C
#define SERVICE_ACTION_IN 0x9E
#define REPORT_LUNS 0xA0
@@ -2729,7 +2768,8 @@ struct scsi_vpd_block_limits
u_int8_t max_atomic_transfer_length[4];
u_int8_t atomic_alignment[4];
u_int8_t atomic_transfer_length_granularity[4];
- u_int8_t reserved2[8];
+ u_int8_t max_atomic_transfer_length_with_atomic_boundary[4];
+ u_int8_t max_atomic_boundary_size[4];
};
struct scsi_read_capacity
@@ -2824,6 +2864,9 @@ struct scsi_report_luns
#define RPL_REPORT_DEFAULT 0x00
#define RPL_REPORT_WELLKNOWN 0x01
#define RPL_REPORT_ALL 0x02
+#define RPL_REPORT_ADMIN 0x10
+#define RPL_REPORT_NONSUBSID 0x11
+#define RPL_REPORT_CONGLOM 0x12
uint8_t select_report;
uint8_t reserved2[3];
uint8_t length[4];
diff --git a/sys/cam/scsi/scsi_cd.c b/sys/cam/scsi/scsi_cd.c
index 47f2728..16acd6d 100644
--- a/sys/cam/scsi/scsi_cd.c
+++ b/sys/cam/scsi/scsi_cd.c
@@ -392,7 +392,8 @@ cdasync(void *callback_arg, u_int32_t code,
if (cgd->protocol != PROTO_SCSI)
break;
-
+ if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED)
+ break;
if (SID_TYPE(&cgd->inq_data) != T_CDROM
&& SID_TYPE(&cgd->inq_data) != T_WORM)
break;
diff --git a/sys/cam/scsi/scsi_cd.h b/sys/cam/scsi/scsi_cd.h
index f502d66..cf8baaf 100644
--- a/sys/cam/scsi/scsi_cd.h
+++ b/sys/cam/scsi/scsi_cd.h
@@ -56,6 +56,83 @@
* SCSI command format
*/
+struct scsi_get_config
+{
+ uint8_t opcode;
+ uint8_t rt;
+#define SGC_RT_ALL 0x00
+#define SGC_RT_CURRENT 0x01
+#define SGC_RT_SPECIFIC 0x02
+#define SGC_RT_MASK 0x03
+ uint8_t starting_feature[2];
+ uint8_t reserved[3];
+ uint8_t length[2];
+ uint8_t control;
+};
+
+struct scsi_get_config_header
+{
+ uint8_t data_length[4];
+ uint8_t reserved[2];
+ uint8_t current_profile[2];
+};
+
+struct scsi_get_config_feature
+{
+ uint8_t feature_code[2];
+ uint8_t flags;
+#define SGC_F_CURRENT 0x01
+#define SGC_F_PERSISTENT 0x02
+#define SGC_F_VERSION_MASK 0x2C
+#define SGC_F_VERSION_SHIFT 2
+ uint8_t add_length;
+ uint8_t feature_data[];
+};
+
+struct scsi_get_event_status
+{
+ uint8_t opcode;
+ uint8_t byte2;
+#define SGESN_POLLED 1
+ uint8_t reserved[2];
+ uint8_t notif_class;
+ uint8_t reserved2[2];
+ uint8_t length[2];
+ uint8_t control;
+};
+
+struct scsi_get_event_status_header
+{
+ uint8_t descr_length[4];
+ uint8_t nea_class;
+#define SGESN_NEA 0x80
+ uint8_t supported_class;
+};
+
+struct scsi_get_event_status_descr
+{
+ uint8_t event_code;
+ uint8_t event_info[];
+};
+
+struct scsi_mechanism_status
+{
+ uint8_t opcode;
+ uint8_t reserved[7];
+ uint8_t length[2];
+ uint8_t reserved2;
+ uint8_t control;
+};
+
+struct scsi_mechanism_status_header
+{
+ uint8_t state1;
+ uint8_t state2;
+ uint8_t lba[3];
+ uint8_t slots_num;
+ uint8_t slots_length[2];
+};
+
struct scsi_pause
{
u_int8_t op_code;
@@ -151,12 +228,29 @@ struct scsi_read_toc
{
u_int8_t op_code;
u_int8_t byte2;
- u_int8_t unused[4];
+ u_int8_t format;
+ u_int8_t unused[3];
u_int8_t from_track;
u_int8_t data_len[2];
u_int8_t control;
};
+struct scsi_read_toc_hdr
+{
+ uint8_t data_length[2];
+ uint8_t first;
+ uint8_t last;
+};
+
+struct scsi_read_toc_type01_descr
+{
+ uint8_t reserved;
+ uint8_t addr_ctl;
+ uint8_t track_number;
+ uint8_t reserved2;
+ uint8_t track_start[4];
+};
+
struct scsi_read_cd_capacity
{
u_int8_t op_code;
@@ -252,9 +346,11 @@ struct scsi_read_dvd_structure
#define READ_TOC 0x43 /* cdrom read TOC */
#define READ_HEADER 0x44 /* cdrom read header */
#define PLAY_10 0x45 /* cdrom play 'play audio' mode */
+#define GET_CONFIGURATION 0x46 /* Get device configuration */
#define PLAY_MSF 0x47 /* cdrom play Min,Sec,Frames mode */
#define PLAY_TRACK 0x48 /* cdrom play track/index mode */
#define PLAY_TRACK_REL 0x49 /* cdrom play track/index mode */
+#define GET_EVENT_STATUS 0x4a /* Get event status notification */
#define PAUSE 0x4b /* cdrom pause in 'play audio' mode */
#define SEND_KEY 0xa3 /* dvd send key command */
#define REPORT_KEY 0xa4 /* dvd report key command */
@@ -262,6 +358,7 @@ struct scsi_read_dvd_structure
#define PLAY_TRACK_REL_BIG 0xa9 /* cdrom play track/index mode */
#define READ_DVD_STRUCTURE 0xad /* read dvd structure */
#define SET_CD_SPEED 0xbb /* set c/dvd speed */
+#define MECHANISM_STATUS 0xbd /* get status of c/dvd mechanics */
struct scsi_report_key_data_header
{
@@ -686,6 +783,37 @@ struct cd_audio_page
#define RIGHT_PORT 1
};
+struct scsi_cddvd_capabilities_page_sd {
+ uint8_t reserved;
+ uint8_t rotation_control;
+ uint8_t write_speed_supported[2];
+};
+
+struct scsi_cddvd_capabilities_page {
+ uint8_t page_code;
+#define SMS_CDDVD_CAPS_PAGE 0x2a
+ uint8_t page_length;
+ uint8_t caps1;
+ uint8_t caps2;
+ uint8_t caps3;
+ uint8_t caps4;
+ uint8_t caps5;
+ uint8_t caps6;
+ uint8_t obsolete[2];
+ uint8_t nvol_levels[2];
+ uint8_t buffer_size[2];
+ uint8_t obsolete2[2];
+ uint8_t reserved;
+ uint8_t digital;
+ uint8_t obsolete3;
+ uint8_t copy_management;
+ uint8_t reserved2;
+ uint8_t rotation_control;
+ uint8_t cur_write_speed;
+ uint8_t num_speed_descr;
+ struct scsi_cddvd_capabilities_page_sd speed_descr[];
+};
+
union cd_pages
{
struct cd_audio_page audio;
diff --git a/sys/cam/scsi/scsi_ch.c b/sys/cam/scsi/scsi_ch.c
index b83bc53..bc398e7 100644
--- a/sys/cam/scsi/scsi_ch.c
+++ b/sys/cam/scsi/scsi_ch.c
@@ -337,7 +337,8 @@ chasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg)
if (cgd->protocol != PROTO_SCSI)
break;
-
+ if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED)
+ break;
if (SID_TYPE(&cgd->inq_data)!= T_CHANGER)
break;
@@ -654,11 +655,13 @@ chdone(struct cam_periph *periph, union ccb *done_ccb)
*/
return;
} else if (error != 0) {
- int retry_scheduled;
struct scsi_mode_sense_6 *sms;
+ int frozen, retry_scheduled;
sms = (struct scsi_mode_sense_6 *)
done_ccb->csio.cdb_io.cdb_bytes;
+ frozen = (done_ccb->ccb_h.status &
+ CAM_DEV_QFRZN) != 0;
/*
* Check to see if block descriptors were
@@ -669,7 +672,8 @@ chdone(struct cam_periph *periph, union ccb *done_ccb)
* block descriptors were disabled, enable
* them and re-send the command.
*/
- if (sms->byte2 & SMS_DBD) {
+ if ((sms->byte2 & SMS_DBD) != 0 &&
+ (periph->flags & CAM_PERIPH_INVALID) == 0) {
sms->byte2 &= ~SMS_DBD;
xpt_action(done_ccb);
softc->quirks |= CH_Q_NO_DBD;
@@ -678,7 +682,7 @@ chdone(struct cam_periph *periph, union ccb *done_ccb)
retry_scheduled = 0;
/* Don't wedge this device's queue */
- if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
+ if (frozen)
cam_release_devq(done_ccb->ccb_h.path,
/*relsim_flags*/0,
/*reduction*/0,
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index fe629e0..0fa5a11 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -1667,7 +1667,8 @@ daasync(void *callback_arg, u_int32_t code,
if (cgd->protocol != PROTO_SCSI)
break;
-
+ if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED)
+ break;
if (SID_TYPE(&cgd->inq_data) != T_DIRECT
&& SID_TYPE(&cgd->inq_data) != T_RBC
&& SID_TYPE(&cgd->inq_data) != T_OPTICAL)
@@ -3047,6 +3048,16 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
TAILQ_INIT(&queue);
TAILQ_CONCAT(&queue, &softc->delete_run_queue.queue, bio_queue);
softc->delete_run_queue.insert_point = NULL;
+ /*
+ * Normally, the xpt_release_ccb() above would make sure
+ * that when we have more work to do, that work would
+ * get kicked off. However, we specifically keep
+ * delete_running set to 0 before the call above to
+ * allow other I/O to progress when many BIO_DELETE
+ * requests are pushed down. We set delete_running to 0
+ * and call daschedule again so that we don't stall if
+ * there are no other I/Os pending apart from BIO_DELETEs.
+ */
softc->delete_running = 0;
daschedule(periph);
cam_periph_unlock(periph);
@@ -3447,7 +3458,7 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
* Disable queue sorting for non-rotational media
* by default.
*/
- u_int old_rate = softc->disk->d_rotation_rate;
+ u_int16_t old_rate = softc->disk->d_rotation_rate;
softc->disk->d_rotation_rate =
scsi_2btoul(bdc->medium_rotation_rate);
diff --git a/sys/cam/scsi/scsi_low.c b/sys/cam/scsi/scsi_low.c
index 6e6a2e9..8ca669e 100644
--- a/sys/cam/scsi/scsi_low.c
+++ b/sys/cam/scsi/scsi_low.c
@@ -326,15 +326,13 @@ scsi_low_find_ccb(slp, target, lun, osdep)
if ((cb = slp->sl_Qnexus) != NULL && cb->osdep == osdep)
return cb;
- for (cb = TAILQ_FIRST(&slp->sl_start); cb != NULL;
- cb = TAILQ_NEXT(cb, ccb_chain))
+ TAILQ_FOREACH(cb, &slp->sl_start, ccb_chain)
{
if (cb->osdep == osdep)
return cb;
}
- for (cb = TAILQ_FIRST(&li->li_discq); cb != NULL;
- cb = TAILQ_NEXT(cb, ccb_chain))
+ TAILQ_FOREACH(cb, &li->li_discq, ccb_chain)
{
if (cb->osdep == osdep)
return cb;
@@ -4184,8 +4182,7 @@ scsi_low_info(slp, ti, s)
printf(">>>>> SCSI_LOW_INFO(0x%lx): %s\n", (u_long) slp->sl_Tnexus, s);
if (ti == NULL)
{
- for (ti = TAILQ_FIRST(&slp->sl_titab); ti != NULL;
- ti = TAILQ_NEXT(ti, ti_chain))
+ TAILQ_FOREACH(ti, &slp->sl_titab, ti_chain)
{
scsi_low_print(slp, ti);
}
diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c
index 158e90f..174151e 100644
--- a/sys/cam/scsi/scsi_pass.c
+++ b/sys/cam/scsi/scsi_pass.c
@@ -77,6 +77,7 @@ struct pass_softc {
u_int8_t pd_type;
union ccb saved_ccb;
int open_count;
+ u_int maxio;
struct devstat *device_stats;
struct cdev *dev;
struct cdev *alias_dev;
@@ -366,6 +367,13 @@ passregister(struct cam_periph *periph, void *arg)
cpi.ccb_h.func_code = XPT_PATH_INQ;
xpt_action((union ccb *)&cpi);
+ if (cpi.maxio == 0)
+ softc->maxio = DFLTPHYS; /* traditional default */
+ else if (cpi.maxio > MAXPHYS)
+ softc->maxio = MAXPHYS; /* for safety */
+ else
+ softc->maxio = cpi.maxio; /* real value */
+
/*
* We pass in 0 for a blocksize, since we don't
* know what the blocksize of this device is, if
@@ -659,7 +667,7 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb)
* Dropping it here is reasonably safe.
*/
cam_periph_unlock(periph);
- error = cam_periph_mapmem(ccb, &mapinfo);
+ error = cam_periph_mapmem(ccb, &mapinfo, softc->maxio);
cam_periph_lock(periph);
/*
diff --git a/sys/cam/scsi/scsi_pt.c b/sys/cam/scsi/scsi_pt.c
index f34748c..15240da 100644
--- a/sys/cam/scsi/scsi_pt.c
+++ b/sys/cam/scsi/scsi_pt.c
@@ -366,7 +366,8 @@ ptasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg)
if (cgd->protocol != PROTO_SCSI)
break;
-
+ if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED)
+ break;
if (SID_TYPE(&cgd->inq_data) != T_PROCESSOR)
break;
diff --git a/sys/cam/scsi/scsi_sa.c b/sys/cam/scsi/scsi_sa.c
index 0480854..d0a2811 100644
--- a/sys/cam/scsi/scsi_sa.c
+++ b/sys/cam/scsi/scsi_sa.c
@@ -2254,7 +2254,8 @@ saasync(void *callback_arg, u_int32_t code,
if (cgd->protocol != PROTO_SCSI)
break;
-
+ if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED)
+ break;
if (SID_TYPE(&cgd->inq_data) != T_SEQUENTIAL)
break;
diff --git a/sys/cam/scsi/scsi_sg.c b/sys/cam/scsi/scsi_sg.c
index 3e80ac3..3e13003 100644
--- a/sys/cam/scsi/scsi_sg.c
+++ b/sys/cam/scsi/scsi_sg.c
@@ -99,6 +99,7 @@ struct sg_softc {
sg_state state;
sg_flags flags;
int open_count;
+ u_int maxio;
struct devstat *device_stats;
TAILQ_HEAD(, sg_rdwr) rdwr_done;
struct cdev *dev;
@@ -325,6 +326,13 @@ sgregister(struct cam_periph *periph, void *arg)
cpi.ccb_h.func_code = XPT_PATH_INQ;
xpt_action((union ccb *)&cpi);
+ if (cpi.maxio == 0)
+ softc->maxio = DFLTPHYS; /* traditional default */
+ else if (cpi.maxio > MAXPHYS)
+ softc->maxio = MAXPHYS; /* for safety */
+ else
+ softc->maxio = cpi.maxio; /* real value */
+
/*
* We pass in 0 for all blocksize, since we don't know what the
* blocksize of the device is, if it even has a blocksize.
@@ -894,7 +902,7 @@ sgsendccb(struct cam_periph *periph, union ccb *ccb)
* need for additional checks.
*/
cam_periph_unlock(periph);
- error = cam_periph_mapmem(ccb, &mapinfo);
+ error = cam_periph_mapmem(ccb, &mapinfo, softc->maxio);
cam_periph_lock(periph);
if (error)
return (error);
diff --git a/sys/cam/scsi/scsi_target.c b/sys/cam/scsi/scsi_target.c
index 42dc152..f2504ab 100644
--- a/sys/cam/scsi/scsi_target.c
+++ b/sys/cam/scsi/scsi_target.c
@@ -94,6 +94,7 @@ struct targ_softc {
struct cam_periph *periph;
struct cam_path *path;
targ_state state;
+ u_int maxio;
struct selinfo read_select;
struct devstat device_stats;
};
@@ -403,6 +404,12 @@ targenable(struct targ_softc *softc, struct cam_path *path, int grp6_len,
status = CAM_FUNC_NOTAVAIL;
goto enable_fail;
}
+ if (cpi.maxio == 0)
+ softc->maxio = DFLTPHYS; /* traditional default */
+ else if (cpi.maxio > MAXPHYS)
+ softc->maxio = MAXPHYS; /* for safety */
+ else
+ softc->maxio = cpi.maxio; /* real value */
/* Destroy any periph on our path if it is disabled */
periph = cam_periph_find(path, "targ");
@@ -725,7 +732,7 @@ targsendccb(struct targ_softc *softc, union ccb *ccb,
if ((ccb_h->func_code == XPT_CONT_TARGET_IO) ||
(ccb_h->func_code == XPT_DEV_MATCH)) {
- error = cam_periph_mapmem(ccb, mapinfo);
+ error = cam_periph_mapmem(ccb, mapinfo, softc->maxio);
/*
* cam_periph_mapmem returned an error, we can't continue.
diff --git a/sys/cam/scsi/scsi_xpt.c b/sys/cam/scsi/scsi_xpt.c
index 57bcfb2..767aa0d 100644
--- a/sys/cam/scsi/scsi_xpt.c
+++ b/sys/cam/scsi/scsi_xpt.c
@@ -1126,6 +1126,7 @@ probedone(struct cam_periph *periph, union ccb *done_ccb)
{
probe_softc *softc;
struct cam_path *path;
+ struct scsi_inquiry_data *inq_buf;
u_int32_t priority;
CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("probedone\n"));
@@ -1165,7 +1166,6 @@ out:
case PROBE_FULL_INQUIRY:
{
if (cam_ccb_status(done_ccb) == CAM_REQ_CMP) {
- struct scsi_inquiry_data *inq_buf;
u_int8_t periph_qual;
path->device->flags |= CAM_DEV_INQUIRY_DATA_VALID;
@@ -1174,7 +1174,8 @@ out:
periph_qual = SID_QUAL(inq_buf);
- if (periph_qual == SID_QUAL_LU_CONNECTED) {
+ if (periph_qual == SID_QUAL_LU_CONNECTED ||
+ periph_qual == SID_QUAL_LU_OFFLINE) {
u_int8_t len;
/*
@@ -1350,10 +1351,10 @@ out:
probe_purge_old(path, lp, softc->flags);
lp = NULL;
}
+ inq_buf = &path->device->inq_data;
if (path->device->flags & CAM_DEV_INQUIRY_DATA_VALID &&
- SID_QUAL(&path->device->inq_data) == SID_QUAL_LU_CONNECTED) {
- struct scsi_inquiry_data *inq_buf;
- inq_buf = &path->device->inq_data;
+ (SID_QUAL(inq_buf) == SID_QUAL_LU_CONNECTED ||
+ SID_QUAL(inq_buf) == SID_QUAL_LU_OFFLINE)) {
if (INQ_DATA_TQ_ENABLED(inq_buf))
PROBE_SET_ACTION(softc, PROBE_MODE_SENSE);
else
diff --git a/sys/cddl/compat/opensolaris/sys/kstat.h b/sys/cddl/compat/opensolaris/sys/kstat.h
index acf6626..fdd3aa7 100644
--- a/sys/cddl/compat/opensolaris/sys/kstat.h
+++ b/sys/cddl/compat/opensolaris/sys/kstat.h
@@ -35,6 +35,9 @@
#define KSTAT_FLAG_VIRTUAL 0x01
+#define KSTAT_READ 0
+#define KSTAT_WRITE 1
+
typedef struct kstat {
void *ks_data;
u_int ks_ndata;
@@ -42,6 +45,8 @@ typedef struct kstat {
struct sysctl_ctx_list ks_sysctl_ctx;
struct sysctl_oid *ks_sysctl_root;
#endif
+ int (*ks_update)(struct kstat *, int); /* dynamic update */
+ void *ks_private; /* arbitrary provider-private data */
} kstat_t;
typedef struct kstat_named {
diff --git a/sys/cddl/contrib/opensolaris/common/avl/avl.c b/sys/cddl/contrib/opensolaris/common/avl/avl.c
index 9d86242..2349aba 100644
--- a/sys/cddl/contrib/opensolaris/common/avl/avl.c
+++ b/sys/cddl/contrib/opensolaris/common/avl/avl.c
@@ -25,6 +25,7 @@
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -635,14 +636,17 @@ avl_add(avl_tree_t *tree, void *new_node)
/*
* This is unfortunate. We want to call panic() here, even for
* non-DEBUG kernels. In userland, however, we can't depend on anything
- * in libc or else the rtld build process gets confused. So, all we can
- * do in userland is resort to a normal ASSERT().
+ * in libc or else the rtld build process gets confused.
+ * Thankfully, rtld provides us with its own assfail() so we can use
+ * that here. We use assfail() directly to get a nice error message
+ * in the core - much like what panic() does for crashdumps.
*/
if (avl_find(tree, new_node, &where) != NULL)
#ifdef _KERNEL
panic("avl_find() succeeded inside avl_add()");
#else
- ASSERT(0);
+ (void) assfail("avl_find() succeeded inside avl_add()",
+ __FILE__, __LINE__);
#endif
avl_insert(tree, new_node, where);
}
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
index 52a355d..d59fbf0 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
@@ -129,15 +129,15 @@ zfeature_depends_on(spa_feature_t fid, spa_feature_t check) {
static void
zfeature_register(spa_feature_t fid, const char *guid, const char *name,
- const char *desc, boolean_t readonly, boolean_t mos,
- boolean_t activate_on_enable, const spa_feature_t *deps)
+ const char *desc, zfeature_flags_t flags, const spa_feature_t *deps)
{
zfeature_info_t *feature = &spa_feature_table[fid];
static spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
ASSERT(name != NULL);
ASSERT(desc != NULL);
- ASSERT(!readonly || !mos);
+ ASSERT((flags & ZFEATURE_FLAG_READONLY_COMPAT) == 0 ||
+ (flags & ZFEATURE_FLAG_MOS) == 0);
ASSERT3U(fid, <, SPA_FEATURES);
ASSERT(zfeature_is_valid_guid(guid));
@@ -148,9 +148,7 @@ zfeature_register(spa_feature_t fid, const char *guid, const char *name,
feature->fi_guid = guid;
feature->fi_uname = name;
feature->fi_desc = desc;
- feature->fi_can_readonly = readonly;
- feature->fi_mos = mos;
- feature->fi_activate_on_enable = activate_on_enable;
+ feature->fi_flags = flags;
feature->fi_depends = deps;
}
@@ -159,45 +157,46 @@ zpool_feature_init(void)
{
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
"com.delphix:async_destroy", "async_destroy",
- "Destroy filesystems asynchronously.", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Destroy filesystems asynchronously.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
"com.delphix:empty_bpobj", "empty_bpobj",
- "Snapshots use less space.", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Snapshots use less space.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
zfeature_register(SPA_FEATURE_LZ4_COMPRESS,
"org.illumos:lz4_compress", "lz4_compress",
- "LZ4 compression algorithm support.", B_FALSE, B_FALSE,
- B_TRUE, NULL);
+ "LZ4 compression algorithm support.",
+ ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, NULL);
zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
"com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump",
- "Crash dumps to multiple vdev pools.", B_FALSE, B_FALSE,
- B_FALSE, NULL);
+ "Crash dumps to multiple vdev pools.",
+ 0, NULL);
zfeature_register(SPA_FEATURE_SPACEMAP_HISTOGRAM,
"com.delphix:spacemap_histogram", "spacemap_histogram",
- "Spacemaps maintain space histograms.", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Spacemaps maintain space histograms.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
zfeature_register(SPA_FEATURE_ENABLED_TXG,
"com.delphix:enabled_txg", "enabled_txg",
- "Record txg at which a feature is enabled", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Record txg at which a feature is enabled",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
static spa_feature_t hole_birth_deps[] = { SPA_FEATURE_ENABLED_TXG,
SPA_FEATURE_NONE };
zfeature_register(SPA_FEATURE_HOLE_BIRTH,
"com.delphix:hole_birth", "hole_birth",
"Retain hole birth txg for more precise zfs send",
- B_FALSE, B_TRUE, B_TRUE, hole_birth_deps);
+ ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
+ hole_birth_deps);
zfeature_register(SPA_FEATURE_EXTENSIBLE_DATASET,
"com.delphix:extensible_dataset", "extensible_dataset",
"Enhanced dataset functionality, used by other features.",
- B_FALSE, B_FALSE, B_FALSE, NULL);
+ 0, NULL);
static const spa_feature_t bookmarks_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
@@ -206,7 +205,7 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_BOOKMARKS,
"com.delphix:bookmarks", "bookmarks",
"\"zfs bookmark\" command",
- B_TRUE, B_FALSE, B_FALSE, bookmarks_deps);
+ ZFEATURE_FLAG_READONLY_COMPAT, bookmarks_deps);
static const spa_feature_t filesystem_limits_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
@@ -214,13 +213,14 @@ zpool_feature_init(void)
};
zfeature_register(SPA_FEATURE_FS_SS_LIMIT,
"com.joyent:filesystem_limits", "filesystem_limits",
- "Filesystem and snapshot limits.", B_TRUE, B_FALSE, B_FALSE,
- filesystem_limits_deps);
+ "Filesystem and snapshot limits.",
+ ZFEATURE_FLAG_READONLY_COMPAT, filesystem_limits_deps);
zfeature_register(SPA_FEATURE_EMBEDDED_DATA,
"com.delphix:embedded_data", "embedded_data",
"Blocks which compress very well use even less space.",
- B_FALSE, B_TRUE, B_TRUE, NULL);
+ ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
+ NULL);
static const spa_feature_t large_blocks_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
@@ -228,6 +228,6 @@ zpool_feature_init(void)
};
zfeature_register(SPA_FEATURE_LARGE_BLOCKS,
"org.open-zfs:large_blocks", "large_blocks",
- "Support for blocks larger than 128KB.", B_FALSE, B_FALSE, B_FALSE,
- large_blocks_deps);
+ "Support for blocks larger than 128KB.",
+ ZFEATURE_FLAG_PER_DATASET, large_blocks_deps);
}
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
index 4ffe435..0e88a9a 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
@@ -56,15 +56,23 @@ typedef enum spa_feature {
#define SPA_FEATURE_DISABLED (-1ULL)
+typedef enum zfeature_flags {
+ /* Can open pool readonly even if this feature is not supported. */
+ ZFEATURE_FLAG_READONLY_COMPAT = (1 << 0),
+ /* Is this feature necessary to read the MOS? */
+ ZFEATURE_FLAG_MOS = (1 << 1),
+ /* Activate this feature at the same time it is enabled. */
+ ZFEATURE_FLAG_ACTIVATE_ON_ENABLE = (1 << 2),
+ /* Each dataset has a field set if it has ever used this feature. */
+ ZFEATURE_FLAG_PER_DATASET = (1 << 3)
+} zfeature_flags_t;
+
typedef struct zfeature_info {
spa_feature_t fi_feature;
const char *fi_uname; /* User-facing feature name */
const char *fi_guid; /* On-disk feature identifier */
const char *fi_desc; /* Feature description */
- boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
- boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
- /* Activate this feature at the same time it is enabled */
- boolean_t fi_activate_on_enable;
+ zfeature_flags_t fi_flags;
/* array of dependencies, terminated by SPA_FEATURE_NONE */
const spa_feature_t *fi_depends;
} zfeature_info_t;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/Makefile.files b/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
index 4c7e225..77c7b1d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
+++ b/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
@@ -22,7 +22,9 @@
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
+# Copyright (c) 2011, 2014 by Delphix. All rights reserved.
# Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
#
#
@@ -36,6 +38,7 @@ ZFS_COMMON_OBJS += \
blkptr.o \
bpobj.o \
bptree.o \
+ bqueue.o \
dbuf.o \
ddt.o \
ddt_zap.o \
@@ -65,6 +68,7 @@ ZFS_COMMON_OBJS += \
lz4.o \
lzjb.o \
metaslab.o \
+ multilist.o \
range_tree.o \
refcount.o \
rrwlock.o \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index 30aac16..b053993 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -20,9 +20,10 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -81,9 +82,9 @@
* types of locks: 1) the hash table lock array, and 2) the
* arc list locks.
*
- * Buffers do not have their own mutexs, rather they rely on the
- * hash table mutexs for the bulk of their protection (i.e. most
- * fields in the arc_buf_hdr_t are protected by these mutexs).
+ * Buffers do not have their own mutexes, rather they rely on the
+ * hash table mutexes for the bulk of their protection (i.e. most
+ * fields in the arc_buf_hdr_t are protected by these mutexes).
*
* buf_hash_find() returns the appropriate mutex (held) when it
* locates the requested buffer in the hash table. It returns
@@ -110,7 +111,7 @@
* Note that the majority of the performance stats are manipulated
* with atomic operations.
*
- * The L2ARC uses the l2arc_buflist_mtx global mutex for the following:
+ * The L2ARC uses the l2ad_mtx on each vdev for the following:
*
* - L2ARC buflist creation
* - L2ARC buflist eviction
@@ -128,6 +129,7 @@
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/dsl_pool.h>
+#include <sys/multilist.h>
#ifdef _KERNEL
#include <sys/dnlc.h>
#endif
@@ -148,32 +150,56 @@ int arc_procfd;
#endif
#endif /* illumos */
-static kmutex_t arc_reclaim_thr_lock;
-static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
-static uint8_t arc_thread_exit;
+static kmutex_t arc_reclaim_lock;
+static kcondvar_t arc_reclaim_thread_cv;
+static boolean_t arc_reclaim_thread_exit;
+static kcondvar_t arc_reclaim_waiters_cv;
-#define ARC_REDUCE_DNLC_PERCENT 3
-uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT;
+static kmutex_t arc_user_evicts_lock;
+static kcondvar_t arc_user_evicts_cv;
+static boolean_t arc_user_evicts_thread_exit;
-typedef enum arc_reclaim_strategy {
- ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */
- ARC_RECLAIM_CONS /* Conservative reclaim strategy */
-} arc_reclaim_strategy_t;
+uint_t arc_reduce_dnlc_percent = 3;
/*
- * The number of iterations through arc_evict_*() before we
- * drop & reacquire the lock.
+ * The number of headers to evict in arc_evict_state_impl() before
+ * dropping the sublist lock and evicting from another sublist. A lower
+ * value means we're more likely to evict the "correct" header (i.e. the
+ * oldest header in the arc state), but comes with higher overhead
+ * (i.e. more invocations of arc_evict_state_impl()).
*/
-int arc_evict_iterations = 100;
+int zfs_arc_evict_batch_limit = 10;
+
+/*
+ * The number of sublists used for each of the arc state lists. If this
+ * is not set to a suitable value by the user, it will be configured to
+ * the number of CPUs on the system in arc_init().
+ */
+int zfs_arc_num_sublists_per_state = 0;
/* number of seconds before growing cache again */
static int arc_grow_retry = 60;
+/* shift of arc_c for calculating overflow limit in arc_get_data_buf */
+int zfs_arc_overflow_shift = 8;
+
/* shift of arc_c for calculating both min and max arc_p */
static int arc_p_min_shift = 4;
/* log2(fraction of arc to reclaim) */
-static int arc_shrink_shift = 5;
+static int arc_shrink_shift = 7;
+
+/*
+ * log2(fraction of ARC which must be free to allow growing).
+ * I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
+ * when reading a new block into the ARC, we will evict an equal-sized block
+ * from the ARC.
+ *
+ * This must be less than arc_shrink_shift, so that when we shrink the ARC,
+ * we will still not allow it to grow.
+ */
+int arc_no_grow_shift = 5;
+
/*
* minimum lifespan of a prefetch block in clock ticks
@@ -187,16 +213,20 @@ static int arc_min_prefetch_lifespan;
int arc_lotsfree_percent = 10;
static int arc_dead;
-extern int zfs_prefetch_disable;
+extern boolean_t zfs_prefetch_disable;
/*
* The arc has filled available memory and has now warmed up.
*/
static boolean_t arc_warm;
+/*
+ * These tunables are for performance analysis.
+ */
uint64_t zfs_arc_max;
uint64_t zfs_arc_min;
uint64_t zfs_arc_meta_limit = 0;
+uint64_t zfs_arc_meta_min = 0;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
@@ -220,6 +250,7 @@ SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
+TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min);
TUNABLE_QUAD("vfs.zfs.arc_average_blocksize", &zfs_arc_average_blocksize);
TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift);
SYSCTL_DECL(_vfs_zfs);
@@ -306,31 +337,22 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_meta_limit,
* second level ARC benefit from these fast lookups.
*/
-#define ARCS_LOCK_PAD CACHE_LINE_SIZE
-struct arcs_lock {
- kmutex_t arcs_lock;
-#ifdef _KERNEL
- unsigned char pad[(ARCS_LOCK_PAD - sizeof (kmutex_t))];
-#endif
-};
-
-/*
- * must be power of two for mask use to work
- *
- */
-#define ARC_BUFC_NUMDATALISTS 16
-#define ARC_BUFC_NUMMETADATALISTS 16
-#define ARC_BUFC_NUMLISTS (ARC_BUFC_NUMMETADATALISTS + ARC_BUFC_NUMDATALISTS)
-
typedef struct arc_state {
- uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
- uint64_t arcs_size; /* total amount of data in this state */
- list_t arcs_lists[ARC_BUFC_NUMLISTS]; /* list of evictable buffers */
- struct arcs_lock arcs_locks[ARC_BUFC_NUMLISTS] __aligned(CACHE_LINE_SIZE);
+ /*
+ * list of evictable buffers
+ */
+ multilist_t arcs_list[ARC_BUFC_NUMTYPES];
+ /*
+ * total amount of evictable data in this state
+ */
+ uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];
+ /*
+ * total amount of data in this state; this includes: evictable,
+ * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
+ */
+ refcount_t arcs_size;
} arc_state_t;
-#define ARCS_LOCK(s, i) (&((s)->arcs_locks[(i)].arcs_lock))
-
/* The 6 states: */
static arc_state_t ARC_anon;
static arc_state_t ARC_mru;
@@ -356,8 +378,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_mfu_ghost_hits;
kstat_named_t arcstat_allocated;
kstat_named_t arcstat_deleted;
- kstat_named_t arcstat_stolen;
- kstat_named_t arcstat_recycle_miss;
/*
* Number of buffers that could not be evicted because the hash lock
* was held by another thread. The lock may not necessarily be held
@@ -371,9 +391,15 @@ typedef struct arc_stats {
* not from the spa we're trying to evict from.
*/
kstat_named_t arcstat_evict_skip;
+ /*
+ * Number of times arc_evict_state() was unable to evict enough
+ * buffers to reach it's target amount.
+ */
+ kstat_named_t arcstat_evict_not_enough;
kstat_named_t arcstat_evict_l2_cached;
kstat_named_t arcstat_evict_l2_eligible;
kstat_named_t arcstat_evict_l2_ineligible;
+ kstat_named_t arcstat_evict_l2_skip;
kstat_named_t arcstat_hash_elements;
kstat_named_t arcstat_hash_elements_max;
kstat_named_t arcstat_hash_collisions;
@@ -384,9 +410,137 @@ typedef struct arc_stats {
kstat_named_t arcstat_c_min;
kstat_named_t arcstat_c_max;
kstat_named_t arcstat_size;
+ /*
+ * Number of bytes consumed by internal ARC structures necessary
+ * for tracking purposes; these structures are not actually
+ * backed by ARC buffers. This includes arc_buf_hdr_t structures
+ * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
+ * caches), and arc_buf_t structures (allocated via arc_buf_t
+ * cache).
+ */
kstat_named_t arcstat_hdr_size;
+ /*
+ * Number of bytes consumed by ARC buffers of type equal to
+ * ARC_BUFC_DATA. This is generally consumed by buffers backing
+ * on disk user data (e.g. plain file contents).
+ */
kstat_named_t arcstat_data_size;
+ /*
+ * Number of bytes consumed by ARC buffers of type equal to
+ * ARC_BUFC_METADATA. This is generally consumed by buffers
+ * backing on disk data that is used for internal ZFS
+ * structures (e.g. ZAP, dnode, indirect blocks, etc).
+ */
+ kstat_named_t arcstat_metadata_size;
+ /*
+ * Number of bytes consumed by various buffers and structures
+ * not actually backed with ARC buffers. This includes bonus
+ * buffers (allocated directly via zio_buf_* functions),
+ * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t
+ * cache), and dnode_t structures (allocated via dnode_t cache).
+ */
kstat_named_t arcstat_other_size;
+ /*
+ * Total number of bytes consumed by ARC buffers residing in the
+ * arc_anon state. This includes *all* buffers in the arc_anon
+ * state; e.g. data, metadata, evictable, and unevictable buffers
+ * are all included in this value.
+ */
+ kstat_named_t arcstat_anon_size;
+ /*
+ * Number of bytes consumed by ARC buffers that meet the
+ * following criteria: backing buffers of type ARC_BUFC_DATA,
+ * residing in the arc_anon state, and are eligible for eviction
+ * (e.g. have no outstanding holds on the buffer).
+ */
+ kstat_named_t arcstat_anon_evictable_data;
+ /*
+ * Number of bytes consumed by ARC buffers that meet the
+ * following criteria: backing buffers of type ARC_BUFC_METADATA,
+ * residing in the arc_anon state, and are eligible for eviction
+ * (e.g. have no outstanding holds on the buffer).
+ */
+ kstat_named_t arcstat_anon_evictable_metadata;
+ /*
+ * Total number of bytes consumed by ARC buffers residing in the
+ * arc_mru state. This includes *all* buffers in the arc_mru
+ * state; e.g. data, metadata, evictable, and unevictable buffers
+ * are all included in this value.
+ */
+ kstat_named_t arcstat_mru_size;
+ /*
+ * Number of bytes consumed by ARC buffers that meet the
+ * following criteria: backing buffers of type ARC_BUFC_DATA,
+ * residing in the arc_mru state, and are eligible for eviction
+ * (e.g. have no outstanding holds on the buffer).
+ */
+ kstat_named_t arcstat_mru_evictable_data;
+ /*
+ * Number of bytes consumed by ARC buffers that meet the
+ * following criteria: backing buffers of type ARC_BUFC_METADATA,
+ * residing in the arc_mru state, and are eligible for eviction
+ * (e.g. have no outstanding holds on the buffer).
+ */
+ kstat_named_t arcstat_mru_evictable_metadata;
+ /*
+ * Total number of bytes that *would have been* consumed by ARC
+ * buffers in the arc_mru_ghost state. The key thing to note
+ * here, is the fact that this size doesn't actually indicate
+ * RAM consumption. The ghost lists only consist of headers and
+ * don't actually have ARC buffers linked off of these headers.
+ * Thus, *if* the headers had associated ARC buffers, these
+ * buffers *would have* consumed this number of bytes.
+ */
+ kstat_named_t arcstat_mru_ghost_size;
+ /*
+ * Number of bytes that *would have been* consumed by ARC
+ * buffers that are eligible for eviction, of type
+ * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
+ */
+ kstat_named_t arcstat_mru_ghost_evictable_data;
+ /*
+ * Number of bytes that *would have been* consumed by ARC
+ * buffers that are eligible for eviction, of type
+ * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
+ */
+ kstat_named_t arcstat_mru_ghost_evictable_metadata;
+ /*
+ * Total number of bytes consumed by ARC buffers residing in the
+ * arc_mfu state. This includes *all* buffers in the arc_mfu
+ * state; e.g. data, metadata, evictable, and unevictable buffers
+ * are all included in this value.
+ */
+ kstat_named_t arcstat_mfu_size;
+ /*
+ * Number of bytes consumed by ARC buffers that are eligible for
+ * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
+ * state.
+ */
+ kstat_named_t arcstat_mfu_evictable_data;
+ /*
+ * Number of bytes consumed by ARC buffers that are eligible for
+ * eviction, of type ARC_BUFC_METADATA, and reside in the
+ * arc_mfu state.
+ */
+ kstat_named_t arcstat_mfu_evictable_metadata;
+ /*
+ * Total number of bytes that *would have been* consumed by ARC
+ * buffers in the arc_mfu_ghost state. See the comment above
+ * arcstat_mru_ghost_size for more details.
+ */
+ kstat_named_t arcstat_mfu_ghost_size;
+ /*
+ * Number of bytes that *would have been* consumed by ARC
+ * buffers that are eligible for eviction, of type
+ * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
+ */
+ kstat_named_t arcstat_mfu_ghost_evictable_data;
+ /*
+ * Number of bytes that *would have been* consumed by ARC
+ * buffers that are eligible for eviction, of type
+ * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
+ */
+ kstat_named_t arcstat_mfu_ghost_evictable_metadata;
kstat_named_t arcstat_l2_hits;
kstat_named_t arcstat_l2_misses;
kstat_named_t arcstat_l2_feeds;
@@ -396,9 +550,10 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_writes_sent;
kstat_named_t arcstat_l2_writes_done;
kstat_named_t arcstat_l2_writes_error;
- kstat_named_t arcstat_l2_writes_hdr_miss;
+ kstat_named_t arcstat_l2_writes_lock_retry;
kstat_named_t arcstat_l2_evict_lock_retry;
kstat_named_t arcstat_l2_evict_reading;
+ kstat_named_t arcstat_l2_evict_l1cached;
kstat_named_t arcstat_l2_free_on_write;
kstat_named_t arcstat_l2_cdata_free_on_write;
kstat_named_t arcstat_l2_abort_lowmem;
@@ -429,6 +584,9 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_used;
kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_meta_max;
+ kstat_named_t arcstat_meta_min;
+ kstat_named_t arcstat_sync_wait_for_async;
+ kstat_named_t arcstat_demand_hit_predictive_prefetch;
} arc_stats_t;
static arc_stats_t arc_stats = {
@@ -448,13 +606,13 @@ static arc_stats_t arc_stats = {
{ "mfu_ghost_hits", KSTAT_DATA_UINT64 },
{ "allocated", KSTAT_DATA_UINT64 },
{ "deleted", KSTAT_DATA_UINT64 },
- { "stolen", KSTAT_DATA_UINT64 },
- { "recycle_miss", KSTAT_DATA_UINT64 },
{ "mutex_miss", KSTAT_DATA_UINT64 },
{ "evict_skip", KSTAT_DATA_UINT64 },
+ { "evict_not_enough", KSTAT_DATA_UINT64 },
{ "evict_l2_cached", KSTAT_DATA_UINT64 },
{ "evict_l2_eligible", KSTAT_DATA_UINT64 },
{ "evict_l2_ineligible", KSTAT_DATA_UINT64 },
+ { "evict_l2_skip", KSTAT_DATA_UINT64 },
{ "hash_elements", KSTAT_DATA_UINT64 },
{ "hash_elements_max", KSTAT_DATA_UINT64 },
{ "hash_collisions", KSTAT_DATA_UINT64 },
@@ -467,7 +625,23 @@ static arc_stats_t arc_stats = {
{ "size", KSTAT_DATA_UINT64 },
{ "hdr_size", KSTAT_DATA_UINT64 },
{ "data_size", KSTAT_DATA_UINT64 },
+ { "metadata_size", KSTAT_DATA_UINT64 },
{ "other_size", KSTAT_DATA_UINT64 },
+ { "anon_size", KSTAT_DATA_UINT64 },
+ { "anon_evictable_data", KSTAT_DATA_UINT64 },
+ { "anon_evictable_metadata", KSTAT_DATA_UINT64 },
+ { "mru_size", KSTAT_DATA_UINT64 },
+ { "mru_evictable_data", KSTAT_DATA_UINT64 },
+ { "mru_evictable_metadata", KSTAT_DATA_UINT64 },
+ { "mru_ghost_size", KSTAT_DATA_UINT64 },
+ { "mru_ghost_evictable_data", KSTAT_DATA_UINT64 },
+ { "mru_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
+ { "mfu_size", KSTAT_DATA_UINT64 },
+ { "mfu_evictable_data", KSTAT_DATA_UINT64 },
+ { "mfu_evictable_metadata", KSTAT_DATA_UINT64 },
+ { "mfu_ghost_size", KSTAT_DATA_UINT64 },
+ { "mfu_ghost_evictable_data", KSTAT_DATA_UINT64 },
+ { "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
{ "l2_hits", KSTAT_DATA_UINT64 },
{ "l2_misses", KSTAT_DATA_UINT64 },
{ "l2_feeds", KSTAT_DATA_UINT64 },
@@ -477,9 +651,10 @@ static arc_stats_t arc_stats = {
{ "l2_writes_sent", KSTAT_DATA_UINT64 },
{ "l2_writes_done", KSTAT_DATA_UINT64 },
{ "l2_writes_error", KSTAT_DATA_UINT64 },
- { "l2_writes_hdr_miss", KSTAT_DATA_UINT64 },
+ { "l2_writes_lock_retry", KSTAT_DATA_UINT64 },
{ "l2_evict_lock_retry", KSTAT_DATA_UINT64 },
{ "l2_evict_reading", KSTAT_DATA_UINT64 },
+ { "l2_evict_l1cached", KSTAT_DATA_UINT64 },
{ "l2_free_on_write", KSTAT_DATA_UINT64 },
{ "l2_cdata_free_on_write", KSTAT_DATA_UINT64 },
{ "l2_abort_lowmem", KSTAT_DATA_UINT64 },
@@ -509,7 +684,10 @@ static arc_stats_t arc_stats = {
{ "duplicate_reads", KSTAT_DATA_UINT64 },
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
- { "arc_meta_max", KSTAT_DATA_UINT64 }
+ { "arc_meta_max", KSTAT_DATA_UINT64 },
+ { "arc_meta_min", KSTAT_DATA_UINT64 },
+ { "sync_wait_for_async", KSTAT_DATA_UINT64 },
+ { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
};
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@@ -572,6 +750,7 @@ static arc_state_t *arc_l2c_only;
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
+#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
@@ -582,8 +761,6 @@ static int arc_no_grow; /* Don't try to grow cache size */
static uint64_t arc_tempreserve;
static uint64_t arc_loaned_bytes;
-typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
-
typedef struct arc_callback arc_callback_t;
struct arc_callback {
@@ -604,32 +781,56 @@ struct arc_write_callback {
arc_buf_t *awcb_buf;
};
-struct arc_buf_hdr {
- /* protected by hash lock */
- dva_t b_dva;
- uint64_t b_birth;
- uint64_t b_cksum0;
-
+/*
+ * ARC buffers are separated into multiple structs as a memory saving measure:
+ * - Common fields struct, always defined, and embedded within it:
+ * - L2-only fields, always allocated but undefined when not in L2ARC
+ * - L1-only fields, only allocated when in L1ARC
+ *
+ * Buffer in L1 Buffer only in L2
+ * +------------------------+ +------------------------+
+ * | arc_buf_hdr_t | | arc_buf_hdr_t |
+ * | | | |
+ * | | | |
+ * | | | |
+ * +------------------------+ +------------------------+
+ * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
+ * | (undefined if L1-only) | | |
+ * +------------------------+ +------------------------+
+ * | l1arc_buf_hdr_t |
+ * | |
+ * | |
+ * | |
+ * | |
+ * +------------------------+
+ *
+ * Because it's possible for the L2ARC to become extremely large, we can wind
+ * up eating a lot of memory in L2ARC buffer headers, so the size of a header
+ * is minimized by only allocating the fields necessary for an L1-cached buffer
+ * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
+ * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
+ * words in pointers. arc_hdr_realloc() is used to switch a header between
+ * these two allocation states.
+ */
+typedef struct l1arc_buf_hdr {
kmutex_t b_freeze_lock;
- zio_cksum_t *b_freeze_cksum;
+#ifdef ZFS_DEBUG
+ /*
+ * used for debugging wtih kmem_flags - by allocating and freeing
+ * b_thawed when the buffer is thawed, we get a record of the stack
+ * trace that thawed it.
+ */
void *b_thawed;
+#endif
- arc_buf_hdr_t *b_hash_next;
arc_buf_t *b_buf;
- arc_flags_t b_flags;
uint32_t b_datacnt;
-
- arc_callback_t *b_acb;
+ /* for waiting on writes to complete */
kcondvar_t b_cv;
- /* immutable */
- arc_buf_contents_t b_type;
- uint64_t b_size;
- uint64_t b_spa;
-
/* protected by arc state mutex */
arc_state_t *b_state;
- list_node_t b_arc_node;
+ multilist_node_t b_arc_node;
/* updated atomically */
clock_t b_arc_access;
@@ -637,8 +838,47 @@ struct arc_buf_hdr {
/* self protecting */
refcount_t b_refcnt;
- l2arc_buf_hdr_t *b_l2hdr;
+ arc_callback_t *b_acb;
+ /* temporary buffer holder for in-flight compressed data */
+ void *b_tmp_cdata;
+} l1arc_buf_hdr_t;
+
+typedef struct l2arc_dev l2arc_dev_t;
+
+typedef struct l2arc_buf_hdr {
+ /* protected by arc_buf_hdr mutex */
+ l2arc_dev_t *b_dev; /* L2ARC device */
+ uint64_t b_daddr; /* disk address, offset byte */
+ /* real alloc'd buffer size depending on b_compress applied */
+ int32_t b_asize;
+ uint8_t b_compress;
+
list_node_t b_l2node;
+} l2arc_buf_hdr_t;
+
+struct arc_buf_hdr {
+ /* protected by hash lock */
+ dva_t b_dva;
+ uint64_t b_birth;
+ /*
+ * Even though this checksum is only set/verified when a buffer is in
+ * the L1 cache, it needs to be in the set of common fields because it
+ * must be preserved from the time before a buffer is written out to
+ * L2ARC until after it is read back in.
+ */
+ zio_cksum_t *b_freeze_cksum;
+
+ arc_buf_hdr_t *b_hash_next;
+ arc_flags_t b_flags;
+
+ /* immutable */
+ int32_t b_size;
+ uint64_t b_spa;
+
+ /* L2ARC fields. Undefined when not in L2ARC. */
+ l2arc_buf_hdr_t b_l2hdr;
+ /* L1ARC fields. Undefined when in l2arc_only state */
+ l1arc_buf_hdr_t b_l1hdr;
};
#ifdef _KERNEL
@@ -662,7 +902,6 @@ sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS)
#endif
static arc_buf_t *arc_eviction_list;
-static kmutex_t arc_eviction_mtx;
static arc_buf_hdr_t arc_eviction_hdr;
#define GHOST_STATE(state) \
@@ -675,22 +914,29 @@ static arc_buf_hdr_t arc_eviction_hdr;
#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
#define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ)
#define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE)
-#define HDR_FREE_IN_PROGRESS(hdr) \
- ((hdr)->b_flags & ARC_FLAG_FREE_IN_PROGRESS)
+
#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE)
+#define HDR_L2COMPRESS(hdr) ((hdr)->b_flags & ARC_FLAG_L2COMPRESS)
#define HDR_L2_READING(hdr) \
- ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS && \
- (hdr)->b_l2hdr != NULL)
+ (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
+ ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING)
#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
+#define HDR_ISTYPE_METADATA(hdr) \
+ ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
+#define HDR_ISTYPE_DATA(hdr) (!HDR_ISTYPE_METADATA(hdr))
+
+#define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
+#define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
+
/*
* Other sizes
*/
-#define HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
-#define L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
+#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
/*
* Hash table routines
@@ -737,6 +983,16 @@ uint64_t zfs_crc64_table[256];
#define L2ARC_FEED_SECS 1 /* caching interval secs */
#define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
+/*
+ * Used to distinguish headers that are being process by
+ * l2arc_write_buffers(), but have yet to be assigned to a l2arc disk
+ * address. This can happen when the header is added to the l2arc's list
+ * of buffers to write in the first stage of l2arc_write_buffers(), but
+ * has not yet been written out which happens in the second stage of
+ * l2arc_write_buffers().
+ */
+#define L2ARC_ADDR_UNSET ((uint64_t)(-1))
+
#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
@@ -770,21 +1026,21 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
&l2arc_norw, 0, "no reads during writes");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
- &ARC_anon.arcs_size, 0, "size of anonymous state");
+ &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_lsize, CTLFLAG_RD,
&ARC_anon.arcs_lsize[ARC_BUFC_METADATA], 0, "size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_lsize, CTLFLAG_RD,
&ARC_anon.arcs_lsize[ARC_BUFC_DATA], 0, "size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
- &ARC_mru.arcs_size, 0, "size of mru state");
+ &ARC_mru.arcs_size.rc_count, 0, "size of mru state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_lsize, CTLFLAG_RD,
&ARC_mru.arcs_lsize[ARC_BUFC_METADATA], 0, "size of metadata in mru state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_lsize, CTLFLAG_RD,
&ARC_mru.arcs_lsize[ARC_BUFC_DATA], 0, "size of data in mru state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
- &ARC_mru_ghost.arcs_size, 0, "size of mru ghost state");
+ &ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_lsize, CTLFLAG_RD,
&ARC_mru_ghost.arcs_lsize[ARC_BUFC_METADATA], 0,
"size of metadata in mru ghost state");
@@ -793,14 +1049,14 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_lsize, CTLFLAG_RD,
"size of data in mru ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
- &ARC_mfu.arcs_size, 0, "size of mfu state");
+ &ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_lsize, CTLFLAG_RD,
&ARC_mfu.arcs_lsize[ARC_BUFC_METADATA], 0, "size of metadata in mfu state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_lsize, CTLFLAG_RD,
&ARC_mfu.arcs_lsize[ARC_BUFC_DATA], 0, "size of data in mfu state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
- &ARC_mfu_ghost.arcs_size, 0, "size of mfu ghost state");
+ &ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_lsize, CTLFLAG_RD,
&ARC_mfu_ghost.arcs_lsize[ARC_BUFC_METADATA], 0,
"size of metadata in mfu ghost state");
@@ -809,29 +1065,29 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_lsize, CTLFLAG_RD,
"size of data in mfu ghost state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
- &ARC_l2c_only.arcs_size, 0, "size of mru state");
+ &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
/*
* L2ARC Internals
*/
-typedef struct l2arc_dev {
+struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
uint64_t l2ad_hand; /* next write location */
uint64_t l2ad_start; /* first addr on device */
uint64_t l2ad_end; /* last addr on device */
- uint64_t l2ad_evict; /* last addr eviction reached */
boolean_t l2ad_first; /* first sweep through */
boolean_t l2ad_writing; /* currently writing */
- list_t *l2ad_buflist; /* buffer list */
+ kmutex_t l2ad_mtx; /* lock for buffer list */
+ list_t l2ad_buflist; /* buffer list */
list_node_t l2ad_node; /* device list node */
-} l2arc_dev_t;
+ refcount_t l2ad_alloc; /* allocated bytes */
+};
static list_t L2ARC_dev_list; /* device list */
static list_t *l2arc_dev_list; /* device list pointer */
static kmutex_t l2arc_dev_mtx; /* device list mutex */
static l2arc_dev_t *l2arc_dev_last; /* last device used */
-static kmutex_t l2arc_buflist_mtx; /* mutex for all buflists */
static list_t L2ARC_free_on_write; /* free after write buf list */
static list_t *l2arc_free_on_write; /* free after write list ptr */
static kmutex_t l2arc_free_on_write_mtx; /* mutex for list */
@@ -851,18 +1107,6 @@ typedef struct l2arc_write_callback {
arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
} l2arc_write_callback_t;
-struct l2arc_buf_hdr {
- /* protected by arc_buf_hdr mutex */
- l2arc_dev_t *b_dev; /* L2ARC device */
- uint64_t b_daddr; /* disk address, offset byte */
- /* compression applied to buffer data */
- enum zio_compress b_compress;
- /* real alloc'd buffer size depending on b_compress applied */
- int b_asize;
- /* temporary buffer holder for in-flight compressed data */
- void *b_tmp_cdata;
-};
-
typedef struct l2arc_data_free {
/* protected by l2arc_free_on_write_mtx */
void *l2df_data;
@@ -877,16 +1121,16 @@ static uint8_t l2arc_thread_exit;
static void arc_get_data_buf(arc_buf_t *);
static void arc_access(arc_buf_hdr_t *, kmutex_t *);
-static int arc_evict_needed(arc_buf_contents_t);
-static void arc_evict_ghost(arc_state_t *, uint64_t, int64_t);
+static boolean_t arc_is_overflowing();
static void arc_buf_watch(arc_buf_t *);
+static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
+static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
+
static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
static void l2arc_read_done(zio_t *);
-static void l2arc_hdr_stat_add(void);
-static void l2arc_hdr_stat_remove(void);
-static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *);
+static boolean_t l2arc_compress_buf(arc_buf_hdr_t *);
static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress);
static void l2arc_release_cdata_buf(arc_buf_hdr_t *);
@@ -909,8 +1153,7 @@ buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
#define BUF_EMPTY(buf) \
((buf)->b_dva.dva_word[0] == 0 && \
- (buf)->b_dva.dva_word[1] == 0 && \
- (buf)->b_cksum0 == 0)
+ (buf)->b_dva.dva_word[1] == 0)
#define BUF_EQUAL(spa, dva, birth, buf) \
((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \
@@ -923,7 +1166,6 @@ buf_discard_identity(arc_buf_hdr_t *hdr)
hdr->b_dva.dva_word[0] = 0;
hdr->b_dva.dva_word[1] = 0;
hdr->b_birth = 0;
- hdr->b_cksum0 = 0;
}
static arc_buf_hdr_t *
@@ -953,6 +1195,7 @@ buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
* equal to elem in the hash table, then the already existing element
* will be returned and the new element will not be inserted.
* Otherwise returns NULL.
+ * If lockp == NULL, the caller is assumed to already hold the hash lock.
*/
static arc_buf_hdr_t *
buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
@@ -965,8 +1208,14 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
ASSERT(!DVA_IS_EMPTY(&hdr->b_dva));
ASSERT(hdr->b_birth != 0);
ASSERT(!HDR_IN_HASH_TABLE(hdr));
- *lockp = hash_lock;
- mutex_enter(hash_lock);
+
+ if (lockp != NULL) {
+ *lockp = hash_lock;
+ mutex_enter(hash_lock);
+ } else {
+ ASSERT(MUTEX_HELD(hash_lock));
+ }
+
for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL;
fhdr = fhdr->b_hash_next, i++) {
if (BUF_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr))
@@ -1021,7 +1270,8 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
/*
* Global data structures and functions for the buf kmem cache.
*/
-static kmem_cache_t *hdr_cache;
+static kmem_cache_t *hdr_full_cache;
+static kmem_cache_t *hdr_l2only_cache;
static kmem_cache_t *buf_cache;
static void
@@ -1033,7 +1283,8 @@ buf_fini(void)
(buf_hash_table.ht_mask + 1) * sizeof (void *));
for (i = 0; i < BUF_LOCKS; i++)
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
- kmem_cache_destroy(hdr_cache);
+ kmem_cache_destroy(hdr_full_cache);
+ kmem_cache_destroy(hdr_l2only_cache);
kmem_cache_destroy(buf_cache);
}
@@ -1043,15 +1294,28 @@ buf_fini(void)
*/
/* ARGSUSED */
static int
-hdr_cons(void *vbuf, void *unused, int kmflag)
+hdr_full_cons(void *vbuf, void *unused, int kmflag)
+{
+ arc_buf_hdr_t *hdr = vbuf;
+
+ bzero(hdr, HDR_FULL_SIZE);
+ cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
+ refcount_create(&hdr->b_l1hdr.b_refcnt);
+ mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
+ multilist_link_init(&hdr->b_l1hdr.b_arc_node);
+ arc_space_consume(HDR_FULL_SIZE, ARC_SPACE_HDRS);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
{
arc_buf_hdr_t *hdr = vbuf;
- bzero(hdr, sizeof (arc_buf_hdr_t));
- refcount_create(&hdr->b_refcnt);
- cv_init(&hdr->b_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&hdr->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
- arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
+ bzero(hdr, HDR_L2ONLY_SIZE);
+ arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
return (0);
}
@@ -1075,15 +1339,26 @@ buf_cons(void *vbuf, void *unused, int kmflag)
*/
/* ARGSUSED */
static void
-hdr_dest(void *vbuf, void *unused)
+hdr_full_dest(void *vbuf, void *unused)
+{
+ arc_buf_hdr_t *hdr = vbuf;
+
+ ASSERT(BUF_EMPTY(hdr));
+ cv_destroy(&hdr->b_l1hdr.b_cv);
+ refcount_destroy(&hdr->b_l1hdr.b_refcnt);
+ mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
+ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
+ arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
+}
+
+/* ARGSUSED */
+static void
+hdr_l2only_dest(void *vbuf, void *unused)
{
arc_buf_hdr_t *hdr = vbuf;
ASSERT(BUF_EMPTY(hdr));
- refcount_destroy(&hdr->b_refcnt);
- cv_destroy(&hdr->b_cv);
- mutex_destroy(&hdr->b_freeze_lock);
- arc_space_return(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
+ arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
}
/* ARGSUSED */
@@ -1109,7 +1384,7 @@ hdr_recl(void *unused)
* which is after we do arc_fini().
*/
if (!arc_dead)
- cv_signal(&arc_reclaim_thr_cv);
+ cv_signal(&arc_reclaim_thread_cv);
}
static void
@@ -1137,8 +1412,11 @@ retry:
goto retry;
}
- hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
- 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
+ hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
+ 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0);
+ hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
+ HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl,
+ NULL, NULL, 0);
buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
@@ -1152,6 +1430,116 @@ retry:
}
}
+/*
+ * Transition between the two allocation states for the arc_buf_hdr struct.
+ * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without
+ * (hdr_l2only_cache) the fields necessary for the L1 cache - the smaller
+ * version is used when a cache buffer is only in the L2ARC in order to reduce
+ * memory usage.
+ */
+static arc_buf_hdr_t *
+arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
+{
+ ASSERT(HDR_HAS_L2HDR(hdr));
+
+ arc_buf_hdr_t *nhdr;
+ l2arc_dev_t *dev = hdr->b_l2hdr.b_dev;
+
+ ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
+ (old == hdr_l2only_cache && new == hdr_full_cache));
+
+ nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
+
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+ buf_hash_remove(hdr);
+
+ bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+
+ if (new == hdr_full_cache) {
+ nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+ /*
+ * arc_access and arc_change_state need to be aware that a
+ * header has just come out of L2ARC, so we set its state to
+ * l2c_only even though it's about to change.
+ */
+ nhdr->b_l1hdr.b_state = arc_l2c_only;
+
+ /* Verify previous threads set to NULL before freeing */
+ ASSERT3P(nhdr->b_l1hdr.b_tmp_cdata, ==, NULL);
+ } else {
+ ASSERT(hdr->b_l1hdr.b_buf == NULL);
+ ASSERT0(hdr->b_l1hdr.b_datacnt);
+
+ /*
+ * If we've reached here, We must have been called from
+ * arc_evict_hdr(), as such we should have already been
+ * removed from any ghost list we were previously on
+ * (which protects us from racing with arc_evict_state),
+ * thus no locking is needed during this check.
+ */
+ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
+
+ /*
+ * A buffer must not be moved into the arc_l2c_only
+ * state if it's not finished being written out to the
+ * l2arc device. Otherwise, the b_l1hdr.b_tmp_cdata field
+ * might try to be accessed, even though it was removed.
+ */
+ VERIFY(!HDR_L2_WRITING(hdr));
+ VERIFY3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
+
+#ifdef ZFS_DEBUG
+ if (hdr->b_l1hdr.b_thawed != NULL) {
+ kmem_free(hdr->b_l1hdr.b_thawed, 1);
+ hdr->b_l1hdr.b_thawed = NULL;
+ }
+#endif
+
+ nhdr->b_flags &= ~ARC_FLAG_HAS_L1HDR;
+ }
+ /*
+ * The header has been reallocated so we need to re-insert it into any
+ * lists it was on.
+ */
+ (void) buf_hash_insert(nhdr, NULL);
+
+ ASSERT(list_link_active(&hdr->b_l2hdr.b_l2node));
+
+ mutex_enter(&dev->l2ad_mtx);
+
+ /*
+ * We must place the realloc'ed header back into the list at
+ * the same spot. Otherwise, if it's placed earlier in the list,
+ * l2arc_write_buffers() could find it during the function's
+ * write phase, and try to write it out to the l2arc.
+ */
+ list_insert_after(&dev->l2ad_buflist, hdr, nhdr);
+ list_remove(&dev->l2ad_buflist, hdr);
+
+ mutex_exit(&dev->l2ad_mtx);
+
+ /*
+ * Since we're using the pointer address as the tag when
+ * incrementing and decrementing the l2ad_alloc refcount, we
+ * must remove the old pointer (that we're about to destroy) and
+ * add the new pointer to the refcount. Otherwise we'd remove
+ * the wrong pointer address when calling arc_hdr_destroy() later.
+ */
+
+ (void) refcount_remove_many(&dev->l2ad_alloc,
+ hdr->b_l2hdr.b_asize, hdr);
+
+ (void) refcount_add_many(&dev->l2ad_alloc,
+ nhdr->b_l2hdr.b_asize, nhdr);
+
+ buf_discard_identity(hdr);
+ hdr->b_freeze_cksum = NULL;
+ kmem_cache_free(old, hdr);
+
+ return (nhdr);
+}
+
+
#define ARC_MINTIME (hz>>4) /* 62 ms */
static void
@@ -1162,16 +1550,15 @@ arc_cksum_verify(arc_buf_t *buf)
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;
- mutex_enter(&buf->b_hdr->b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum == NULL ||
- (buf->b_hdr->b_flags & ARC_FLAG_IO_ERROR)) {
- mutex_exit(&buf->b_hdr->b_freeze_lock);
+ mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+ if (buf->b_hdr->b_freeze_cksum == NULL || HDR_IO_ERROR(buf->b_hdr)) {
+ mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
return;
}
fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
panic("buffer modified while frozen!");
- mutex_exit(&buf->b_hdr->b_freeze_lock);
+ mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
}
static int
@@ -1180,10 +1567,10 @@ arc_cksum_equal(arc_buf_t *buf)
zio_cksum_t zc;
int equal;
- mutex_enter(&buf->b_hdr->b_freeze_lock);
+ mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc);
- mutex_exit(&buf->b_hdr->b_freeze_lock);
+ mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
return (equal);
}
@@ -1194,15 +1581,15 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY))
return;
- mutex_enter(&buf->b_hdr->b_freeze_lock);
+ mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
if (buf->b_hdr->b_freeze_cksum != NULL) {
- mutex_exit(&buf->b_hdr->b_freeze_lock);
+ mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
return;
}
buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP);
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
buf->b_hdr->b_freeze_cksum);
- mutex_exit(&buf->b_hdr->b_freeze_lock);
+ mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
#ifdef illumos
arc_buf_watch(buf);
#endif /* illumos */
@@ -1253,30 +1640,58 @@ arc_buf_watch(arc_buf_t *buf)
}
#endif /* illumos */
+static arc_buf_contents_t
+arc_buf_type(arc_buf_hdr_t *hdr)
+{
+ if (HDR_ISTYPE_METADATA(hdr)) {
+ return (ARC_BUFC_METADATA);
+ } else {
+ return (ARC_BUFC_DATA);
+ }
+}
+
+static uint32_t
+arc_bufc_to_flags(arc_buf_contents_t type)
+{
+ switch (type) {
+ case ARC_BUFC_DATA:
+ /* metadata field is 0 if buffer contains normal data */
+ return (0);
+ case ARC_BUFC_METADATA:
+ return (ARC_FLAG_BUFC_METADATA);
+ default:
+ break;
+ }
+ panic("undefined ARC buffer type!");
+ return ((uint32_t)-1);
+}
+
void
arc_buf_thaw(arc_buf_t *buf)
{
if (zfs_flags & ZFS_DEBUG_MODIFY) {
- if (buf->b_hdr->b_state != arc_anon)
+ if (buf->b_hdr->b_l1hdr.b_state != arc_anon)
panic("modifying non-anon buffer!");
- if (buf->b_hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS)
+ if (HDR_IO_IN_PROGRESS(buf->b_hdr))
panic("modifying buffer while i/o in progress!");
arc_cksum_verify(buf);
}
- mutex_enter(&buf->b_hdr->b_freeze_lock);
+ mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
if (buf->b_hdr->b_freeze_cksum != NULL) {
kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
buf->b_hdr->b_freeze_cksum = NULL;
}
+#ifdef ZFS_DEBUG
if (zfs_flags & ZFS_DEBUG_MODIFY) {
- if (buf->b_hdr->b_thawed)
- kmem_free(buf->b_hdr->b_thawed, 1);
- buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP);
+ if (buf->b_hdr->b_l1hdr.b_thawed != NULL)
+ kmem_free(buf->b_hdr->b_l1hdr.b_thawed, 1);
+ buf->b_hdr->b_l1hdr.b_thawed = kmem_alloc(1, KM_SLEEP);
}
+#endif
- mutex_exit(&buf->b_hdr->b_freeze_lock);
+ mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
#ifdef illumos
arc_buf_unwatch(buf);
@@ -1295,58 +1710,41 @@ arc_buf_freeze(arc_buf_t *buf)
mutex_enter(hash_lock);
ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
- buf->b_hdr->b_state == arc_anon);
+ buf->b_hdr->b_l1hdr.b_state == arc_anon);
arc_cksum_compute(buf, B_FALSE);
mutex_exit(hash_lock);
}
static void
-get_buf_info(arc_buf_hdr_t *hdr, arc_state_t *state, list_t **list, kmutex_t **lock)
-{
- uint64_t buf_hashid = buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth);
-
- if (hdr->b_type == ARC_BUFC_METADATA)
- buf_hashid &= (ARC_BUFC_NUMMETADATALISTS - 1);
- else {
- buf_hashid &= (ARC_BUFC_NUMDATALISTS - 1);
- buf_hashid += ARC_BUFC_NUMMETADATALISTS;
- }
-
- *list = &state->arcs_lists[buf_hashid];
- *lock = ARCS_LOCK(state, buf_hashid);
-}
-
-
-static void
add_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
{
+ ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(MUTEX_HELD(hash_lock));
+ arc_state_t *state = hdr->b_l1hdr.b_state;
- if ((refcount_add(&hdr->b_refcnt, tag) == 1) &&
- (hdr->b_state != arc_anon)) {
- uint64_t delta = hdr->b_size * hdr->b_datacnt;
- uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type];
- list_t *list;
- kmutex_t *lock;
-
- get_buf_info(hdr, hdr->b_state, &list, &lock);
- ASSERT(!MUTEX_HELD(lock));
- mutex_enter(lock);
- ASSERT(list_link_active(&hdr->b_arc_node));
- list_remove(list, hdr);
- if (GHOST_STATE(hdr->b_state)) {
- ASSERT0(hdr->b_datacnt);
- ASSERT3P(hdr->b_buf, ==, NULL);
- delta = hdr->b_size;
+ if ((refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
+ (state != arc_anon)) {
+ /* We don't use the L2-only state list. */
+ if (state != arc_l2c_only) {
+ arc_buf_contents_t type = arc_buf_type(hdr);
+ uint64_t delta = hdr->b_size * hdr->b_l1hdr.b_datacnt;
+ multilist_t *list = &state->arcs_list[type];
+ uint64_t *size = &state->arcs_lsize[type];
+
+ multilist_remove(list, hdr);
+
+ if (GHOST_STATE(state)) {
+ ASSERT0(hdr->b_l1hdr.b_datacnt);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ delta = hdr->b_size;
+ }
+ ASSERT(delta > 0);
+ ASSERT3U(*size, >=, delta);
+ atomic_add_64(size, -delta);
}
- ASSERT(delta > 0);
- ASSERT3U(*size, >=, delta);
- atomic_add_64(size, -delta);
- mutex_exit(lock);
/* remove the prefetch flag if we get a reference */
- if (hdr->b_flags & ARC_FLAG_PREFETCH)
- hdr->b_flags &= ~ARC_FLAG_PREFETCH;
+ hdr->b_flags &= ~ARC_FLAG_PREFETCH;
}
}
@@ -1354,104 +1752,112 @@ static int
remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
{
int cnt;
- arc_state_t *state = hdr->b_state;
+ arc_state_t *state = hdr->b_l1hdr.b_state;
+ ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(state == arc_anon || MUTEX_HELD(hash_lock));
ASSERT(!GHOST_STATE(state));
- if (((cnt = refcount_remove(&hdr->b_refcnt, tag)) == 0) &&
+ /*
+ * arc_l2c_only counts as a ghost state so we don't need to explicitly
+ * check to prevent usage of the arc_l2c_only list.
+ */
+ if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
(state != arc_anon)) {
- uint64_t *size = &state->arcs_lsize[hdr->b_type];
- list_t *list;
- kmutex_t *lock;
-
- get_buf_info(hdr, state, &list, &lock);
- ASSERT(!MUTEX_HELD(lock));
- mutex_enter(lock);
- ASSERT(!list_link_active(&hdr->b_arc_node));
- list_insert_head(list, hdr);
- ASSERT(hdr->b_datacnt > 0);
- atomic_add_64(size, hdr->b_size * hdr->b_datacnt);
- mutex_exit(lock);
+ arc_buf_contents_t type = arc_buf_type(hdr);
+ multilist_t *list = &state->arcs_list[type];
+ uint64_t *size = &state->arcs_lsize[type];
+
+ multilist_insert(list, hdr);
+
+ ASSERT(hdr->b_l1hdr.b_datacnt > 0);
+ atomic_add_64(size, hdr->b_size *
+ hdr->b_l1hdr.b_datacnt);
}
return (cnt);
}
/*
- * Move the supplied buffer to the indicated state. The mutex
+ * Move the supplied buffer to the indicated state. The hash lock
* for the buffer must be held by the caller.
*/
static void
arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
kmutex_t *hash_lock)
{
- arc_state_t *old_state = hdr->b_state;
- int64_t refcnt = refcount_count(&hdr->b_refcnt);
+ arc_state_t *old_state;
+ int64_t refcnt;
+ uint32_t datacnt;
uint64_t from_delta, to_delta;
- list_t *list;
- kmutex_t *lock;
+ arc_buf_contents_t buftype = arc_buf_type(hdr);
+
+ /*
+ * We almost always have an L1 hdr here, since we call arc_hdr_realloc()
+ * in arc_read() when bringing a buffer out of the L2ARC. However, the
+ * L1 hdr doesn't always exist when we change state to arc_anon before
+ * destroying a header, in which case reallocating to add the L1 hdr is
+ * pointless.
+ */
+ if (HDR_HAS_L1HDR(hdr)) {
+ old_state = hdr->b_l1hdr.b_state;
+ refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt);
+ datacnt = hdr->b_l1hdr.b_datacnt;
+ } else {
+ old_state = arc_l2c_only;
+ refcnt = 0;
+ datacnt = 0;
+ }
ASSERT(MUTEX_HELD(hash_lock));
ASSERT3P(new_state, !=, old_state);
- ASSERT(refcnt == 0 || hdr->b_datacnt > 0);
- ASSERT(hdr->b_datacnt == 0 || !GHOST_STATE(new_state));
- ASSERT(hdr->b_datacnt <= 1 || old_state != arc_anon);
+ ASSERT(refcnt == 0 || datacnt > 0);
+ ASSERT(!GHOST_STATE(new_state) || datacnt == 0);
+ ASSERT(old_state != arc_anon || datacnt <= 1);
- from_delta = to_delta = hdr->b_datacnt * hdr->b_size;
+ from_delta = to_delta = datacnt * hdr->b_size;
/*
* If this buffer is evictable, transfer it from the
* old state list to the new state list.
*/
if (refcnt == 0) {
- if (old_state != arc_anon) {
- int use_mutex;
- uint64_t *size = &old_state->arcs_lsize[hdr->b_type];
+ if (old_state != arc_anon && old_state != arc_l2c_only) {
+ uint64_t *size = &old_state->arcs_lsize[buftype];
- get_buf_info(hdr, old_state, &list, &lock);
- use_mutex = !MUTEX_HELD(lock);
- if (use_mutex)
- mutex_enter(lock);
-
- ASSERT(list_link_active(&hdr->b_arc_node));
- list_remove(list, hdr);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ multilist_remove(&old_state->arcs_list[buftype], hdr);
/*
* If prefetching out of the ghost cache,
* we will have a non-zero datacnt.
*/
- if (GHOST_STATE(old_state) && hdr->b_datacnt == 0) {
+ if (GHOST_STATE(old_state) && datacnt == 0) {
/* ghost elements have a ghost size */
- ASSERT(hdr->b_buf == NULL);
+ ASSERT(hdr->b_l1hdr.b_buf == NULL);
from_delta = hdr->b_size;
}
ASSERT3U(*size, >=, from_delta);
atomic_add_64(size, -from_delta);
-
- if (use_mutex)
- mutex_exit(lock);
}
- if (new_state != arc_anon) {
- int use_mutex;
- uint64_t *size = &new_state->arcs_lsize[hdr->b_type];
+ if (new_state != arc_anon && new_state != arc_l2c_only) {
+ uint64_t *size = &new_state->arcs_lsize[buftype];
- get_buf_info(hdr, new_state, &list, &lock);
- use_mutex = !MUTEX_HELD(lock);
- if (use_mutex)
- mutex_enter(lock);
-
- list_insert_head(list, hdr);
+ /*
+ * An L1 header always exists here, since if we're
+ * moving to some L1-cached state (i.e. not l2c_only or
+ * anonymous), we realloc the header to add an L1hdr
+ * beforehand.
+ */
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ multilist_insert(&new_state->arcs_list[buftype], hdr);
/* ghost elements have a ghost size */
if (GHOST_STATE(new_state)) {
- ASSERT(hdr->b_datacnt == 0);
- ASSERT(hdr->b_buf == NULL);
+ ASSERT0(datacnt);
+ ASSERT(hdr->b_l1hdr.b_buf == NULL);
to_delta = hdr->b_size;
}
atomic_add_64(size, to_delta);
-
- if (use_mutex)
- mutex_exit(lock);
}
}
@@ -1459,20 +1865,83 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
if (new_state == arc_anon && HDR_IN_HASH_TABLE(hdr))
buf_hash_remove(hdr);
- /* adjust state sizes */
- if (to_delta)
- atomic_add_64(&new_state->arcs_size, to_delta);
- if (from_delta) {
- ASSERT3U(old_state->arcs_size, >=, from_delta);
- atomic_add_64(&old_state->arcs_size, -from_delta);
+ /* adjust state sizes (ignore arc_l2c_only) */
+
+ if (to_delta && new_state != arc_l2c_only) {
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ if (GHOST_STATE(new_state)) {
+ ASSERT0(datacnt);
+
+ /*
+ * We moving a header to a ghost state, we first
+ * remove all arc buffers. Thus, we'll have a
+ * datacnt of zero, and no arc buffer to use for
+ * the reference. As a result, we use the arc
+ * header pointer for the reference.
+ */
+ (void) refcount_add_many(&new_state->arcs_size,
+ hdr->b_size, hdr);
+ } else {
+ ASSERT3U(datacnt, !=, 0);
+
+ /*
+ * Each individual buffer holds a unique reference,
+ * thus we must remove each of these references one
+ * at a time.
+ */
+ for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
+ buf = buf->b_next) {
+ (void) refcount_add_many(&new_state->arcs_size,
+ hdr->b_size, buf);
+ }
+ }
+ }
+
+ if (from_delta && old_state != arc_l2c_only) {
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ if (GHOST_STATE(old_state)) {
+ /*
+ * When moving a header off of a ghost state,
+ * there's the possibility for datacnt to be
+ * non-zero. This is because we first add the
+ * arc buffer to the header prior to changing
+ * the header's state. Since we used the header
+ * for the reference when putting the header on
+ * the ghost state, we must balance that and use
+ * the header when removing off the ghost state
+ * (even though datacnt is non zero).
+ */
+
+ IMPLY(datacnt == 0, new_state == arc_anon ||
+ new_state == arc_l2c_only);
+
+ (void) refcount_remove_many(&old_state->arcs_size,
+ hdr->b_size, hdr);
+ } else {
+ ASSERT3P(datacnt, !=, 0);
+
+ /*
+ * Each individual buffer holds a unique reference,
+ * thus we must remove each of these references one
+ * at a time.
+ */
+ for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
+ buf = buf->b_next) {
+ (void) refcount_remove_many(
+ &old_state->arcs_size, hdr->b_size, buf);
+ }
+ }
}
- hdr->b_state = new_state;
- /* adjust l2arc hdr stats */
- if (new_state == arc_l2c_only)
- l2arc_hdr_stat_add();
- else if (old_state == arc_l2c_only)
- l2arc_hdr_stat_remove();
+ if (HDR_HAS_L1HDR(hdr))
+ hdr->b_l1hdr.b_state = new_state;
+
+ /*
+ * L2 headers should never be on the L2 state list since they don't
+ * have L1 headers allocated.
+ */
+ ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
+ multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
}
void
@@ -1484,6 +1953,9 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
case ARC_SPACE_DATA:
ARCSTAT_INCR(arcstat_data_size, space);
break;
+ case ARC_SPACE_META:
+ ARCSTAT_INCR(arcstat_metadata_size, space);
+ break;
case ARC_SPACE_OTHER:
ARCSTAT_INCR(arcstat_other_size, space);
break;
@@ -1495,7 +1967,9 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
break;
}
- ARCSTAT_INCR(arcstat_meta_used, space);
+ if (type != ARC_SPACE_DATA)
+ ARCSTAT_INCR(arcstat_meta_used, space);
+
atomic_add_64(&arc_size, space);
}
@@ -1508,6 +1982,9 @@ arc_space_return(uint64_t space, arc_space_type_t type)
case ARC_SPACE_DATA:
ARCSTAT_INCR(arcstat_data_size, -space);
break;
+ case ARC_SPACE_META:
+ ARCSTAT_INCR(arcstat_metadata_size, -space);
+ break;
case ARC_SPACE_OTHER:
ARCSTAT_INCR(arcstat_other_size, -space);
break;
@@ -1519,40 +1996,49 @@ arc_space_return(uint64_t space, arc_space_type_t type)
break;
}
- ASSERT(arc_meta_used >= space);
- if (arc_meta_max < arc_meta_used)
- arc_meta_max = arc_meta_used;
- ARCSTAT_INCR(arcstat_meta_used, -space);
+ if (type != ARC_SPACE_DATA) {
+ ASSERT(arc_meta_used >= space);
+ if (arc_meta_max < arc_meta_used)
+ arc_meta_max = arc_meta_used;
+ ARCSTAT_INCR(arcstat_meta_used, -space);
+ }
+
ASSERT(arc_size >= space);
atomic_add_64(&arc_size, -space);
}
arc_buf_t *
-arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
+arc_buf_alloc(spa_t *spa, int32_t size, void *tag, arc_buf_contents_t type)
{
arc_buf_hdr_t *hdr;
arc_buf_t *buf;
ASSERT3U(size, >, 0);
- hdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+ hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
ASSERT(BUF_EMPTY(hdr));
+ ASSERT3P(hdr->b_freeze_cksum, ==, NULL);
hdr->b_size = size;
- hdr->b_type = type;
hdr->b_spa = spa_load_guid(spa);
- hdr->b_state = arc_anon;
- hdr->b_arc_access = 0;
+
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
buf->b_efunc = NULL;
buf->b_private = NULL;
buf->b_next = NULL;
- hdr->b_buf = buf;
+
+ hdr->b_flags = arc_bufc_to_flags(type);
+ hdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+
+ hdr->b_l1hdr.b_buf = buf;
+ hdr->b_l1hdr.b_state = arc_anon;
+ hdr->b_l1hdr.b_arc_access = 0;
+ hdr->b_l1hdr.b_datacnt = 1;
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
+
arc_get_data_buf(buf);
- hdr->b_datacnt = 1;
- hdr->b_flags = 0;
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- (void) refcount_add(&hdr->b_refcnt, tag);
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
return (buf);
}
@@ -1585,8 +2071,9 @@ arc_return_buf(arc_buf_t *buf, void *tag)
arc_buf_hdr_t *hdr = buf->b_hdr;
ASSERT(buf->b_data != NULL);
- (void) refcount_add(&hdr->b_refcnt, tag);
- (void) refcount_remove(&hdr->b_refcnt, arc_onloan_tag);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
+ (void) refcount_remove(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
}
@@ -1595,12 +2082,12 @@ arc_return_buf(arc_buf_t *buf, void *tag)
void
arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
{
- arc_buf_hdr_t *hdr;
+ arc_buf_hdr_t *hdr = buf->b_hdr;
ASSERT(buf->b_data != NULL);
- hdr = buf->b_hdr;
- (void) refcount_add(&hdr->b_refcnt, arc_onloan_tag);
- (void) refcount_remove(&hdr->b_refcnt, tag);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ (void) refcount_add(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
+ (void) refcount_remove(&hdr->b_l1hdr.b_refcnt, tag);
buf->b_efunc = NULL;
buf->b_private = NULL;
@@ -1614,15 +2101,16 @@ arc_buf_clone(arc_buf_t *from)
arc_buf_hdr_t *hdr = from->b_hdr;
uint64_t size = hdr->b_size;
- ASSERT(hdr->b_state != arc_anon);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT(hdr->b_l1hdr.b_state != arc_anon);
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
buf->b_efunc = NULL;
buf->b_private = NULL;
- buf->b_next = hdr->b_buf;
- hdr->b_buf = buf;
+ buf->b_next = hdr->b_l1hdr.b_buf;
+ hdr->b_l1hdr.b_buf = buf;
arc_get_data_buf(buf);
bcopy(from->b_data, buf->b_data, size);
@@ -1632,11 +2120,11 @@ arc_buf_clone(arc_buf_t *from)
* then track the size and number of duplicates. These stats will be
* updated as duplicate buffers are created and destroyed.
*/
- if (hdr->b_type == ARC_BUFC_DATA) {
+ if (HDR_ISTYPE_DATA(hdr)) {
ARCSTAT_BUMP(arcstat_duplicate_buffers);
ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
}
- hdr->b_datacnt += 1;
+ hdr->b_l1hdr.b_datacnt += 1;
return (buf);
}
@@ -1659,17 +2147,20 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
hash_lock = HDR_LOCK(buf->b_hdr);
mutex_enter(hash_lock);
hdr = buf->b_hdr;
+ ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
mutex_exit(&buf->b_evict_lock);
- ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
+ ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+ hdr->b_l1hdr.b_state == arc_mfu);
+
add_reference(hdr, hash_lock, tag);
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
arc_access(hdr, hash_lock);
mutex_exit(hash_lock);
ARCSTAT_BUMP(arcstat_hits);
- ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH),
- demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
+ ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
+ demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
data, metadata, hits);
}
@@ -1679,7 +2170,7 @@ arc_buf_free_on_write(void *data, size_t size,
{
l2arc_data_free_t *df;
- df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
+ df = kmem_alloc(sizeof (*df), KM_SLEEP);
df->l2df_data = data;
df->l2df_size = size;
df->l2df_func = free_func;
@@ -1705,78 +2196,116 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
}
}
-/*
- * Free up buf->b_data and if 'remove' is set, then pull the
- * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
- */
static void
arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
{
- l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ ASSERT(HDR_HAS_L2HDR(hdr));
+ ASSERT(MUTEX_HELD(&hdr->b_l2hdr.b_dev->l2ad_mtx));
+
+ /*
+ * The b_tmp_cdata field is linked off of the b_l1hdr, so if
+ * that doesn't exist, the header is in the arc_l2c_only state,
+ * and there isn't anything to free (it's already been freed).
+ */
+ if (!HDR_HAS_L1HDR(hdr))
+ return;
+
+ /*
+ * The header isn't being written to the l2arc device, thus it
+ * shouldn't have a b_tmp_cdata to free.
+ */
+ if (!HDR_L2_WRITING(hdr)) {
+ ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
+ return;
+ }
- ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
+ /*
+ * The header does not have compression enabled. This can be due
+ * to the buffer not being compressible, or because we're
+ * freeing the buffer before the second phase of
+ * l2arc_write_buffer() has started (which does the compression
+ * step). In either case, b_tmp_cdata does not point to a
+ * separately compressed buffer, so there's nothing to free (it
+ * points to the same buffer as the arc_buf_t's b_data field).
+ */
+ if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) {
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
+ return;
+ }
- if (l2hdr->b_tmp_cdata == NULL)
+ /*
+ * There's nothing to free since the buffer was all zero's and
+ * compressed to a zero length buffer.
+ */
+ if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) {
+ ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
return;
+ }
+
+ ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress));
+
+ arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
+ hdr->b_size, zio_data_buf_free);
- ASSERT(HDR_L2_WRITING(hdr));
- arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
- zio_data_buf_free);
ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
- l2hdr->b_tmp_cdata = NULL;
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
}
+/*
+ * Free up buf->b_data and if 'remove' is set, then pull the
+ * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
+ */
static void
-arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
+arc_buf_destroy(arc_buf_t *buf, boolean_t remove)
{
arc_buf_t **bufp;
/* free up data associated with the buf */
- if (buf->b_data) {
- arc_state_t *state = buf->b_hdr->b_state;
+ if (buf->b_data != NULL) {
+ arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
uint64_t size = buf->b_hdr->b_size;
- arc_buf_contents_t type = buf->b_hdr->b_type;
+ arc_buf_contents_t type = arc_buf_type(buf->b_hdr);
arc_cksum_verify(buf);
#ifdef illumos
arc_buf_unwatch(buf);
#endif /* illumos */
- if (!recycle) {
- if (type == ARC_BUFC_METADATA) {
- arc_buf_data_free(buf, zio_buf_free);
- arc_space_return(size, ARC_SPACE_DATA);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- arc_buf_data_free(buf, zio_data_buf_free);
- ARCSTAT_INCR(arcstat_data_size, -size);
- atomic_add_64(&arc_size, -size);
- }
+ if (type == ARC_BUFC_METADATA) {
+ arc_buf_data_free(buf, zio_buf_free);
+ arc_space_return(size, ARC_SPACE_META);
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ arc_buf_data_free(buf, zio_data_buf_free);
+ arc_space_return(size, ARC_SPACE_DATA);
}
- if (list_link_active(&buf->b_hdr->b_arc_node)) {
+
+ /* protected by hash lock, if in the hash table */
+ if (multilist_link_active(&buf->b_hdr->b_l1hdr.b_arc_node)) {
uint64_t *cnt = &state->arcs_lsize[type];
- ASSERT(refcount_is_zero(&buf->b_hdr->b_refcnt));
- ASSERT(state != arc_anon);
+ ASSERT(refcount_is_zero(
+ &buf->b_hdr->b_l1hdr.b_refcnt));
+ ASSERT(state != arc_anon && state != arc_l2c_only);
ASSERT3U(*cnt, >=, size);
atomic_add_64(cnt, -size);
}
- ASSERT3U(state->arcs_size, >=, size);
- atomic_add_64(&state->arcs_size, -size);
+
+ (void) refcount_remove_many(&state->arcs_size, size, buf);
buf->b_data = NULL;
/*
* If we're destroying a duplicate buffer make sure
* that the appropriate statistics are updated.
*/
- if (buf->b_hdr->b_datacnt > 1 &&
- buf->b_hdr->b_type == ARC_BUFC_DATA) {
+ if (buf->b_hdr->b_l1hdr.b_datacnt > 1 &&
+ HDR_ISTYPE_DATA(buf->b_hdr)) {
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
}
- ASSERT(buf->b_hdr->b_datacnt > 0);
- buf->b_hdr->b_datacnt -= 1;
+ ASSERT(buf->b_hdr->b_l1hdr.b_datacnt > 0);
+ buf->b_hdr->b_l1hdr.b_datacnt -= 1;
}
/* only remove the buf if requested */
@@ -1784,7 +2313,8 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
return;
/* remove the buf from the hdr list */
- for (bufp = &buf->b_hdr->b_buf; *bufp != buf; bufp = &(*bufp)->b_next)
+ for (bufp = &buf->b_hdr->b_l1hdr.b_buf; *bufp != buf;
+ bufp = &(*bufp)->b_next)
continue;
*bufp = buf->b_next;
buf->b_next = NULL;
@@ -1797,91 +2327,147 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
}
static void
+arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
+{
+ l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
+ l2arc_dev_t *dev = l2hdr->b_dev;
+
+ ASSERT(MUTEX_HELD(&dev->l2ad_mtx));
+ ASSERT(HDR_HAS_L2HDR(hdr));
+
+ list_remove(&dev->l2ad_buflist, hdr);
+
+ /*
+ * We don't want to leak the b_tmp_cdata buffer that was
+ * allocated in l2arc_write_buffers()
+ */
+ arc_buf_l2_cdata_free(hdr);
+
+ /*
+ * If the l2hdr's b_daddr is equal to L2ARC_ADDR_UNSET, then
+ * this header is being processed by l2arc_write_buffers() (i.e.
+ * it's in the first stage of l2arc_write_buffers()).
+ * Re-affirming that truth here, just to serve as a reminder. If
+ * b_daddr does not equal L2ARC_ADDR_UNSET, then the header may or
+ * may not have its HDR_L2_WRITING flag set. (the write may have
+ * completed, in which case HDR_L2_WRITING will be false and the
+ * b_daddr field will point to the address of the buffer on disk).
+ */
+ IMPLY(l2hdr->b_daddr == L2ARC_ADDR_UNSET, HDR_L2_WRITING(hdr));
+
+ /*
+ * If b_daddr is equal to L2ARC_ADDR_UNSET, we're racing with
+ * l2arc_write_buffers(). Since we've just removed this header
+ * from the l2arc buffer list, this header will never reach the
+ * second stage of l2arc_write_buffers(), which increments the
+ * accounting stats for this header. Thus, we must be careful
+ * not to decrement them for this header either.
+ */
+ if (l2hdr->b_daddr != L2ARC_ADDR_UNSET) {
+ ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
+ ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+
+ vdev_space_update(dev->l2ad_vdev,
+ -l2hdr->b_asize, 0, 0);
+
+ (void) refcount_remove_many(&dev->l2ad_alloc,
+ l2hdr->b_asize, hdr);
+ }
+
+ hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
+}
+
+static void
arc_hdr_destroy(arc_buf_hdr_t *hdr)
{
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- ASSERT3P(hdr->b_state, ==, arc_anon);
+ if (HDR_HAS_L1HDR(hdr)) {
+ ASSERT(hdr->b_l1hdr.b_buf == NULL ||
+ hdr->b_l1hdr.b_datacnt > 0);
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+ }
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ ASSERT(!HDR_IN_HASH_TABLE(hdr));
+
+ if (HDR_HAS_L2HDR(hdr)) {
+ l2arc_dev_t *dev = hdr->b_l2hdr.b_dev;
+ boolean_t buflist_held = MUTEX_HELD(&dev->l2ad_mtx);
+
+ if (!buflist_held)
+ mutex_enter(&dev->l2ad_mtx);
- if (l2hdr != NULL) {
- boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx);
/*
- * To prevent arc_free() and l2arc_evict() from
- * attempting to free the same buffer at the same time,
- * a FREE_IN_PROGRESS flag is given to arc_free() to
- * give it priority. l2arc_evict() can't destroy this
- * header while we are waiting on l2arc_buflist_mtx.
- *
- * The hdr may be removed from l2ad_buflist before we
- * grab l2arc_buflist_mtx, so b_l2hdr is rechecked.
+ * Even though we checked this conditional above, we
+ * need to check this again now that we have the
+ * l2ad_mtx. This is because we could be racing with
+ * another thread calling l2arc_evict() which might have
+ * destroyed this header's L2 portion as we were waiting
+ * to acquire the l2ad_mtx. If that happens, we don't
+ * want to re-destroy the header's L2 portion.
*/
- if (!buflist_held) {
- mutex_enter(&l2arc_buflist_mtx);
- l2hdr = hdr->b_l2hdr;
- }
-
- if (l2hdr != NULL) {
- trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
- l2hdr->b_asize, 0);
- list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
- arc_buf_l2_cdata_free(hdr);
- ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
- ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
- vdev_space_update(l2hdr->b_dev->l2ad_vdev,
- -l2hdr->b_asize, 0, 0);
- kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
- if (hdr->b_state == arc_l2c_only)
- l2arc_hdr_stat_remove();
- hdr->b_l2hdr = NULL;
+ if (HDR_HAS_L2HDR(hdr)) {
+ if (hdr->b_l2hdr.b_daddr != L2ARC_ADDR_UNSET)
+ trim_map_free(dev->l2ad_vdev,
+ hdr->b_l2hdr.b_daddr,
+ hdr->b_l2hdr.b_asize, 0);
+ arc_hdr_l2hdr_destroy(hdr);
}
if (!buflist_held)
- mutex_exit(&l2arc_buflist_mtx);
+ mutex_exit(&dev->l2ad_mtx);
}
- if (!BUF_EMPTY(hdr)) {
- ASSERT(!HDR_IN_HASH_TABLE(hdr));
+ if (!BUF_EMPTY(hdr))
buf_discard_identity(hdr);
- }
- while (hdr->b_buf) {
- arc_buf_t *buf = hdr->b_buf;
- if (buf->b_efunc) {
- mutex_enter(&arc_eviction_mtx);
- mutex_enter(&buf->b_evict_lock);
- ASSERT(buf->b_hdr != NULL);
- arc_buf_destroy(hdr->b_buf, FALSE, FALSE);
- hdr->b_buf = buf->b_next;
- buf->b_hdr = &arc_eviction_hdr;
- buf->b_next = arc_eviction_list;
- arc_eviction_list = buf;
- mutex_exit(&buf->b_evict_lock);
- mutex_exit(&arc_eviction_mtx);
- } else {
- arc_buf_destroy(hdr->b_buf, FALSE, TRUE);
- }
- }
if (hdr->b_freeze_cksum != NULL) {
kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
hdr->b_freeze_cksum = NULL;
}
- if (hdr->b_thawed) {
- kmem_free(hdr->b_thawed, 1);
- hdr->b_thawed = NULL;
+
+ if (HDR_HAS_L1HDR(hdr)) {
+ while (hdr->b_l1hdr.b_buf) {
+ arc_buf_t *buf = hdr->b_l1hdr.b_buf;
+
+ if (buf->b_efunc != NULL) {
+ mutex_enter(&arc_user_evicts_lock);
+ mutex_enter(&buf->b_evict_lock);
+ ASSERT(buf->b_hdr != NULL);
+ arc_buf_destroy(hdr->b_l1hdr.b_buf, FALSE);
+ hdr->b_l1hdr.b_buf = buf->b_next;
+ buf->b_hdr = &arc_eviction_hdr;
+ buf->b_next = arc_eviction_list;
+ arc_eviction_list = buf;
+ mutex_exit(&buf->b_evict_lock);
+ cv_signal(&arc_user_evicts_cv);
+ mutex_exit(&arc_user_evicts_lock);
+ } else {
+ arc_buf_destroy(hdr->b_l1hdr.b_buf, TRUE);
+ }
+ }
+#ifdef ZFS_DEBUG
+ if (hdr->b_l1hdr.b_thawed != NULL) {
+ kmem_free(hdr->b_l1hdr.b_thawed, 1);
+ hdr->b_l1hdr.b_thawed = NULL;
+ }
+#endif
}
- ASSERT(!list_link_active(&hdr->b_arc_node));
ASSERT3P(hdr->b_hash_next, ==, NULL);
- ASSERT3P(hdr->b_acb, ==, NULL);
- kmem_cache_free(hdr_cache, hdr);
+ if (HDR_HAS_L1HDR(hdr)) {
+ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
+ ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+ kmem_cache_free(hdr_full_cache, hdr);
+ } else {
+ kmem_cache_free(hdr_l2only_cache, hdr);
+ }
}
void
arc_buf_free(arc_buf_t *buf, void *tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
- int hashed = hdr->b_state != arc_anon;
+ int hashed = hdr->b_l1hdr.b_state != arc_anon;
ASSERT(buf->b_efunc == NULL);
ASSERT(buf->b_data != NULL);
@@ -1894,10 +2480,10 @@ arc_buf_free(arc_buf_t *buf, void *tag)
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
(void) remove_reference(hdr, hash_lock, tag);
- if (hdr->b_datacnt > 1) {
- arc_buf_destroy(buf, FALSE, TRUE);
+ if (hdr->b_l1hdr.b_datacnt > 1) {
+ arc_buf_destroy(buf, TRUE);
} else {
- ASSERT(buf == hdr->b_buf);
+ ASSERT(buf == hdr->b_l1hdr.b_buf);
ASSERT(buf->b_efunc == NULL);
hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
}
@@ -1909,16 +2495,16 @@ arc_buf_free(arc_buf_t *buf, void *tag)
* this buffer unless the write completes before we finish
* decrementing the reference count.
*/
- mutex_enter(&arc_eviction_mtx);
+ mutex_enter(&arc_user_evicts_lock);
(void) remove_reference(hdr, NULL, tag);
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
destroy_hdr = !HDR_IO_IN_PROGRESS(hdr);
- mutex_exit(&arc_eviction_mtx);
+ mutex_exit(&arc_user_evicts_lock);
if (destroy_hdr)
arc_hdr_destroy(hdr);
} else {
if (remove_reference(hdr, NULL, tag) > 0)
- arc_buf_destroy(buf, FALSE, TRUE);
+ arc_buf_destroy(buf, TRUE);
else
arc_hdr_destroy(hdr);
}
@@ -1931,34 +2517,35 @@ arc_buf_remove_ref(arc_buf_t *buf, void* tag)
kmutex_t *hash_lock = HDR_LOCK(hdr);
boolean_t no_callback = (buf->b_efunc == NULL);
- if (hdr->b_state == arc_anon) {
- ASSERT(hdr->b_datacnt == 1);
+ if (hdr->b_l1hdr.b_state == arc_anon) {
+ ASSERT(hdr->b_l1hdr.b_datacnt == 1);
arc_buf_free(buf, tag);
return (no_callback);
}
mutex_enter(hash_lock);
hdr = buf->b_hdr;
+ ASSERT(hdr->b_l1hdr.b_datacnt > 0);
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
- ASSERT(hdr->b_state != arc_anon);
+ ASSERT(hdr->b_l1hdr.b_state != arc_anon);
ASSERT(buf->b_data != NULL);
(void) remove_reference(hdr, hash_lock, tag);
- if (hdr->b_datacnt > 1) {
+ if (hdr->b_l1hdr.b_datacnt > 1) {
if (no_callback)
- arc_buf_destroy(buf, FALSE, TRUE);
+ arc_buf_destroy(buf, TRUE);
} else if (no_callback) {
- ASSERT(hdr->b_buf == buf && buf->b_next == NULL);
+ ASSERT(hdr->b_l1hdr.b_buf == buf && buf->b_next == NULL);
ASSERT(buf->b_efunc == NULL);
hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
}
- ASSERT(no_callback || hdr->b_datacnt > 1 ||
- refcount_is_zero(&hdr->b_refcnt));
+ ASSERT(no_callback || hdr->b_l1hdr.b_datacnt > 1 ||
+ refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
mutex_exit(hash_lock);
return (no_callback);
}
-int
+int32_t
arc_buf_size(arc_buf_t *buf)
{
return (buf->b_hdr->b_size);
@@ -1999,7 +2586,7 @@ arc_buf_eviction_needed(arc_buf_t *buf)
return (B_TRUE);
}
- if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
+ if (hdr->b_l1hdr.b_datacnt > 1 && HDR_ISTYPE_DATA(hdr))
evict_needed = B_TRUE;
mutex_exit(&buf->b_evict_lock);
@@ -2007,432 +2594,678 @@ arc_buf_eviction_needed(arc_buf_t *buf)
}
/*
- * Evict buffers from list until we've removed the specified number of
- * bytes. Move the removed buffers to the appropriate evict state.
- * If the recycle flag is set, then attempt to "recycle" a buffer:
- * - look for a buffer to evict that is `bytes' long.
- * - return the data block from this buffer rather than freeing it.
- * This flag is used by callers that are trying to make space for a
- * new buffer in a full arc cache.
+ * Evict the arc_buf_hdr that is provided as a parameter. The resultant
+ * state of the header is dependent on it's state prior to entering this
+ * function. The following transitions are possible:
*
- * This function makes a "best effort". It skips over any buffers
- * it can't get a hash_lock on, and so may not catch all candidates.
- * It may also return without evicting as much space as requested.
+ * - arc_mru -> arc_mru_ghost
+ * - arc_mfu -> arc_mfu_ghost
+ * - arc_mru_ghost -> arc_l2c_only
+ * - arc_mru_ghost -> deleted
+ * - arc_mfu_ghost -> arc_l2c_only
+ * - arc_mfu_ghost -> deleted
*/
-static void *
-arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
- arc_buf_contents_t type)
+static int64_t
+arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
{
- arc_state_t *evicted_state;
- uint64_t bytes_evicted = 0, skipped = 0, missed = 0;
- int64_t bytes_remaining;
- arc_buf_hdr_t *hdr, *hdr_prev = NULL;
- list_t *evicted_list, *list, *evicted_list_start, *list_start;
- kmutex_t *lock, *evicted_lock;
- kmutex_t *hash_lock;
- boolean_t have_lock;
- void *stolen = NULL;
- arc_buf_hdr_t marker = { 0 };
- int count = 0;
- static int evict_metadata_offset, evict_data_offset;
- int i, idx, offset, list_count, lists;
-
- ASSERT(state == arc_mru || state == arc_mfu);
-
- evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
+ arc_state_t *evicted_state, *state;
+ int64_t bytes_evicted = 0;
- if (type == ARC_BUFC_METADATA) {
- offset = 0;
- list_count = ARC_BUFC_NUMMETADATALISTS;
- list_start = &state->arcs_lists[0];
- evicted_list_start = &evicted_state->arcs_lists[0];
- idx = evict_metadata_offset;
- } else {
- offset = ARC_BUFC_NUMMETADATALISTS;
- list_start = &state->arcs_lists[offset];
- evicted_list_start = &evicted_state->arcs_lists[offset];
- list_count = ARC_BUFC_NUMDATALISTS;
- idx = evict_data_offset;
- }
- bytes_remaining = evicted_state->arcs_lsize[type];
- lists = 0;
-
-evict_start:
- list = &list_start[idx];
- evicted_list = &evicted_list_start[idx];
- lock = ARCS_LOCK(state, (offset + idx));
- evicted_lock = ARCS_LOCK(evicted_state, (offset + idx));
-
- mutex_enter(lock);
- mutex_enter(evicted_lock);
-
- for (hdr = list_tail(list); hdr; hdr = hdr_prev) {
- hdr_prev = list_prev(list, hdr);
- bytes_remaining -= (hdr->b_size * hdr->b_datacnt);
- /* prefetch buffers have a minimum lifespan */
- if (HDR_IO_IN_PROGRESS(hdr) ||
- (spa && hdr->b_spa != spa) ||
- (hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT) &&
- ddi_get_lbolt() - hdr->b_arc_access <
- arc_min_prefetch_lifespan)) {
- skipped++;
- continue;
- }
- /* "lookahead" for better eviction candidate */
- if (recycle && hdr->b_size != bytes &&
- hdr_prev && hdr_prev->b_size == bytes)
- continue;
+ ASSERT(MUTEX_HELD(hash_lock));
+ ASSERT(HDR_HAS_L1HDR(hdr));
- /* ignore markers */
- if (hdr->b_spa == 0)
- continue;
+ state = hdr->b_l1hdr.b_state;
+ if (GHOST_STATE(state)) {
+ ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+ ASSERT(hdr->b_l1hdr.b_buf == NULL);
/*
- * It may take a long time to evict all the bufs requested.
- * To avoid blocking all arc activity, periodically drop
- * the arcs_mtx and give other threads a chance to run
- * before reacquiring the lock.
- *
- * If we are looking for a buffer to recycle, we are in
- * the hot code path, so don't sleep.
+ * l2arc_write_buffers() relies on a header's L1 portion
+ * (i.e. it's b_tmp_cdata field) during it's write phase.
+ * Thus, we cannot push a header onto the arc_l2c_only
+ * state (removing it's L1 piece) until the header is
+ * done being written to the l2arc.
*/
- if (!recycle && count++ > arc_evict_iterations) {
- list_insert_after(list, hdr, &marker);
- mutex_exit(evicted_lock);
- mutex_exit(lock);
- kpreempt(KPREEMPT_SYNC);
- mutex_enter(lock);
- mutex_enter(evicted_lock);
- hdr_prev = list_prev(list, &marker);
- list_remove(list, &marker);
- count = 0;
- continue;
+ if (HDR_HAS_L2HDR(hdr) && HDR_L2_WRITING(hdr)) {
+ ARCSTAT_BUMP(arcstat_evict_l2_skip);
+ return (bytes_evicted);
}
- hash_lock = HDR_LOCK(hdr);
- have_lock = MUTEX_HELD(hash_lock);
- if (have_lock || mutex_tryenter(hash_lock)) {
- ASSERT0(refcount_count(&hdr->b_refcnt));
- ASSERT(hdr->b_datacnt > 0);
- while (hdr->b_buf) {
- arc_buf_t *buf = hdr->b_buf;
- if (!mutex_tryenter(&buf->b_evict_lock)) {
- missed += 1;
- break;
- }
- if (buf->b_data) {
- bytes_evicted += hdr->b_size;
- if (recycle && hdr->b_type == type &&
- hdr->b_size == bytes &&
- !HDR_L2_WRITING(hdr)) {
- stolen = buf->b_data;
- recycle = FALSE;
- }
- }
- if (buf->b_efunc) {
- mutex_enter(&arc_eviction_mtx);
- arc_buf_destroy(buf,
- buf->b_data == stolen, FALSE);
- hdr->b_buf = buf->b_next;
- buf->b_hdr = &arc_eviction_hdr;
- buf->b_next = arc_eviction_list;
- arc_eviction_list = buf;
- mutex_exit(&arc_eviction_mtx);
- mutex_exit(&buf->b_evict_lock);
- } else {
- mutex_exit(&buf->b_evict_lock);
- arc_buf_destroy(buf,
- buf->b_data == stolen, TRUE);
- }
- }
+ ARCSTAT_BUMP(arcstat_deleted);
+ bytes_evicted += hdr->b_size;
- if (hdr->b_l2hdr) {
- ARCSTAT_INCR(arcstat_evict_l2_cached,
- hdr->b_size);
- } else {
- if (l2arc_write_eligible(hdr->b_spa, hdr)) {
- ARCSTAT_INCR(arcstat_evict_l2_eligible,
- hdr->b_size);
- } else {
- ARCSTAT_INCR(
- arcstat_evict_l2_ineligible,
- hdr->b_size);
- }
- }
+ DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr);
- if (hdr->b_datacnt == 0) {
- arc_change_state(evicted_state, hdr, hash_lock);
- ASSERT(HDR_IN_HASH_TABLE(hdr));
- hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE;
- hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
- DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr);
- }
- if (!have_lock)
- mutex_exit(hash_lock);
- if (bytes >= 0 && bytes_evicted >= bytes)
- break;
- if (bytes_remaining > 0) {
- mutex_exit(evicted_lock);
- mutex_exit(lock);
- idx = ((idx + 1) & (list_count - 1));
- lists++;
- goto evict_start;
- }
+ if (HDR_HAS_L2HDR(hdr)) {
+ /*
+ * This buffer is cached on the 2nd Level ARC;
+ * don't destroy the header.
+ */
+ arc_change_state(arc_l2c_only, hdr, hash_lock);
+ /*
+ * dropping from L1+L2 cached to L2-only,
+ * realloc to remove the L1 header.
+ */
+ hdr = arc_hdr_realloc(hdr, hdr_full_cache,
+ hdr_l2only_cache);
} else {
- missed += 1;
+ arc_change_state(arc_anon, hdr, hash_lock);
+ arc_hdr_destroy(hdr);
}
+ return (bytes_evicted);
}
- mutex_exit(evicted_lock);
- mutex_exit(lock);
+ ASSERT(state == arc_mru || state == arc_mfu);
+ evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
- idx = ((idx + 1) & (list_count - 1));
- lists++;
+ /* prefetch buffers have a minimum lifespan */
+ if (HDR_IO_IN_PROGRESS(hdr) ||
+ ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&
+ ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access <
+ arc_min_prefetch_lifespan)) {
+ ARCSTAT_BUMP(arcstat_evict_skip);
+ return (bytes_evicted);
+ }
+
+ ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
+ ASSERT3U(hdr->b_l1hdr.b_datacnt, >, 0);
+ while (hdr->b_l1hdr.b_buf) {
+ arc_buf_t *buf = hdr->b_l1hdr.b_buf;
+ if (!mutex_tryenter(&buf->b_evict_lock)) {
+ ARCSTAT_BUMP(arcstat_mutex_miss);
+ break;
+ }
+ if (buf->b_data != NULL)
+ bytes_evicted += hdr->b_size;
+ if (buf->b_efunc != NULL) {
+ mutex_enter(&arc_user_evicts_lock);
+ arc_buf_destroy(buf, FALSE);
+ hdr->b_l1hdr.b_buf = buf->b_next;
+ buf->b_hdr = &arc_eviction_hdr;
+ buf->b_next = arc_eviction_list;
+ arc_eviction_list = buf;
+ cv_signal(&arc_user_evicts_cv);
+ mutex_exit(&arc_user_evicts_lock);
+ mutex_exit(&buf->b_evict_lock);
+ } else {
+ mutex_exit(&buf->b_evict_lock);
+ arc_buf_destroy(buf, TRUE);
+ }
+ }
- if (bytes_evicted < bytes) {
- if (lists < list_count)
- goto evict_start;
+ if (HDR_HAS_L2HDR(hdr)) {
+ ARCSTAT_INCR(arcstat_evict_l2_cached, hdr->b_size);
+ } else {
+ if (l2arc_write_eligible(hdr->b_spa, hdr))
+ ARCSTAT_INCR(arcstat_evict_l2_eligible, hdr->b_size);
else
- dprintf("only evicted %lld bytes from %x",
- (longlong_t)bytes_evicted, state);
+ ARCSTAT_INCR(arcstat_evict_l2_ineligible, hdr->b_size);
}
- if (type == ARC_BUFC_METADATA)
- evict_metadata_offset = idx;
- else
- evict_data_offset = idx;
- if (skipped)
- ARCSTAT_INCR(arcstat_evict_skip, skipped);
+ if (hdr->b_l1hdr.b_datacnt == 0) {
+ arc_change_state(evicted_state, hdr, hash_lock);
+ ASSERT(HDR_IN_HASH_TABLE(hdr));
+ hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE;
+ hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
+ DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr);
+ }
- if (missed)
- ARCSTAT_INCR(arcstat_mutex_miss, missed);
+ return (bytes_evicted);
+}
- /*
- * Note: we have just evicted some data into the ghost state,
- * potentially putting the ghost size over the desired size. Rather
- * that evicting from the ghost list in this hot code path, leave
- * this chore to the arc_reclaim_thread().
- */
+static uint64_t
+arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
+ uint64_t spa, int64_t bytes)
+{
+ multilist_sublist_t *mls;
+ uint64_t bytes_evicted = 0;
+ arc_buf_hdr_t *hdr;
+ kmutex_t *hash_lock;
+ int evict_count = 0;
- if (stolen)
- ARCSTAT_BUMP(arcstat_stolen);
- return (stolen);
+ ASSERT3P(marker, !=, NULL);
+ IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
+
+ mls = multilist_sublist_lock(ml, idx);
+
+ for (hdr = multilist_sublist_prev(mls, marker); hdr != NULL;
+ hdr = multilist_sublist_prev(mls, marker)) {
+ if ((bytes != ARC_EVICT_ALL && bytes_evicted >= bytes) ||
+ (evict_count >= zfs_arc_evict_batch_limit))
+ break;
+
+ /*
+ * To keep our iteration location, move the marker
+ * forward. Since we're not holding hdr's hash lock, we
+ * must be very careful and not remove 'hdr' from the
+ * sublist. Otherwise, other consumers might mistake the
+ * 'hdr' as not being on a sublist when they call the
+ * multilist_link_active() function (they all rely on
+ * the hash lock protecting concurrent insertions and
+ * removals). multilist_sublist_move_forward() was
+ * specifically implemented to ensure this is the case
+ * (only 'marker' will be removed and re-inserted).
+ */
+ multilist_sublist_move_forward(mls, marker);
+
+ /*
+ * The only case where the b_spa field should ever be
+ * zero, is the marker headers inserted by
+ * arc_evict_state(). It's possible for multiple threads
+ * to be calling arc_evict_state() concurrently (e.g.
+ * dsl_pool_close() and zio_inject_fault()), so we must
+ * skip any markers we see from these other threads.
+ */
+ if (hdr->b_spa == 0)
+ continue;
+
+ /* we're only interested in evicting buffers of a certain spa */
+ if (spa != 0 && hdr->b_spa != spa) {
+ ARCSTAT_BUMP(arcstat_evict_skip);
+ continue;
+ }
+
+ hash_lock = HDR_LOCK(hdr);
+
+ /*
+ * We aren't calling this function from any code path
+ * that would already be holding a hash lock, so we're
+ * asserting on this assumption to be defensive in case
+ * this ever changes. Without this check, it would be
+ * possible to incorrectly increment arcstat_mutex_miss
+ * below (e.g. if the code changed such that we called
+ * this function with a hash lock held).
+ */
+ ASSERT(!MUTEX_HELD(hash_lock));
+
+ if (mutex_tryenter(hash_lock)) {
+ uint64_t evicted = arc_evict_hdr(hdr, hash_lock);
+ mutex_exit(hash_lock);
+
+ bytes_evicted += evicted;
+
+ /*
+ * If evicted is zero, arc_evict_hdr() must have
+ * decided to skip this header, don't increment
+ * evict_count in this case.
+ */
+ if (evicted != 0)
+ evict_count++;
+
+ /*
+ * If arc_size isn't overflowing, signal any
+ * threads that might happen to be waiting.
+ *
+ * For each header evicted, we wake up a single
+ * thread. If we used cv_broadcast, we could
+ * wake up "too many" threads causing arc_size
+ * to significantly overflow arc_c; since
+ * arc_get_data_buf() doesn't check for overflow
+ * when it's woken up (it doesn't because it's
+ * possible for the ARC to be overflowing while
+ * full of un-evictable buffers, and the
+ * function should proceed in this case).
+ *
+ * If threads are left sleeping, due to not
+ * using cv_broadcast, they will be woken up
+ * just before arc_reclaim_thread() sleeps.
+ */
+ mutex_enter(&arc_reclaim_lock);
+ if (!arc_is_overflowing())
+ cv_signal(&arc_reclaim_waiters_cv);
+ mutex_exit(&arc_reclaim_lock);
+ } else {
+ ARCSTAT_BUMP(arcstat_mutex_miss);
+ }
+ }
+
+ multilist_sublist_unlock(mls);
+
+ return (bytes_evicted);
}
/*
- * Remove buffers from list until we've removed the specified number of
- * bytes. Destroy the buffers that are removed.
+ * Evict buffers from the given arc state, until we've removed the
+ * specified number of bytes. Move the removed buffers to the
+ * appropriate evict state.
+ *
+ * This function makes a "best effort". It skips over any buffers
+ * it can't get a hash_lock on, and so, may not catch all candidates.
+ * It may also return without evicting as much space as requested.
+ *
+ * If bytes is specified using the special value ARC_EVICT_ALL, this
+ * will evict all available (i.e. unlocked and evictable) buffers from
+ * the given arc state; which is used by arc_flush().
*/
-static void
-arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes)
+static uint64_t
+arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
+ arc_buf_contents_t type)
{
- arc_buf_hdr_t *hdr, *hdr_prev;
- arc_buf_hdr_t marker = { 0 };
- list_t *list, *list_start;
- kmutex_t *hash_lock, *lock;
- uint64_t bytes_deleted = 0;
- uint64_t bufs_skipped = 0;
- int count = 0;
- static int evict_offset;
- int list_count, idx = evict_offset;
- int offset, lists = 0;
-
- ASSERT(GHOST_STATE(state));
+ uint64_t total_evicted = 0;
+ multilist_t *ml = &state->arcs_list[type];
+ int num_sublists;
+ arc_buf_hdr_t **markers;
+
+ IMPLY(bytes < 0, bytes == ARC_EVICT_ALL);
+
+ num_sublists = multilist_get_num_sublists(ml);
/*
- * data lists come after metadata lists
+ * If we've tried to evict from each sublist, made some
+ * progress, but still have not hit the target number of bytes
+ * to evict, we want to keep trying. The markers allow us to
+ * pick up where we left off for each individual sublist, rather
+ * than starting from the tail each time.
*/
- list_start = &state->arcs_lists[ARC_BUFC_NUMMETADATALISTS];
- list_count = ARC_BUFC_NUMDATALISTS;
- offset = ARC_BUFC_NUMMETADATALISTS;
-
-evict_start:
- list = &list_start[idx];
- lock = ARCS_LOCK(state, idx + offset);
-
- mutex_enter(lock);
- for (hdr = list_tail(list); hdr; hdr = hdr_prev) {
- hdr_prev = list_prev(list, hdr);
- if (hdr->b_type > ARC_BUFC_NUMTYPES)
- panic("invalid hdr=%p", (void *)hdr);
- if (spa && hdr->b_spa != spa)
- continue;
+ markers = kmem_zalloc(sizeof (*markers) * num_sublists, KM_SLEEP);
+ for (int i = 0; i < num_sublists; i++) {
+ markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP);
- /* ignore markers */
- if (hdr->b_spa == 0)
- continue;
+ /*
+ * A b_spa of 0 is used to indicate that this header is
+ * a marker. This fact is used in arc_adjust_type() and
+ * arc_evict_state_impl().
+ */
+ markers[i]->b_spa = 0;
- hash_lock = HDR_LOCK(hdr);
- /* caller may be trying to modify this buffer, skip it */
- if (MUTEX_HELD(hash_lock))
- continue;
+ multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
+ multilist_sublist_insert_tail(mls, markers[i]);
+ multilist_sublist_unlock(mls);
+ }
+ /*
+ * While we haven't hit our target number of bytes to evict, or
+ * we're evicting all available buffers.
+ */
+ while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
/*
- * It may take a long time to evict all the bufs requested.
- * To avoid blocking all arc activity, periodically drop
- * the arcs_mtx and give other threads a chance to run
- * before reacquiring the lock.
+ * Start eviction using a randomly selected sublist,
+ * this is to try and evenly balance eviction across all
+ * sublists. Always starting at the same sublist
+ * (e.g. index 0) would cause evictions to favor certain
+ * sublists over others.
*/
- if (count++ > arc_evict_iterations) {
- list_insert_after(list, hdr, &marker);
- mutex_exit(lock);
- kpreempt(KPREEMPT_SYNC);
- mutex_enter(lock);
- hdr_prev = list_prev(list, &marker);
- list_remove(list, &marker);
- count = 0;
- continue;
- }
- if (mutex_tryenter(hash_lock)) {
- ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT(hdr->b_buf == NULL);
- ARCSTAT_BUMP(arcstat_deleted);
- bytes_deleted += hdr->b_size;
+ int sublist_idx = multilist_get_random_index(ml);
+ uint64_t scan_evicted = 0;
- if (hdr->b_l2hdr != NULL) {
- /*
- * This buffer is cached on the 2nd Level ARC;
- * don't destroy the header.
- */
- arc_change_state(arc_l2c_only, hdr, hash_lock);
- mutex_exit(hash_lock);
- } else {
- arc_change_state(arc_anon, hdr, hash_lock);
- mutex_exit(hash_lock);
- arc_hdr_destroy(hdr);
- }
+ for (int i = 0; i < num_sublists; i++) {
+ uint64_t bytes_remaining;
+ uint64_t bytes_evicted;
- DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr);
- if (bytes >= 0 && bytes_deleted >= bytes)
+ if (bytes == ARC_EVICT_ALL)
+ bytes_remaining = ARC_EVICT_ALL;
+ else if (total_evicted < bytes)
+ bytes_remaining = bytes - total_evicted;
+ else
break;
- } else if (bytes < 0) {
+
+ bytes_evicted = arc_evict_state_impl(ml, sublist_idx,
+ markers[sublist_idx], spa, bytes_remaining);
+
+ scan_evicted += bytes_evicted;
+ total_evicted += bytes_evicted;
+
+ /* we've reached the end, wrap to the beginning */
+ if (++sublist_idx >= num_sublists)
+ sublist_idx = 0;
+ }
+
+ /*
+ * If we didn't evict anything during this scan, we have
+ * no reason to believe we'll evict more during another
+ * scan, so break the loop.
+ */
+ if (scan_evicted == 0) {
+ /* This isn't possible, let's make that obvious */
+ ASSERT3S(bytes, !=, 0);
+
/*
- * Insert a list marker and then wait for the
- * hash lock to become available. Once its
- * available, restart from where we left off.
+ * When bytes is ARC_EVICT_ALL, the only way to
+ * break the loop is when scan_evicted is zero.
+ * In that case, we actually have evicted enough,
+ * so we don't want to increment the kstat.
*/
- list_insert_after(list, hdr, &marker);
- mutex_exit(lock);
- mutex_enter(hash_lock);
- mutex_exit(hash_lock);
- mutex_enter(lock);
- hdr_prev = list_prev(list, &marker);
- list_remove(list, &marker);
- } else {
- bufs_skipped += 1;
+ if (bytes != ARC_EVICT_ALL) {
+ ASSERT3S(total_evicted, <, bytes);
+ ARCSTAT_BUMP(arcstat_evict_not_enough);
+ }
+
+ break;
}
+ }
+
+ for (int i = 0; i < num_sublists; i++) {
+ multilist_sublist_t *mls = multilist_sublist_lock(ml, i);
+ multilist_sublist_remove(mls, markers[i]);
+ multilist_sublist_unlock(mls);
+ kmem_cache_free(hdr_full_cache, markers[i]);
}
- mutex_exit(lock);
- idx = ((idx + 1) & (ARC_BUFC_NUMDATALISTS - 1));
- lists++;
+ kmem_free(markers, sizeof (*markers) * num_sublists);
+
+ return (total_evicted);
+}
+
+/*
+ * Flush all "evictable" data of the given type from the arc state
+ * specified. This will not evict any "active" buffers (i.e. referenced).
+ *
+ * When 'retry' is set to FALSE, the function will make a single pass
+ * over the state and evict any buffers that it can. Since it doesn't
+ * continually retry the eviction, it might end up leaving some buffers
+ * in the ARC due to lock misses.
+ *
+ * When 'retry' is set to TRUE, the function will continually retry the
+ * eviction until *all* evictable buffers have been removed from the
+ * state. As a result, if concurrent insertions into the state are
+ * allowed (e.g. if the ARC isn't shutting down), this function might
+ * wind up in an infinite loop, continually trying to evict buffers.
+ */
+static uint64_t
+arc_flush_state(arc_state_t *state, uint64_t spa, arc_buf_contents_t type,
+ boolean_t retry)
+{
+ uint64_t evicted = 0;
- if (lists < list_count)
- goto evict_start;
+ while (state->arcs_lsize[type] != 0) {
+ evicted += arc_evict_state(state, spa, ARC_EVICT_ALL, type);
- evict_offset = idx;
- if ((uintptr_t)list > (uintptr_t)&state->arcs_lists[ARC_BUFC_NUMMETADATALISTS] &&
- (bytes < 0 || bytes_deleted < bytes)) {
- list_start = &state->arcs_lists[0];
- list_count = ARC_BUFC_NUMMETADATALISTS;
- offset = lists = 0;
- goto evict_start;
+ if (!retry)
+ break;
}
- if (bufs_skipped) {
- ARCSTAT_INCR(arcstat_mutex_miss, bufs_skipped);
- ASSERT(bytes >= 0);
+ return (evicted);
+}
+
+/*
+ * Evict the specified number of bytes from the state specified,
+ * restricting eviction to the spa and type given. This function
+ * prevents us from trying to evict more from a state's list than
+ * is "evictable", and to skip evicting altogether when passed a
+ * negative value for "bytes". In contrast, arc_evict_state() will
+ * evict everything it can, when passed a negative value for "bytes".
+ */
+static uint64_t
+arc_adjust_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
+ arc_buf_contents_t type)
+{
+ int64_t delta;
+
+ if (bytes > 0 && state->arcs_lsize[type] > 0) {
+ delta = MIN(state->arcs_lsize[type], bytes);
+ return (arc_evict_state(state, spa, delta, type));
}
- if (bytes_deleted < bytes)
- dprintf("only deleted %lld bytes from %p",
- (longlong_t)bytes_deleted, state);
+ return (0);
}
-static void
+/*
+ * Evict metadata buffers from the cache, such that arc_meta_used is
+ * capped by the arc_meta_limit tunable.
+ */
+static uint64_t
+arc_adjust_meta(void)
+{
+ uint64_t total_evicted = 0;
+ int64_t target;
+
+ /*
+ * If we're over the meta limit, we want to evict enough
+ * metadata to get back under the meta limit. We don't want to
+ * evict so much that we drop the MRU below arc_p, though. If
+ * we're over the meta limit more than we're over arc_p, we
+ * evict some from the MRU here, and some from the MFU below.
+ */
+ target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
+ (int64_t)(refcount_count(&arc_anon->arcs_size) +
+ refcount_count(&arc_mru->arcs_size) - arc_p));
+
+ total_evicted += arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
+
+ /*
+ * Similar to the above, we want to evict enough bytes to get us
+ * below the meta limit, but not so much as to drop us below the
+ * space alloted to the MFU (which is defined as arc_c - arc_p).
+ */
+ target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
+ (int64_t)(refcount_count(&arc_mfu->arcs_size) - (arc_c - arc_p)));
+
+ total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
+
+ return (total_evicted);
+}
+
+/*
+ * Return the type of the oldest buffer in the given arc state
+ *
+ * This function will select a random sublist of type ARC_BUFC_DATA and
+ * a random sublist of type ARC_BUFC_METADATA. The tail of each sublist
+ * is compared, and the type which contains the "older" buffer will be
+ * returned.
+ */
+static arc_buf_contents_t
+arc_adjust_type(arc_state_t *state)
+{
+ multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
+ multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
+ int data_idx = multilist_get_random_index(data_ml);
+ int meta_idx = multilist_get_random_index(meta_ml);
+ multilist_sublist_t *data_mls;
+ multilist_sublist_t *meta_mls;
+ arc_buf_contents_t type;
+ arc_buf_hdr_t *data_hdr;
+ arc_buf_hdr_t *meta_hdr;
+
+ /*
+ * We keep the sublist lock until we're finished, to prevent
+ * the headers from being destroyed via arc_evict_state().
+ */
+ data_mls = multilist_sublist_lock(data_ml, data_idx);
+ meta_mls = multilist_sublist_lock(meta_ml, meta_idx);
+
+ /*
+ * These two loops are to ensure we skip any markers that
+ * might be at the tail of the lists due to arc_evict_state().
+ */
+
+ for (data_hdr = multilist_sublist_tail(data_mls); data_hdr != NULL;
+ data_hdr = multilist_sublist_prev(data_mls, data_hdr)) {
+ if (data_hdr->b_spa != 0)
+ break;
+ }
+
+ for (meta_hdr = multilist_sublist_tail(meta_mls); meta_hdr != NULL;
+ meta_hdr = multilist_sublist_prev(meta_mls, meta_hdr)) {
+ if (meta_hdr->b_spa != 0)
+ break;
+ }
+
+ if (data_hdr == NULL && meta_hdr == NULL) {
+ type = ARC_BUFC_DATA;
+ } else if (data_hdr == NULL) {
+ ASSERT3P(meta_hdr, !=, NULL);
+ type = ARC_BUFC_METADATA;
+ } else if (meta_hdr == NULL) {
+ ASSERT3P(data_hdr, !=, NULL);
+ type = ARC_BUFC_DATA;
+ } else {
+ ASSERT3P(data_hdr, !=, NULL);
+ ASSERT3P(meta_hdr, !=, NULL);
+
+ /* The headers can't be on the sublist without an L1 header */
+ ASSERT(HDR_HAS_L1HDR(data_hdr));
+ ASSERT(HDR_HAS_L1HDR(meta_hdr));
+
+ if (data_hdr->b_l1hdr.b_arc_access <
+ meta_hdr->b_l1hdr.b_arc_access) {
+ type = ARC_BUFC_DATA;
+ } else {
+ type = ARC_BUFC_METADATA;
+ }
+ }
+
+ multilist_sublist_unlock(meta_mls);
+ multilist_sublist_unlock(data_mls);
+
+ return (type);
+}
+
+/*
+ * Evict buffers from the cache, such that arc_size is capped by arc_c.
+ */
+static uint64_t
arc_adjust(void)
{
- int64_t adjustment, delta;
+ uint64_t total_evicted = 0;
+ uint64_t bytes;
+ int64_t target;
+
+ /*
+ * If we're over arc_meta_limit, we want to correct that before
+ * potentially evicting data buffers below.
+ */
+ total_evicted += arc_adjust_meta();
/*
* Adjust MRU size
+ *
+ * If we're over the target cache size, we want to evict enough
+ * from the list to get back to our target size. We don't want
+ * to evict too much from the MRU, such that it drops below
+ * arc_p. So, if we're over our target cache size more than
+ * the MRU is over arc_p, we'll evict enough to get back to
+ * arc_p here, and then evict more from the MFU below.
*/
+ target = MIN((int64_t)(arc_size - arc_c),
+ (int64_t)(refcount_count(&arc_anon->arcs_size) +
+ refcount_count(&arc_mru->arcs_size) + arc_meta_used - arc_p));
- adjustment = MIN((int64_t)(arc_size - arc_c),
- (int64_t)(arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used -
- arc_p));
+ /*
+ * If we're below arc_meta_min, always prefer to evict data.
+ * Otherwise, try to satisfy the requested number of bytes to
+ * evict from the type which contains older buffers; in an
+ * effort to keep newer buffers in the cache regardless of their
+ * type. If we cannot satisfy the number of bytes from this
+ * type, spill over into the next type.
+ */
+ if (arc_adjust_type(arc_mru) == ARC_BUFC_METADATA &&
+ arc_meta_used > arc_meta_min) {
+ bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
+ total_evicted += bytes;
- if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_DATA] > 0) {
- delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_DATA], adjustment);
- (void) arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_DATA);
- adjustment -= delta;
- }
+ /*
+ * If we couldn't evict our target number of bytes from
+ * metadata, we try to get the rest from data.
+ */
+ target -= bytes;
+
+ total_evicted +=
+ arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_DATA);
+ } else {
+ bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_DATA);
+ total_evicted += bytes;
- if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
- delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
- (void) arc_evict(arc_mru, 0, delta, FALSE,
- ARC_BUFC_METADATA);
+ /*
+ * If we couldn't evict our target number of bytes from
+ * data, we try to get the rest from metadata.
+ */
+ target -= bytes;
+
+ total_evicted +=
+ arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
}
/*
* Adjust MFU size
+ *
+ * Now that we've tried to evict enough from the MRU to get its
+ * size back to arc_p, if we're still above the target cache
+ * size, we evict the rest from the MFU.
*/
+ target = arc_size - arc_c;
- adjustment = arc_size - arc_c;
+ if (arc_adjust_type(arc_mfu) == ARC_BUFC_METADATA &&
+ arc_meta_used > arc_meta_min) {
+ bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
+ total_evicted += bytes;
- if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_DATA] > 0) {
- delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_DATA]);
- (void) arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_DATA);
- adjustment -= delta;
- }
+ /*
+ * If we couldn't evict our target number of bytes from
+ * metadata, we try to get the rest from data.
+ */
+ target -= bytes;
+
+ total_evicted +=
+ arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_DATA);
+ } else {
+ bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_DATA);
+ total_evicted += bytes;
+
+ /*
+ * If we couldn't evict our target number of bytes from
+ * data, we try to get the rest from data.
+ */
+ target -= bytes;
- if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
- int64_t delta = MIN(adjustment,
- arc_mfu->arcs_lsize[ARC_BUFC_METADATA]);
- (void) arc_evict(arc_mfu, 0, delta, FALSE,
- ARC_BUFC_METADATA);
+ total_evicted +=
+ arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
}
/*
* Adjust ghost lists
+ *
+ * In addition to the above, the ARC also defines target values
+ * for the ghost lists. The sum of the mru list and mru ghost
+ * list should never exceed the target size of the cache, and
+ * the sum of the mru list, mfu list, mru ghost list, and mfu
+ * ghost list should never exceed twice the target size of the
+ * cache. The following logic enforces these limits on the ghost
+ * caches, and evicts from them as needed.
*/
+ target = refcount_count(&arc_mru->arcs_size) +
+ refcount_count(&arc_mru_ghost->arcs_size) - arc_c;
- adjustment = arc_mru->arcs_size + arc_mru_ghost->arcs_size - arc_c;
+ bytes = arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_DATA);
+ total_evicted += bytes;
- if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) {
- delta = MIN(arc_mru_ghost->arcs_size, adjustment);
- arc_evict_ghost(arc_mru_ghost, 0, delta);
- }
+ target -= bytes;
- adjustment =
- arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size - arc_c;
+ total_evicted +=
+ arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_METADATA);
- if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) {
- delta = MIN(arc_mfu_ghost->arcs_size, adjustment);
- arc_evict_ghost(arc_mfu_ghost, 0, delta);
- }
+ /*
+ * We assume the sum of the mru list and mfu list is less than
+ * or equal to arc_c (we enforced this above), which means we
+ * can use the simpler of the two equations below:
+ *
+ * mru + mfu + mru ghost + mfu ghost <= 2 * arc_c
+ * mru ghost + mfu ghost <= arc_c
+ */
+ target = refcount_count(&arc_mru_ghost->arcs_size) +
+ refcount_count(&arc_mfu_ghost->arcs_size) - arc_c;
+
+ bytes = arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_DATA);
+ total_evicted += bytes;
+
+ target -= bytes;
+
+ total_evicted +=
+ arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_METADATA);
+
+ return (total_evicted);
}
static void
arc_do_user_evicts(void)
{
- static arc_buf_t *tmp_arc_eviction_list;
-
- /*
- * Move list over to avoid LOR
- */
-restart:
- mutex_enter(&arc_eviction_mtx);
- tmp_arc_eviction_list = arc_eviction_list;
- arc_eviction_list = NULL;
- mutex_exit(&arc_eviction_mtx);
-
- while (tmp_arc_eviction_list != NULL) {
- arc_buf_t *buf = tmp_arc_eviction_list;
- tmp_arc_eviction_list = buf->b_next;
+ mutex_enter(&arc_user_evicts_lock);
+ while (arc_eviction_list != NULL) {
+ arc_buf_t *buf = arc_eviction_list;
+ arc_eviction_list = buf->b_next;
mutex_enter(&buf->b_evict_lock);
buf->b_hdr = NULL;
mutex_exit(&buf->b_evict_lock);
+ mutex_exit(&arc_user_evicts_lock);
if (buf->b_efunc != NULL)
VERIFY0(buf->b_efunc(buf->b_private));
@@ -2440,68 +3273,48 @@ restart:
buf->b_efunc = NULL;
buf->b_private = NULL;
kmem_cache_free(buf_cache, buf);
+ mutex_enter(&arc_user_evicts_lock);
}
-
- if (arc_eviction_list != NULL)
- goto restart;
+ mutex_exit(&arc_user_evicts_lock);
}
-/*
- * Flush all *evictable* data from the cache for the given spa.
- * NOTE: this will not touch "active" (i.e. referenced) data.
- */
void
-arc_flush(spa_t *spa)
+arc_flush(spa_t *spa, boolean_t retry)
{
uint64_t guid = 0;
- if (spa)
+ /*
+ * If retry is TRUE, a spa must not be specified since we have
+ * no good way to determine if all of a spa's buffers have been
+ * evicted from an arc state.
+ */
+ ASSERT(!retry || spa == 0);
+
+ if (spa != NULL)
guid = spa_load_guid(spa);
- while (arc_mru->arcs_lsize[ARC_BUFC_DATA]) {
- (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA);
- if (spa)
- break;
- }
- while (arc_mru->arcs_lsize[ARC_BUFC_METADATA]) {
- (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_METADATA);
- if (spa)
- break;
- }
- while (arc_mfu->arcs_lsize[ARC_BUFC_DATA]) {
- (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_DATA);
- if (spa)
- break;
- }
- while (arc_mfu->arcs_lsize[ARC_BUFC_METADATA]) {
- (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_METADATA);
- if (spa)
- break;
- }
+ (void) arc_flush_state(arc_mru, guid, ARC_BUFC_DATA, retry);
+ (void) arc_flush_state(arc_mru, guid, ARC_BUFC_METADATA, retry);
+
+ (void) arc_flush_state(arc_mfu, guid, ARC_BUFC_DATA, retry);
+ (void) arc_flush_state(arc_mfu, guid, ARC_BUFC_METADATA, retry);
- arc_evict_ghost(arc_mru_ghost, guid, -1);
- arc_evict_ghost(arc_mfu_ghost, guid, -1);
+ (void) arc_flush_state(arc_mru_ghost, guid, ARC_BUFC_DATA, retry);
+ (void) arc_flush_state(arc_mru_ghost, guid, ARC_BUFC_METADATA, retry);
+
+ (void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry);
+ (void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
- mutex_enter(&arc_reclaim_thr_lock);
arc_do_user_evicts();
- mutex_exit(&arc_reclaim_thr_lock);
ASSERT(spa || arc_eviction_list == NULL);
}
void
-arc_shrink(void)
+arc_shrink(int64_t to_free)
{
-
if (arc_c > arc_c_min) {
- uint64_t to_free;
-
DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t,
arc_c_min, uint64_t, arc_p, uint64_t, to_free);
-#ifdef _KERNEL
- to_free = arc_c >> arc_shrink_shift;
-#else
- to_free = arc_c >> arc_shrink_shift;
-#endif
if (arc_c > arc_c_min + to_free)
atomic_add_64(&arc_c, -to_free);
else
@@ -2523,48 +3336,80 @@ arc_shrink(void)
if (arc_size > arc_c) {
DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size,
uint64_t, arc_c);
- arc_adjust();
+ (void) arc_adjust();
}
}
-static int needfree = 0;
+static long needfree = 0;
-static int
-arc_reclaim_needed(void)
+typedef enum free_memory_reason_t {
+ FMR_UNKNOWN,
+ FMR_NEEDFREE,
+ FMR_LOTSFREE,
+ FMR_SWAPFS_MINFREE,
+ FMR_PAGES_PP_MAXIMUM,
+ FMR_HEAP_ARENA,
+ FMR_ZIO_ARENA,
+ FMR_ZIO_FRAG,
+} free_memory_reason_t;
+
+int64_t last_free_memory;
+free_memory_reason_t last_free_reason;
+
+/*
+ * Additional reserve of pages for pp_reserve.
+ */
+int64_t arc_pages_pp_reserve = 64;
+
+/*
+ * Additional reserve of pages for swapfs.
+ */
+int64_t arc_swapfs_reserve = 64;
+
+/*
+ * Return the amount of memory that can be consumed before reclaim will be
+ * needed. Positive if there is sufficient free memory, negative indicates
+ * the amount of memory that needs to be freed up.
+ */
+static int64_t
+arc_available_memory(void)
{
+ int64_t lowest = INT64_MAX;
+ int64_t n;
+ free_memory_reason_t r = FMR_UNKNOWN;
#ifdef _KERNEL
-
- if (needfree) {
- DTRACE_PROBE(arc__reclaim_needfree);
- return (1);
+ if (needfree > 0) {
+ n = PAGESIZE * (-needfree);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_NEEDFREE;
+ }
}
/*
* Cooperate with pagedaemon when it's time for it to scan
* and reclaim some pages.
*/
- if (freemem < zfs_arc_free_target) {
- DTRACE_PROBE2(arc__reclaim_freemem, uint64_t,
- freemem, uint64_t, zfs_arc_free_target);
- return (1);
+ n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_LOTSFREE;
}
#ifdef sun
/*
- * take 'desfree' extra pages, so we reclaim sooner, rather than later
- */
- extra = desfree;
-
- /*
* check that we're out of range of the pageout scanner. It starts to
* schedule paging if freemem is less than lotsfree and needfree.
* lotsfree is the high-water mark for pageout, and needfree is the
* number of needed free pages. We add extra pages here to make sure
* the scanner doesn't start up while we're freeing memory.
*/
- if (freemem < lotsfree + needfree + extra)
- return (1);
+ n = PAGESIZE * (freemem - lotsfree - needfree - desfree);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_LOTSFREE;
+ }
/*
* check to make sure that swapfs has enough space so that anon
@@ -2573,8 +3418,13 @@ arc_reclaim_needed(void)
* swap pages. We also add a bit of extra here just to prevent
* circumstances from getting really dire.
*/
- if (availrmem < swapfs_minfree + swapfs_reserve + extra)
- return (1);
+ n = PAGESIZE * (availrmem - swapfs_minfree - swapfs_reserve -
+ desfree - arc_swapfs_reserve);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_SWAPFS_MINFREE;
+ }
+
/*
* Check that we have enough availrmem that memory locking (e.g., via
@@ -2583,8 +3433,12 @@ arc_reclaim_needed(void)
* drops below pages_pp_maximum, page locking mechanisms such as
* page_pp_lock() will fail.)
*/
- if (availrmem <= pages_pp_maximum)
- return (1);
+ n = PAGESIZE * (availrmem - pages_pp_maximum -
+ arc_pages_pp_reserve);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_PAGES_PP_MAXIMUM;
+ }
#endif /* sun */
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
@@ -2599,12 +3453,11 @@ arc_reclaim_needed(void)
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
- if (vmem_size(heap_arena, VMEM_FREE) <
- (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
- DTRACE_PROBE2(arc__reclaim_used, uint64_t,
- vmem_size(heap_arena, VMEM_FREE), uint64_t,
- (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
- return (1);
+ n = (int64_t)vmem_size(heap_arena, VMEM_FREE) -
+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_HEAP_ARENA;
}
#define zio_arena NULL
#else
@@ -2620,25 +3473,51 @@ arc_reclaim_needed(void)
* to aggressively evict memory from the arc in order to avoid
* memory fragmentation issues.
*/
- if (zio_arena != NULL &&
- vmem_size(zio_arena, VMEM_FREE) <
- (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
- return (1);
+ if (zio_arena != NULL) {
+ n = (int64_t)vmem_size(zio_arena, VMEM_FREE) -
+ (vmem_size(zio_arena, VMEM_ALLOC) >> 4);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_ZIO_ARENA;
+ }
+ }
/*
* Above limits know nothing about real level of KVA fragmentation.
* Start aggressive reclamation if too little sequential KVA left.
*/
- if (vmem_size(heap_arena, VMEM_MAXFREE) < zfs_max_recordsize)
- return (1);
+ if (lowest > 0) {
+ n = (vmem_size(heap_arena, VMEM_MAXFREE) < zfs_max_recordsize) ?
+ -((int64_t)vmem_size(heap_arena, VMEM_ALLOC) >> 4) :
+ INT64_MAX;
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_ZIO_FRAG;
+ }
+ }
#else /* _KERNEL */
+ /* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
- return (1);
+ lowest = -1024;
#endif /* _KERNEL */
- DTRACE_PROBE(arc__reclaim_no);
- return (0);
+ last_free_memory = lowest;
+ last_free_reason = r;
+ DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r);
+ return (lowest);
+}
+
+
+/*
+ * Determine if the system is under memory pressure and is asking
+ * to reclaim memory. A return value of TRUE indicates that the system
+ * is under memory pressure and that the arc should adjust accordingly.
+ */
+static boolean_t
+arc_reclaim_needed(void)
+{
+ return (arc_available_memory() < 0);
}
extern kmem_cache_t *zio_buf_cache[];
@@ -2646,7 +3525,7 @@ extern kmem_cache_t *zio_data_buf_cache[];
extern kmem_cache_t *range_seg_cache;
static __noinline void
-arc_kmem_reap_now(arc_reclaim_strategy_t strat)
+arc_kmem_reap_now(void)
{
size_t i;
kmem_cache_t *prev_cache = NULL;
@@ -2669,13 +3548,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat)
#endif
#endif
- /*
- * An aggressive reclamation will shrink the cache size as well as
- * reap free buffers from the arc kmem caches.
- */
- if (strat == ARC_RECLAIM_AGGR)
- arc_shrink();
-
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
if (zio_buf_cache[i] != prev_cache) {
prev_cache = zio_buf_cache[i];
@@ -2687,88 +3559,170 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat)
}
}
kmem_cache_reap_now(buf_cache);
- kmem_cache_reap_now(hdr_cache);
+ kmem_cache_reap_now(hdr_full_cache);
+ kmem_cache_reap_now(hdr_l2only_cache);
kmem_cache_reap_now(range_seg_cache);
#ifdef sun
- /*
- * Ask the vmem arena to reclaim unused memory from its
- * quantum caches.
- */
- if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
+ if (zio_arena != NULL) {
+ /*
+ * Ask the vmem arena to reclaim unused memory from its
+ * quantum caches.
+ */
vmem_qcache_reap(zio_arena);
+ }
#endif
DTRACE_PROBE(arc__kmem_reap_end);
}
+/*
+ * Threads can block in arc_get_data_buf() waiting for this thread to evict
+ * enough data and signal them to proceed. When this happens, the threads in
+ * arc_get_data_buf() are sleeping while holding the hash lock for their
+ * particular arc header. Thus, we must be careful to never sleep on a
+ * hash lock in this thread. This is to prevent the following deadlock:
+ *
+ * - Thread A sleeps on CV in arc_get_data_buf() holding hash lock "L",
+ * waiting for the reclaim thread to signal it.
+ *
+ * - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter,
+ * fails, and goes to sleep forever.
+ *
+ * This possible deadlock is avoided by always acquiring a hash lock
+ * using mutex_tryenter() from arc_reclaim_thread().
+ */
static void
arc_reclaim_thread(void *dummy __unused)
{
clock_t growtime = 0;
- arc_reclaim_strategy_t last_reclaim = ARC_RECLAIM_CONS;
callb_cpr_t cpr;
- CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
+ CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
- mutex_enter(&arc_reclaim_thr_lock);
- while (arc_thread_exit == 0) {
- if (arc_reclaim_needed()) {
+ mutex_enter(&arc_reclaim_lock);
+ while (!arc_reclaim_thread_exit) {
+ int64_t free_memory = arc_available_memory();
+ uint64_t evicted = 0;
- if (arc_no_grow) {
- if (last_reclaim == ARC_RECLAIM_CONS) {
- DTRACE_PROBE(arc__reclaim_aggr_no_grow);
- last_reclaim = ARC_RECLAIM_AGGR;
- } else {
- last_reclaim = ARC_RECLAIM_CONS;
- }
- } else {
- arc_no_grow = TRUE;
- last_reclaim = ARC_RECLAIM_AGGR;
- DTRACE_PROBE(arc__reclaim_aggr);
- membar_producer();
- }
+ mutex_exit(&arc_reclaim_lock);
- /* reset the growth delay for every reclaim */
- growtime = ddi_get_lbolt() + (arc_grow_retry * hz);
+ if (free_memory < 0) {
- if (needfree && last_reclaim == ARC_RECLAIM_CONS) {
- /*
- * If needfree is TRUE our vm_lowmem hook
- * was called and in that case we must free some
- * memory, so switch to aggressive mode.
- */
- arc_no_grow = TRUE;
- last_reclaim = ARC_RECLAIM_AGGR;
- }
- arc_kmem_reap_now(last_reclaim);
+ arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
- } else if (arc_no_grow && ddi_get_lbolt() >= growtime) {
- arc_no_grow = FALSE;
+ /*
+ * Wait at least zfs_grow_retry (default 60) seconds
+ * before considering growing.
+ */
+ growtime = ddi_get_lbolt() + (arc_grow_retry * hz);
+
+ arc_kmem_reap_now();
+
+ /*
+ * If we are still low on memory, shrink the ARC
+ * so that we have arc_shrink_min free space.
+ */
+ free_memory = arc_available_memory();
+
+ int64_t to_free =
+ (arc_c >> arc_shrink_shift) - free_memory;
+ if (to_free > 0) {
+#ifdef _KERNEL
+ to_free = MAX(to_free, ptob(needfree));
+#endif
+ arc_shrink(to_free);
+ }
+ } else if (free_memory < arc_c >> arc_no_grow_shift) {
+ arc_no_grow = B_TRUE;
+ } else if (ddi_get_lbolt() >= growtime) {
+ arc_no_grow = B_FALSE;
}
- arc_adjust();
+ evicted = arc_adjust();
- if (arc_eviction_list != NULL)
- arc_do_user_evicts();
+ mutex_enter(&arc_reclaim_lock);
+ /*
+ * If evicted is zero, we couldn't evict anything via
+ * arc_adjust(). This could be due to hash lock
+ * collisions, but more likely due to the majority of
+ * arc buffers being unevictable. Therefore, even if
+ * arc_size is above arc_c, another pass is unlikely to
+ * be helpful and could potentially cause us to enter an
+ * infinite loop.
+ */
+ if (arc_size <= arc_c || evicted == 0) {
#ifdef _KERNEL
- if (needfree) {
needfree = 0;
- wakeup(&needfree);
- }
#endif
+ /*
+ * We're either no longer overflowing, or we
+ * can't evict anything more, so we should wake
+ * up any threads before we go to sleep.
+ */
+ cv_broadcast(&arc_reclaim_waiters_cv);
+
+ /*
+ * Block until signaled, or after one second (we
+ * might need to perform arc_kmem_reap_now()
+ * even if we aren't being signalled)
+ */
+ CALLB_CPR_SAFE_BEGIN(&cpr);
+ (void) cv_timedwait(&arc_reclaim_thread_cv,
+ &arc_reclaim_lock, hz);
+ CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_lock);
+ }
+ }
+
+ arc_reclaim_thread_exit = FALSE;
+ cv_broadcast(&arc_reclaim_thread_cv);
+ CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_lock */
+ thread_exit();
+}
- /* block until needed, or one second, whichever is shorter */
+static void
+arc_user_evicts_thread(void *dummy __unused)
+{
+ callb_cpr_t cpr;
+
+ CALLB_CPR_INIT(&cpr, &arc_user_evicts_lock, callb_generic_cpr, FTAG);
+
+ mutex_enter(&arc_user_evicts_lock);
+ while (!arc_user_evicts_thread_exit) {
+ mutex_exit(&arc_user_evicts_lock);
+
+ arc_do_user_evicts();
+
+ /*
+ * This is necessary in order for the mdb ::arc dcmd to
+ * show up to date information. Since the ::arc command
+ * does not call the kstat's update function, without
+ * this call, the command may show stale stats for the
+ * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
+ * with this change, the data might be up to 1 second
+ * out of date; but that should suffice. The arc_state_t
+ * structures can be queried directly if more accurate
+ * information is needed.
+ */
+ if (arc_ksp != NULL)
+ arc_ksp->ks_update(arc_ksp, KSTAT_READ);
+
+ mutex_enter(&arc_user_evicts_lock);
+
+ /*
+ * Block until signaled, or after one second (we need to
+ * call the arc's kstat update function regularly).
+ */
CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait(&arc_reclaim_thr_cv,
- &arc_reclaim_thr_lock, hz);
- CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock);
+ (void) cv_timedwait(&arc_user_evicts_cv,
+ &arc_user_evicts_lock, hz);
+ CALLB_CPR_SAFE_END(&cpr, &arc_user_evicts_lock);
}
- arc_thread_exit = 0;
- cv_broadcast(&arc_reclaim_thr_cv);
- CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_thr_lock */
+ arc_user_evicts_thread_exit = FALSE;
+ cv_broadcast(&arc_user_evicts_cv);
+ CALLB_CPR_EXIT(&cpr); /* drops arc_user_evicts_lock */
thread_exit();
}
@@ -2782,6 +3736,8 @@ arc_adapt(int bytes, arc_state_t *state)
{
int mult;
uint64_t arc_p_min = (arc_c >> arc_p_min_shift);
+ int64_t mrug_size = refcount_count(&arc_mru_ghost->arcs_size);
+ int64_t mfug_size = refcount_count(&arc_mfu_ghost->arcs_size);
if (state == arc_l2c_only)
return;
@@ -2796,16 +3752,14 @@ arc_adapt(int bytes, arc_state_t *state)
* target size of the MRU list.
*/
if (state == arc_mru_ghost) {
- mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ?
- 1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size));
+ mult = (mrug_size >= mfug_size) ? 1 : (mfug_size / mrug_size);
mult = MIN(mult, 10); /* avoid wild arc_p adjustment */
arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult);
} else if (state == arc_mfu_ghost) {
uint64_t delta;
- mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ?
- 1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size));
+ mult = (mfug_size >= mrug_size) ? 1 : (mrug_size / mfug_size);
mult = MIN(mult, 10);
delta = MIN(bytes * mult, arc_p);
@@ -2814,7 +3768,7 @@ arc_adapt(int bytes, arc_state_t *state)
ASSERT((int64_t)arc_p >= 0);
if (arc_reclaim_needed()) {
- cv_signal(&arc_reclaim_thr_cv);
+ cv_signal(&arc_reclaim_thread_cv);
return;
}
@@ -2842,122 +3796,111 @@ arc_adapt(int bytes, arc_state_t *state)
}
/*
- * Check if the cache has reached its limits and eviction is required
- * prior to insert.
+ * Check if arc_size has grown past our upper threshold, determined by
+ * zfs_arc_overflow_shift.
*/
-static int
-arc_evict_needed(arc_buf_contents_t type)
+static boolean_t
+arc_is_overflowing(void)
{
- if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
- return (1);
-
- if (arc_reclaim_needed())
- return (1);
+ /* Always allow at least one block of overflow */
+ uint64_t overflow = MAX(SPA_MAXBLOCKSIZE,
+ arc_c >> zfs_arc_overflow_shift);
- return (arc_size > arc_c);
+ return (arc_size >= arc_c + overflow);
}
/*
- * The buffer, supplied as the first argument, needs a data block.
- * So, if we are at cache max, determine which cache should be victimized.
- * We have the following cases:
- *
- * 1. Insert for MRU, p > sizeof(arc_anon + arc_mru) ->
- * In this situation if we're out of space, but the resident size of the MFU is
- * under the limit, victimize the MFU cache to satisfy this insertion request.
- *
- * 2. Insert for MRU, p <= sizeof(arc_anon + arc_mru) ->
- * Here, we've used up all of the available space for the MRU, so we need to
- * evict from our own cache instead. Evict from the set of resident MRU
- * entries.
- *
- * 3. Insert for MFU (c - p) > sizeof(arc_mfu) ->
- * c minus p represents the MFU space in the cache, since p is the size of the
- * cache that is dedicated to the MRU. In this situation there's still space on
- * the MFU side, so the MRU side needs to be victimized.
- *
- * 4. Insert for MFU (c - p) < sizeof(arc_mfu) ->
- * MFU's resident set is consuming more space than it has been allotted. In
- * this situation, we must victimize our own cache, the MFU, for this insertion.
+ * The buffer, supplied as the first argument, needs a data block. If we
+ * are hitting the hard limit for the cache size, we must sleep, waiting
+ * for the eviction thread to catch up. If we're past the target size
+ * but below the hard limit, we'll only signal the reclaim thread and
+ * continue on.
*/
static void
arc_get_data_buf(arc_buf_t *buf)
{
- arc_state_t *state = buf->b_hdr->b_state;
+ arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
uint64_t size = buf->b_hdr->b_size;
- arc_buf_contents_t type = buf->b_hdr->b_type;
+ arc_buf_contents_t type = arc_buf_type(buf->b_hdr);
arc_adapt(size, state);
/*
- * We have not yet reached cache maximum size,
- * just allocate a new buffer.
+ * If arc_size is currently overflowing, and has grown past our
+ * upper limit, we must be adding data faster than the evict
+ * thread can evict. Thus, to ensure we don't compound the
+ * problem by adding more data and forcing arc_size to grow even
+ * further past it's target size, we halt and wait for the
+ * eviction thread to catch up.
+ *
+ * It's also possible that the reclaim thread is unable to evict
+ * enough buffers to get arc_size below the overflow limit (e.g.
+ * due to buffers being un-evictable, or hash lock collisions).
+ * In this case, we want to proceed regardless if we're
+ * overflowing; thus we don't use a while loop here.
*/
- if (!arc_evict_needed(type)) {
- if (type == ARC_BUFC_METADATA) {
- buf->b_data = zio_buf_alloc(size);
- arc_space_consume(size, ARC_SPACE_DATA);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- buf->b_data = zio_data_buf_alloc(size);
- ARCSTAT_INCR(arcstat_data_size, size);
- atomic_add_64(&arc_size, size);
+ if (arc_is_overflowing()) {
+ mutex_enter(&arc_reclaim_lock);
+
+ /*
+ * Now that we've acquired the lock, we may no longer be
+ * over the overflow limit, lets check.
+ *
+ * We're ignoring the case of spurious wake ups. If that
+ * were to happen, it'd let this thread consume an ARC
+ * buffer before it should have (i.e. before we're under
+ * the overflow limit and were signalled by the reclaim
+ * thread). As long as that is a rare occurrence, it
+ * shouldn't cause any harm.
+ */
+ if (arc_is_overflowing()) {
+ cv_signal(&arc_reclaim_thread_cv);
+ cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
}
- goto out;
+
+ mutex_exit(&arc_reclaim_lock);
}
- /*
- * If we are prefetching from the mfu ghost list, this buffer
- * will end up on the mru list; so steal space from there.
- */
- if (state == arc_mfu_ghost)
- state = buf->b_hdr->b_flags & ARC_FLAG_PREFETCH ?
- arc_mru : arc_mfu;
- else if (state == arc_mru_ghost)
- state = arc_mru;
-
- if (state == arc_mru || state == arc_anon) {
- uint64_t mru_used = arc_anon->arcs_size + arc_mru->arcs_size;
- state = (arc_mfu->arcs_lsize[type] >= size &&
- arc_p > mru_used) ? arc_mfu : arc_mru;
+ if (type == ARC_BUFC_METADATA) {
+ buf->b_data = zio_buf_alloc(size);
+ arc_space_consume(size, ARC_SPACE_META);
} else {
- /* MFU cases */
- uint64_t mfu_space = arc_c - arc_p;
- state = (arc_mru->arcs_lsize[type] >= size &&
- mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
+ ASSERT(type == ARC_BUFC_DATA);
+ buf->b_data = zio_data_buf_alloc(size);
+ arc_space_consume(size, ARC_SPACE_DATA);
}
- if ((buf->b_data = arc_evict(state, 0, size, TRUE, type)) == NULL) {
- if (type == ARC_BUFC_METADATA) {
- buf->b_data = zio_buf_alloc(size);
- arc_space_consume(size, ARC_SPACE_DATA);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- buf->b_data = zio_data_buf_alloc(size);
- ARCSTAT_INCR(arcstat_data_size, size);
- atomic_add_64(&arc_size, size);
- }
- ARCSTAT_BUMP(arcstat_recycle_miss);
- }
- ASSERT(buf->b_data != NULL);
-out:
+
/*
* Update the state size. Note that ghost states have a
* "ghost size" and so don't need to be updated.
*/
- if (!GHOST_STATE(buf->b_hdr->b_state)) {
+ if (!GHOST_STATE(buf->b_hdr->b_l1hdr.b_state)) {
arc_buf_hdr_t *hdr = buf->b_hdr;
+ arc_state_t *state = hdr->b_l1hdr.b_state;
- atomic_add_64(&hdr->b_state->arcs_size, size);
- if (list_link_active(&hdr->b_arc_node)) {
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
- atomic_add_64(&hdr->b_state->arcs_lsize[type], size);
+ (void) refcount_add_many(&state->arcs_size, size, buf);
+
+ /*
+ * If this is reached via arc_read, the link is
+ * protected by the hash lock. If reached via
+ * arc_buf_alloc, the header should not be accessed by
+ * any other thread. And, if reached via arc_read_done,
+ * the hash lock will protect it if it's found in the
+ * hash table; otherwise no other thread should be
+ * trying to [add|remove]_reference it.
+ */
+ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ atomic_add_64(&hdr->b_l1hdr.b_state->arcs_lsize[type],
+ size);
}
/*
* If we are growing the cache, and we are adding anonymous
* data, and we have outgrown arc_p, update arc_p
*/
- if (arc_size < arc_c && hdr->b_state == arc_anon &&
- arc_anon->arcs_size + arc_mru->arcs_size > arc_p)
+ if (arc_size < arc_c && hdr->b_l1hdr.b_state == arc_anon &&
+ (refcount_count(&arc_anon->arcs_size) +
+ refcount_count(&arc_mru->arcs_size) > arc_p))
arc_p = MIN(arc_c, arc_p + size);
}
ARCSTAT_BUMP(arcstat_allocated);
@@ -2973,20 +3916,21 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
clock_t now;
ASSERT(MUTEX_HELD(hash_lock));
+ ASSERT(HDR_HAS_L1HDR(hdr));
- if (hdr->b_state == arc_anon) {
+ if (hdr->b_l1hdr.b_state == arc_anon) {
/*
* This buffer is not in the cache, and does not
* appear in our "ghost" list. Add the new buffer
* to the MRU state.
*/
- ASSERT(hdr->b_arc_access == 0);
- hdr->b_arc_access = ddi_get_lbolt();
+ ASSERT0(hdr->b_l1hdr.b_arc_access);
+ hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
arc_change_state(arc_mru, hdr, hash_lock);
- } else if (hdr->b_state == arc_mru) {
+ } else if (hdr->b_l1hdr.b_state == arc_mru) {
now = ddi_get_lbolt();
/*
@@ -2997,14 +3941,16 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* - move the buffer to the head of the list if this is
* another prefetch (to make it less likely to be evicted).
*/
- if ((hdr->b_flags & ARC_FLAG_PREFETCH) != 0) {
- if (refcount_count(&hdr->b_refcnt) == 0) {
- ASSERT(list_link_active(&hdr->b_arc_node));
+ if (HDR_PREFETCH(hdr)) {
+ if (refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
+ /* link protected by hash lock */
+ ASSERT(multilist_link_active(
+ &hdr->b_l1hdr.b_arc_node));
} else {
hdr->b_flags &= ~ARC_FLAG_PREFETCH;
ARCSTAT_BUMP(arcstat_mru_hits);
}
- hdr->b_arc_access = now;
+ hdr->b_l1hdr.b_arc_access = now;
return;
}
@@ -3013,18 +3959,18 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* but it is still in the cache. Move it to the MFU
* state.
*/
- if (now > hdr->b_arc_access + ARC_MINTIME) {
+ if (now > hdr->b_l1hdr.b_arc_access + ARC_MINTIME) {
/*
* More than 125ms have passed since we
* instantiated this buffer. Move it to the
* most frequently used state.
*/
- hdr->b_arc_access = now;
+ hdr->b_l1hdr.b_arc_access = now;
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
arc_change_state(arc_mfu, hdr, hash_lock);
}
ARCSTAT_BUMP(arcstat_mru_hits);
- } else if (hdr->b_state == arc_mru_ghost) {
+ } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
arc_state_t *new_state;
/*
* This buffer has been "accessed" recently, but
@@ -3032,9 +3978,9 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* MFU state.
*/
- if (hdr->b_flags & ARC_FLAG_PREFETCH) {
+ if (HDR_PREFETCH(hdr)) {
new_state = arc_mru;
- if (refcount_count(&hdr->b_refcnt) > 0)
+ if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0)
hdr->b_flags &= ~ARC_FLAG_PREFETCH;
DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
} else {
@@ -3042,11 +3988,11 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
}
- hdr->b_arc_access = ddi_get_lbolt();
+ hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
arc_change_state(new_state, hdr, hash_lock);
ARCSTAT_BUMP(arcstat_mru_ghost_hits);
- } else if (hdr->b_state == arc_mfu) {
+ } else if (hdr->b_l1hdr.b_state == arc_mfu) {
/*
* This buffer has been accessed more than once and is
* still in the cache. Keep it in the MFU state.
@@ -3056,13 +4002,14 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* If it was a prefetch, we will explicitly move it to
* the head of the list now.
*/
- if ((hdr->b_flags & ARC_FLAG_PREFETCH) != 0) {
- ASSERT(refcount_count(&hdr->b_refcnt) == 0);
- ASSERT(list_link_active(&hdr->b_arc_node));
+ if ((HDR_PREFETCH(hdr)) != 0) {
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ /* link protected by hash_lock */
+ ASSERT(multilist_link_active(&hdr->b_l1hdr.b_arc_node));
}
ARCSTAT_BUMP(arcstat_mfu_hits);
- hdr->b_arc_access = ddi_get_lbolt();
- } else if (hdr->b_state == arc_mfu_ghost) {
+ hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
+ } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
arc_state_t *new_state = arc_mfu;
/*
* This buffer has been accessed more than once but has
@@ -3070,26 +4017,26 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* MFU state.
*/
- if (hdr->b_flags & ARC_FLAG_PREFETCH) {
+ if (HDR_PREFETCH(hdr)) {
/*
* This is a prefetch access...
* move this block back to the MRU state.
*/
- ASSERT0(refcount_count(&hdr->b_refcnt));
+ ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
new_state = arc_mru;
}
- hdr->b_arc_access = ddi_get_lbolt();
+ hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
arc_change_state(new_state, hdr, hash_lock);
ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
- } else if (hdr->b_state == arc_l2c_only) {
+ } else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
/*
* This buffer is on the 2nd Level ARC.
*/
- hdr->b_arc_access = ddi_get_lbolt();
+ hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
arc_change_state(arc_mfu, hdr, hash_lock);
} else {
@@ -3160,11 +4107,11 @@ arc_read_done(zio_t *zio)
}
hdr->b_flags &= ~ARC_FLAG_L2_EVICTED;
- if (l2arc_noprefetch && (hdr->b_flags & ARC_FLAG_PREFETCH))
+ if (l2arc_noprefetch && HDR_PREFETCH(hdr))
hdr->b_flags &= ~ARC_FLAG_L2CACHE;
/* byteswap if necessary */
- callback_list = hdr->b_acb;
+ callback_list = hdr->b_l1hdr.b_acb;
ASSERT(callback_list != NULL);
if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
dmu_object_byteswap_t bswap =
@@ -3180,7 +4127,8 @@ arc_read_done(zio_t *zio)
arc_buf_watch(buf);
#endif /* illumos */
- if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
+ if (hash_lock && zio->io_error == 0 &&
+ hdr->b_l1hdr.b_state == arc_anon) {
/*
* Only call arc_access on anonymous buffers. This is because
* if we've issued an I/O for an evicted buffer, we've already
@@ -3202,24 +4150,25 @@ arc_read_done(zio_t *zio)
abuf = NULL;
}
}
- hdr->b_acb = NULL;
+ hdr->b_l1hdr.b_acb = NULL;
hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
ASSERT(!HDR_BUF_AVAILABLE(hdr));
if (abuf == buf) {
ASSERT(buf->b_efunc == NULL);
- ASSERT(hdr->b_datacnt == 1);
+ ASSERT(hdr->b_l1hdr.b_datacnt == 1);
hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
}
- ASSERT(refcount_is_zero(&hdr->b_refcnt) || callback_list != NULL);
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
+ callback_list != NULL);
if (zio->io_error != 0) {
hdr->b_flags |= ARC_FLAG_IO_ERROR;
- if (hdr->b_state != arc_anon)
+ if (hdr->b_l1hdr.b_state != arc_anon)
arc_change_state(arc_anon, hdr, hash_lock);
if (HDR_IN_HASH_TABLE(hdr))
buf_hash_remove(hdr);
- freeable = refcount_is_zero(&hdr->b_refcnt);
+ freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
}
/*
@@ -3227,9 +4176,9 @@ arc_read_done(zio_t *zio)
* that the hdr (and hence the cv) might be freed before we get to
* the cv_broadcast().
*/
- cv_broadcast(&hdr->b_cv);
+ cv_broadcast(&hdr->b_l1hdr.b_cv);
- if (hash_lock) {
+ if (hash_lock != NULL) {
mutex_exit(hash_lock);
} else {
/*
@@ -3238,8 +4187,8 @@ arc_read_done(zio_t *zio)
* moved to the anonymous state (so that it won't show up
* in the cache).
*/
- ASSERT3P(hdr->b_state, ==, arc_anon);
- freeable = refcount_is_zero(&hdr->b_refcnt);
+ ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+ freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
}
/* execute each callback and free its structure */
@@ -3261,7 +4210,7 @@ arc_read_done(zio_t *zio)
}
/*
- * "Read" the block block at the specified DVA (in bp) via the
+ * "Read" the block at the specified DVA (in bp) via the
* cache. If the block is found in the cache, invoke the provided
* callback immediately and return. Note that the `zio' parameter
* in the callback will be NULL in this case, since no IO was
@@ -3301,21 +4250,51 @@ top:
hdr = buf_hash_find(guid, bp, &hash_lock);
}
- if (hdr != NULL && hdr->b_datacnt > 0) {
+ if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_datacnt > 0) {
*arc_flags |= ARC_FLAG_CACHED;
if (HDR_IO_IN_PROGRESS(hdr)) {
+ if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
+ priority == ZIO_PRIORITY_SYNC_READ) {
+ /*
+ * This sync read must wait for an
+ * in-progress async read (e.g. a predictive
+ * prefetch). Async reads are queued
+ * separately at the vdev_queue layer, so
+ * this is a form of priority inversion.
+ * Ideally, we would "inherit" the demand
+ * i/o's priority by moving the i/o from
+ * the async queue to the synchronous queue,
+ * but there is currently no mechanism to do
+ * so. Track this so that we can evaluate
+ * the magnitude of this potential performance
+ * problem.
+ *
+ * Note that if the prefetch i/o is already
+ * active (has been issued to the device),
+ * the prefetch improved performance, because
+ * we issued it sooner than we would have
+ * without the prefetch.
+ */
+ DTRACE_PROBE1(arc__sync__wait__for__async,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(arcstat_sync_wait_for_async);
+ }
+ if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+ hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
+ }
+
if (*arc_flags & ARC_FLAG_WAIT) {
- cv_wait(&hdr->b_cv, hash_lock);
+ cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
mutex_exit(hash_lock);
goto top;
}
ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
if (done) {
- arc_callback_t *acb = NULL;
+ arc_callback_t *acb = NULL;
acb = kmem_zalloc(sizeof (arc_callback_t),
KM_SLEEP);
@@ -3326,8 +4305,8 @@ top:
spa, NULL, NULL, NULL, zio_flags);
ASSERT(acb->acb_done != NULL);
- acb->acb_next = hdr->b_acb;
- hdr->b_acb = acb;
+ acb->acb_next = hdr->b_l1hdr.b_acb;
+ hdr->b_l1hdr.b_acb = acb;
add_reference(hdr, hash_lock, private);
mutex_exit(hash_lock);
return (0);
@@ -3336,16 +4315,30 @@ top:
return (0);
}
- ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
+ ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+ hdr->b_l1hdr.b_state == arc_mfu);
if (done) {
+ if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+ /*
+ * This is a demand read which does not have to
+ * wait for i/o because we did a predictive
+ * prefetch i/o for it, which has completed.
+ */
+ DTRACE_PROBE1(
+ arc__demand__hit__predictive__prefetch,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(
+ arcstat_demand_hit_predictive_prefetch);
+ hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH;
+ }
add_reference(hdr, hash_lock, private);
/*
* If this block is already in use, create a new
* copy of the data so that we will be guaranteed
* that arc_release() will always succeed.
*/
- buf = hdr->b_buf;
+ buf = hdr->b_l1hdr.b_buf;
ASSERT(buf);
ASSERT(buf->b_data);
if (HDR_BUF_AVAILABLE(hdr)) {
@@ -3356,7 +4349,7 @@ top:
}
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
- refcount_count(&hdr->b_refcnt) == 0) {
+ refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
hdr->b_flags |= ARC_FLAG_PREFETCH;
}
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
@@ -3367,8 +4360,8 @@ top:
hdr->b_flags |= ARC_FLAG_L2COMPRESS;
mutex_exit(hash_lock);
ARCSTAT_BUMP(arcstat_hits);
- ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH),
- demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
+ ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
+ demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
data, metadata, hits);
if (done)
@@ -3380,7 +4373,7 @@ top:
uint64_t addr = 0;
boolean_t devw = B_FALSE;
enum zio_compress b_compress = ZIO_COMPRESS_OFF;
- uint64_t b_asize = 0;
+ int32_t b_asize = 0;
if (hdr == NULL) {
/* this block is not in the cache */
@@ -3391,7 +4384,6 @@ top:
if (!BP_IS_EMBEDDED(bp)) {
hdr->b_dva = *BP_IDENTITY(bp);
hdr->b_birth = BP_PHYSICAL_BIRTH(bp);
- hdr->b_cksum0 = bp->blk_cksum.zc_word[0];
exists = buf_hash_insert(hdr, &hash_lock);
}
if (exists != NULL) {
@@ -3402,12 +4394,16 @@ top:
goto top; /* restart the IO request */
}
- /* if this is a prefetch, we don't have a reference */
- if (*arc_flags & ARC_FLAG_PREFETCH) {
+ /*
+ * If there is a callback, we pass our reference to
+ * it; otherwise we remove our reference.
+ */
+ if (done == NULL) {
(void) remove_reference(hdr, hash_lock,
private);
- hdr->b_flags |= ARC_FLAG_PREFETCH;
}
+ if (*arc_flags & ARC_FLAG_PREFETCH)
+ hdr->b_flags |= ARC_FLAG_PREFETCH;
if (*arc_flags & ARC_FLAG_L2CACHE)
hdr->b_flags |= ARC_FLAG_L2CACHE;
if (*arc_flags & ARC_FLAG_L2COMPRESS)
@@ -3415,17 +4411,28 @@ top:
if (BP_GET_LEVEL(bp) > 0)
hdr->b_flags |= ARC_FLAG_INDIRECT;
} else {
- /* this block is in the ghost cache */
- ASSERT(GHOST_STATE(hdr->b_state));
+ /*
+ * This block is in the ghost cache. If it was L2-only
+ * (and thus didn't have an L1 hdr), we realloc the
+ * header to add an L1 hdr.
+ */
+ if (!HDR_HAS_L1HDR(hdr)) {
+ hdr = arc_hdr_realloc(hdr, hdr_l2only_cache,
+ hdr_full_cache);
+ }
+
+ ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT0(refcount_count(&hdr->b_refcnt));
- ASSERT(hdr->b_buf == NULL);
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- /* if this is a prefetch, we don't have a reference */
+ /*
+ * If there is a callback, we pass a reference to it.
+ */
+ if (done != NULL)
+ add_reference(hdr, hash_lock, private);
if (*arc_flags & ARC_FLAG_PREFETCH)
hdr->b_flags |= ARC_FLAG_PREFETCH;
- else
- add_reference(hdr, hash_lock, private);
if (*arc_flags & ARC_FLAG_L2CACHE)
hdr->b_flags |= ARC_FLAG_L2CACHE;
if (*arc_flags & ARC_FLAG_L2COMPRESS)
@@ -3436,29 +4443,31 @@ top:
buf->b_efunc = NULL;
buf->b_private = NULL;
buf->b_next = NULL;
- hdr->b_buf = buf;
- ASSERT(hdr->b_datacnt == 0);
- hdr->b_datacnt = 1;
+ hdr->b_l1hdr.b_buf = buf;
+ ASSERT0(hdr->b_l1hdr.b_datacnt);
+ hdr->b_l1hdr.b_datacnt = 1;
arc_get_data_buf(buf);
arc_access(hdr, hash_lock);
}
- ASSERT(!GHOST_STATE(hdr->b_state));
+ if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
+ hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH;
+ ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
acb->acb_done = done;
acb->acb_private = private;
- ASSERT(hdr->b_acb == NULL);
- hdr->b_acb = acb;
+ ASSERT(hdr->b_l1hdr.b_acb == NULL);
+ hdr->b_l1hdr.b_acb = acb;
hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
- if (hdr->b_l2hdr != NULL &&
- (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) {
- devw = hdr->b_l2hdr->b_dev->l2ad_writing;
- addr = hdr->b_l2hdr->b_daddr;
- b_compress = hdr->b_l2hdr->b_compress;
- b_asize = hdr->b_l2hdr->b_asize;
+ if (HDR_HAS_L2HDR(hdr) &&
+ (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
+ devw = hdr->b_l2hdr.b_dev->l2ad_writing;
+ addr = hdr->b_l2hdr.b_daddr;
+ b_compress = hdr->b_l2hdr.b_compress;
+ b_asize = hdr->b_l2hdr.b_asize;
/*
* Lock out device removal.
*/
@@ -3478,13 +4487,18 @@ top:
DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
uint64_t, size, zbookmark_phys_t *, zb);
ARCSTAT_BUMP(arcstat_misses);
- ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH),
- demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
+ ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
+ demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
data, metadata, misses);
#ifdef _KERNEL
curthread->td_ru.ru_inblock++;
#endif
+ if (priority == ZIO_PRIORITY_ASYNC_READ)
+ hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ;
+ else
+ hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ;
+
if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
/*
* Read from the L2ARC if the following are true:
@@ -3495,7 +4509,7 @@ top:
* also have invalidated the vdev.
* 5. This isn't prefetch and l2arc_noprefetch is set.
*/
- if (hdr->b_l2hdr != NULL &&
+ if (HDR_HAS_L2HDR(hdr) &&
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
!(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
l2arc_read_callback_t *cb;
@@ -3587,8 +4601,9 @@ void
arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
{
ASSERT(buf->b_hdr != NULL);
- ASSERT(buf->b_hdr->b_state != arc_anon);
- ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt) || func == NULL);
+ ASSERT(buf->b_hdr->b_l1hdr.b_state != arc_anon);
+ ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt) ||
+ func == NULL);
ASSERT(buf->b_efunc == NULL);
ASSERT(!HDR_BUF_AVAILABLE(buf->b_hdr));
@@ -3612,7 +4627,7 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
if (hdr == NULL)
return;
if (HDR_BUF_AVAILABLE(hdr)) {
- arc_buf_t *buf = hdr->b_buf;
+ arc_buf_t *buf = hdr->b_l1hdr.b_buf;
add_reference(hdr, hash_lock, FTAG);
hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
mutex_exit(hash_lock);
@@ -3645,8 +4660,6 @@ arc_clear_callback(arc_buf_t *buf)
kmutex_t *hash_lock;
arc_evict_func_t *efunc = buf->b_efunc;
void *private = buf->b_private;
- list_t *list, *evicted_list;
- kmutex_t *lock, *evicted_lock;
mutex_enter(&buf->b_evict_lock);
hdr = buf->b_hdr;
@@ -3672,17 +4685,19 @@ arc_clear_callback(arc_buf_t *buf)
hdr = buf->b_hdr;
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
- ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
- ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
+ ASSERT3U(refcount_count(&hdr->b_l1hdr.b_refcnt), <,
+ hdr->b_l1hdr.b_datacnt);
+ ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+ hdr->b_l1hdr.b_state == arc_mfu);
buf->b_efunc = NULL;
buf->b_private = NULL;
- if (hdr->b_datacnt > 1) {
+ if (hdr->b_l1hdr.b_datacnt > 1) {
mutex_exit(&buf->b_evict_lock);
- arc_buf_destroy(buf, FALSE, TRUE);
+ arc_buf_destroy(buf, TRUE);
} else {
- ASSERT(buf == hdr->b_buf);
+ ASSERT(buf == hdr->b_l1hdr.b_buf);
hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
mutex_exit(&buf->b_evict_lock);
}
@@ -3701,10 +4716,7 @@ arc_clear_callback(arc_buf_t *buf)
void
arc_release(arc_buf_t *buf, void *tag)
{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_lock = NULL;
- l2arc_buf_hdr_t *l2hdr;
- uint64_t buf_size;
+ arc_buf_hdr_t *hdr = buf->b_hdr;
/*
* It would be nice to assert that if it's DMU metadata (level >
@@ -3713,57 +4725,101 @@ arc_release(arc_buf_t *buf, void *tag)
*/
mutex_enter(&buf->b_evict_lock);
- hdr = buf->b_hdr;
- /* this buffer is not on any list */
- ASSERT(refcount_count(&hdr->b_refcnt) > 0);
+ ASSERT(HDR_HAS_L1HDR(hdr));
- if (hdr->b_state == arc_anon) {
- /* this buffer is already released */
- ASSERT(buf->b_efunc == NULL);
- } else {
- hash_lock = HDR_LOCK(hdr);
- mutex_enter(hash_lock);
- hdr = buf->b_hdr;
- ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+ /*
+ * We don't grab the hash lock prior to this check, because if
+ * the buffer's header is in the arc_anon state, it won't be
+ * linked into the hash table.
+ */
+ if (hdr->b_l1hdr.b_state == arc_anon) {
+ mutex_exit(&buf->b_evict_lock);
+ ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+ ASSERT(!HDR_IN_HASH_TABLE(hdr));
+ ASSERT(!HDR_HAS_L2HDR(hdr));
+ ASSERT(BUF_EMPTY(hdr));
+ ASSERT3U(hdr->b_l1hdr.b_datacnt, ==, 1);
+ ASSERT3S(refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
+ ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+
+ ASSERT3P(buf->b_efunc, ==, NULL);
+ ASSERT3P(buf->b_private, ==, NULL);
+
+ hdr->b_l1hdr.b_arc_access = 0;
+ arc_buf_thaw(buf);
+
+ return;
}
- l2hdr = hdr->b_l2hdr;
- if (l2hdr) {
- mutex_enter(&l2arc_buflist_mtx);
- arc_buf_l2_cdata_free(hdr);
- hdr->b_l2hdr = NULL;
- list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
+ kmutex_t *hash_lock = HDR_LOCK(hdr);
+ mutex_enter(hash_lock);
+
+ /*
+ * This assignment is only valid as long as the hash_lock is
+ * held, we must be careful not to reference state or the
+ * b_state field after dropping the lock.
+ */
+ arc_state_t *state = hdr->b_l1hdr.b_state;
+ ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+ ASSERT3P(state, !=, arc_anon);
+
+ /* this buffer is not on any list */
+ ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) > 0);
+
+ if (HDR_HAS_L2HDR(hdr)) {
+ mutex_enter(&hdr->b_l2hdr.b_dev->l2ad_mtx);
+
+ /*
+ * We have to recheck this conditional again now that
+ * we're holding the l2ad_mtx to prevent a race with
+ * another thread which might be concurrently calling
+ * l2arc_evict(). In that case, l2arc_evict() might have
+ * destroyed the header's L2 portion as we were waiting
+ * to acquire the l2ad_mtx.
+ */
+ if (HDR_HAS_L2HDR(hdr)) {
+ if (hdr->b_l2hdr.b_daddr != L2ARC_ADDR_UNSET)
+ trim_map_free(hdr->b_l2hdr.b_dev->l2ad_vdev,
+ hdr->b_l2hdr.b_daddr,
+ hdr->b_l2hdr.b_asize, 0);
+ arc_hdr_l2hdr_destroy(hdr);
+ }
+
+ mutex_exit(&hdr->b_l2hdr.b_dev->l2ad_mtx);
}
- buf_size = hdr->b_size;
/*
* Do we have more than one buf?
*/
- if (hdr->b_datacnt > 1) {
+ if (hdr->b_l1hdr.b_datacnt > 1) {
arc_buf_hdr_t *nhdr;
arc_buf_t **bufp;
uint64_t blksz = hdr->b_size;
uint64_t spa = hdr->b_spa;
- arc_buf_contents_t type = hdr->b_type;
+ arc_buf_contents_t type = arc_buf_type(hdr);
uint32_t flags = hdr->b_flags;
- ASSERT(hdr->b_buf != buf || buf->b_next != NULL);
+ ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
/*
* Pull the data off of this hdr and attach it to
* a new anonymous hdr.
*/
(void) remove_reference(hdr, hash_lock, tag);
- bufp = &hdr->b_buf;
+ bufp = &hdr->b_l1hdr.b_buf;
while (*bufp != buf)
bufp = &(*bufp)->b_next;
*bufp = buf->b_next;
buf->b_next = NULL;
- ASSERT3U(hdr->b_state->arcs_size, >=, hdr->b_size);
- atomic_add_64(&hdr->b_state->arcs_size, -hdr->b_size);
- if (refcount_is_zero(&hdr->b_refcnt)) {
- uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type];
+ ASSERT3P(state, !=, arc_l2c_only);
+
+ (void) refcount_remove_many(
+ &state->arcs_size, hdr->b_size, buf);
+
+ if (refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
+ ASSERT3P(state, !=, arc_l2c_only);
+ uint64_t *size = &state->arcs_lsize[type];
ASSERT3U(*size, >=, hdr->b_size);
atomic_add_64(size, -hdr->b_size);
}
@@ -3772,12 +4828,12 @@ arc_release(arc_buf_t *buf, void *tag)
* We're releasing a duplicate user data buffer, update
* our statistics accordingly.
*/
- if (hdr->b_type == ARC_BUFC_DATA) {
+ if (HDR_ISTYPE_DATA(hdr)) {
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
ARCSTAT_INCR(arcstat_duplicate_buffers_size,
-hdr->b_size);
}
- hdr->b_datacnt -= 1;
+ hdr->b_l1hdr.b_datacnt -= 1;
arc_cksum_verify(buf);
#ifdef illumos
arc_buf_unwatch(buf);
@@ -3785,48 +4841,40 @@ arc_release(arc_buf_t *buf, void *tag)
mutex_exit(hash_lock);
- nhdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+ nhdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
nhdr->b_size = blksz;
nhdr->b_spa = spa;
- nhdr->b_type = type;
- nhdr->b_buf = buf;
- nhdr->b_state = arc_anon;
- nhdr->b_arc_access = 0;
+
nhdr->b_flags = flags & ARC_FLAG_L2_WRITING;
- nhdr->b_l2hdr = NULL;
- nhdr->b_datacnt = 1;
+ nhdr->b_flags |= arc_bufc_to_flags(type);
+ nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+
+ nhdr->b_l1hdr.b_buf = buf;
+ nhdr->b_l1hdr.b_datacnt = 1;
+ nhdr->b_l1hdr.b_state = arc_anon;
+ nhdr->b_l1hdr.b_arc_access = 0;
+ nhdr->b_l1hdr.b_tmp_cdata = NULL;
nhdr->b_freeze_cksum = NULL;
- (void) refcount_add(&nhdr->b_refcnt, tag);
+
+ (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
buf->b_hdr = nhdr;
mutex_exit(&buf->b_evict_lock);
- atomic_add_64(&arc_anon->arcs_size, blksz);
+ (void) refcount_add_many(&arc_anon->arcs_size, blksz, buf);
} else {
mutex_exit(&buf->b_evict_lock);
- ASSERT(refcount_count(&hdr->b_refcnt) == 1);
- ASSERT(!list_link_active(&hdr->b_arc_node));
+ ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1);
+ /* protected by hash lock, or hdr is on arc_anon */
+ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- if (hdr->b_state != arc_anon)
- arc_change_state(arc_anon, hdr, hash_lock);
- hdr->b_arc_access = 0;
- if (hash_lock)
- mutex_exit(hash_lock);
+ arc_change_state(arc_anon, hdr, hash_lock);
+ hdr->b_l1hdr.b_arc_access = 0;
+ mutex_exit(hash_lock);
buf_discard_identity(hdr);
arc_buf_thaw(buf);
}
buf->b_efunc = NULL;
buf->b_private = NULL;
-
- if (l2hdr) {
- ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
- vdev_space_update(l2hdr->b_dev->l2ad_vdev,
- -l2hdr->b_asize, 0, 0);
- trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
- l2hdr->b_asize, 0);
- kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
- ARCSTAT_INCR(arcstat_l2_size, -buf_size);
- mutex_exit(&l2arc_buflist_mtx);
- }
}
int
@@ -3835,7 +4883,8 @@ arc_released(arc_buf_t *buf)
int released;
mutex_enter(&buf->b_evict_lock);
- released = (buf->b_data != NULL && buf->b_hdr->b_state == arc_anon);
+ released = (buf->b_data != NULL &&
+ buf->b_hdr->b_l1hdr.b_state == arc_anon);
mutex_exit(&buf->b_evict_lock);
return (released);
}
@@ -3847,7 +4896,7 @@ arc_referenced(arc_buf_t *buf)
int referenced;
mutex_enter(&buf->b_evict_lock);
- referenced = (refcount_count(&buf->b_hdr->b_refcnt));
+ referenced = (refcount_count(&buf->b_hdr->b_l1hdr.b_refcnt));
mutex_exit(&buf->b_evict_lock);
return (referenced);
}
@@ -3860,7 +4909,9 @@ arc_write_ready(zio_t *zio)
arc_buf_t *buf = callback->awcb_buf;
arc_buf_hdr_t *hdr = buf->b_hdr;
- ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt));
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
+ ASSERT(hdr->b_l1hdr.b_datacnt > 0);
callback->awcb_ready(zio, buf, callback->awcb_private);
/*
@@ -3870,12 +4921,12 @@ arc_write_ready(zio_t *zio)
* accounting for any re-write attempt.
*/
if (HDR_IO_IN_PROGRESS(hdr)) {
- mutex_enter(&hdr->b_freeze_lock);
+ mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
if (hdr->b_freeze_cksum != NULL) {
kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
hdr->b_freeze_cksum = NULL;
}
- mutex_exit(&hdr->b_freeze_lock);
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
}
arc_cksum_compute(buf, B_FALSE);
hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
@@ -3900,7 +4951,7 @@ arc_write_done(zio_t *zio)
arc_buf_t *buf = callback->awcb_buf;
arc_buf_hdr_t *hdr = buf->b_hdr;
- ASSERT(hdr->b_acb == NULL);
+ ASSERT(hdr->b_l1hdr.b_acb == NULL);
if (zio->io_error == 0) {
if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
@@ -3908,7 +4959,6 @@ arc_write_done(zio_t *zio)
} else {
hdr->b_dva = *BP_IDENTITY(zio->io_bp);
hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
- hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
}
} else {
ASSERT(BUF_EMPTY(hdr));
@@ -3929,7 +4979,7 @@ arc_write_done(zio_t *zio)
arc_cksum_verify(buf);
exists = buf_hash_insert(hdr, &hash_lock);
- if (exists) {
+ if (exists != NULL) {
/*
* This can only happen if we overwrite for
* sync-to-convergence, because we remove
@@ -3939,7 +4989,8 @@ arc_write_done(zio_t *zio)
if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp))
panic("bad overwrite, hdr=%p exists=%p",
(void *)hdr, (void *)exists);
- ASSERT(refcount_is_zero(&exists->b_refcnt));
+ ASSERT(refcount_is_zero(
+ &exists->b_l1hdr.b_refcnt));
arc_change_state(arc_anon, exists, hash_lock);
mutex_exit(hash_lock);
arc_hdr_destroy(exists);
@@ -3953,22 +5004,22 @@ arc_write_done(zio_t *zio)
(void *)hdr, (void *)exists);
} else {
/* Dedup */
- ASSERT(hdr->b_datacnt == 1);
- ASSERT(hdr->b_state == arc_anon);
+ ASSERT(hdr->b_l1hdr.b_datacnt == 1);
+ ASSERT(hdr->b_l1hdr.b_state == arc_anon);
ASSERT(BP_GET_DEDUP(zio->io_bp));
ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
}
}
hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
/* if it's not anon, we are doing a scrub */
- if (!exists && hdr->b_state == arc_anon)
+ if (exists == NULL && hdr->b_l1hdr.b_state == arc_anon)
arc_access(hdr, hash_lock);
mutex_exit(hash_lock);
} else {
hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
}
- ASSERT(!refcount_is_zero(&hdr->b_refcnt));
+ ASSERT(!refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
callback->awcb_done(zio, buf, callback->awcb_private);
kmem_free(callback, sizeof (arc_write_callback_t));
@@ -3988,8 +5039,9 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT(ready != NULL);
ASSERT(done != NULL);
ASSERT(!HDR_IO_ERROR(hdr));
- ASSERT((hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS) == 0);
- ASSERT(hdr->b_acb == NULL);
+ ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+ ASSERT(hdr->b_l1hdr.b_acb == NULL);
+ ASSERT(hdr->b_l1hdr.b_datacnt > 0);
if (l2arc)
hdr->b_flags |= ARC_FLAG_L2CACHE;
if (l2arc_compress)
@@ -4074,7 +5126,8 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* network delays from blocking transactions that are ready to be
* assigned to a txg.
*/
- anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0);
+ anon_size = MAX((int64_t)(refcount_count(&arc_anon->arcs_size) -
+ arc_loaned_bytes), 0);
/*
* Writes will, almost always, require additional memory allocations
@@ -4107,7 +5160,83 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
return (0);
}
-static kmutex_t arc_lowmem_lock;
+static void
+arc_kstat_update_state(arc_state_t *state, kstat_named_t *size,
+ kstat_named_t *evict_data, kstat_named_t *evict_metadata)
+{
+ size->value.ui64 = refcount_count(&state->arcs_size);
+ evict_data->value.ui64 = state->arcs_lsize[ARC_BUFC_DATA];
+ evict_metadata->value.ui64 = state->arcs_lsize[ARC_BUFC_METADATA];
+}
+
+static int
+arc_kstat_update(kstat_t *ksp, int rw)
+{
+ arc_stats_t *as = ksp->ks_data;
+
+ if (rw == KSTAT_WRITE) {
+ return (EACCES);
+ } else {
+ arc_kstat_update_state(arc_anon,
+ &as->arcstat_anon_size,
+ &as->arcstat_anon_evictable_data,
+ &as->arcstat_anon_evictable_metadata);
+ arc_kstat_update_state(arc_mru,
+ &as->arcstat_mru_size,
+ &as->arcstat_mru_evictable_data,
+ &as->arcstat_mru_evictable_metadata);
+ arc_kstat_update_state(arc_mru_ghost,
+ &as->arcstat_mru_ghost_size,
+ &as->arcstat_mru_ghost_evictable_data,
+ &as->arcstat_mru_ghost_evictable_metadata);
+ arc_kstat_update_state(arc_mfu,
+ &as->arcstat_mfu_size,
+ &as->arcstat_mfu_evictable_data,
+ &as->arcstat_mfu_evictable_metadata);
+ arc_kstat_update_state(arc_mfu_ghost,
+ &as->arcstat_mfu_ghost_size,
+ &as->arcstat_mfu_ghost_evictable_data,
+ &as->arcstat_mfu_ghost_evictable_metadata);
+ }
+
+ return (0);
+}
+
+/*
+ * This function *must* return indices evenly distributed between all
+ * sublists of the multilist. This is needed due to how the ARC eviction
+ * code is laid out; arc_evict_state() assumes ARC buffers are evenly
+ * distributed between all sublists and uses this assumption when
+ * deciding which sublist to evict from and how much to evict from it.
+ */
+unsigned int
+arc_state_multilist_index_func(multilist_t *ml, void *obj)
+{
+ arc_buf_hdr_t *hdr = obj;
+
+ /*
+ * We rely on b_dva to generate evenly distributed index
+ * numbers using buf_hash below. So, as an added precaution,
+ * let's make sure we never add empty buffers to the arc lists.
+ */
+ ASSERT(!BUF_EMPTY(hdr));
+
+ /*
+ * The assumption here, is the hash value for a given
+ * arc_buf_hdr_t will remain constant throughout it's lifetime
+ * (i.e. it's b_spa, b_dva, and b_birth fields don't change).
+ * Thus, we don't need to store the header's sublist index
+ * on insertion, as this index can be recalculated on removal.
+ *
+ * Also, the low order bits of the hash value are thought to be
+ * distributed evenly. Otherwise, in the case that the multilist
+ * has a power of two number of sublists, each sublists' usage
+ * would not be evenly distributed.
+ */
+ return (buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) %
+ multilist_get_num_sublists(ml));
+}
+
#ifdef _KERNEL
static eventhandler_tag arc_event_lowmem = NULL;
@@ -4115,24 +5244,20 @@ static void
arc_lowmem(void *arg __unused, int howto __unused)
{
- /* Serialize access via arc_lowmem_lock. */
- mutex_enter(&arc_lowmem_lock);
- mutex_enter(&arc_reclaim_thr_lock);
- needfree = 1;
+ mutex_enter(&arc_reclaim_lock);
+ /* XXX: Memory deficit should be passed as argument. */
+ needfree = btoc(arc_c >> arc_shrink_shift);
DTRACE_PROBE(arc__needfree);
- cv_signal(&arc_reclaim_thr_cv);
+ cv_signal(&arc_reclaim_thread_cv);
/*
* It is unsafe to block here in arbitrary threads, because we can come
* here from ARC itself and may hold ARC locks and thus risk a deadlock
* with ARC reclaim thread.
*/
- if (curproc == pageproc) {
- while (needfree)
- msleep(&needfree, &arc_reclaim_thr_lock, 0, "zfs:lowmem", 0);
- }
- mutex_exit(&arc_reclaim_thr_lock);
- mutex_exit(&arc_lowmem_lock);
+ if (curproc == pageproc)
+ (void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
+ mutex_exit(&arc_reclaim_lock);
}
#endif
@@ -4141,9 +5266,12 @@ arc_init(void)
{
int i, prefetch_tunable_set = 0;
- mutex_init(&arc_reclaim_thr_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&arc_lowmem_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL);
+
+ mutex_init(&arc_user_evicts_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&arc_user_evicts_cv, NULL, CV_DEFAULT, NULL);
/* Convert seconds to clock ticks */
arc_min_prefetch_lifespan = 1 * hz;
@@ -4194,15 +5322,30 @@ arc_init(void)
if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0)
arc_c_min = arc_meta_limit / 2;
+ if (zfs_arc_meta_min > 0) {
+ arc_meta_min = zfs_arc_meta_min;
+ } else {
+ arc_meta_min = arc_c_min / 2;
+ }
+
if (zfs_arc_grow_retry > 0)
arc_grow_retry = zfs_arc_grow_retry;
if (zfs_arc_shrink_shift > 0)
arc_shrink_shift = zfs_arc_shrink_shift;
+ /*
+ * Ensure that arc_no_grow_shift is less than arc_shrink_shift.
+ */
+ if (arc_no_grow_shift >= arc_shrink_shift)
+ arc_no_grow_shift = arc_shrink_shift - 1;
+
if (zfs_arc_p_min_shift > 0)
arc_p_min_shift = zfs_arc_p_min_shift;
+ if (zfs_arc_num_sublists_per_state < 1)
+ zfs_arc_num_sublists_per_state = MAX(max_ncpus, 1);
+
/* if kmem_flags are set, lets try to use less memory */
if (kmem_debugging())
arc_c = arc_c / 2;
@@ -4220,39 +5363,59 @@ arc_init(void)
arc_l2c_only = &ARC_l2c_only;
arc_size = 0;
- for (i = 0; i < ARC_BUFC_NUMLISTS; i++) {
- mutex_init(&arc_anon->arcs_locks[i].arcs_lock,
- NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mru->arcs_locks[i].arcs_lock,
- NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mru_ghost->arcs_locks[i].arcs_lock,
- NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mfu->arcs_locks[i].arcs_lock,
- NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_mfu_ghost->arcs_locks[i].arcs_lock,
- NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&arc_l2c_only->arcs_locks[i].arcs_lock,
- NULL, MUTEX_DEFAULT, NULL);
-
- list_create(&arc_mru->arcs_lists[i],
- sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mru_ghost->arcs_lists[i],
- sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mfu->arcs_lists[i],
- sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mfu_ghost->arcs_lists[i],
- sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_mfu_ghost->arcs_lists[i],
- sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
- list_create(&arc_l2c_only->arcs_lists[i],
- sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
- }
+ multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+ multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
+ sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
+
+ refcount_create(&arc_anon->arcs_size);
+ refcount_create(&arc_mru->arcs_size);
+ refcount_create(&arc_mru_ghost->arcs_size);
+ refcount_create(&arc_mfu->arcs_size);
+ refcount_create(&arc_mfu_ghost->arcs_size);
+ refcount_create(&arc_l2c_only->arcs_size);
buf_init();
- arc_thread_exit = 0;
+ arc_reclaim_thread_exit = FALSE;
+ arc_user_evicts_thread_exit = FALSE;
arc_eviction_list = NULL;
- mutex_init(&arc_eviction_mtx, NULL, MUTEX_DEFAULT, NULL);
bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
@@ -4260,6 +5423,7 @@ arc_init(void)
if (arc_ksp != NULL) {
arc_ksp->ks_data = &arc_stats;
+ arc_ksp->ks_update = arc_kstat_update;
kstat_install(arc_ksp);
}
@@ -4271,6 +5435,9 @@ arc_init(void)
EVENTHANDLER_PRI_FIRST);
#endif
+ (void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0,
+ TS_RUN, minclsyspri);
+
arc_dead = FALSE;
arc_warm = B_FALSE;
@@ -4329,16 +5496,32 @@ arc_init(void)
void
arc_fini(void)
{
- int i;
+ mutex_enter(&arc_reclaim_lock);
+ arc_reclaim_thread_exit = TRUE;
+ /*
+ * The reclaim thread will set arc_reclaim_thread_exit back to
+ * FALSE when it is finished exiting; we're waiting for that.
+ */
+ while (arc_reclaim_thread_exit) {
+ cv_signal(&arc_reclaim_thread_cv);
+ cv_wait(&arc_reclaim_thread_cv, &arc_reclaim_lock);
+ }
+ mutex_exit(&arc_reclaim_lock);
- mutex_enter(&arc_reclaim_thr_lock);
- arc_thread_exit = 1;
- cv_signal(&arc_reclaim_thr_cv);
- while (arc_thread_exit != 0)
- cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock);
- mutex_exit(&arc_reclaim_thr_lock);
+ mutex_enter(&arc_user_evicts_lock);
+ arc_user_evicts_thread_exit = TRUE;
+ /*
+ * The user evicts thread will set arc_user_evicts_thread_exit
+ * to FALSE when it is finished exiting; we're waiting for that.
+ */
+ while (arc_user_evicts_thread_exit) {
+ cv_signal(&arc_user_evicts_cv);
+ cv_wait(&arc_user_evicts_cv, &arc_user_evicts_lock);
+ }
+ mutex_exit(&arc_user_evicts_lock);
- arc_flush(NULL);
+ /* Use TRUE to ensure *all* buffers are evicted */
+ arc_flush(NULL, TRUE);
arc_dead = TRUE;
@@ -4347,30 +5530,33 @@ arc_fini(void)
arc_ksp = NULL;
}
- mutex_destroy(&arc_eviction_mtx);
- mutex_destroy(&arc_reclaim_thr_lock);
- cv_destroy(&arc_reclaim_thr_cv);
-
- for (i = 0; i < ARC_BUFC_NUMLISTS; i++) {
- list_destroy(&arc_mru->arcs_lists[i]);
- list_destroy(&arc_mru_ghost->arcs_lists[i]);
- list_destroy(&arc_mfu->arcs_lists[i]);
- list_destroy(&arc_mfu_ghost->arcs_lists[i]);
- list_destroy(&arc_l2c_only->arcs_lists[i]);
-
- mutex_destroy(&arc_anon->arcs_locks[i].arcs_lock);
- mutex_destroy(&arc_mru->arcs_locks[i].arcs_lock);
- mutex_destroy(&arc_mru_ghost->arcs_locks[i].arcs_lock);
- mutex_destroy(&arc_mfu->arcs_locks[i].arcs_lock);
- mutex_destroy(&arc_mfu_ghost->arcs_locks[i].arcs_lock);
- mutex_destroy(&arc_l2c_only->arcs_locks[i].arcs_lock);
- }
+ mutex_destroy(&arc_reclaim_lock);
+ cv_destroy(&arc_reclaim_thread_cv);
+ cv_destroy(&arc_reclaim_waiters_cv);
+
+ mutex_destroy(&arc_user_evicts_lock);
+ cv_destroy(&arc_user_evicts_cv);
+
+ refcount_destroy(&arc_anon->arcs_size);
+ refcount_destroy(&arc_mru->arcs_size);
+ refcount_destroy(&arc_mru_ghost->arcs_size);
+ refcount_destroy(&arc_mfu->arcs_size);
+ refcount_destroy(&arc_mfu_ghost->arcs_size);
+ refcount_destroy(&arc_l2c_only->arcs_size);
+
+ multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
buf_fini();
- ASSERT(arc_loaned_bytes == 0);
+ ASSERT0(arc_loaned_bytes);
- mutex_destroy(&arc_lowmem_lock);
#ifdef _KERNEL
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
@@ -4536,7 +5722,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
ARCSTAT_BUMP(arcstat_l2_write_spa_mismatch);
return (B_FALSE);
}
- if (hdr->b_l2hdr != NULL) {
+ if (HDR_HAS_L2HDR(hdr)) {
ARCSTAT_BUMP(arcstat_l2_write_in_l2);
return (B_FALSE);
}
@@ -4598,20 +5784,6 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote)
return (next);
}
-static void
-l2arc_hdr_stat_add(void)
-{
- ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE + L2HDR_SIZE);
- ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE);
-}
-
-static void
-l2arc_hdr_stat_remove(void)
-{
- ARCSTAT_INCR(arcstat_l2_hdr_size, -(HDR_SIZE + L2HDR_SIZE));
- ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE);
-}
-
/*
* Cycle through L2ARC devices. This is how L2ARC load balances.
* If a device is returned, this also returns holding the spa config lock.
@@ -4708,7 +5880,6 @@ l2arc_write_done(zio_t *zio)
l2arc_dev_t *dev;
list_t *buflist;
arc_buf_hdr_t *head, *hdr, *hdr_prev;
- l2arc_buf_hdr_t *abl2;
kmutex_t *hash_lock;
int64_t bytes_dropped = 0;
@@ -4718,7 +5889,7 @@ l2arc_write_done(zio_t *zio)
ASSERT(dev != NULL);
head = cb->l2wcb_head;
ASSERT(head != NULL);
- buflist = dev->l2ad_buflist;
+ buflist = &dev->l2ad_buflist;
ASSERT(buflist != NULL);
DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
l2arc_write_callback_t *, cb);
@@ -4726,48 +5897,83 @@ l2arc_write_done(zio_t *zio)
if (zio->io_error != 0)
ARCSTAT_BUMP(arcstat_l2_writes_error);
- mutex_enter(&l2arc_buflist_mtx);
-
/*
* All writes completed, or an error was hit.
*/
+top:
+ mutex_enter(&dev->l2ad_mtx);
for (hdr = list_prev(buflist, head); hdr; hdr = hdr_prev) {
hdr_prev = list_prev(buflist, hdr);
- abl2 = hdr->b_l2hdr;
+
+ hash_lock = HDR_LOCK(hdr);
/*
- * Release the temporary compressed buffer as soon as possible.
+ * We cannot use mutex_enter or else we can deadlock
+ * with l2arc_write_buffers (due to swapping the order
+ * the hash lock and l2ad_mtx are taken).
*/
- if (abl2->b_compress != ZIO_COMPRESS_OFF)
- l2arc_release_cdata_buf(hdr);
-
- hash_lock = HDR_LOCK(hdr);
if (!mutex_tryenter(hash_lock)) {
/*
- * This buffer misses out. It may be in a stage
- * of eviction. Its ARC_L2_WRITING flag will be
- * left set, denying reads to this buffer.
+ * Missed the hash lock. We must retry so we
+ * don't leave the ARC_FLAG_L2_WRITING bit set.
*/
- ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss);
- continue;
+ ARCSTAT_BUMP(arcstat_l2_writes_lock_retry);
+
+ /*
+ * We don't want to rescan the headers we've
+ * already marked as having been written out, so
+ * we reinsert the head node so we can pick up
+ * where we left off.
+ */
+ list_remove(buflist, head);
+ list_insert_after(buflist, hdr, head);
+
+ mutex_exit(&dev->l2ad_mtx);
+
+ /*
+ * We wait for the hash lock to become available
+ * to try and prevent busy waiting, and increase
+ * the chance we'll be able to acquire the lock
+ * the next time around.
+ */
+ mutex_enter(hash_lock);
+ mutex_exit(hash_lock);
+ goto top;
}
+ /*
+ * We could not have been moved into the arc_l2c_only
+ * state while in-flight due to our ARC_FLAG_L2_WRITING
+ * bit being set. Let's just ensure that's being enforced.
+ */
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ /*
+ * We may have allocated a buffer for L2ARC compression,
+ * we must release it to avoid leaking this data.
+ */
+ l2arc_release_cdata_buf(hdr);
+
if (zio->io_error != 0) {
/*
* Error - drop L2ARC entry.
*/
list_remove(buflist, hdr);
- ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
- bytes_dropped += abl2->b_asize;
- hdr->b_l2hdr = NULL;
- trim_map_free(abl2->b_dev->l2ad_vdev, abl2->b_daddr,
- abl2->b_asize, 0);
- kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
+ trim_map_free(hdr->b_l2hdr.b_dev->l2ad_vdev,
+ hdr->b_l2hdr.b_daddr, hdr->b_l2hdr.b_asize, 0);
+ hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
+
+ ARCSTAT_INCR(arcstat_l2_asize, -hdr->b_l2hdr.b_asize);
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+
+ bytes_dropped += hdr->b_l2hdr.b_asize;
+ (void) refcount_remove_many(&dev->l2ad_alloc,
+ hdr->b_l2hdr.b_asize, hdr);
}
/*
- * Allow ARC to begin reads to this L2ARC entry.
+ * Allow ARC to begin reads and ghost list evictions to
+ * this L2ARC entry.
*/
hdr->b_flags &= ~ARC_FLAG_L2_WRITING;
@@ -4776,8 +5982,9 @@ l2arc_write_done(zio_t *zio)
atomic_inc_64(&l2arc_writes_done);
list_remove(buflist, head);
- kmem_cache_free(hdr_cache, head);
- mutex_exit(&l2arc_buflist_mtx);
+ ASSERT(!HDR_HAS_L1HDR(head));
+ kmem_cache_free(hdr_l2only_cache, head);
+ mutex_exit(&dev->l2ad_mtx);
vdev_space_update(dev->l2ad_vdev, -bytes_dropped, 0, 0);
@@ -4820,6 +6027,8 @@ l2arc_read_done(zio_t *zio)
if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
ASSERT(zio->io_data != NULL);
+ ASSERT3U(zio->io_size, ==, hdr->b_size);
+ ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size);
/*
* Check this survived the L2ARC journey.
@@ -4856,7 +6065,7 @@ l2arc_read_done(zio_t *zio)
ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
- buf->b_data, zio->io_size, arc_read_done, buf,
+ buf->b_data, hdr->b_size, arc_read_done, buf,
zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
}
}
@@ -4874,36 +6083,37 @@ l2arc_read_done(zio_t *zio)
* the data lists. This function returns a locked list, and also returns
* the lock pointer.
*/
-static list_t *
-l2arc_list_locked(int list_num, kmutex_t **lock)
+static multilist_sublist_t *
+l2arc_sublist_lock(int list_num)
{
- list_t *list = NULL;
- int idx;
-
- ASSERT(list_num >= 0 && list_num < 2 * ARC_BUFC_NUMLISTS);
-
- if (list_num < ARC_BUFC_NUMMETADATALISTS) {
- idx = list_num;
- list = &arc_mfu->arcs_lists[idx];
- *lock = ARCS_LOCK(arc_mfu, idx);
- } else if (list_num < ARC_BUFC_NUMMETADATALISTS * 2) {
- idx = list_num - ARC_BUFC_NUMMETADATALISTS;
- list = &arc_mru->arcs_lists[idx];
- *lock = ARCS_LOCK(arc_mru, idx);
- } else if (list_num < (ARC_BUFC_NUMMETADATALISTS * 2 +
- ARC_BUFC_NUMDATALISTS)) {
- idx = list_num - ARC_BUFC_NUMMETADATALISTS;
- list = &arc_mfu->arcs_lists[idx];
- *lock = ARCS_LOCK(arc_mfu, idx);
- } else {
- idx = list_num - ARC_BUFC_NUMLISTS;
- list = &arc_mru->arcs_lists[idx];
- *lock = ARCS_LOCK(arc_mru, idx);
+ multilist_t *ml = NULL;
+ unsigned int idx;
+
+ ASSERT(list_num >= 0 && list_num <= 3);
+
+ switch (list_num) {
+ case 0:
+ ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
+ break;
+ case 1:
+ ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
+ break;
+ case 2:
+ ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
+ break;
+ case 3:
+ ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
+ break;
}
- ASSERT(!(MUTEX_HELD(*lock)));
- mutex_enter(*lock);
- return (list);
+ /*
+ * Return a randomly-selected sublist. This is acceptable
+ * because the caller feeds only a little bit of data for each
+ * call (8MB). Subsequent calls will result in different
+ * sublists being selected.
+ */
+ idx = multilist_get_random_index(ml);
+ return (multilist_sublist_lock(ml, idx));
}
/*
@@ -4916,16 +6126,11 @@ static void
l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
{
list_t *buflist;
- l2arc_buf_hdr_t *abl2;
arc_buf_hdr_t *hdr, *hdr_prev;
kmutex_t *hash_lock;
uint64_t taddr;
- int64_t bytes_evicted = 0;
- buflist = dev->l2ad_buflist;
-
- if (buflist == NULL)
- return;
+ buflist = &dev->l2ad_buflist;
if (!all && dev->l2ad_first) {
/*
@@ -4948,17 +6153,23 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
uint64_t, taddr, boolean_t, all);
top:
- mutex_enter(&l2arc_buflist_mtx);
+ mutex_enter(&dev->l2ad_mtx);
for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) {
hdr_prev = list_prev(buflist, hdr);
hash_lock = HDR_LOCK(hdr);
+
+ /*
+ * We cannot use mutex_enter or else we can deadlock
+ * with l2arc_write_buffers (due to swapping the order
+ * the hash lock and l2ad_mtx are taken).
+ */
if (!mutex_tryenter(hash_lock)) {
/*
* Missed the hash lock. Retry.
*/
ARCSTAT_BUMP(arcstat_l2_evict_lock_retry);
- mutex_exit(&l2arc_buflist_mtx);
+ mutex_exit(&dev->l2ad_mtx);
mutex_enter(hash_lock);
mutex_exit(hash_lock);
goto top;
@@ -4974,9 +6185,9 @@ top:
continue;
}
- if (!all && hdr->b_l2hdr != NULL &&
- (hdr->b_l2hdr->b_daddr > taddr ||
- hdr->b_l2hdr->b_daddr < dev->l2ad_hand)) {
+ if (!all && HDR_HAS_L2HDR(hdr) &&
+ (hdr->b_l2hdr.b_daddr > taddr ||
+ hdr->b_l2hdr.b_daddr < dev->l2ad_hand)) {
/*
* We've evicted to the target address,
* or the end of the device.
@@ -4985,15 +6196,8 @@ top:
break;
}
- if (HDR_FREE_IN_PROGRESS(hdr)) {
- /*
- * Already on the path to destruction.
- */
- mutex_exit(hash_lock);
- continue;
- }
-
- if (hdr->b_state == arc_l2c_only) {
+ ASSERT(HDR_HAS_L2HDR(hdr));
+ if (!HDR_HAS_L1HDR(hdr)) {
ASSERT(!HDR_L2_READING(hdr));
/*
* This doesn't exist in the ARC. Destroy.
@@ -5003,6 +6207,8 @@ top:
arc_change_state(arc_anon, hdr, hash_lock);
arc_hdr_destroy(hdr);
} else {
+ ASSERT(hdr->b_l1hdr.b_state != arc_l2c_only);
+ ARCSTAT_BUMP(arcstat_l2_evict_l1cached);
/*
* Invalidate issued or about to be issued
* reads, since we may be about to write
@@ -5013,36 +6219,15 @@ top:
hdr->b_flags |= ARC_FLAG_L2_EVICTED;
}
- /*
- * Tell ARC this no longer exists in L2ARC.
- */
- if (hdr->b_l2hdr != NULL) {
- abl2 = hdr->b_l2hdr;
- ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
- bytes_evicted += abl2->b_asize;
- hdr->b_l2hdr = NULL;
- /*
- * We are destroying l2hdr, so ensure that
- * its compressed buffer, if any, is not leaked.
- */
- ASSERT(abl2->b_tmp_cdata == NULL);
- kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
- ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
- }
- list_remove(buflist, hdr);
+ /* Ensure this header has finished being written */
+ ASSERT(!HDR_L2_WRITING(hdr));
+ ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
- /*
- * This may have been leftover after a
- * failed write.
- */
- hdr->b_flags &= ~ARC_FLAG_L2_WRITING;
+ arc_hdr_l2hdr_destroy(hdr);
}
mutex_exit(hash_lock);
}
- mutex_exit(&l2arc_buflist_mtx);
-
- vdev_space_update(dev->l2ad_vdev, -bytes_evicted, 0, 0);
- dev->l2ad_evict = taddr;
+ mutex_exit(&dev->l2ad_mtx);
}
/*
@@ -5061,10 +6246,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
boolean_t *headroom_boost)
{
arc_buf_hdr_t *hdr, *hdr_prev, *head;
- list_t *list;
uint64_t write_asize, write_sz, headroom, buf_compress_minsz;
void *buf_data;
- kmutex_t *list_lock;
boolean_t full;
l2arc_write_callback_t *cb;
zio_t *pio, *wzio;
@@ -5080,8 +6263,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
pio = NULL;
write_sz = write_asize = 0;
full = B_FALSE;
- head = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+ head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
+ head->b_flags |= ARC_FLAG_HAS_L2HDR;
ARCSTAT_BUMP(arcstat_l2_write_buffer_iter);
/*
@@ -5093,11 +6277,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
/*
* Copy buffers for L2ARC writing.
*/
- mutex_enter(&l2arc_buflist_mtx);
- for (try = 0; try < 2 * ARC_BUFC_NUMLISTS; try++) {
+ for (try = 0; try <= 3; try++) {
+ multilist_sublist_t *mls = l2arc_sublist_lock(try);
uint64_t passed_sz = 0;
- list = l2arc_list_locked(try, &list_lock);
ARCSTAT_BUMP(arcstat_l2_write_buffer_list_iter);
/*
@@ -5107,26 +6290,25 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
* head of the ARC lists rather than the tail.
*/
if (arc_warm == B_FALSE)
- hdr = list_head(list);
+ hdr = multilist_sublist_head(mls);
else
- hdr = list_tail(list);
+ hdr = multilist_sublist_tail(mls);
if (hdr == NULL)
ARCSTAT_BUMP(arcstat_l2_write_buffer_list_null_iter);
- headroom = target_sz * l2arc_headroom * 2 / ARC_BUFC_NUMLISTS;
+ headroom = target_sz * l2arc_headroom;
if (do_headroom_boost)
headroom = (headroom * l2arc_headroom_boost) / 100;
for (; hdr; hdr = hdr_prev) {
- l2arc_buf_hdr_t *l2hdr;
kmutex_t *hash_lock;
uint64_t buf_sz;
uint64_t buf_a_sz;
if (arc_warm == B_FALSE)
- hdr_prev = list_next(list, hdr);
+ hdr_prev = multilist_sublist_next(mls, hdr);
else
- hdr_prev = list_prev(list, hdr);
+ hdr_prev = multilist_sublist_prev(mls, hdr);
ARCSTAT_INCR(arcstat_l2_write_buffer_bytes_scanned, hdr->b_size);
hash_lock = HDR_LOCK(hdr);
@@ -5174,7 +6356,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
* l2arc_write_done() can find where the
* write buffers begin without searching.
*/
- list_insert_head(dev->l2ad_buflist, head);
+ mutex_enter(&dev->l2ad_mtx);
+ list_insert_head(&dev->l2ad_buflist, head);
+ mutex_exit(&dev->l2ad_mtx);
cb = kmem_alloc(
sizeof (l2arc_write_callback_t), KM_SLEEP);
@@ -5188,32 +6372,54 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
/*
* Create and add a new L2ARC header.
*/
- l2hdr = kmem_zalloc(sizeof (l2arc_buf_hdr_t), KM_SLEEP);
- l2hdr->b_dev = dev;
+ hdr->b_l2hdr.b_dev = dev;
hdr->b_flags |= ARC_FLAG_L2_WRITING;
-
/*
* Temporarily stash the data buffer in b_tmp_cdata.
* The subsequent write step will pick it up from
- * there. This is because can't access hdr->b_buf
+ * there. This is because can't access b_l1hdr.b_buf
* without holding the hash_lock, which we in turn
* can't access without holding the ARC list locks
* (which we want to avoid during compression/writing).
*/
- l2hdr->b_compress = ZIO_COMPRESS_OFF;
- l2hdr->b_asize = hdr->b_size;
- l2hdr->b_tmp_cdata = hdr->b_buf->b_data;
+ hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF;
+ hdr->b_l2hdr.b_asize = hdr->b_size;
+ hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
- hdr->b_l2hdr = l2hdr;
+ /*
+ * Explicitly set the b_daddr field to a known
+ * value which means "invalid address". This
+ * enables us to differentiate which stage of
+ * l2arc_write_buffers() the particular header
+ * is in (e.g. this loop, or the one below).
+ * ARC_FLAG_L2_WRITING is not enough to make
+ * this distinction, and we need to know in
+ * order to do proper l2arc vdev accounting in
+ * arc_release() and arc_hdr_destroy().
+ *
+ * Note, we can't use a new flag to distinguish
+ * the two stages because we don't hold the
+ * header's hash_lock below, in the second stage
+ * of this function. Thus, we can't simply
+ * change the b_flags field to denote that the
+ * IO has been sent. We can change the b_daddr
+ * field of the L2 portion, though, since we'll
+ * be holding the l2ad_mtx; which is why we're
+ * using it to denote the header's state change.
+ */
+ hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
+ hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
- list_insert_head(dev->l2ad_buflist, hdr);
+ mutex_enter(&dev->l2ad_mtx);
+ list_insert_head(&dev->l2ad_buflist, hdr);
+ mutex_exit(&dev->l2ad_mtx);
/*
* Compute and store the buffer cksum before
* writing. On debug the cksum is verified first.
*/
- arc_cksum_verify(hdr->b_buf);
- arc_cksum_compute(hdr->b_buf, B_TRUE);
+ arc_cksum_verify(hdr->b_l1hdr.b_buf);
+ arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE);
mutex_exit(hash_lock);
@@ -5221,7 +6427,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
write_asize += buf_a_sz;
}
- mutex_exit(list_lock);
+ multilist_sublist_unlock(mls);
if (full == B_TRUE)
break;
@@ -5230,11 +6436,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
/* No buffers selected for writing? */
if (pio == NULL) {
ASSERT0(write_sz);
- mutex_exit(&l2arc_buflist_mtx);
- kmem_cache_free(hdr_cache, head);
+ ASSERT(!HDR_HAS_L1HDR(head));
+ kmem_cache_free(hdr_l2only_cache, head);
return (0);
}
+ mutex_enter(&dev->l2ad_mtx);
+
/*
* Note that elsewhere in this file arcstat_l2_asize
* and the used space on l2ad_vdev are updated using b_asize,
@@ -5253,24 +6461,30 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
* and work backwards, retracing the course of the buffer selector
* loop above.
*/
- for (hdr = list_prev(dev->l2ad_buflist, head); hdr;
- hdr = list_prev(dev->l2ad_buflist, hdr)) {
- l2arc_buf_hdr_t *l2hdr;
+ for (hdr = list_prev(&dev->l2ad_buflist, head); hdr;
+ hdr = list_prev(&dev->l2ad_buflist, hdr)) {
uint64_t buf_sz;
/*
+ * We rely on the L1 portion of the header below, so
+ * it's invalid for this header to have been evicted out
+ * of the ghost cache, prior to being written out. The
+ * ARC_FLAG_L2_WRITING bit ensures this won't happen.
+ */
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ /*
* We shouldn't need to lock the buffer here, since we flagged
* it as ARC_FLAG_L2_WRITING in the previous step, but we must
* take care to only access its L2 cache parameters. In
- * particular, hdr->b_buf may be invalid by now due to
+ * particular, hdr->l1hdr.b_buf may be invalid by now due to
* ARC eviction.
*/
- l2hdr = hdr->b_l2hdr;
- l2hdr->b_daddr = dev->l2ad_hand;
+ hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
- if ((hdr->b_flags & ARC_FLAG_L2COMPRESS) &&
- l2hdr->b_asize >= buf_compress_minsz) {
- if (l2arc_compress_buf(l2hdr)) {
+ if ((HDR_L2COMPRESS(hdr)) &&
+ hdr->b_l2hdr.b_asize >= buf_compress_minsz) {
+ if (l2arc_compress_buf(hdr)) {
/*
* If compression succeeded, enable headroom
* boost on the next scan cycle.
@@ -5283,16 +6497,15 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
* Pick up the buffer data we had previously stashed away
* (and now potentially also compressed).
*/
- buf_data = l2hdr->b_tmp_cdata;
- buf_sz = l2hdr->b_asize;
+ buf_data = hdr->b_l1hdr.b_tmp_cdata;
+ buf_sz = hdr->b_l2hdr.b_asize;
/*
- * If the data has not been compressed, then clear b_tmp_cdata
- * to make sure that it points only to a temporary compression
- * buffer.
+ * We need to do this regardless if buf_sz is zero or
+ * not, otherwise, when this l2hdr is evicted we'll
+ * remove a reference that was never added.
*/
- if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
- l2hdr->b_tmp_cdata = NULL;
+ (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr);
/* Compression may have squashed the buffer to zero length. */
if (buf_sz != 0) {
@@ -5308,6 +6521,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
(void) zio_nowait(wzio);
stats_size += buf_sz;
+
/*
* Keep the clock hand suitably device-aligned.
*/
@@ -5317,7 +6531,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
}
}
- mutex_exit(&l2arc_buflist_mtx);
+ mutex_exit(&dev->l2ad_mtx);
ASSERT3U(write_asize, <=, target_sz);
ARCSTAT_BUMP(arcstat_l2_writes_sent);
@@ -5332,7 +6546,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
*/
if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
dev->l2ad_hand = dev->l2ad_start;
- dev->l2ad_evict = dev->l2ad_start;
dev->l2ad_first = B_FALSE;
}
@@ -5345,7 +6558,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
/*
* Compresses an L2ARC buffer.
- * The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
+ * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its
* size in l2hdr->b_asize. This routine tries to compress the data and
* depending on the compression result there are three possible outcomes:
* *) The buffer was incompressible. The original l2hdr contents were left
@@ -5363,17 +6576,21 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
* buffer was incompressible).
*/
static boolean_t
-l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
+l2arc_compress_buf(arc_buf_hdr_t *hdr)
{
void *cdata;
size_t csize, len, rounded;
+ ASSERT(HDR_HAS_L2HDR(hdr));
+ l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
- ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
- ASSERT(l2hdr->b_tmp_cdata != NULL);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT3S(l2hdr->b_compress, ==, ZIO_COMPRESS_OFF);
+ ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
len = l2hdr->b_asize;
cdata = zio_data_buf_alloc(len);
- csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
+ ASSERT3P(cdata, !=, NULL);
+ csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata,
cdata, l2hdr->b_asize);
if (csize == 0) {
@@ -5381,7 +6598,7 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
zio_data_buf_free(cdata, len);
l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
l2hdr->b_asize = 0;
- l2hdr->b_tmp_cdata = NULL;
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
ARCSTAT_BUMP(arcstat_l2_compress_zeros);
return (B_TRUE);
}
@@ -5399,7 +6616,7 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
}
l2hdr->b_compress = ZIO_COMPRESS_LZ4;
l2hdr->b_asize = csize;
- l2hdr->b_tmp_cdata = cdata;
+ hdr->b_l1hdr.b_tmp_cdata = cdata;
ARCSTAT_BUMP(arcstat_l2_compress_successes);
return (B_TRUE);
} else {
@@ -5444,9 +6661,9 @@ l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c)
* need to fill its io_data after we're done restoring the
* buffer's contents.
*/
- ASSERT(hdr->b_buf != NULL);
- bzero(hdr->b_buf->b_data, hdr->b_size);
- zio->io_data = zio->io_orig_data = hdr->b_buf->b_data;
+ ASSERT(hdr->b_l1hdr.b_buf != NULL);
+ bzero(hdr->b_l1hdr.b_buf->b_data, hdr->b_size);
+ zio->io_data = zio->io_orig_data = hdr->b_l1hdr.b_buf->b_data;
} else {
ASSERT(zio->io_data != NULL);
/*
@@ -5484,20 +6701,37 @@ l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c)
static void
l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
{
- l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+ ASSERT(HDR_HAS_L2HDR(hdr));
+ enum zio_compress comp = hdr->b_l2hdr.b_compress;
+
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));
- ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
- if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
+ if (comp == ZIO_COMPRESS_OFF) {
+ /*
+ * In this case, b_tmp_cdata points to the same buffer
+ * as the arc_buf_t's b_data field. We don't want to
+ * free it, since the arc_buf_t will handle that.
+ */
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
+ } else if (comp == ZIO_COMPRESS_EMPTY) {
+ /*
+ * In this case, b_tmp_cdata was compressed to an empty
+ * buffer, thus there's nothing to free and b_tmp_cdata
+ * should have been set to NULL in l2arc_write_buffers().
+ */
+ ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
+ } else {
/*
* If the data was compressed, then we've allocated a
* temporary buffer for it, so now we need to release it.
*/
- ASSERT(l2hdr->b_tmp_cdata != NULL);
- zio_data_buf_free(l2hdr->b_tmp_cdata, hdr->b_size);
- l2hdr->b_tmp_cdata = NULL;
- } else {
- ASSERT(l2hdr->b_tmp_cdata == NULL);
+ ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
+ zio_data_buf_free(hdr->b_l1hdr.b_tmp_cdata,
+ hdr->b_size);
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
}
+
}
/*
@@ -5636,19 +6870,19 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
adddev->l2ad_start = VDEV_LABEL_START_SIZE;
adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd);
adddev->l2ad_hand = adddev->l2ad_start;
- adddev->l2ad_evict = adddev->l2ad_start;
adddev->l2ad_first = B_TRUE;
adddev->l2ad_writing = B_FALSE;
+ mutex_init(&adddev->l2ad_mtx, NULL, MUTEX_DEFAULT, NULL);
/*
* This is a list of all ARC buffers that are still valid on the
* device.
*/
- adddev->l2ad_buflist = kmem_zalloc(sizeof (list_t), KM_SLEEP);
- list_create(adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l2node));
+ list_create(&adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l2hdr.b_l2node));
vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
+ refcount_create(&adddev->l2ad_alloc);
/*
* Add device to global list
@@ -5692,8 +6926,9 @@ l2arc_remove_vdev(vdev_t *vd)
* Clear all buflists and ARC references. L2ARC device flush.
*/
l2arc_evict(remdev, 0, B_TRUE);
- list_destroy(remdev->l2ad_buflist);
- kmem_free(remdev->l2ad_buflist, sizeof (list_t));
+ list_destroy(&remdev->l2ad_buflist);
+ mutex_destroy(&remdev->l2ad_mtx);
+ refcount_destroy(&remdev->l2ad_alloc);
kmem_free(remdev, sizeof (l2arc_dev_t));
}
@@ -5708,7 +6943,6 @@ l2arc_init(void)
mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
l2arc_dev_list = &L2ARC_dev_list;
@@ -5733,7 +6967,6 @@ l2arc_fini(void)
mutex_destroy(&l2arc_feed_thr_lock);
cv_destroy(&l2arc_feed_thr_cv);
mutex_destroy(&l2arc_dev_mtx);
- mutex_destroy(&l2arc_buflist_mtx);
mutex_destroy(&l2arc_free_on_write_mtx);
list_destroy(l2arc_dev_list);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
index da4d38a..7d20096 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
*/
#include <sys/bpobj.h>
@@ -256,9 +256,8 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
dbuf = NULL;
}
if (free) {
- i++;
VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
- i * sizeof (blkptr_t), -1ULL, tx));
+ (i + 1) * sizeof (blkptr_t), -1ULL, tx));
}
if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
goto out;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
index 5f7d76f..b2b9887 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
@@ -154,7 +154,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int err;
struct bptree_args *ba = arg;
- if (BP_IS_HOLE(bp))
+ if (bp == NULL || BP_IS_HOLE(bp))
return (0);
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c
new file mode 100644
index 0000000..1ddc697
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c
@@ -0,0 +1,111 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/bqueue.h>
+#include <sys/zfs_context.h>
+
+static inline bqueue_node_t *
+obj2node(bqueue_t *q, void *data)
+{
+ return ((bqueue_node_t *)((char *)data + q->bq_node_offset));
+}
+
+/*
+ * Initialize a blocking queue The maximum capacity of the queue is set to
+ * size. Types that want to be stored in a bqueue must contain a bqueue_node_t,
+ * and offset should give its offset from the start of the struct. Return 0 on
+ * success, or -1 on failure.
+ */
+int
+bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
+{
+ list_create(&q->bq_list, node_offset + sizeof (bqueue_node_t),
+ node_offset + offsetof(bqueue_node_t, bqn_node));
+ cv_init(&q->bq_add_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&q->bq_pop_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&q->bq_lock, NULL, MUTEX_DEFAULT, NULL);
+ q->bq_node_offset = node_offset;
+ q->bq_size = 0;
+ q->bq_maxsize = size;
+ return (0);
+}
+
+/*
+ * Destroy a blocking queue. This function asserts that there are no
+ * elements in the queue, and no one is blocked on the condition
+ * variables.
+ */
+void
+bqueue_destroy(bqueue_t *q)
+{
+ ASSERT0(q->bq_size);
+ cv_destroy(&q->bq_add_cv);
+ cv_destroy(&q->bq_pop_cv);
+ mutex_destroy(&q->bq_lock);
+ list_destroy(&q->bq_list);
+}
+
+/*
+ * Add data to q, consuming size units of capacity. If there is insufficient
+ * capacity to consume size units, block until capacity exists. Asserts size is
+ * > 0.
+ */
+void
+bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+{
+ ASSERT3U(item_size, >, 0);
+ ASSERT3U(item_size, <, q->bq_maxsize);
+ mutex_enter(&q->bq_lock);
+ obj2node(q, data)->bqn_size = item_size;
+ while (q->bq_size + item_size > q->bq_maxsize) {
+ cv_wait(&q->bq_add_cv, &q->bq_lock);
+ }
+ q->bq_size += item_size;
+ list_insert_tail(&q->bq_list, data);
+ cv_signal(&q->bq_pop_cv);
+ mutex_exit(&q->bq_lock);
+}
+/*
+ * Take the first element off of q. If there are no elements on the queue, wait
+ * until one is put there. Return the removed element.
+ */
+void *
+bqueue_dequeue(bqueue_t *q)
+{
+ void *ret;
+ uint64_t item_size;
+ mutex_enter(&q->bq_lock);
+ while (q->bq_size == 0) {
+ cv_wait(&q->bq_pop_cv, &q->bq_lock);
+ }
+ ret = list_remove_head(&q->bq_list);
+ item_size = obj2node(q, ret)->bqn_size;
+ q->bq_size -= item_size;
+ mutex_exit(&q->bq_lock);
+ cv_signal(&q->bq_add_cv);
+ return (ret);
+}
+
+/*
+ * Returns true if the space used is 0.
+ */
+boolean_t
+bqueue_empty(bqueue_t *q)
+{
+ return (q->bq_size == 0);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 2b55290..a28d866 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -24,6 +24,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -58,6 +59,7 @@ static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
* Global data structures and functions for the dbuf cache.
*/
static kmem_cache_t *dbuf_cache;
+static taskq_t *dbu_evict_taskq;
/* ARGSUSED */
static int
@@ -118,11 +120,9 @@ dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
(dbuf)->db_blkid == (blkid))
dmu_buf_impl_t *
-dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
+dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
{
dbuf_hash_table_t *h = &dbuf_hash_table;
- objset_t *os = dn->dn_objset;
- uint64_t obj = dn->dn_object;
uint64_t hv = DBUF_HASH(os, obj, level, blkid);
uint64_t idx = hv & h->hash_table_mask;
dmu_buf_impl_t *db;
@@ -142,6 +142,24 @@ dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
return (NULL);
}
+static dmu_buf_impl_t *
+dbuf_find_bonus(objset_t *os, uint64_t object)
+{
+ dnode_t *dn;
+ dmu_buf_impl_t *db = NULL;
+
+ if (dnode_hold(os, object, FTAG, &dn) == 0) {
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ if (dn->dn_bonus != NULL) {
+ db = dn->dn_bonus;
+ mutex_enter(&db->db_mtx);
+ }
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ }
+ return (db);
+}
+
/*
* Insert an entry into the hash table. If there is already an element
* equal to elem in the hash table, then the already existing element
@@ -215,17 +233,72 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
static arc_evict_func_t dbuf_do_evict;
+typedef enum {
+ DBVU_EVICTING,
+ DBVU_NOT_EVICTING
+} dbvu_verify_type_t;
+
+static void
+dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
+{
+#ifdef ZFS_DEBUG
+ int64_t holds;
+
+ if (db->db_user == NULL)
+ return;
+
+ /* Only data blocks support the attachment of user data. */
+ ASSERT(db->db_level == 0);
+
+ /* Clients must resolve a dbuf before attaching user data. */
+ ASSERT(db->db.db_data != NULL);
+ ASSERT3U(db->db_state, ==, DB_CACHED);
+
+ holds = refcount_count(&db->db_holds);
+ if (verify_type == DBVU_EVICTING) {
+ /*
+ * Immediate eviction occurs when holds == dirtycnt.
+ * For normal eviction buffers, holds is zero on
+ * eviction, except when dbuf_fix_old_data() calls
+ * dbuf_clear_data(). However, the hold count can grow
+ * during eviction even though db_mtx is held (see
+ * dmu_bonus_hold() for an example), so we can only
+ * test the generic invariant that holds >= dirtycnt.
+ */
+ ASSERT3U(holds, >=, db->db_dirtycnt);
+ } else {
+ if (db->db_immediate_evict == TRUE)
+ ASSERT3U(holds, >=, db->db_dirtycnt);
+ else
+ ASSERT3U(holds, >, 0);
+ }
+#endif
+}
+
static void
dbuf_evict_user(dmu_buf_impl_t *db)
{
+ dmu_buf_user_t *dbu = db->db_user;
+
ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_level != 0 || db->db_evict_func == NULL)
+ if (dbu == NULL)
return;
- db->db_evict_func(&db->db, db->db_user_ptr);
- db->db_user_ptr = NULL;
- db->db_evict_func = NULL;
+ dbuf_verify_user(db, DBVU_EVICTING);
+ db->db_user = NULL;
+
+#ifdef ZFS_DEBUG
+ if (dbu->dbu_clear_on_evict_dbufp != NULL)
+ *dbu->dbu_clear_on_evict_dbufp = NULL;
+#endif
+
+ /*
+ * Invoke the callback from a taskq to avoid lock order reversals
+ * and limit stack depth.
+ */
+ taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
+ &dbu->dbu_tqent);
}
boolean_t
@@ -286,6 +359,12 @@ retry:
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
+ * configuration is not required.
+ */
+ dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
}
void
@@ -298,6 +377,7 @@ dbuf_fini(void)
mutex_destroy(&h->hash_mutexes[i]);
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
kmem_cache_destroy(dbuf_cache);
+ taskq_destroy(dbu_evict_taskq);
}
/*
@@ -415,21 +495,27 @@ dbuf_verify(dmu_buf_impl_t *db)
#endif
static void
+dbuf_clear_data(dmu_buf_impl_t *db)
+{
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+ dbuf_evict_user(db);
+ db->db_buf = NULL;
+ db->db.db_data = NULL;
+ if (db->db_state != DB_NOFILL)
+ db->db_state = DB_UNCACHED;
+}
+
+static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
+ ASSERT(buf != NULL);
+
db->db_buf = buf;
- if (buf != NULL) {
- ASSERT(buf->b_data != NULL);
- db->db.db_data = buf->b_data;
- if (!arc_released(buf))
- arc_set_callback(buf, dbuf_do_evict, db);
- } else {
- dbuf_evict_user(db);
- db->db.db_data = NULL;
- if (db->db_state != DB_NOFILL)
- db->db_state = DB_UNCACHED;
- }
+ ASSERT(buf->b_data != NULL);
+ db->db.db_data = buf->b_data;
+ if (!arc_released(buf))
+ arc_set_callback(buf, dbuf_do_evict, db);
}
/*
@@ -451,17 +537,41 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
} else {
abuf = db->db_buf;
arc_loan_inuse_buf(abuf, db);
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
mutex_exit(&db->db_mtx);
}
return (abuf);
}
+/*
+ * Calculate which level n block references the data at the level 0 offset
+ * provided.
+ */
uint64_t
-dbuf_whichblock(dnode_t *dn, uint64_t offset)
+dbuf_whichblock(dnode_t *dn, int64_t level, uint64_t offset)
{
- if (dn->dn_datablkshift) {
- return (offset >> dn->dn_datablkshift);
+ if (dn->dn_datablkshift != 0 && dn->dn_indblkshift != 0) {
+ /*
+ * The level n blkid is equal to the level 0 blkid divided by
+ * the number of level 0s in a level n block.
+ *
+ * The level 0 blkid is offset >> datablkshift =
+ * offset / 2^datablkshift.
+ *
+ * The number of level 0s in a level n is the number of block
+ * pointers in an indirect block, raised to the power of level.
+ * This is 2^(indblkshift - SPA_BLKPTRSHIFT)^level =
+ * 2^(level*(indblkshift - SPA_BLKPTRSHIFT)).
+ *
+ * Thus, the level n blkid is: offset /
+ * ((2^datablkshift)*(2^(level*(indblkshift - SPA_BLKPTRSHIFT)))
+ * = offset / 2^(datablkshift + level *
+ * (indblkshift - SPA_BLKPTRSHIFT))
+ * = offset >> (datablkshift + level *
+ * (indblkshift - SPA_BLKPTRSHIFT))
+ */
+ return (offset >> (dn->dn_datablkshift + level *
+ (dn->dn_indblkshift - SPA_BLKPTRSHIFT)));
} else {
ASSERT3U(offset, <, dn->dn_datablksz);
return (0);
@@ -503,7 +613,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
}
static void
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
+dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
dnode_t *dn;
zbookmark_phys_t zb;
@@ -549,7 +659,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
db->db_state = DB_CACHED;
- *flags |= DB_RF_CACHED;
mutex_exit(&db->db_mtx);
return;
}
@@ -572,10 +681,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
(void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
- (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+ (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
- if (aflags & ARC_FLAG_CACHED)
- *flags |= DB_RF_CACHED;
}
int
@@ -608,8 +715,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if (db->db_state == DB_CACHED) {
mutex_exit(&db->db_mtx);
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
@@ -618,13 +724,12 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
if (zio == NULL)
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
- dbuf_read_impl(db, zio, &flags);
+ dbuf_read_impl(db, zio, flags);
/* dbuf_read_impl has dropped db_mtx for us */
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, flags & DB_RF_CACHED);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
@@ -643,8 +748,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/
mutex_exit(&db->db_mtx);
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
@@ -687,7 +791,7 @@ dbuf_noread(dmu_buf_impl_t *db)
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
} else {
ASSERT3U(db->db_state, ==, DB_CACHED);
}
@@ -743,7 +847,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
}
}
@@ -794,7 +898,8 @@ void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_tx_t *tx)
{
- dmu_buf_impl_t *db, *db_next, db_search;
+ dmu_buf_impl_t db_search;
+ dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
avl_index_t where;
@@ -1372,7 +1477,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
arc_buf_t *buf = db->db_buf;
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (B_TRUE);
@@ -1457,6 +1562,11 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
struct dirty_leaf *dl;
dmu_object_type_t type;
+ if (etype == BP_EMBEDDED_TYPE_DATA) {
+ ASSERT(spa_feature_is_active(dmu_objset_spa(db->db_objset),
+ SPA_FEATURE_EMBEDDED_DATA));
+ }
+
DB_DNODE_ENTER(db);
type = DB_DNODE(db)->dn_type;
DB_DNODE_EXIT(db);
@@ -1623,6 +1733,12 @@ dbuf_clear(dmu_buf_impl_t *db)
dbuf_rele(parent, db);
}
+/*
+ * Note: While bpp will always be updated if the function returns success,
+ * parentp will not be updated if the dnode does not have dn_dbuf filled in;
+ * this happens when the dnode is the meta-dnode, or a userused or groupused
+ * object.
+ */
static int
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
dmu_buf_impl_t **parentp, blkptr_t **bpp)
@@ -1663,7 +1779,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
} else if (level < nlevels-1) {
/* this block is referenced from an indirect block */
int err = dbuf_hold_impl(dn, level+1,
- blkid >> epbs, fail_sparse, NULL, parentp);
+ blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
if (err)
return (err);
err = dbuf_read(*parentp, NULL,
@@ -1712,8 +1828,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_parent = parent;
db->db_blkptr = blkptr;
- db->db_user_ptr = NULL;
- db->db_evict_func = NULL;
+ db->db_user = NULL;
db->db_immediate_evict = 0;
db->db_freed_in_flight = 0;
@@ -1839,47 +1954,204 @@ dbuf_destroy(dmu_buf_impl_t *db)
arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
}
+typedef struct dbuf_prefetch_arg {
+ spa_t *dpa_spa; /* The spa to issue the prefetch in. */
+ zbookmark_phys_t dpa_zb; /* The target block to prefetch. */
+ int dpa_epbs; /* Entries (blkptr_t's) Per Block Shift. */
+ int dpa_curlevel; /* The current level that we're reading */
+ zio_priority_t dpa_prio; /* The priority I/Os should be issued at. */
+ zio_t *dpa_zio; /* The parent zio_t for all prefetches. */
+ arc_flags_t dpa_aflags; /* Flags to pass to the final prefetch. */
+} dbuf_prefetch_arg_t;
+
+/*
+ * Actually issue the prefetch read for the block given.
+ */
+static void
+dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
+{
+ if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ return;
+
+ arc_flags_t aflags =
+ dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+
+ ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
+ ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level);
+ ASSERT(dpa->dpa_zio != NULL);
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp, NULL, NULL,
+ dpa->dpa_prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &aflags, &dpa->dpa_zb);
+}
+
+/*
+ * Called when an indirect block above our prefetch target is read in. This
+ * will either read in the next indirect block down the tree or issue the actual
+ * prefetch if the next block down is our target.
+ */
+static void
+dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
+{
+ dbuf_prefetch_arg_t *dpa = private;
+
+ ASSERT3S(dpa->dpa_zb.zb_level, <, dpa->dpa_curlevel);
+ ASSERT3S(dpa->dpa_curlevel, >, 0);
+ if (zio != NULL) {
+ ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
+ ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
+ ASSERT3P(zio->io_spa, ==, dpa->dpa_spa);
+ }
+
+ dpa->dpa_curlevel--;
+
+ uint64_t nextblkid = dpa->dpa_zb.zb_blkid >>
+ (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
+ blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
+ P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
+ if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) {
+ kmem_free(dpa, sizeof (*dpa));
+ } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
+ ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
+ dbuf_issue_final_prefetch(dpa, bp);
+ kmem_free(dpa, sizeof (*dpa));
+ } else {
+ arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
+ zbookmark_phys_t zb;
+
+ ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
+
+ SET_BOOKMARK(&zb, dpa->dpa_zb.zb_objset,
+ dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid);
+
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
+ bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &iter_aflags, &zb);
+ }
+ (void) arc_buf_remove_ref(abuf, private);
+}
+
+/*
+ * Issue prefetch reads for the given block on the given level. If the indirect
+ * blocks above that block are not in memory, we will read them in
+ * asynchronously. As a result, this call never blocks waiting for a read to
+ * complete.
+ */
void
-dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
+dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
+ arc_flags_t aflags)
{
- dmu_buf_impl_t *db = NULL;
- blkptr_t *bp = NULL;
+ blkptr_t bp;
+ int epbs, nlevels, curlevel;
+ uint64_t curblkid;
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ if (blkid > dn->dn_maxblkid)
+ return;
+
if (dnode_block_freed(dn, blkid))
return;
- /* dbuf_find() returns with db_mtx held */
- if (db = dbuf_find(dn, 0, blkid)) {
+ /*
+ * This dnode hasn't been written to disk yet, so there's nothing to
+ * prefetch.
+ */
+ nlevels = dn->dn_phys->dn_nlevels;
+ if (level >= nlevels || dn->dn_phys->dn_nblkptr == 0)
+ return;
+
+ epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ if (dn->dn_phys->dn_maxblkid < blkid << (epbs * level))
+ return;
+
+ dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object,
+ level, blkid);
+ if (db != NULL) {
+ mutex_exit(&db->db_mtx);
/*
- * This dbuf is already in the cache. We assume that
- * it is already CACHED, or else about to be either
- * read or filled.
+ * This dbuf already exists. It is either CACHED, or
+ * (we assume) about to be read or filled.
*/
- mutex_exit(&db->db_mtx);
return;
}
- if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
- if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
- dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
- arc_flags_t aflags =
- ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
- zbookmark_phys_t zb;
+ /*
+ * Find the closest ancestor (indirect block) of the target block
+ * that is present in the cache. In this indirect block, we will
+ * find the bp that is at curlevel, curblkid.
+ */
+ curlevel = level;
+ curblkid = blkid;
+ while (curlevel < nlevels - 1) {
+ int parent_level = curlevel + 1;
+ uint64_t parent_blkid = curblkid >> epbs;
+ dmu_buf_impl_t *db;
+
+ if (dbuf_hold_impl(dn, parent_level, parent_blkid,
+ FALSE, TRUE, FTAG, &db) == 0) {
+ blkptr_t *bpp = db->db_buf->b_data;
+ bp = bpp[P2PHASE(curblkid, 1 << epbs)];
+ dbuf_rele(db, FTAG);
+ break;
+ }
- SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
- dn->dn_object, 0, blkid);
+ curlevel = parent_level;
+ curblkid = parent_blkid;
+ }
- (void) arc_read(NULL, dn->dn_objset->os_spa,
- bp, NULL, NULL, prio,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
- &aflags, &zb);
- }
- if (db)
- dbuf_rele(db, NULL);
+ if (curlevel == nlevels - 1) {
+ /* No cached indirect blocks found. */
+ ASSERT3U(curblkid, <, dn->dn_phys->dn_nblkptr);
+ bp = dn->dn_phys->dn_blkptr[curblkid];
}
+ if (BP_IS_HOLE(&bp))
+ return;
+
+ ASSERT3U(curlevel, ==, BP_GET_LEVEL(&bp));
+
+ zio_t *pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL,
+ ZIO_FLAG_CANFAIL);
+
+ dbuf_prefetch_arg_t *dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP);
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ SET_BOOKMARK(&dpa->dpa_zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
+ dn->dn_object, level, blkid);
+ dpa->dpa_curlevel = curlevel;
+ dpa->dpa_prio = prio;
+ dpa->dpa_aflags = aflags;
+ dpa->dpa_spa = dn->dn_objset->os_spa;
+ dpa->dpa_epbs = epbs;
+ dpa->dpa_zio = pio;
+
+ /*
+ * If we have the indirect just above us, no need to do the asynchronous
+ * prefetch chain; we'll just run the last step ourselves. If we're at
+ * a higher level, though, we want to issue the prefetches for all the
+ * indirect blocks asynchronously, so we can go on with whatever we were
+ * doing.
+ */
+ if (curlevel == level) {
+ ASSERT3U(curblkid, ==, blkid);
+ dbuf_issue_final_prefetch(dpa, &bp);
+ kmem_free(dpa, sizeof (*dpa));
+ } else {
+ arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
+ zbookmark_phys_t zb;
+
+ SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
+ dn->dn_object, curlevel, curblkid);
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
+ &bp, dbuf_prefetch_indirect_done, dpa, prio,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &iter_aflags, &zb);
+ }
+ /*
+ * We use pio here instead of dpa_zio since it's possible that
+ * dpa may have already been freed.
+ */
+ zio_nowait(pio);
}
/*
@@ -1887,7 +2159,8 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
* Note: dn_struct_rwlock must be held.
*/
int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
+dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp)
{
dmu_buf_impl_t *db, *parent = NULL;
@@ -1899,12 +2172,15 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
*dbp = NULL;
top:
/* dbuf_find() returns with db_mtx held */
- db = dbuf_find(dn, level, blkid);
+ db = dbuf_find(dn->dn_objset, dn->dn_object, level, blkid);
if (db == NULL) {
blkptr_t *bp = NULL;
int err;
+ if (fail_uncached)
+ return (SET_ERROR(ENOENT));
+
ASSERT3P(parent, ==, NULL);
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
if (fail_sparse) {
@@ -1921,6 +2197,11 @@ top:
db = dbuf_create(dn, level, blkid, parent, bp);
}
+ if (fail_uncached && db->db_state != DB_CACHED) {
+ mutex_exit(&db->db_mtx);
+ return (SET_ERROR(ENOENT));
+ }
+
if (db->db_buf && refcount_is_zero(&db->db_holds)) {
arc_buf_add_ref(db->db_buf, db);
if (db->db_buf->b_data == NULL) {
@@ -1976,16 +2257,14 @@ top:
dmu_buf_impl_t *
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
{
- dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db);
- return (err ? NULL : db);
+ return (dbuf_hold_level(dn, 0, blkid, tag));
}
dmu_buf_impl_t *
dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
{
dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
+ int err = dbuf_hold_impl(dn, level, blkid, FALSE, FALSE, tag, &db);
return (err ? NULL : db);
}
@@ -2035,6 +2314,30 @@ dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
ASSERT(holds > 1);
}
+#pragma weak dmu_buf_try_add_ref = dbuf_try_add_ref
+boolean_t
+dbuf_try_add_ref(dmu_buf_t *db_fake, objset_t *os, uint64_t obj, uint64_t blkid,
+ void *tag)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+ dmu_buf_impl_t *found_db;
+ boolean_t result = B_FALSE;
+
+ if (db->db_blkid == DMU_BONUS_BLKID)
+ found_db = dbuf_find_bonus(os, obj);
+ else
+ found_db = dbuf_find(os, obj, 0, blkid);
+
+ if (found_db != NULL) {
+ if (db == found_db && dbuf_refcount(db) > db->db_dirtycnt) {
+ (void) refcount_add(&db->db_holds, tag);
+ result = B_TRUE;
+ }
+ mutex_exit(&db->db_mtx);
+ }
+ return (result);
+}
+
/*
* If you call dbuf_rele() you had better not be referencing the dnode handle
* unless you have some other direct or indirect hold on the dnode. (An indirect
@@ -2088,21 +2391,60 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
- mutex_exit(&db->db_mtx);
+ dnode_t *dn;
/*
- * If the dnode moves here, we cannot cross this barrier
- * until the move completes.
+ * If the dnode moves here, we cannot cross this
+ * barrier until the move completes.
*/
DB_DNODE_ENTER(db);
- atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count);
+
+ dn = DB_DNODE(db);
+ atomic_dec_32(&dn->dn_dbufs_count);
+
+ /*
+ * Decrementing the dbuf count means that the bonus
+ * buffer's dnode hold is no longer discounted in
+ * dnode_move(). The dnode cannot move until after
+ * the dnode_rele_and_unlock() below.
+ */
DB_DNODE_EXIT(db);
+
+ /*
+ * Do not reference db after its lock is dropped.
+ * Another thread may evict it.
+ */
+ mutex_exit(&db->db_mtx);
+
/*
- * The bonus buffer's dnode hold is no longer discounted
- * in dnode_move(). The dnode cannot move until after
- * the dnode_rele().
+ * If the dnode has been freed, evict the bonus
+ * buffer immediately. The data in the bonus
+ * buffer is no longer relevant and this prevents
+ * a stale bonus buffer from being associated
+ * with this dnode_t should the dnode_t be reused
+ * prior to being destroyed.
*/
- dnode_rele(DB_DNODE(db), db);
+ mutex_enter(&dn->dn_mtx);
+ if (dn->dn_type == DMU_OT_NONE ||
+ dn->dn_free_txg != 0) {
+ /*
+ * Drop dn_mtx. It is a leaf lock and
+ * cannot be held when dnode_evict_bonus()
+ * acquires other locks in order to
+ * perform the eviction.
+ *
+ * Freed dnodes cannot be reused until the
+ * last hold is released. Since this bonus
+ * buffer has a hold, the dnode will remain
+ * in the free state, even without dn_mtx
+ * held, until the dnode_rele_and_unlock()
+ * below.
+ */
+ mutex_exit(&dn->dn_mtx);
+ dnode_evict_bonus(dn);
+ mutex_enter(&dn->dn_mtx);
+ }
+ dnode_rele_and_unlock(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this
@@ -2116,7 +2458,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
/*
* This dbuf has anonymous data associated with it.
*/
- dbuf_set_data(db, NULL);
+ dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
@@ -2149,7 +2491,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
} else {
dbuf_clear(db);
}
- } else if (arc_buf_eviction_needed(db->db_buf)) {
+ } else if (db->db_objset->os_evicting ||
+ arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
mutex_exit(&db->db_mtx);
@@ -2168,51 +2511,57 @@ dbuf_refcount(dmu_buf_impl_t *db)
}
void *
-dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr,
- dmu_buf_evict_func_t *evict_func)
+dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
+ dmu_buf_user_t *new_user)
{
- return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+ mutex_enter(&db->db_mtx);
+ dbuf_verify_user(db, DBVU_NOT_EVICTING);
+ if (db->db_user == old_user)
+ db->db_user = new_user;
+ else
+ old_user = db->db_user;
+ dbuf_verify_user(db, DBVU_NOT_EVICTING);
+ mutex_exit(&db->db_mtx);
+
+ return (old_user);
}
void *
-dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr,
- dmu_buf_evict_func_t *evict_func)
+dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-
- db->db_immediate_evict = TRUE;
- return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
+ return (dmu_buf_replace_user(db_fake, NULL, user));
}
void *
-dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
- dmu_buf_evict_func_t *evict_func)
+dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(db->db_level == 0);
-
- ASSERT((user_ptr == NULL) == (evict_func == NULL));
- mutex_enter(&db->db_mtx);
-
- if (db->db_user_ptr == old_user_ptr) {
- db->db_user_ptr = user_ptr;
- db->db_evict_func = evict_func;
- } else {
- old_user_ptr = db->db_user_ptr;
- }
+ db->db_immediate_evict = TRUE;
+ return (dmu_buf_set_user(db_fake, user));
+}
- mutex_exit(&db->db_mtx);
- return (old_user_ptr);
+void *
+dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
+{
+ return (dmu_buf_replace_user(db_fake, user, NULL));
}
void *
dmu_buf_get_user(dmu_buf_t *db_fake)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(!refcount_is_zero(&db->db_holds));
- return (db->db_user_ptr);
+ dbuf_verify_user(db, DBVU_NOT_EVICTING);
+ return (db->db_user);
+}
+
+void
+dmu_buf_user_evict_wait()
+{
+ taskq_wait(dbu_evict_taskq);
}
boolean_t
@@ -2268,8 +2617,8 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
if (parent == NULL) {
mutex_exit(&db->db_mtx);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- (void) dbuf_hold_impl(dn, db->db_level+1,
- db->db_blkid >> epbs, FALSE, db, &parent);
+ parent = dbuf_hold_level(dn, db->db_level + 1,
+ db->db_blkid >> epbs, db);
rw_exit(&dn->dn_struct_rwlock);
mutex_enter(&db->db_mtx);
db->db_parent = parent;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index edd6d81..fe6f60d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
/* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
/* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -142,7 +142,7 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
@@ -390,7 +390,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
*/
static int
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
- int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
+ boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
@@ -400,15 +400,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
ASSERT(length <= DMU_MAX_ACCESS);
- dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
- if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
- dbuf_flags |= DB_RF_NOPREFETCH;
+ /*
+ * Note: We directly notify the prefetch code of this read, so that
+ * we can tell it about the multi-block read. dbuf_read() only knows
+ * about the one block it is accessing.
+ */
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
+ DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
- P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
+ nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) -
+ P2ALIGN(offset, 1ULL << blkshift)) >> blkshift;
} else {
if (offset + length > dn->dn_datablksz) {
zfs_panic_recover("zfs: accessing past end of object "
@@ -425,15 +429,16 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
for (i = 0; i < nblks; i++) {
- dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
+ dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
zio_nowait(zio);
return (SET_ERROR(EIO));
}
+
/* initiate async i/o */
if (read)
(void) dbuf_read(db, zio, dbuf_flags);
@@ -443,6 +448,11 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
#endif
dbp[i] = &db->db;
}
+
+ if ((flags & DMU_READ_NO_PREFETCH) == 0 && read &&
+ length <= zfetch_array_rd_sz) {
+ dmu_zfetch(&dn->dn_zfetch, blkid, nblks);
+ }
rw_exit(&dn->dn_struct_rwlock);
/* wait for async i/o */
@@ -496,7 +506,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+ uint64_t length, boolean_t read, void *tag, int *numbufsp,
+ dmu_buf_t ***dbpp)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;
@@ -529,25 +540,21 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
}
/*
- * Issue prefetch i/os for the given blocks.
+ * Issue prefetch i/os for the given blocks. If level is greater than 0, the
+ * indirect blocks prefeteched will be those that point to the blocks containing
+ * the data starting at offset, and continuing to offset + len.
*
- * Note: The assumption is that we *know* these blocks will be needed
- * almost immediately. Therefore, the prefetch i/os will be issued at
- * ZIO_PRIORITY_SYNC_READ
- *
- * Note: indirect blocks and other metadata will be read synchronously,
- * causing this function to block if they are not already cached.
+ * Note that if the indirect blocks above the blocks being prefetched are not in
+ * cache, they will be asychronously read in.
*/
void
-dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
+dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
+ uint64_t len, zio_priority_t pri)
{
dnode_t *dn;
uint64_t blkid;
int nblks, err;
- if (zfs_prefetch_disable)
- return;
-
if (len == 0) { /* they're interested in the bonus buffer */
dn = DMU_META_DNODE(os);
@@ -555,8 +562,9 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
- dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ);
+ blkid = dbuf_whichblock(dn, level,
+ object * sizeof (dnode_phys_t));
+ dbuf_prefetch(dn, level, blkid, pri, 0);
rw_exit(&dn->dn_struct_rwlock);
return;
}
@@ -571,18 +579,24 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_datablkshift) {
- int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset + len, 1 << blkshift) -
- P2ALIGN(offset, 1 << blkshift)) >> blkshift;
+ /*
+ * offset + len - 1 is the last byte we want to prefetch for, and offset
+ * is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
+ * last block we want to prefetch, and dbuf_whichblock(dn, level,
+ * offset) is the first. Then the number we need to prefetch is the
+ * last - first + 1.
+ */
+ if (level > 0 || dn->dn_datablkshift != 0) {
+ nblks = dbuf_whichblock(dn, level, offset + len - 1) -
+ dbuf_whichblock(dn, level, offset) + 1;
} else {
nblks = (offset < dn->dn_datablksz);
}
if (nblks != 0) {
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, level, offset);
for (int i = 0; i < nblks; i++)
- dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ);
+ dbuf_prefetch(dn, level, blkid + i, pri, 0);
}
rw_exit(&dn->dn_struct_rwlock);
@@ -1394,7 +1408,7 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
DB_DNODE_ENTER(dbuf);
dn = DB_DNODE(dbuf);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(dbuf);
@@ -1485,7 +1499,19 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
dr->dt.dl.dr_overridden_by = *zio->io_bp;
dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
dr->dt.dl.dr_copies = zio->io_prop.zp_copies;
- if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by))
+
+ /*
+ * Old style holes are filled with all zeros, whereas
+ * new-style holes maintain their lsize, type, level,
+ * and birth time (see zio_write_compress). While we
+ * need to reset the BP_SET_LSIZE() call that happened
+ * in dmu_sync_ready for old style holes, we do *not*
+ * want to wipe out the information contained in new
+ * style holes. Thus, only zero out the block pointer if
+ * it's an old style hole.
+ */
+ if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by) &&
+ dr->dt.dl.dr_overridden_by.blk_birth == 0)
BP_ZERO(&dr->dt.dl.dr_overridden_by);
} else {
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -1653,19 +1679,32 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg);
/*
- * Assume the on-disk data is X, the current syncing data is Y,
- * and the current in-memory data is Z (currently in dmu_sync).
- * X and Z are identical but Y is has been modified. Normally,
- * when X and Z are the same we will perform a nopwrite but if Y
- * is different we must disable nopwrite since the resulting write
- * of Y to disk can free the block containing X. If we allowed a
- * nopwrite to occur the block pointing to Z would reference a freed
- * block. Since this is a rare case we simplify this by disabling
- * nopwrite if the current dmu_sync-ing dbuf has been modified in
- * a previous transaction.
+ * Assume the on-disk data is X, the current syncing data (in
+ * txg - 1) is Y, and the current in-memory data is Z (currently
+ * in dmu_sync).
+ *
+ * We usually want to perform a nopwrite if X and Z are the
+ * same. However, if Y is different (i.e. the BP is going to
+ * change before this write takes effect), then a nopwrite will
+ * be incorrect - we would override with X, which could have
+ * been freed when Y was written.
+ *
+ * (Note that this is not a concern when we are nop-writing from
+ * syncing context, because X and Y must be identical, because
+ * all previous txgs have been synced.)
+ *
+ * Therefore, we disable nopwrite if the current BP could change
+ * before this TXG. There are two ways it could change: by
+ * being dirty (dr_next is non-NULL), or by being freed
+ * (dnode_block_freed()). This behavior is verified by
+ * zio_done(), which VERIFYs that the override BP is identical
+ * to the on-disk BP.
*/
- if (dr->dr_next)
+ DB_DNODE_ENTER(db);
+ dn = DB_DNODE(db);
+ if (dr->dr_next != NULL || dnode_block_freed(dn, db->db_blkid))
zp.zp_nopwrite = B_FALSE;
+ DB_DNODE_EXIT(db);
ASSERT(dr->dr_txg == txg);
if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC ||
@@ -1785,19 +1824,15 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
* 3. all other level 0 blocks
*/
if (ismd) {
- /*
- * XXX -- we should design a compression algorithm
- * that specializes in arrays of bps.
- */
- boolean_t lz4_ac = spa_feature_is_active(os->os_spa,
- SPA_FEATURE_LZ4_COMPRESS);
-
if (zfs_mdcomp_disable) {
compress = ZIO_COMPRESS_EMPTY;
- } else if (lz4_ac) {
- compress = ZIO_COMPRESS_LZ4;
} else {
- compress = ZIO_COMPRESS_LZJB;
+ /*
+ * XXX -- we should design a compression algorithm
+ * that specializes in arrays of bps.
+ */
+ compress = zio_compress_select(os->os_spa,
+ ZIO_COMPRESS_ON, ZIO_COMPRESS_ON);
}
/*
@@ -1830,7 +1865,8 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
compress = ZIO_COMPRESS_OFF;
checksum = ZIO_CHECKSUM_NOPARITY;
} else {
- compress = zio_compress_select(dn->dn_compress, compress);
+ compress = zio_compress_select(os->os_spa, dn->dn_compress,
+ compress);
checksum = (dedup_checksum == ZIO_CHECKSUM_OFF) ?
zio_checksum_select(dn->dn_checksum, checksum) :
@@ -1874,25 +1910,20 @@ int
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
{
dnode_t *dn;
- int i, err;
+ int err;
- err = dnode_hold(os, object, FTAG, &dn);
- if (err)
- return (err);
/*
* Sync any current changes before
* we go trundling through the block pointers.
*/
- for (i = 0; i < TXG_SIZE; i++) {
- if (list_link_active(&dn->dn_dirty_link[i]))
- break;
+ err = dmu_object_wait_synced(os, object);
+ if (err) {
+ return (err);
}
- if (i != TXG_SIZE) {
- dnode_rele(dn, FTAG);
- txg_wait_synced(dmu_objset_pool(os), 0);
- err = dnode_hold(os, object, FTAG, &dn);
- if (err)
- return (err);
+
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err) {
+ return (err);
}
err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
@@ -1901,6 +1932,36 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
return (err);
}
+/*
+ * Given the ZFS object, if it contains any dirty nodes
+ * this function flushes all dirty blocks to disk. This
+ * ensures the DMU object info is updated. A more efficient
+ * future version might just find the TXG with the maximum
+ * ID and wait for that to be synced.
+ */
+int
+dmu_object_wait_synced(objset_t *os, uint64_t object) {
+ dnode_t *dn;
+ int error, i;
+
+ error = dnode_hold(os, object, FTAG, &dn);
+ if (error) {
+ return (error);
+ }
+
+ for (i = 0; i < TXG_SIZE; i++) {
+ if (list_link_active(&dn->dn_dirty_link[i])) {
+ break;
+ }
+ }
+ dnode_rele(dn, FTAG);
+ if (i != TXG_SIZE) {
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ }
+
+ return (0);
+}
+
void
dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
{
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
index bd9e894..e88968b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
@@ -138,7 +138,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (issig(JUSTLOOKING) && issig(FORREAL))
return (SET_ERROR(EINTR));
- if (zb->zb_object != DMU_META_DNODE_OBJECT)
+ if (bp == NULL || zb->zb_object != DMU_META_DNODE_OBJECT)
return (0);
if (BP_IS_HOLE(bp)) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
index 808864a..6ca021e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
@@ -148,6 +148,11 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
return (0);
}
+/*
+ * Return (in *objectp) the next object which is allocated (or a hole)
+ * after *object, taking into account only objects that may have been modified
+ * after the specified txg.
+ */
int
dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
{
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
index fb9c8a1..9ad18f0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
@@ -23,7 +23,9 @@
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -48,6 +50,7 @@
#include <sys/sa.h>
#include <sys/zfs_onexit.h>
#include <sys/dsl_destroy.h>
+#include <sys/vdev.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -55,6 +58,16 @@
*/
krwlock_t os_lock;
+/*
+ * Tunable to overwrite the maximum number of threads for the parallization
+ * of dmu_objset_find_dp, needed to speed up the import of pools with many
+ * datasets.
+ * Default is 4 times the number of leaf vdevs.
+ */
+int dmu_find_threads = 0;
+
+static void dmu_objset_find_dp_cb(void *arg);
+
void
dmu_objset_init(void)
{
@@ -151,7 +164,8 @@ compression_changed_cb(void *arg, uint64_t newval)
*/
ASSERT(newval != ZIO_COMPRESS_INHERIT);
- os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
+ os->os_compress = zio_compress_select(os->os_spa, newval,
+ ZIO_COMPRESS_ON);
}
static void
@@ -356,7 +370,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
secondary_cache_changed_cb, os);
}
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
if (err == 0) {
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
@@ -408,7 +422,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
} else {
/* It's the meta-objset. */
os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
- os->os_compress = ZIO_COMPRESS_LZJB;
+ os->os_compress = ZIO_COMPRESS_ON;
os->os_copies = spa_max_replication(spa);
os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
os->os_dedup_verify = B_FALSE;
@@ -418,7 +432,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_secondary_cache = ZFS_CACHE_ALL;
}
- if (ds == NULL || !dsl_dataset_is_snapshot(ds))
+ if (ds == NULL || !ds->ds_is_snapshot)
os->os_zil_header = os->os_phys->os_zil_header;
os->os_zil = zil_alloc(os, &os->os_zil_header);
@@ -437,16 +451,13 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
- DMU_META_DNODE(os) = dnode_special_open(os,
- &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
- &os->os_meta_dnode);
+ dnode_special_open(os, &os->os_phys->os_meta_dnode,
+ DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
- DMU_USERUSED_DNODE(os) = dnode_special_open(os,
- &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
- &os->os_userused_dnode);
- DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
- &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
- &os->os_groupused_dnode);
+ dnode_special_open(os, &os->os_phys->os_userused_dnode,
+ DMU_USERUSED_OBJECT, &os->os_userused_dnode);
+ dnode_special_open(os, &os->os_phys->os_groupused_dnode,
+ DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
}
*osp = os;
@@ -505,6 +516,25 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
return (err);
}
+static int
+dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
+ boolean_t readonly, void *tag, objset_t **osp)
+{
+ int err;
+
+ err = dmu_objset_from_ds(ds, osp);
+ if (err != 0) {
+ dsl_dataset_disown(ds, tag);
+ } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
+ dsl_dataset_disown(ds, tag);
+ return (SET_ERROR(EINVAL));
+ } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_disown(ds, tag);
+ return (SET_ERROR(EROFS));
+ }
+ return (err);
+}
+
/*
* dsl_pool must not be held when this is called.
* Upon successful return, there will be a longhold on the dataset,
@@ -526,21 +556,26 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
dsl_pool_rele(dp, FTAG);
return (err);
}
-
- err = dmu_objset_from_ds(ds, osp);
+ err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
dsl_pool_rele(dp, FTAG);
- if (err != 0) {
- dsl_dataset_disown(ds, tag);
- } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
- dsl_dataset_disown(ds, tag);
- return (SET_ERROR(EINVAL));
- } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
- dsl_dataset_disown(ds, tag);
- return (SET_ERROR(EROFS));
- }
+
return (err);
}
+int
+dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
+ boolean_t readonly, void *tag, objset_t **osp)
+{
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_dataset_own_obj(dp, obj, tag, &ds);
+ if (err != 0)
+ return (err);
+
+ return (dmu_objset_own_impl(ds, type, readonly, tag, osp));
+}
+
void
dmu_objset_rele(objset_t *os, void *tag)
{
@@ -590,41 +625,53 @@ dmu_objset_disown(objset_t *os, void *tag)
void
dmu_objset_evict_dbufs(objset_t *os)
{
+ dnode_t dn_marker;
dnode_t *dn;
mutex_enter(&os->os_lock);
+ dn = list_head(&os->os_dnodes);
+ while (dn != NULL) {
+ /*
+ * Skip dnodes without holds. We have to do this dance
+ * because dnode_add_ref() only works if there is already a
+ * hold. If the dnode has no holds, then it has no dbufs.
+ */
+ if (dnode_add_ref(dn, FTAG)) {
+ list_insert_after(&os->os_dnodes, dn, &dn_marker);
+ mutex_exit(&os->os_lock);
- /* process the mdn last, since the other dnodes have holds on it */
- list_remove(&os->os_dnodes, DMU_META_DNODE(os));
- list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
-
- /*
- * Find the first dnode with holds. We have to do this dance
- * because dnode_add_ref() only works if you already have a
- * hold. If there are no holds then it has no dbufs so OK to
- * skip.
- */
- for (dn = list_head(&os->os_dnodes);
- dn && !dnode_add_ref(dn, FTAG);
- dn = list_next(&os->os_dnodes, dn))
- continue;
-
- while (dn) {
- dnode_t *next_dn = dn;
-
- do {
- next_dn = list_next(&os->os_dnodes, next_dn);
- } while (next_dn && !dnode_add_ref(next_dn, FTAG));
+ dnode_evict_dbufs(dn);
+ dnode_rele(dn, FTAG);
- mutex_exit(&os->os_lock);
- dnode_evict_dbufs(dn);
- dnode_rele(dn, FTAG);
- mutex_enter(&os->os_lock);
- dn = next_dn;
+ mutex_enter(&os->os_lock);
+ dn = list_next(&os->os_dnodes, &dn_marker);
+ list_remove(&os->os_dnodes, &dn_marker);
+ } else {
+ dn = list_next(&os->os_dnodes, dn);
+ }
}
mutex_exit(&os->os_lock);
+
+ if (DMU_USERUSED_DNODE(os) != NULL) {
+ dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
+ dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
+ }
+ dnode_evict_dbufs(DMU_META_DNODE(os));
}
+/*
+ * Objset eviction processing is split into into two pieces.
+ * The first marks the objset as evicting, evicts any dbufs that
+ * have a refcount of zero, and then queues up the objset for the
+ * second phase of eviction. Once os->os_dnodes has been cleared by
+ * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
+ * The second phase closes the special dnodes, dequeues the objset from
+ * the list of those undergoing eviction, and finally frees the objset.
+ *
+ * NOTE: Due to asynchronous eviction processing (invocation of
+ * dnode_buf_pageout()), it is possible for the meta dnode for the
+ * objset to have no holds even though os->os_dnodes is not empty.
+ */
void
dmu_objset_evict(objset_t *os)
{
@@ -634,7 +681,7 @@ dmu_objset_evict(objset_t *os)
ASSERT(!dmu_objset_is_dirty(os, t));
if (ds) {
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
VERIFY0(dsl_prop_unregister(ds,
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
checksum_changed_cb, os));
@@ -671,8 +718,24 @@ dmu_objset_evict(objset_t *os)
if (os->os_sa)
sa_tear_down(os);
+ os->os_evicting = B_TRUE;
dmu_objset_evict_dbufs(os);
+ mutex_enter(&os->os_lock);
+ spa_evicting_os_register(os->os_spa, os);
+ if (list_is_empty(&os->os_dnodes)) {
+ mutex_exit(&os->os_lock);
+ dmu_objset_evict_done(os);
+ } else {
+ mutex_exit(&os->os_lock);
+ }
+}
+
+void
+dmu_objset_evict_done(objset_t *os)
+{
+ ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
+
dnode_special_close(&os->os_meta_dnode);
if (DMU_USERUSED_DNODE(os)) {
dnode_special_close(&os->os_userused_dnode);
@@ -680,8 +743,6 @@ dmu_objset_evict(objset_t *os)
}
zil_free(os->os_zil);
- ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
-
VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
/*
@@ -696,6 +757,7 @@ dmu_objset_evict(objset_t *os)
mutex_destroy(&os->os_lock);
mutex_destroy(&os->os_obj_lock);
mutex_destroy(&os->os_user_ptr_lock);
+ spa_evicting_os_deregister(os->os_spa, os);
kmem_free(os, sizeof (objset_t));
}
@@ -894,7 +956,7 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
return (error);
/* You can only clone snapshots, not the head datasets. */
- if (!dsl_dataset_is_snapshot(origin)) {
+ if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1458,7 +1520,7 @@ int
dmu_objset_is_snapshot(objset_t *os)
{
if (os->os_dsl_dataset != NULL)
- return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
+ return (os->os_dsl_dataset->ds_is_snapshot);
else
return (B_FALSE);
}
@@ -1554,30 +1616,41 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name,
return (0);
}
-/*
- * Find objsets under and including ddobj, call func(ds) on each.
- */
-int
-dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
- int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
+typedef struct dmu_objset_find_ctx {
+ taskq_t *dc_tq;
+ dsl_pool_t *dc_dp;
+ uint64_t dc_ddobj;
+ int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
+ void *dc_arg;
+ int dc_flags;
+ kmutex_t *dc_error_lock;
+ int *dc_error;
+} dmu_objset_find_ctx_t;
+
+static void
+dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
{
+ dsl_pool_t *dp = dcp->dc_dp;
+ dmu_objset_find_ctx_t *child_dcp;
dsl_dir_t *dd;
dsl_dataset_t *ds;
zap_cursor_t zc;
zap_attribute_t *attr;
uint64_t thisobj;
- int err;
+ int err = 0;
- ASSERT(dsl_pool_config_held(dp));
+ /* don't process if there already was an error */
+ if (*dcp->dc_error != 0)
+ goto out;
- err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
+ err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd);
if (err != 0)
- return (err);
+ goto out;
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
if (dd->dd_myname[0] == '$') {
dsl_dir_rele(dd, FTAG);
- return (0);
+ goto out;
}
thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
@@ -1586,7 +1659,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
/*
* Iterate over all children.
*/
- if (flags & DS_FIND_CHILDREN) {
+ if (dcp->dc_flags & DS_FIND_CHILDREN) {
for (zap_cursor_init(&zc, dp->dp_meta_objset,
dsl_dir_phys(dd)->dd_child_dir_zapobj);
zap_cursor_retrieve(&zc, attr) == 0;
@@ -1595,24 +1668,22 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
sizeof (uint64_t));
ASSERT3U(attr->za_num_integers, ==, 1);
- err = dmu_objset_find_dp(dp, attr->za_first_integer,
- func, arg, flags);
- if (err != 0)
- break;
+ child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
+ *child_dcp = *dcp;
+ child_dcp->dc_ddobj = attr->za_first_integer;
+ if (dcp->dc_tq != NULL)
+ (void) taskq_dispatch(dcp->dc_tq,
+ dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP);
+ else
+ dmu_objset_find_dp_impl(child_dcp);
}
zap_cursor_fini(&zc);
-
- if (err != 0) {
- dsl_dir_rele(dd, FTAG);
- kmem_free(attr, sizeof (zap_attribute_t));
- return (err);
- }
}
/*
* Iterate over all snapshots.
*/
- if (flags & DS_FIND_SNAPSHOTS) {
+ if (dcp->dc_flags & DS_FIND_SNAPSHOTS) {
dsl_dataset_t *ds;
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
@@ -1633,7 +1704,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
attr->za_first_integer, FTAG, &ds);
if (err != 0)
break;
- err = func(dp, ds, arg);
+ err = dcp->dc_func(dp, ds, dcp->dc_arg);
dsl_dataset_rele(ds, FTAG);
if (err != 0)
break;
@@ -1646,17 +1717,123 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
kmem_free(attr, sizeof (zap_attribute_t));
if (err != 0)
- return (err);
+ goto out;
/*
* Apply to self.
*/
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
if (err != 0)
- return (err);
- err = func(dp, ds, arg);
+ goto out;
+ err = dcp->dc_func(dp, ds, dcp->dc_arg);
dsl_dataset_rele(ds, FTAG);
- return (err);
+
+out:
+ if (err != 0) {
+ mutex_enter(dcp->dc_error_lock);
+ /* only keep first error */
+ if (*dcp->dc_error == 0)
+ *dcp->dc_error = err;
+ mutex_exit(dcp->dc_error_lock);
+ }
+
+ kmem_free(dcp, sizeof (*dcp));
+}
+
+static void
+dmu_objset_find_dp_cb(void *arg)
+{
+ dmu_objset_find_ctx_t *dcp = arg;
+ dsl_pool_t *dp = dcp->dc_dp;
+
+ /*
+ * We need to get a pool_config_lock here, as there are several
+ * asssert(pool_config_held) down the stack. Getting a lock via
+ * dsl_pool_config_enter is risky, as it might be stalled by a
+ * pending writer. This would deadlock, as the write lock can
+ * only be granted when our parent thread gives up the lock.
+ * The _prio interface gives us priority over a pending writer.
+ */
+ dsl_pool_config_enter_prio(dp, FTAG);
+
+ dmu_objset_find_dp_impl(dcp);
+
+ dsl_pool_config_exit(dp, FTAG);
+}
+
+/*
+ * Find objsets under and including ddobj, call func(ds) on each.
+ * The order for the enumeration is completely undefined.
+ * func is called with dsl_pool_config held.
+ */
+int
+dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
+ int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
+{
+ int error = 0;
+ taskq_t *tq = NULL;
+ int ntasks;
+ dmu_objset_find_ctx_t *dcp;
+ kmutex_t err_lock;
+
+ mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL);
+ dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP);
+ dcp->dc_tq = NULL;
+ dcp->dc_dp = dp;
+ dcp->dc_ddobj = ddobj;
+ dcp->dc_func = func;
+ dcp->dc_arg = arg;
+ dcp->dc_flags = flags;
+ dcp->dc_error_lock = &err_lock;
+ dcp->dc_error = &error;
+
+ if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) {
+ /*
+ * In case a write lock is held we can't make use of
+ * parallelism, as down the stack of the worker threads
+ * the lock is asserted via dsl_pool_config_held.
+ * In case of a read lock this is solved by getting a read
+ * lock in each worker thread, which isn't possible in case
+ * of a writer lock. So we fall back to the synchronous path
+ * here.
+ * In the future it might be possible to get some magic into
+ * dsl_pool_config_held in a way that it returns true for
+ * the worker threads so that a single lock held from this
+ * thread suffices. For now, stay single threaded.
+ */
+ dmu_objset_find_dp_impl(dcp);
+
+ return (error);
+ }
+
+ ntasks = dmu_find_threads;
+ if (ntasks == 0)
+ ntasks = vdev_count_leaves(dp->dp_spa) * 4;
+ tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
+ INT_MAX, 0);
+ if (tq == NULL) {
+ kmem_free(dcp, sizeof (*dcp));
+ return (SET_ERROR(ENOMEM));
+ }
+ dcp->dc_tq = tq;
+
+ /* dcp will be freed by task */
+ (void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP);
+
+ /*
+ * PORTING: this code relies on the property of taskq_wait to wait
+ * until no more tasks are queued and no more tasks are active. As
+ * we always queue new tasks from within other tasks, task_wait
+ * reliably waits for the full recursion to finish, even though we
+ * enqueue new tasks after taskq_wait has been called.
+ * On platforms other than illumos, taskq_wait may not have this
+ * property.
+ */
+ taskq_wait(tq);
+ taskq_destroy(tq);
+ mutex_destroy(&err_lock);
+
+ return (error);
}
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
index fd6e800..ef13961 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
@@ -53,6 +53,7 @@
#include <sys/blkptr.h>
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
+#include <sys/bqueue.h>
#ifdef __FreeBSD__
#undef dump_write
@@ -61,10 +62,34 @@
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
+int zfs_send_queue_length = 16 * 1024 * 1024;
+int zfs_recv_queue_length = 16 * 1024 * 1024;
static char *dmu_recv_tag = "dmu_recv_tag";
static const char *recv_clone_name = "%recv";
+#define BP_SPAN(datablkszsec, indblkshift, level) \
+ (((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
+ (level) * (indblkshift - SPA_BLKPTRSHIFT)))
+
+struct send_thread_arg {
+ bqueue_t q;
+ dsl_dataset_t *ds; /* Dataset to traverse */
+ uint64_t fromtxg; /* Traverse from this txg */
+ int flags; /* flags to pass to traverse_dataset */
+ int error_code;
+ boolean_t cancel;
+};
+
+struct send_block_record {
+ boolean_t eos_marker; /* Marks the end of the stream */
+ blkptr_t bp;
+ zbookmark_phys_t zb;
+ uint8_t indblkshift;
+ uint16_t datablkszsec;
+ bqueue_node_t ln;
+};
+
static int
dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
{
@@ -73,7 +98,6 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
struct iovec aiov;
ASSERT0(len % 8);
- fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
aiov.iov_base = buf;
aiov.iov_len = len;
auio.uio_iov = &aiov;
@@ -99,6 +123,38 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
return (dsp->dsa_err);
}
+/*
+ * For all record types except BEGIN, fill in the checksum (overlaid in
+ * drr_u.drr_checksum.drr_checksum). The checksum verifies everything
+ * up to the start of the checksum itself.
+ */
+static int
+dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
+{
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ fletcher_4_incremental_native(dsp->dsa_drr,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ &dsp->dsa_zc);
+ if (dsp->dsa_drr->drr_type != DRR_BEGIN) {
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
+ drr_checksum.drr_checksum));
+ dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
+ }
+ fletcher_4_incremental_native(&dsp->dsa_drr->
+ drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), &dsp->dsa_zc);
+ if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+ return (SET_ERROR(EINTR));
+ if (payload_len != 0) {
+ fletcher_4_incremental_native(payload, payload_len,
+ &dsp->dsa_zc);
+ if (dump_bytes(dsp, payload, payload_len) != 0)
+ return (SET_ERROR(EINTR));
+ }
+ return (0);
+}
+
static int
dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
uint64_t length)
@@ -143,8 +199,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
*/
if (dsp->dsa_pending_op != PENDING_NONE &&
dsp->dsa_pending_op != PENDING_FREE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -167,8 +222,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
return (0);
} else {
/* not a continuation. Push out pending record */
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -181,8 +235,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
drrf->drr_length = length;
drrf->drr_toguid = dsp->dsa_toguid;
if (length == -1ULL) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
} else {
dsp->dsa_pending_op = PENDING_FREE;
@@ -214,12 +267,11 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
* of different types.
*/
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
- /* write a DATA record */
+ /* write a WRITE record */
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
dsp->dsa_drr->drr_type = DRR_WRITE;
drrw->drr_object = object;
@@ -245,9 +297,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_key.ddk_cksum = bp->blk_cksum;
}
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
- return (SET_ERROR(EINTR));
- if (dump_bytes(dsp, data, blksz) != 0)
+ if (dump_record(dsp, data, blksz) != 0)
return (SET_ERROR(EINTR));
return (0);
}
@@ -261,8 +311,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
&(dsp->dsa_drr->drr_u.drr_write_embedded);
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (EINTR);
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -282,9 +331,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
decode_embedded_bp_compressed(bp, buf);
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
- return (EINTR);
- if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
+ if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
return (EINTR);
return (0);
}
@@ -295,8 +342,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -308,9 +354,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
- return (SET_ERROR(EINTR));
- if (dump_bytes(dsp, data, blksz))
+ if (dump_record(dsp, data, blksz) != 0)
return (SET_ERROR(EINTR));
return (0);
}
@@ -333,8 +377,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
*/
if (dsp->dsa_pending_op != PENDING_NONE &&
dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -348,8 +391,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
return (0);
} else {
/* can't be aggregated. Push out pending record */
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -376,8 +418,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
return (dump_freeobjects(dsp, object, 1));
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -398,11 +439,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
- return (SET_ERROR(EINTR));
-
- if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+ if (dump_record(dsp, DN_BONUS(dnp),
+ P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
return (SET_ERROR(EINTR));
+ }
/* Free anything past the end of the file. */
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
@@ -440,58 +480,116 @@ backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
return (B_FALSE);
}
-#define BP_SPAN(dnp, level) \
- (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
- (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
+/*
+ * This is the callback function to traverse_dataset that acts as the worker
+ * thread for dmu_send_impl.
+ */
+/*ARGSUSED*/
+static int
+send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ const zbookmark_phys_t *zb, const struct dnode_phys *dnp, void *arg)
+{
+ struct send_thread_arg *sta = arg;
+ struct send_block_record *record;
+ uint64_t record_size;
+ int err = 0;
-/* ARGSUSED */
+ if (sta->cancel)
+ return (SET_ERROR(EINTR));
+
+ if (bp == NULL) {
+ ASSERT3U(zb->zb_level, ==, ZB_DNODE_LEVEL);
+ return (0);
+ } else if (zb->zb_level < 0) {
+ return (0);
+ }
+
+ record = kmem_zalloc(sizeof (struct send_block_record), KM_SLEEP);
+ record->eos_marker = B_FALSE;
+ record->bp = *bp;
+ record->zb = *zb;
+ record->indblkshift = dnp->dn_indblkshift;
+ record->datablkszsec = dnp->dn_datablkszsec;
+ record_size = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
+ bqueue_enqueue(&sta->q, record, record_size);
+
+ return (err);
+}
+
+/*
+ * This function kicks off the traverse_dataset. It also handles setting the
+ * error code of the thread in case something goes wrong, and pushes the End of
+ * Stream record when the traverse_dataset call has finished. If there is no
+ * dataset to traverse, the thread immediately pushes End of Stream marker.
+ */
+static void
+send_traverse_thread(void *arg)
+{
+ struct send_thread_arg *st_arg = arg;
+ int err;
+ struct send_block_record *data;
+
+ if (st_arg->ds != NULL) {
+ err = traverse_dataset(st_arg->ds, st_arg->fromtxg,
+ st_arg->flags, send_cb, arg);
+ if (err != EINTR)
+ st_arg->error_code = err;
+ }
+ data = kmem_zalloc(sizeof (*data), KM_SLEEP);
+ data->eos_marker = B_TRUE;
+ bqueue_enqueue(&st_arg->q, data, 1);
+ thread_exit();
+}
+
+/*
+ * This function actually handles figuring out what kind of record needs to be
+ * dumped, reading the data (which has hopefully been prefetched), and calling
+ * the appropriate helper function.
+ */
static int
-backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
- const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
+do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
{
- dmu_sendarg_t *dsp = arg;
+ dsl_dataset_t *ds = dmu_objset_ds(dsa->dsa_os);
+ const blkptr_t *bp = &data->bp;
+ const zbookmark_phys_t *zb = &data->zb;
+ uint8_t indblkshift = data->indblkshift;
+ uint16_t dblkszsec = data->datablkszsec;
+ spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
int err = 0;
- if (issig(JUSTLOOKING) && issig(FORREAL))
- return (SET_ERROR(EINTR));
+ ASSERT3U(zb->zb_level, >=, 0);
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
- } else if (zb->zb_level == ZB_ZIL_LEVEL) {
- /*
- * If we are sending a non-snapshot (which is allowed on
- * read-only pools), it may have a ZIL, which must be ignored.
- */
- return (0);
} else if (BP_IS_HOLE(bp) &&
zb->zb_object == DMU_META_DNODE_OBJECT) {
- uint64_t span = BP_SPAN(dnp, zb->zb_level);
+ uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
- err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
+ err = dump_freeobjects(dsa, dnobj, span >> DNODE_SHIFT);
} else if (BP_IS_HOLE(bp)) {
- uint64_t span = BP_SPAN(dnp, zb->zb_level);
- err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
+ uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
+ uint64_t offset = zb->zb_blkid * span;
+ err = dump_free(dsa, zb->zb_object, offset, span);
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
return (0);
} else if (type == DMU_OT_DNODE) {
- dnode_phys_t *blk;
- int i;
int blksz = BP_GET_LSIZE(bp);
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
+ ASSERT0(zb->zb_level);
+
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
&aflags, zb) != 0)
return (SET_ERROR(EIO));
- blk = abuf->b_data;
- for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
- uint64_t dnobj = (zb->zb_blkid <<
- (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
- err = dump_dnode(dsp, dnobj, blk+i);
+ dnode_phys_t *blk = abuf->b_data;
+ uint64_t dnobj = zb->zb_blkid * (blksz >> DNODE_SHIFT);
+ for (int i = 0; i < blksz >> DNODE_SHIFT; i++) {
+ err = dump_dnode(dsa, dnobj + i, blk + i);
if (err != 0)
break;
}
@@ -506,20 +604,21 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
&aflags, zb) != 0)
return (SET_ERROR(EIO));
- err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
+ err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
- } else if (backup_do_embed(dsp, bp)) {
+ } else if (backup_do_embed(dsa, bp)) {
/* it's an embedded level-0 block of a regular object */
- int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- err = dump_write_embedded(dsp, zb->zb_object,
+ int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
+ ASSERT0(zb->zb_level);
+ err = dump_write_embedded(dsa, zb->zb_object,
zb->zb_blkid * blksz, blksz, bp);
- } else { /* it's a level-0 block of a regular object */
+ } else {
+ /* it's a level-0 block of a regular object */
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
- int blksz = BP_GET_LSIZE(bp);
+ int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
uint64_t offset;
- ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
ASSERT0(zb->zb_level);
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
@@ -532,7 +631,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
for (ptr = abuf->b_data;
(char *)ptr < (char *)abuf->b_data + blksz;
ptr++)
- *ptr = 0x2f5baddb10c;
+ *ptr = 0x2f5baddb10cULL;
} else {
return (SET_ERROR(EIO));
}
@@ -540,20 +639,20 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
offset = zb->zb_blkid * blksz;
- if (!(dsp->dsa_featureflags &
+ if (!(dsa->dsa_featureflags &
DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
blksz > SPA_OLD_MAXBLOCKSIZE) {
char *buf = abuf->b_data;
while (blksz > 0 && err == 0) {
int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE);
- err = dump_write(dsp, type, zb->zb_object,
+ err = dump_write(dsa, type, zb->zb_object,
offset, n, NULL, buf);
offset += n;
buf += n;
blksz -= n;
}
} else {
- err = dump_write(dsp, type, zb->zb_object,
+ err = dump_write(dsa, type, zb->zb_object,
offset, blksz, bp, abuf->b_data);
}
(void) arc_buf_remove_ref(abuf, &abuf);
@@ -564,11 +663,24 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
/*
- * Releases dp using the specified tag.
+ * Pop the new data off the queue, and free the old data.
+ */
+static struct send_block_record *
+get_next_record(bqueue_t *bq, struct send_block_record *data)
+{
+ struct send_block_record *tmp = bqueue_dequeue(bq);
+ kmem_free(data, sizeof (*data));
+ return (tmp);
+}
+
+/*
+ * Actually do the bulk of the work in a zfs send.
+ *
+ * Note: Releases dp using the specified tag.
*/
static int
-dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
- zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok,
+dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
+ zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok,
#ifdef illumos
boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
#else
@@ -581,8 +693,9 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
int err;
uint64_t fromtxg = 0;
uint64_t featureflags = 0;
+ struct send_thread_arg to_arg;
- err = dmu_objset_from_ds(ds, &os);
+ err = dmu_objset_from_ds(to_ds, &os);
if (err != 0) {
dsl_pool_rele(dp, tag);
return (err);
@@ -608,35 +721,34 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
}
#endif
- if (large_block_ok && ds->ds_large_blocks)
+ if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
if (embedok &&
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
- } else {
- embedok = B_FALSE;
}
DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
featureflags);
drr->drr_u.drr_begin.drr_creation_time =
- dsl_dataset_phys(ds)->ds_creation_time;
+ dsl_dataset_phys(to_ds)->ds_creation_time;
drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
if (is_clone)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
- drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(ds)->ds_guid;
- if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
+ drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+ if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
- if (fromzb != NULL) {
- drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid;
- fromtxg = fromzb->zbm_creation_txg;
+ if (ancestor_zb != NULL) {
+ drr->drr_u.drr_begin.drr_fromguid =
+ ancestor_zb->zbm_guid;
+ fromtxg = ancestor_zb->zbm_creation_txg;
}
- dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
- if (!dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_name(to_ds, drr->drr_u.drr_begin.drr_toname);
+ if (!to_ds->ds_is_snapshot) {
(void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
sizeof (drr->drr_u.drr_begin.drr_toname));
}
@@ -650,29 +762,61 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
dsp->dsa_fp = fp;
dsp->dsa_os = os;
dsp->dsa_off = off;
- dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid;
- ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
+ dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
dsp->dsa_pending_op = PENDING_NONE;
- dsp->dsa_incremental = (fromzb != NULL);
+ dsp->dsa_incremental = (ancestor_zb != NULL);
dsp->dsa_featureflags = featureflags;
- mutex_enter(&ds->ds_sendstream_lock);
- list_insert_head(&ds->ds_sendstreams, dsp);
- mutex_exit(&ds->ds_sendstream_lock);
+ mutex_enter(&to_ds->ds_sendstream_lock);
+ list_insert_head(&to_ds->ds_sendstreams, dsp);
+ mutex_exit(&to_ds->ds_sendstream_lock);
- dsl_dataset_long_hold(ds, FTAG);
+ dsl_dataset_long_hold(to_ds, FTAG);
dsl_pool_rele(dp, tag);
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+ if (dump_record(dsp, NULL, 0) != 0) {
err = dsp->dsa_err;
goto out;
}
- err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
- backup_cb, dsp);
+ err = bqueue_init(&to_arg.q, zfs_send_queue_length,
+ offsetof(struct send_block_record, ln));
+ to_arg.error_code = 0;
+ to_arg.cancel = B_FALSE;
+ to_arg.ds = to_ds;
+ to_arg.fromtxg = fromtxg;
+ to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH;
+ (void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, &p0,
+ TS_RUN, minclsyspri);
+
+ struct send_block_record *to_data;
+ to_data = bqueue_dequeue(&to_arg.q);
+
+ while (!to_data->eos_marker && err == 0) {
+ err = do_dump(dsp, to_data);
+ to_data = get_next_record(&to_arg.q, to_data);
+ if (issig(JUSTLOOKING) && issig(FORREAL))
+ err = EINTR;
+ }
+
+ if (err != 0) {
+ to_arg.cancel = B_TRUE;
+ while (!to_data->eos_marker) {
+ to_data = get_next_record(&to_arg.q, to_data);
+ }
+ }
+ kmem_free(to_data, sizeof (*to_data));
+
+ bqueue_destroy(&to_arg.q);
+
+ if (err == 0 && to_arg.error_code != 0)
+ err = to_arg.error_code;
+
+ if (err != 0)
+ goto out;
if (dsp->dsa_pending_op != PENDING_NONE)
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
err = SET_ERROR(EINTR);
if (err != 0) {
@@ -686,20 +830,18 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+ if (dump_record(dsp, NULL, 0) != 0)
err = dsp->dsa_err;
- goto out;
- }
out:
- mutex_enter(&ds->ds_sendstream_lock);
- list_remove(&ds->ds_sendstreams, dsp);
- mutex_exit(&ds->ds_sendstream_lock);
+ mutex_enter(&to_ds->ds_sendstream_lock);
+ list_remove(&to_ds->ds_sendstreams, dsp);
+ mutex_exit(&to_ds->ds_sendstream_lock);
kmem_free(drr, sizeof (dmu_replay_record_t));
kmem_free(dsp, sizeof (dmu_sendarg_t));
- dsl_dataset_long_rele(ds, FTAG);
+ dsl_dataset_long_rele(to_ds, FTAG);
return (err);
}
@@ -842,6 +984,40 @@ dmu_send(const char *tosnap, const char *fromsnap,
return (err);
}
+static int
+dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
+ uint64_t *sizep)
+{
+ int err;
+ /*
+ * Assume that space (both on-disk and in-stream) is dominated by
+ * data. We will adjust for indirect blocks and the copies property,
+ * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
+ */
+
+ /*
+ * Subtract out approximate space used by indirect blocks.
+ * Assume most space is used by data blocks (non-indirect, non-dnode).
+ * Assume all blocks are recordsize. Assume ditto blocks and
+ * internal fragmentation counter out compression.
+ *
+ * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
+ * block, which we observe in practice.
+ */
+ uint64_t recordsize;
+ err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
+ if (err != 0)
+ return (err);
+ size -= size / recordsize * sizeof (blkptr_t);
+
+ /* Add in the space for the record associated with each block. */
+ size += size / recordsize * sizeof (dmu_replay_record_t);
+
+ *sizep = size;
+
+ return (0);
+}
+
int
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
{
@@ -852,11 +1028,11 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
ASSERT(dsl_pool_config_held(dp));
/* tosnap must be a snapshot */
- if (!dsl_dataset_is_snapshot(ds))
+ if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
/* fromsnap, if provided, must be a snapshot */
- if (fromds != NULL && !dsl_dataset_is_snapshot(fromds))
+ if (fromds != NULL && !fromds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
/*
@@ -877,33 +1053,61 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
return (err);
}
- /*
- * Assume that space (both on-disk and in-stream) is dominated by
- * data. We will adjust for indirect blocks and the copies property,
- * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
- */
+ err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+ return (err);
+}
+
+/*
+ * Simple callback used to traverse the blocks of a snapshot and sum their
+ * uncompressed size
+ */
+/* ARGSUSED */
+static int
+dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ uint64_t *spaceptr = arg;
+ if (bp != NULL && !BP_IS_HOLE(bp)) {
+ *spaceptr += BP_GET_UCSIZE(bp);
+ }
+ return (0);
+}
+
+/*
+ * Given a desination snapshot and a TXG, calculate the approximate size of a
+ * send stream sent from that TXG. from_txg may be zero, indicating that the
+ * whole snapshot will be sent.
+ */
+int
+dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
+ uint64_t *sizep)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ int err;
+ uint64_t size = 0;
+
+ ASSERT(dsl_pool_config_held(dp));
+
+ /* tosnap must be a snapshot */
+ if (!dsl_dataset_is_snapshot(ds))
+ return (SET_ERROR(EINVAL));
+
+ /* verify that from_txg is before the provided snapshot was taken */
+ if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
+ return (SET_ERROR(EXDEV));
+ }
/*
- * Subtract out approximate space used by indirect blocks.
- * Assume most space is used by data blocks (non-indirect, non-dnode).
- * Assume all blocks are recordsize. Assume ditto blocks and
- * internal fragmentation counter out compression.
- *
- * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
- * block, which we observe in practice.
+ * traverse the blocks of the snapshot with birth times after
+ * from_txg, summing their uncompressed size
*/
- uint64_t recordsize;
- err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
- if (err != 0)
+ err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
+ dmu_calculate_send_traversal, &size);
+ if (err)
return (err);
- size -= size / recordsize * sizeof (blkptr_t);
- /* Add in the space for the record associated with each block. */
- size += size / recordsize * sizeof (dmu_replay_record_t);
-
- *sizep = size;
-
- return (0);
+ err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+ return (err);
}
typedef struct dmu_recv_begin_arg {
@@ -1068,7 +1272,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
* If it's a non-clone incremental, we are missing the
* target fs, so fail the recv.
*/
- if (fromguid != 0 && !(flags & DRR_FLAG_CLONE))
+ if (fromguid != 0 && !(flags & DRR_FLAG_CLONE ||
+ drba->drba_origin))
return (SET_ERROR(ENOENT));
/* Open the parent of tofs */
@@ -1105,7 +1310,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
return (error);
}
- if (!dsl_dataset_is_snapshot(origin)) {
+ if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG);
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
@@ -1174,13 +1379,6 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
}
VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
- if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
- DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
- !newds->ds_large_blocks) {
- dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
- newds->ds_large_blocks = B_TRUE;
- }
-
dmu_buf_will_dirty(newds->ds_dbuf, tx);
dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
@@ -1250,16 +1448,58 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
&drba, 5, ZFS_SPACE_CHECK_NORMAL));
}
-struct restorearg {
- int err;
+struct receive_record_arg {
+ dmu_replay_record_t header;
+ void *payload; /* Pointer to a buffer containing the payload */
+ /*
+ * If the record is a write, pointer to the arc_buf_t containing the
+ * payload.
+ */
+ arc_buf_t *write_buf;
+ int payload_size;
+ boolean_t eos_marker; /* Marks the end of the stream */
+ bqueue_node_t node;
+};
+
+struct receive_writer_arg {
+ objset_t *os;
boolean_t byteswap;
+ bqueue_t q;
+ /*
+ * These three args are used to signal to the main thread that we're
+ * done.
+ */
+ kmutex_t mutex;
+ kcondvar_t cv;
+ boolean_t done;
+ int err;
+ /* A map from guid to dataset to help handle dedup'd streams. */
+ avl_tree_t *guid_to_ds_map;
+};
+
+struct receive_arg {
+ objset_t *os;
kthread_t *td;
struct file *fp;
- char *buf;
- uint64_t voff;
- int bufsize; /* amount of memory allocated for buf */
+ uint64_t voff; /* The current offset in the stream */
+ /*
+ * A record that has had its payload read in, but hasn't yet been handed
+ * off to the worker thread.
+ */
+ struct receive_record_arg *rrd;
+ /* A record that has had its header read in, but not its payload. */
+ struct receive_record_arg *next_rrd;
zio_cksum_t cksum;
- avl_tree_t *guid_to_ds_map;
+ zio_cksum_t prev_cksum;
+ int err;
+ boolean_t byteswap;
+ /* Sorted list of objects not to issue prefetches for. */
+ list_t ignore_obj_list;
+};
+
+struct receive_ign_obj_node {
+ list_node_t node;
+ uint64_t object;
};
typedef struct guid_map_entry {
@@ -1298,7 +1538,7 @@ free_guid_map_onexit(void *arg)
}
static int
-restore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid)
+restore_bytes(struct receive_arg *ra, void *buf, int len, off_t off, ssize_t *resid)
{
struct uio auio;
struct iovec aiov;
@@ -1323,17 +1563,13 @@ restore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *res
return (error);
}
-static void *
-restore_read(struct restorearg *ra, int len, char *buf)
+static int
+receive_read(struct receive_arg *ra, int len, void *buf)
{
int done = 0;
- if (buf == NULL)
- buf = ra->buf;
-
/* some things will require 8-byte alignment, so everything must */
ASSERT0(len % 8);
- ASSERT3U(len, <=, ra->bufsize);
while (done < len) {
ssize_t resid;
@@ -1346,24 +1582,21 @@ restore_read(struct restorearg *ra, int len, char *buf)
ra->voff += len - done - resid;
done = len - resid;
if (ra->err != 0)
- return (NULL);
+ return (ra->err);
}
ASSERT3U(done, ==, len);
- if (ra->byteswap)
- fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
- else
- fletcher_4_incremental_native(buf, len, &ra->cksum);
- return (buf);
+ return (0);
}
static void
-backup_byteswap(dmu_replay_record_t *drr)
+byteswap_record(dmu_replay_record_t *drr)
{
#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
drr->drr_type = BSWAP_32(drr->drr_type);
drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
+
switch (drr->drr_type) {
case DRR_BEGIN:
DO64(drr_begin.drr_magic);
@@ -1393,10 +1626,7 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_write.drr_offset);
DO64(drr_write.drr_length);
DO64(drr_write.drr_toguid);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
DO64(drr_write.drr_key.ddk_prop);
break;
case DRR_WRITE_BYREF:
@@ -1407,10 +1637,8 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_write_byref.drr_refguid);
DO64(drr_write_byref.drr_refobject);
DO64(drr_write_byref.drr_refoffset);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
+ drr_key.ddk_cksum);
DO64(drr_write_byref.drr_key.ddk_prop);
break;
case DRR_WRITE_EMBEDDED:
@@ -1433,13 +1661,15 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_spill.drr_toguid);
break;
case DRR_END:
- DO64(drr_end.drr_checksum.zc_word[0]);
- DO64(drr_end.drr_checksum.zc_word[1]);
- DO64(drr_end.drr_checksum.zc_word[2]);
- DO64(drr_end.drr_checksum.zc_word[3]);
DO64(drr_end.drr_toguid);
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
break;
}
+
+ if (drr->drr_type != DRR_BEGIN) {
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
+ }
+
#undef DO64
#undef DO32
}
@@ -1456,11 +1686,11 @@ deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
}
static int
-restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
+receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ void *data)
{
dmu_object_info_t doi;
dmu_tx_t *tx;
- void *data = NULL;
uint64_t object;
int err;
@@ -1471,23 +1701,17 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
drro->drr_blksz < SPA_MINBLOCKSIZE ||
- drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
+ drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) ||
drro->drr_bonuslen > DN_MAX_BONUSLEN) {
return (SET_ERROR(EINVAL));
}
- err = dmu_object_info(os, drro->drr_object, &doi);
+ err = dmu_object_info(rwa->os, drro->drr_object, &doi);
if (err != 0 && err != ENOENT)
return (SET_ERROR(EINVAL));
object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
- if (drro->drr_bonuslen) {
- data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
- if (ra->err != 0)
- return (ra->err);
- }
-
/*
* If we are losing blkptrs or changing the block size this must
* be a new file instance. We must clear out the previous file
@@ -1501,14 +1725,14 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
if (drro->drr_blksz != doi.doi_data_block_size ||
nblkptr < doi.doi_nblkptr) {
- err = dmu_free_long_range(os, drro->drr_object,
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
0, DMU_OBJECT_END);
if (err != 0)
return (SET_ERROR(EINVAL));
}
}
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_bonus(tx, object);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
@@ -1518,7 +1742,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
if (object == DMU_NEW_OBJECT) {
/* currently free, want to be allocated */
- err = dmu_object_claim(os, drro->drr_object,
+ err = dmu_object_claim(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen, tx);
} else if (drro->drr_type != doi.doi_type ||
@@ -1526,7 +1750,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
/* currently allocated, but with different properties */
- err = dmu_object_reclaim(os, drro->drr_object,
+ err = dmu_object_reclaim(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen, tx);
}
@@ -1535,19 +1759,20 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
return (SET_ERROR(EINVAL));
}
- dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
- tx);
- dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
+ dmu_object_set_checksum(rwa->os, drro->drr_object,
+ drro->drr_checksumtype, tx);
+ dmu_object_set_compress(rwa->os, drro->drr_object,
+ drro->drr_compress, tx);
if (data != NULL) {
dmu_buf_t *db;
- VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
+ VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db));
dmu_buf_will_dirty(db, tx);
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
bcopy(data, db->db_data, drro->drr_bonuslen);
- if (ra->byteswap) {
+ if (rwa->byteswap) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drro->drr_bonustype);
dmu_ot_byteswap[byteswap].ob_func(db->db_data,
@@ -1561,7 +1786,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
/* ARGSUSED */
static int
-restore_freeobjects(struct restorearg *ra, objset_t *os,
+receive_freeobjects(struct receive_writer_arg *rwa,
struct drr_freeobjects *drrfo)
{
uint64_t obj;
@@ -1571,13 +1796,13 @@ restore_freeobjects(struct restorearg *ra, objset_t *os,
for (obj = drrfo->drr_firstobj;
obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
- (void) dmu_object_next(os, &obj, FALSE, 0)) {
+ (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) {
int err;
- if (dmu_object_info(os, obj, NULL) != 0)
+ if (dmu_object_info(rwa->os, obj, NULL) != 0)
continue;
- err = dmu_free_long_object(os, obj);
+ err = dmu_free_long_object(rwa->os, obj);
if (err != 0)
return (err);
}
@@ -1585,49 +1810,38 @@ restore_freeobjects(struct restorearg *ra, objset_t *os,
}
static int
-restore_write(struct restorearg *ra, objset_t *os,
- struct drr_write *drrw)
+receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
+ arc_buf_t *abuf)
{
dmu_tx_t *tx;
- void *data;
int err;
if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
!DMU_OT_IS_VALID(drrw->drr_type))
return (SET_ERROR(EINVAL));
- if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
+ if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
- dmu_buf_t *bonus;
- if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
- return (SET_ERROR(EINVAL));
-
- arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
-
- data = restore_read(ra, drrw->drr_length, abuf->b_data);
- if (data == NULL) {
- dmu_return_arcbuf(abuf);
- dmu_buf_rele(bonus, FTAG);
- return (ra->err);
- }
-
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_write(tx, drrw->drr_object,
drrw->drr_offset, drrw->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
- dmu_return_arcbuf(abuf);
- dmu_buf_rele(bonus, FTAG);
dmu_tx_abort(tx);
return (err);
}
- if (ra->byteswap) {
+ if (rwa->byteswap) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drrw->drr_type);
- dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+ dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
+ drrw->drr_length);
}
+
+ dmu_buf_t *bonus;
+ if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
+ return (SET_ERROR(EINVAL));
dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
dmu_tx_commit(tx);
dmu_buf_rele(bonus, FTAG);
@@ -1642,7 +1856,7 @@ restore_write(struct restorearg *ra, objset_t *os,
* data from the stream to fulfill this write.
*/
static int
-restore_write_byref(struct restorearg *ra, objset_t *os,
+receive_write_byref(struct receive_writer_arg *rwa,
struct drr_write_byref *drrwbr)
{
dmu_tx_t *tx;
@@ -1662,14 +1876,14 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
*/
if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
gmesrch.guid = drrwbr->drr_refguid;
- if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
+ if ((gmep = avl_find(rwa->guid_to_ds_map, &gmesrch,
&where)) == NULL) {
return (SET_ERROR(EINVAL));
}
if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
return (SET_ERROR(EINVAL));
} else {
- ref_os = os;
+ ref_os = rwa->os;
}
err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
@@ -1677,7 +1891,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
if (err != 0)
return (err);
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_write(tx, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length);
@@ -1686,7 +1900,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
dmu_tx_abort(tx);
return (err);
}
- dmu_write(os, drrwbr->drr_object,
+ dmu_write(rwa->os, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
dmu_buf_rele(dbp, FTAG);
dmu_tx_commit(tx);
@@ -1694,12 +1908,11 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
}
static int
-restore_write_embedded(struct restorearg *ra, objset_t *os,
- struct drr_write_embedded *drrwnp)
+receive_write_embedded(struct receive_writer_arg *rwa,
+ struct drr_write_embedded *drrwnp, void *data)
{
dmu_tx_t *tx;
int err;
- void *data;
if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
return (EINVAL);
@@ -1712,11 +1925,7 @@ restore_write_embedded(struct restorearg *ra, objset_t *os,
if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (EINVAL);
- data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL);
- if (data == NULL)
- return (ra->err);
-
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_write(tx, drrwnp->drr_object,
drrwnp->drr_offset, drrwnp->drr_length);
@@ -1726,41 +1935,37 @@ restore_write_embedded(struct restorearg *ra, objset_t *os,
return (err);
}
- dmu_write_embedded(os, drrwnp->drr_object,
+ dmu_write_embedded(rwa->os, drrwnp->drr_object,
drrwnp->drr_offset, data, drrwnp->drr_etype,
drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
- ra->byteswap ^ ZFS_HOST_BYTEORDER, tx);
+ rwa->byteswap ^ ZFS_HOST_BYTEORDER, tx);
dmu_tx_commit(tx);
return (0);
}
static int
-restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
+receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
+ void *data)
{
dmu_tx_t *tx;
- void *data;
dmu_buf_t *db, *db_spill;
int err;
if (drrs->drr_length < SPA_MINBLOCKSIZE ||
- drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os)))
+ drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
return (SET_ERROR(EINVAL));
- data = restore_read(ra, drrs->drr_length, NULL);
- if (data == NULL)
- return (ra->err);
-
- if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
+ if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
- VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
+ VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db));
if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
dmu_buf_rele(db, FTAG);
return (err);
}
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_spill(tx, db->db_object);
@@ -1787,8 +1992,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
/* ARGSUSED */
static int
-restore_free(struct restorearg *ra, objset_t *os,
- struct drr_free *drrf)
+receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
{
int err;
@@ -1796,11 +2000,12 @@ restore_free(struct restorearg *ra, objset_t *os,
drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
return (SET_ERROR(EINVAL));
- if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
+ if (dmu_object_info(rwa->os, drrf->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
- err = dmu_free_long_range(os, drrf->drr_object,
+ err = dmu_free_long_range(rwa->os, drrf->drr_object,
drrf->drr_offset, drrf->drr_length);
+
return (err);
}
@@ -1814,17 +2019,363 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
(void) dsl_destroy_head(name);
}
+static void
+receive_cksum(struct receive_arg *ra, int len, void *buf)
+{
+ if (ra->byteswap) {
+ fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
+ } else {
+ fletcher_4_incremental_native(buf, len, &ra->cksum);
+ }
+}
+
/*
+ * Read the payload into a buffer of size len, and update the current record's
+ * payload field.
+ * Allocate ra->next_rrd and read the next record's header into
+ * ra->next_rrd->header.
+ * Verify checksum of payload and next record.
+ */
+static int
+receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
+{
+ int err;
+
+ if (len != 0) {
+ ASSERT3U(len, <=, SPA_MAXBLOCKSIZE);
+ ra->rrd->payload = buf;
+ ra->rrd->payload_size = len;
+ err = receive_read(ra, len, ra->rrd->payload);
+ if (err != 0)
+ return (err);
+ receive_cksum(ra, len, ra->rrd->payload);
+ }
+
+ ra->prev_cksum = ra->cksum;
+
+ ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
+ err = receive_read(ra, sizeof (ra->next_rrd->header),
+ &ra->next_rrd->header);
+ if (err != 0) {
+ kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+ ra->next_rrd = NULL;
+ return (err);
+ }
+ if (ra->next_rrd->header.drr_type == DRR_BEGIN) {
+ kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+ ra->next_rrd = NULL;
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Note: checksum is of everything up to but not including the
+ * checksum itself.
+ */
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ receive_cksum(ra,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ &ra->next_rrd->header);
+
+ zio_cksum_t cksum_orig =
+ ra->next_rrd->header.drr_u.drr_checksum.drr_checksum;
+ zio_cksum_t *cksump =
+ &ra->next_rrd->header.drr_u.drr_checksum.drr_checksum;
+
+ if (ra->byteswap)
+ byteswap_record(&ra->next_rrd->header);
+
+ if ((!ZIO_CHECKSUM_IS_ZERO(cksump)) &&
+ !ZIO_CHECKSUM_EQUAL(ra->cksum, *cksump)) {
+ kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+ ra->next_rrd = NULL;
+ return (SET_ERROR(ECKSUM));
+ }
+
+ receive_cksum(ra, sizeof (cksum_orig), &cksum_orig);
+
+ return (0);
+}
+
+/*
+ * Issue the prefetch reads for any necessary indirect blocks.
+ *
+ * We use the object ignore list to tell us whether or not to issue prefetches
+ * for a given object. We do this for both correctness (in case the blocksize
+ * of an object has changed) and performance (if the object doesn't exist, don't
+ * needlessly try to issue prefetches). We also trim the list as we go through
+ * the stream to prevent it from growing to an unbounded size.
+ *
+ * The object numbers within will always be in sorted order, and any write
+ * records we see will also be in sorted order, but they're not sorted with
+ * respect to each other (i.e. we can get several object records before
+ * receiving each object's write records). As a result, once we've reached a
+ * given object number, we can safely remove any reference to lower object
+ * numbers in the ignore list. In practice, we receive up to 32 object records
+ * before receiving write records, so the list can have up to 32 nodes in it.
+ */
+/* ARGSUSED */
+static void
+receive_read_prefetch(struct receive_arg *ra,
+ uint64_t object, uint64_t offset, uint64_t length)
+{
+ struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list);
+ while (node != NULL && node->object < object) {
+ VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list));
+ kmem_free(node, sizeof (*node));
+ node = list_head(&ra->ignore_obj_list);
+ }
+ if (node == NULL || node->object > object) {
+ dmu_prefetch(ra->os, object, 1, offset, length,
+ ZIO_PRIORITY_SYNC_READ);
+ }
+}
+
+/*
+ * Read records off the stream, issuing any necessary prefetches.
+ */
+static int
+receive_read_record(struct receive_arg *ra)
+{
+ int err;
+
+ switch (ra->rrd->header.drr_type) {
+ case DRR_OBJECT:
+ {
+ struct drr_object *drro = &ra->rrd->header.drr_u.drr_object;
+ uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8);
+ void *buf = kmem_zalloc(size, KM_SLEEP);
+ dmu_object_info_t doi;
+ err = receive_read_payload_and_next_header(ra, size, buf);
+ if (err != 0) {
+ kmem_free(buf, size);
+ return (err);
+ }
+ err = dmu_object_info(ra->os, drro->drr_object, &doi);
+ /*
+ * See receive_read_prefetch for an explanation why we're
+ * storing this object in the ignore_obj_list.
+ */
+ if (err == ENOENT ||
+ (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
+ struct receive_ign_obj_node *node =
+ kmem_zalloc(sizeof (*node),
+ KM_SLEEP);
+ node->object = drro->drr_object;
+#ifdef ZFS_DEBUG
+ struct receive_ign_obj_node *last_object =
+ list_tail(&ra->ignore_obj_list);
+ uint64_t last_objnum = (last_object != NULL ?
+ last_object->object : 0);
+ ASSERT3U(node->object, >, last_objnum);
+#endif
+ list_insert_tail(&ra->ignore_obj_list, node);
+ err = 0;
+ }
+ return (err);
+ }
+ case DRR_FREEOBJECTS:
+ {
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
+ return (err);
+ }
+ case DRR_WRITE:
+ {
+ struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write;
+ arc_buf_t *abuf = arc_loan_buf(dmu_objset_spa(ra->os),
+ drrw->drr_length);
+
+ err = receive_read_payload_and_next_header(ra,
+ drrw->drr_length, abuf->b_data);
+ if (err != 0) {
+ dmu_return_arcbuf(abuf);
+ return (err);
+ }
+ ra->rrd->write_buf = abuf;
+ receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset,
+ drrw->drr_length);
+ return (err);
+ }
+ case DRR_WRITE_BYREF:
+ {
+ struct drr_write_byref *drrwb =
+ &ra->rrd->header.drr_u.drr_write_byref;
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
+ receive_read_prefetch(ra, drrwb->drr_object, drrwb->drr_offset,
+ drrwb->drr_length);
+ return (err);
+ }
+ case DRR_WRITE_EMBEDDED:
+ {
+ struct drr_write_embedded *drrwe =
+ &ra->rrd->header.drr_u.drr_write_embedded;
+ uint32_t size = P2ROUNDUP(drrwe->drr_psize, 8);
+ void *buf = kmem_zalloc(size, KM_SLEEP);
+
+ err = receive_read_payload_and_next_header(ra, size, buf);
+ if (err != 0) {
+ kmem_free(buf, size);
+ return (err);
+ }
+
+ receive_read_prefetch(ra, drrwe->drr_object, drrwe->drr_offset,
+ drrwe->drr_length);
+ return (err);
+ }
+ case DRR_FREE:
+ {
+ /*
+ * It might be beneficial to prefetch indirect blocks here, but
+ * we don't really have the data to decide for sure.
+ */
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
+ return (err);
+ }
+ case DRR_END:
+ {
+ struct drr_end *drre = &ra->rrd->header.drr_u.drr_end;
+ if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
+ return (SET_ERROR(EINVAL));
+ return (0);
+ }
+ case DRR_SPILL:
+ {
+ struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill;
+ void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP);
+ err = receive_read_payload_and_next_header(ra, drrs->drr_length,
+ buf);
+ if (err != 0)
+ kmem_free(buf, drrs->drr_length);
+ return (err);
+ }
+ default:
+ return (SET_ERROR(EINVAL));
+ }
+}
+
+/*
+ * Commit the records to the pool.
+ */
+static int
+receive_process_record(struct receive_writer_arg *rwa,
+ struct receive_record_arg *rrd)
+{
+ int err;
+
+ switch (rrd->header.drr_type) {
+ case DRR_OBJECT:
+ {
+ struct drr_object *drro = &rrd->header.drr_u.drr_object;
+ err = receive_object(rwa, drro, rrd->payload);
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ return (err);
+ }
+ case DRR_FREEOBJECTS:
+ {
+ struct drr_freeobjects *drrfo =
+ &rrd->header.drr_u.drr_freeobjects;
+ return (receive_freeobjects(rwa, drrfo));
+ }
+ case DRR_WRITE:
+ {
+ struct drr_write *drrw = &rrd->header.drr_u.drr_write;
+ err = receive_write(rwa, drrw, rrd->write_buf);
+ /* if receive_write() is successful, it consumes the arc_buf */
+ if (err != 0)
+ dmu_return_arcbuf(rrd->write_buf);
+ rrd->write_buf = NULL;
+ rrd->payload = NULL;
+ return (err);
+ }
+ case DRR_WRITE_BYREF:
+ {
+ struct drr_write_byref *drrwbr =
+ &rrd->header.drr_u.drr_write_byref;
+ return (receive_write_byref(rwa, drrwbr));
+ }
+ case DRR_WRITE_EMBEDDED:
+ {
+ struct drr_write_embedded *drrwe =
+ &rrd->header.drr_u.drr_write_embedded;
+ err = receive_write_embedded(rwa, drrwe, rrd->payload);
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ return (err);
+ }
+ case DRR_FREE:
+ {
+ struct drr_free *drrf = &rrd->header.drr_u.drr_free;
+ return (receive_free(rwa, drrf));
+ }
+ case DRR_SPILL:
+ {
+ struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
+ err = receive_spill(rwa, drrs, rrd->payload);
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ return (err);
+ }
+ default:
+ return (SET_ERROR(EINVAL));
+ }
+}
+
+/*
+ * dmu_recv_stream's worker thread; pull records off the queue, and then call
+ * receive_process_record When we're done, signal the main thread and exit.
+ */
+static void
+receive_writer_thread(void *arg)
+{
+ struct receive_writer_arg *rwa = arg;
+ struct receive_record_arg *rrd;
+ for (rrd = bqueue_dequeue(&rwa->q); !rrd->eos_marker;
+ rrd = bqueue_dequeue(&rwa->q)) {
+ /*
+ * If there's an error, the main thread will stop putting things
+ * on the queue, but we need to clear everything in it before we
+ * can exit.
+ */
+ if (rwa->err == 0) {
+ rwa->err = receive_process_record(rwa, rrd);
+ } else if (rrd->write_buf != NULL) {
+ dmu_return_arcbuf(rrd->write_buf);
+ rrd->write_buf = NULL;
+ rrd->payload = NULL;
+ } else if (rrd->payload != NULL) {
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ }
+ kmem_free(rrd, sizeof (*rrd));
+ }
+ kmem_free(rrd, sizeof (*rrd));
+ mutex_enter(&rwa->mutex);
+ rwa->done = B_TRUE;
+ cv_signal(&rwa->cv);
+ mutex_exit(&rwa->mutex);
+ thread_exit();
+}
+
+/*
+ * Read in the stream's records, one by one, and apply them to the pool. There
+ * are two threads involved; the thread that calls this function will spin up a
+ * worker thread, read the records off the stream one by one, and issue
+ * prefetches for any necessary indirect blocks. It will then push the records
+ * onto an internal blocking queue. The worker thread will pull the records off
+ * the queue, and actually write the data into the DMU. This way, the worker
+ * thread doesn't have to wait for reads to complete, since everything it needs
+ * (the indirect blocks) will be prefetched.
+ *
* NB: callers *must* call dmu_recv_end() if this succeeds.
*/
int
dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep)
{
- struct restorearg ra = { 0 };
- dmu_replay_record_t *drr;
- objset_t *os;
- zio_cksum_t pcksum;
+ int err = 0;
+ struct receive_arg ra = { 0 };
+ struct receive_writer_arg rwa = { 0 };
int featureflags;
ra.byteswap = drc->drc_byteswap;
@@ -1832,8 +2383,8 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
ra.td = curthread;
ra.fp = fp;
ra.voff = *voffp;
- ra.bufsize = SPA_MAXBLOCKSIZE;
- ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
+ list_create(&ra.ignore_obj_list, sizeof (struct receive_ign_obj_node),
+ offsetof(struct receive_ign_obj_node, node));
/* these were verified in dmu_recv_begin */
ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
@@ -1843,7 +2394,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
/*
* Open the objset we are modifying.
*/
- VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
+ VERIFY0(dmu_objset_from_ds(drc->drc_ds, &ra.os));
ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
@@ -1864,116 +2415,98 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
}
if (*action_handlep == 0) {
- ra.guid_to_ds_map =
+ rwa.guid_to_ds_map =
kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
- avl_create(ra.guid_to_ds_map, guid_compare,
+ avl_create(rwa.guid_to_ds_map, guid_compare,
sizeof (guid_map_entry_t),
offsetof(guid_map_entry_t, avlnode));
- ra.err = zfs_onexit_add_cb(minor,
- free_guid_map_onexit, ra.guid_to_ds_map,
+ err = zfs_onexit_add_cb(minor,
+ free_guid_map_onexit, rwa.guid_to_ds_map,
action_handlep);
if (ra.err != 0)
goto out;
} else {
- ra.err = zfs_onexit_cb_data(minor, *action_handlep,
- (void **)&ra.guid_to_ds_map);
+ err = zfs_onexit_cb_data(minor, *action_handlep,
+ (void **)&rwa.guid_to_ds_map);
if (ra.err != 0)
goto out;
}
- drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
+ drc->drc_guid_to_ds_map = rwa.guid_to_ds_map;
}
+ err = receive_read_payload_and_next_header(&ra, 0, NULL);
+ if (err)
+ goto out;
+
+ (void) bqueue_init(&rwa.q, zfs_recv_queue_length,
+ offsetof(struct receive_record_arg, node));
+ cv_init(&rwa.cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&rwa.mutex, NULL, MUTEX_DEFAULT, NULL);
+ rwa.os = ra.os;
+ rwa.byteswap = drc->drc_byteswap;
+
+ (void) thread_create(NULL, 0, receive_writer_thread, &rwa, 0, &p0,
+ TS_RUN, minclsyspri);
/*
- * Read records and process them.
+ * We're reading rwa.err without locks, which is safe since we are the
+ * only reader, and the worker thread is the only writer. It's ok if we
+ * miss a write for an iteration or two of the loop, since the writer
+ * thread will keep freeing records we send it until we send it an eos
+ * marker.
+ *
+ * We can leave this loop in 3 ways: First, if rwa.err is
+ * non-zero. In that case, the writer thread will free the rrd we just
+ * pushed. Second, if we're interrupted; in that case, either it's the
+ * first loop and ra.rrd was never allocated, or it's later, and ra.rrd
+ * has been handed off to the writer thread who will free it. Finally,
+ * if receive_read_record fails or we're at the end of the stream, then
+ * we free ra.rrd and exit.
*/
- pcksum = ra.cksum;
- while (ra.err == 0 &&
- NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) {
+ while (rwa.err == 0) {
if (issig(JUSTLOOKING) && issig(FORREAL)) {
- ra.err = SET_ERROR(EINTR);
- goto out;
+ err = SET_ERROR(EINTR);
+ break;
}
- if (ra.byteswap)
- backup_byteswap(drr);
+ ASSERT3P(ra.rrd, ==, NULL);
+ ra.rrd = ra.next_rrd;
+ ra.next_rrd = NULL;
+ /* Allocates and loads header into ra.next_rrd */
+ err = receive_read_record(&ra);
- switch (drr->drr_type) {
- case DRR_OBJECT:
- {
- /*
- * We need to make a copy of the record header,
- * because restore_{object,write} may need to
- * restore_read(), which will invalidate drr.
- */
- struct drr_object drro = drr->drr_u.drr_object;
- ra.err = restore_object(&ra, os, &drro);
- break;
- }
- case DRR_FREEOBJECTS:
- {
- struct drr_freeobjects drrfo =
- drr->drr_u.drr_freeobjects;
- ra.err = restore_freeobjects(&ra, os, &drrfo);
- break;
- }
- case DRR_WRITE:
- {
- struct drr_write drrw = drr->drr_u.drr_write;
- ra.err = restore_write(&ra, os, &drrw);
- break;
- }
- case DRR_WRITE_BYREF:
- {
- struct drr_write_byref drrwbr =
- drr->drr_u.drr_write_byref;
- ra.err = restore_write_byref(&ra, os, &drrwbr);
+ if (ra.rrd->header.drr_type == DRR_END || err != 0) {
+ kmem_free(ra.rrd, sizeof (*ra.rrd));
+ ra.rrd = NULL;
break;
}
- case DRR_WRITE_EMBEDDED:
- {
- struct drr_write_embedded drrwe =
- drr->drr_u.drr_write_embedded;
- ra.err = restore_write_embedded(&ra, os, &drrwe);
- break;
- }
- case DRR_FREE:
- {
- struct drr_free drrf = drr->drr_u.drr_free;
- ra.err = restore_free(&ra, os, &drrf);
- break;
- }
- case DRR_END:
- {
- struct drr_end drre = drr->drr_u.drr_end;
- /*
- * We compare against the *previous* checksum
- * value, because the stored checksum is of
- * everything before the DRR_END record.
- */
- if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
- ra.err = SET_ERROR(ECKSUM);
- goto out;
- }
- case DRR_SPILL:
- {
- struct drr_spill drrs = drr->drr_u.drr_spill;
- ra.err = restore_spill(&ra, os, &drrs);
- break;
- }
- default:
- ra.err = SET_ERROR(EINVAL);
- goto out;
- }
- pcksum = ra.cksum;
+
+ bqueue_enqueue(&rwa.q, ra.rrd,
+ sizeof (struct receive_record_arg) + ra.rrd->payload_size);
+ ra.rrd = NULL;
+ }
+ if (ra.next_rrd == NULL)
+ ra.next_rrd = kmem_zalloc(sizeof (*ra.next_rrd), KM_SLEEP);
+ ra.next_rrd->eos_marker = B_TRUE;
+ bqueue_enqueue(&rwa.q, ra.next_rrd, 1);
+
+ mutex_enter(&rwa.mutex);
+ while (!rwa.done) {
+ cv_wait(&rwa.cv, &rwa.mutex);
}
- ASSERT(ra.err != 0);
+ mutex_exit(&rwa.mutex);
+
+ cv_destroy(&rwa.cv);
+ mutex_destroy(&rwa.mutex);
+ bqueue_destroy(&rwa.q);
+ if (err == 0)
+ err = rwa.err;
out:
if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
zfs_onexit_fd_rele(cleanup_fd);
- if (ra.err != 0) {
+ if (err != 0) {
/*
* destroy what we created, so we don't leave it in the
* inconsistent restoring state.
@@ -1981,9 +2514,14 @@ out:
dmu_recv_cleanup_ds(drc);
}
- kmem_free(ra.buf, ra.bufsize);
*voffp = ra.voff;
- return (ra.err);
+ for (struct receive_ign_obj_node *n =
+ list_remove_head(&ra.ignore_obj_list); n != NULL;
+ n = list_remove_head(&ra.ignore_obj_list)) {
+ kmem_free(n, sizeof (*n));
+ }
+ list_destroy(&ra.ignore_obj_list);
+ return (err);
}
static int
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index 9891099..151d04c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -39,13 +39,12 @@
#include <sys/callb.h>
#include <sys/zfeature.h>
-int zfs_pd_blks_max = 100;
+int32_t zfs_pd_bytes_max = 50 * 1024 * 1024; /* 50MB */
typedef struct prefetch_data {
kmutex_t pd_mtx;
kcondvar_t pd_cv;
- int pd_blks_max;
- int pd_blks_fetched;
+ int32_t pd_bytes_fetched;
int pd_flags;
boolean_t pd_cancel;
boolean_t pd_exited;
@@ -159,7 +158,7 @@ resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
* If we already visited this bp & everything below,
* don't bother doing it again.
*/
- if (zbookmark_is_before(dnp, zb, td->td_resume))
+ if (zbookmark_subtree_completed(dnp, zb, td->td_resume))
return (RESUME_SKIP_ALL);
/*
@@ -250,11 +249,12 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
}
if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) {
+ uint64_t size = BP_GET_LSIZE(bp);
mutex_enter(&pd->pd_mtx);
- ASSERT(pd->pd_blks_fetched >= 0);
- while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
+ ASSERT(pd->pd_bytes_fetched >= 0);
+ while (pd->pd_bytes_fetched < size && !pd->pd_exited)
cv_wait(&pd->pd_cv, &pd->pd_mtx);
- pd->pd_blks_fetched--;
+ pd->pd_bytes_fetched -= size;
cv_broadcast(&pd->pd_cv);
mutex_exit(&pd->pd_mtx);
}
@@ -425,6 +425,17 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
int j, err = 0;
zbookmark_phys_t czb;
+ if (td->td_flags & TRAVERSE_PRE) {
+ SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
+ ZB_DNODE_BLKID);
+ err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
+ td->td_arg);
+ if (err == TRAVERSE_VISIT_NO_CHILDREN)
+ return (0);
+ if (err != 0)
+ return (err);
+ }
+
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
@@ -432,10 +443,21 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
break;
}
- if (err == 0 && dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ if (err == 0 && (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
}
+
+ if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
+ SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
+ ZB_DNODE_BLKID);
+ err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
+ td->td_arg);
+ if (err == TRAVERSE_VISIT_NO_CHILDREN)
+ return (0);
+ if (err != 0)
+ return (err);
+ }
return (err);
}
@@ -447,7 +469,9 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
prefetch_data_t *pfd = arg;
arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
- ASSERT(pfd->pd_blks_fetched >= 0);
+ ASSERT(pfd->pd_bytes_fetched >= 0);
+ if (bp == NULL)
+ return (0);
if (pfd->pd_cancel)
return (SET_ERROR(EINTR));
@@ -455,9 +479,9 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
return (0);
mutex_enter(&pfd->pd_mtx);
- while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max)
+ while (!pfd->pd_cancel && pfd->pd_bytes_fetched >= zfs_pd_bytes_max)
cv_wait(&pfd->pd_cv, &pfd->pd_mtx);
- pfd->pd_blks_fetched++;
+ pfd->pd_bytes_fetched += BP_GET_LSIZE(bp);
cv_broadcast(&pfd->pd_cv);
mutex_exit(&pfd->pd_mtx);
@@ -529,13 +553,12 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
td.td_hole_birth_enabled_txg = 0;
}
- pd.pd_blks_max = zfs_pd_blks_max;
pd.pd_flags = flags;
mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
/* See comment on ZIL traversal in dsl_scan_visitds. */
- if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
+ if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
index dff9fab..65a017f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
@@ -315,7 +315,8 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
dmu_buf_impl_t *db;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
+ err = dbuf_hold_impl(dn, 0, start,
+ FALSE, FALSE, FTAG, &db);
rw_exit(&dn->dn_struct_rwlock);
if (err) {
@@ -516,7 +517,8 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
blkoff = P2PHASE(blkid, epb);
tochk = MIN(epb - blkoff, nblks);
- err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf);
+ err = dbuf_hold_impl(dn, 1, blkid >> epbs,
+ FALSE, FALSE, FTAG, &dbuf);
if (err) {
txh->txh_tx->tx_err = err;
break;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
index d51a981..0d65896 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -36,19 +36,20 @@
#include <sys/kstat.h>
/*
- * I'm against tune-ables, but these should probably exist as tweakable globals
- * until we can get this working the way we want it to.
+ * This tunable disables predictive prefetch. Note that it leaves "prescient"
+ * prefetch (e.g. prefetch for zfs send) intact. Unlike predictive prefetch,
+ * prescient prefetch never issues i/os that end up not being needed,
+ * so it can't hurt performance.
*/
-
-int zfs_prefetch_disable = 0;
+boolean_t zfs_prefetch_disable = B_FALSE;
/* max # of streams per zfetch */
uint32_t zfetch_max_streams = 8;
/* min time before stream reclaim */
uint32_t zfetch_min_sec_reap = 2;
-/* max number of blocks to fetch at a time */
-uint32_t zfetch_block_cap = 256;
-/* number of bytes in a array_read at which we stop prefetching (1Mb) */
+/* max bytes to prefetch per stream (default 8MB) */
+uint32_t zfetch_max_distance = 8 * 1024 * 1024;
+/* number of bytes in a array_read at which we stop prefetching (1MB) */
uint64_t zfetch_array_rd_sz = 1024 * 1024;
SYSCTL_DECL(_vfs_zfs);
@@ -59,202 +60,36 @@ TUNABLE_INT("vfs.zfs.zfetch.max_streams", &zfetch_max_streams);
SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_streams, CTLFLAG_RW,
&zfetch_max_streams, 0, "Max # of streams per zfetch");
TUNABLE_INT("vfs.zfs.zfetch.min_sec_reap", &zfetch_min_sec_reap);
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, min_sec_reap, CTLFLAG_RDTUN,
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, min_sec_reap, CTLFLAG_RWTUN,
&zfetch_min_sec_reap, 0, "Min time before stream reclaim");
-TUNABLE_INT("vfs.zfs.zfetch.block_cap", &zfetch_block_cap);
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, block_cap, CTLFLAG_RDTUN,
- &zfetch_block_cap, 0, "Max number of blocks to fetch at a time");
+TUNABLE_INT("vfs.zfs.zfetch.max_distance", &zfetch_max_distance);
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
+ &zfetch_max_distance, 0, "Max bytes to prefetch per stream");
TUNABLE_QUAD("vfs.zfs.zfetch.array_rd_sz", &zfetch_array_rd_sz);
-SYSCTL_UQUAD(_vfs_zfs_zfetch, OID_AUTO, array_rd_sz, CTLFLAG_RDTUN,
+SYSCTL_UQUAD(_vfs_zfs_zfetch, OID_AUTO, array_rd_sz, CTLFLAG_RWTUN,
&zfetch_array_rd_sz, 0,
"Number of bytes in a array_read at which we stop prefetching");
-/* forward decls for static routines */
-static boolean_t dmu_zfetch_colinear(zfetch_t *, zstream_t *);
-static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
-static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
-static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
-static boolean_t dmu_zfetch_find(zfetch_t *, zstream_t *, int);
-static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
-static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
-static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
-static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
-
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
kstat_named_t zfetchstat_misses;
- kstat_named_t zfetchstat_colinear_hits;
- kstat_named_t zfetchstat_colinear_misses;
- kstat_named_t zfetchstat_stride_hits;
- kstat_named_t zfetchstat_stride_misses;
- kstat_named_t zfetchstat_reclaim_successes;
- kstat_named_t zfetchstat_reclaim_failures;
- kstat_named_t zfetchstat_stream_resets;
- kstat_named_t zfetchstat_stream_noresets;
- kstat_named_t zfetchstat_bogus_streams;
+ kstat_named_t zfetchstat_max_streams;
} zfetch_stats_t;
static zfetch_stats_t zfetch_stats = {
{ "hits", KSTAT_DATA_UINT64 },
{ "misses", KSTAT_DATA_UINT64 },
- { "colinear_hits", KSTAT_DATA_UINT64 },
- { "colinear_misses", KSTAT_DATA_UINT64 },
- { "stride_hits", KSTAT_DATA_UINT64 },
- { "stride_misses", KSTAT_DATA_UINT64 },
- { "reclaim_successes", KSTAT_DATA_UINT64 },
- { "reclaim_failures", KSTAT_DATA_UINT64 },
- { "streams_resets", KSTAT_DATA_UINT64 },
- { "streams_noresets", KSTAT_DATA_UINT64 },
- { "bogus_streams", KSTAT_DATA_UINT64 },
+ { "max_streams", KSTAT_DATA_UINT64 },
};
-#define ZFETCHSTAT_INCR(stat, val) \
- atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
-
-#define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
+#define ZFETCHSTAT_BUMP(stat) \
+ atomic_inc_64(&zfetch_stats.stat.value.ui64);
kstat_t *zfetch_ksp;
-/*
- * Given a zfetch structure and a zstream structure, determine whether the
- * blocks to be read are part of a co-linear pair of existing prefetch
- * streams. If a set is found, coalesce the streams, removing one, and
- * configure the prefetch so it looks for a strided access pattern.
- *
- * In other words: if we find two sequential access streams that are
- * the same length and distance N appart, and this read is N from the
- * last stream, then we are probably in a strided access pattern. So
- * combine the two sequential streams into a single strided stream.
- *
- * Returns whether co-linear streams were found.
- */
-static boolean_t
-dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
-{
- zstream_t *z_walk;
- zstream_t *z_comp;
-
- if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
- return (0);
-
- if (zh == NULL) {
- rw_exit(&zf->zf_rwlock);
- return (0);
- }
-
- for (z_walk = list_head(&zf->zf_stream); z_walk;
- z_walk = list_next(&zf->zf_stream, z_walk)) {
- for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
- z_comp = list_next(&zf->zf_stream, z_comp)) {
- int64_t diff;
-
- if (z_walk->zst_len != z_walk->zst_stride ||
- z_comp->zst_len != z_comp->zst_stride) {
- continue;
- }
-
- diff = z_comp->zst_offset - z_walk->zst_offset;
- if (z_comp->zst_offset + diff == zh->zst_offset) {
- z_walk->zst_offset = zh->zst_offset;
- z_walk->zst_direction = diff < 0 ? -1 : 1;
- z_walk->zst_stride =
- diff * z_walk->zst_direction;
- z_walk->zst_ph_offset =
- zh->zst_offset + z_walk->zst_stride;
- dmu_zfetch_stream_remove(zf, z_comp);
- mutex_destroy(&z_comp->zst_lock);
- kmem_free(z_comp, sizeof (zstream_t));
-
- dmu_zfetch_dofetch(zf, z_walk);
-
- rw_exit(&zf->zf_rwlock);
- return (1);
- }
-
- diff = z_walk->zst_offset - z_comp->zst_offset;
- if (z_walk->zst_offset + diff == zh->zst_offset) {
- z_walk->zst_offset = zh->zst_offset;
- z_walk->zst_direction = diff < 0 ? -1 : 1;
- z_walk->zst_stride =
- diff * z_walk->zst_direction;
- z_walk->zst_ph_offset =
- zh->zst_offset + z_walk->zst_stride;
- dmu_zfetch_stream_remove(zf, z_comp);
- mutex_destroy(&z_comp->zst_lock);
- kmem_free(z_comp, sizeof (zstream_t));
-
- dmu_zfetch_dofetch(zf, z_walk);
-
- rw_exit(&zf->zf_rwlock);
- return (1);
- }
- }
- }
-
- rw_exit(&zf->zf_rwlock);
- return (0);
-}
-
-/*
- * Given a zstream_t, determine the bounds of the prefetch. Then call the
- * routine that actually prefetches the individual blocks.
- */
-static void
-dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
-{
- uint64_t prefetch_tail;
- uint64_t prefetch_limit;
- uint64_t prefetch_ofst;
- uint64_t prefetch_len;
- uint64_t blocks_fetched;
-
- zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
- zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
-
- prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
- (int64_t)(zs->zst_offset + zs->zst_stride));
- /*
- * XXX: use a faster division method?
- */
- prefetch_limit = zs->zst_offset + zs->zst_len +
- (zs->zst_cap * zs->zst_stride) / zs->zst_len;
-
- while (prefetch_tail < prefetch_limit) {
- prefetch_ofst = zs->zst_offset + zs->zst_direction *
- (prefetch_tail - zs->zst_offset);
-
- prefetch_len = zs->zst_len;
-
- /*
- * Don't prefetch beyond the end of the file, if working
- * backwards.
- */
- if ((zs->zst_direction == ZFETCH_BACKWARD) &&
- (prefetch_ofst > prefetch_tail)) {
- prefetch_len += prefetch_ofst;
- prefetch_ofst = 0;
- }
-
- /* don't prefetch more than we're supposed to */
- if (prefetch_len > zs->zst_len)
- break;
-
- blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
- prefetch_ofst, zs->zst_len);
-
- prefetch_tail += zs->zst_stride;
- /* stop if we've run out of stuff to prefetch */
- if (blocks_fetched < zs->zst_len)
- break;
- }
- zs->zst_ph_offset = prefetch_tail;
- zs->zst_last = ddi_get_lbolt();
-}
-
void
zfetch_init(void)
{
-
zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
@@ -282,284 +117,41 @@ zfetch_fini(void)
void
dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
{
- if (zf == NULL) {
+ if (zf == NULL)
return;
- }
zf->zf_dnode = dno;
- zf->zf_stream_cnt = 0;
- zf->zf_alloc_fail = 0;
list_create(&zf->zf_stream, sizeof (zstream_t),
- offsetof(zstream_t, zst_node));
+ offsetof(zstream_t, zs_node));
rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
}
-/*
- * This function computes the actual size, in blocks, that can be prefetched,
- * and fetches it.
- */
-static uint64_t
-dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
-{
- uint64_t fetchsz;
- uint64_t i;
-
- fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
-
- for (i = 0; i < fetchsz; i++) {
- dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_ASYNC_READ);
- }
-
- return (fetchsz);
-}
-
-/*
- * this function returns the number of blocks that would be prefetched, based
- * upon the supplied dnode, blockid, and nblks. This is used so that we can
- * update streams in place, and then prefetch with their old value after the
- * fact. This way, we can delay the prefetch, but subsequent accesses to the
- * stream won't result in the same data being prefetched multiple times.
- */
-static uint64_t
-dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
-{
- uint64_t fetchsz;
-
- if (blkid > dn->dn_maxblkid) {
- return (0);
- }
-
- /* compute fetch size */
- if (blkid + nblks + 1 > dn->dn_maxblkid) {
- fetchsz = (dn->dn_maxblkid - blkid) + 1;
- ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
- } else {
- fetchsz = nblks;
- }
-
-
- return (fetchsz);
-}
-
-/*
- * given a zfetch and a zstream structure, see if there is an associated zstream
- * for this block read. If so, it starts a prefetch for the stream it
- * located and returns true, otherwise it returns false
- */
-static boolean_t
-dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
+static void
+dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
{
- zstream_t *zs;
- int64_t diff;
- int reset = !prefetched;
- int rc = 0;
-
- if (zh == NULL)
- return (0);
-
- /*
- * XXX: This locking strategy is a bit coarse; however, it's impact has
- * yet to be tested. If this turns out to be an issue, it can be
- * modified in a number of different ways.
- */
-
- rw_enter(&zf->zf_rwlock, RW_READER);
-top:
-
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
-
- /*
- * XXX - should this be an assert?
- */
- if (zs->zst_len == 0) {
- /* bogus stream */
- ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
- continue;
- }
-
- /*
- * We hit this case when we are in a strided prefetch stream:
- * we will read "len" blocks before "striding".
- */
- if (zh->zst_offset >= zs->zst_offset &&
- zh->zst_offset < zs->zst_offset + zs->zst_len) {
- if (prefetched) {
- /* already fetched */
- ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
- rc = 1;
- goto out;
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
- }
- }
-
- /*
- * This is the forward sequential read case: we increment
- * len by one each time we hit here, so we will enter this
- * case on every read.
- */
- if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
-
- reset = !prefetched && zs->zst_len > 1;
-
- if (mutex_tryenter(&zs->zst_lock) == 0) {
- rc = 1;
- goto out;
- }
-
- if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
- zs->zst_len += zh->zst_len;
- diff = zs->zst_len - zfetch_block_cap;
- if (diff > 0) {
- zs->zst_offset += diff;
- zs->zst_len = zs->zst_len > diff ?
- zs->zst_len - diff : 0;
- }
- zs->zst_direction = ZFETCH_FORWARD;
-
- break;
-
- /*
- * Same as above, but reading backwards through the file.
- */
- } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
- /* backwards sequential access */
-
- reset = !prefetched && zs->zst_len > 1;
-
- if (mutex_tryenter(&zs->zst_lock) == 0) {
- rc = 1;
- goto out;
- }
-
- if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset = zs->zst_offset > zh->zst_len ?
- zs->zst_offset - zh->zst_len : 0;
- zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
- zs->zst_ph_offset - zh->zst_len : 0;
- zs->zst_len += zh->zst_len;
-
- diff = zs->zst_len - zfetch_block_cap;
- if (diff > 0) {
- zs->zst_ph_offset = zs->zst_ph_offset > diff ?
- zs->zst_ph_offset - diff : 0;
- zs->zst_len = zs->zst_len > diff ?
- zs->zst_len - diff : zs->zst_len;
- }
- zs->zst_direction = ZFETCH_BACKWARD;
-
- break;
-
- } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
- zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
- /* strided forward access */
-
- if (mutex_tryenter(&zs->zst_lock) == 0) {
- rc = 1;
- goto out;
- }
-
- if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
- zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset += zs->zst_stride;
- zs->zst_direction = ZFETCH_FORWARD;
-
- break;
-
- } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
- zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
- /* strided reverse access */
-
- if (mutex_tryenter(&zs->zst_lock) == 0) {
- rc = 1;
- goto out;
- }
-
- if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
- zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset = zs->zst_offset > zs->zst_stride ?
- zs->zst_offset - zs->zst_stride : 0;
- zs->zst_ph_offset = (zs->zst_ph_offset >
- (2 * zs->zst_stride)) ?
- (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
- zs->zst_direction = ZFETCH_BACKWARD;
-
- break;
- }
- }
-
- if (zs) {
- if (reset) {
- zstream_t *remove = zs;
-
- ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
- rc = 0;
- mutex_exit(&zs->zst_lock);
- rw_exit(&zf->zf_rwlock);
- rw_enter(&zf->zf_rwlock, RW_WRITER);
- /*
- * Relocate the stream, in case someone removes
- * it while we were acquiring the WRITER lock.
- */
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
- if (zs == remove) {
- dmu_zfetch_stream_remove(zf, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- break;
- }
- }
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
- rc = 1;
- dmu_zfetch_dofetch(zf, zs);
- mutex_exit(&zs->zst_lock);
- }
- }
-out:
- rw_exit(&zf->zf_rwlock);
- return (rc);
+ ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
+ list_remove(&zf->zf_stream, zs);
+ mutex_destroy(&zs->zs_lock);
+ kmem_free(zs, sizeof (*zs));
}
/*
- * Clean-up state associated with a zfetch structure. This frees allocated
- * structure members, empties the zf_stream tree, and generally makes things
- * nice. This doesn't free the zfetch_t itself, that's left to the caller.
+ * Clean-up state associated with a zfetch structure (e.g. destroy the
+ * streams). This doesn't free the zfetch_t itself, that's left to the caller.
*/
void
-dmu_zfetch_rele(zfetch_t *zf)
+dmu_zfetch_fini(zfetch_t *zf)
{
- zstream_t *zs;
- zstream_t *zs_next;
+ zstream_t *zs;
ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
- for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs);
-
- list_remove(&zf->zf_stream, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- }
+ rw_enter(&zf->zf_rwlock, RW_WRITER);
+ while ((zs = list_head(&zf->zf_stream)) != NULL)
+ dmu_zfetch_stream_remove(zf, zs);
+ rw_exit(&zf->zf_rwlock);
list_destroy(&zf->zf_stream);
rw_destroy(&zf->zf_rwlock);
@@ -567,103 +159,55 @@ dmu_zfetch_rele(zfetch_t *zf)
}
/*
- * Given a zfetch and zstream structure, insert the zstream structure into the
- * AVL tree contained within the zfetch structure. Peform the appropriate
- * book-keeping. It is possible that another thread has inserted a stream which
- * matches one that we are about to insert, so we must be sure to check for this
- * case. If one is found, return failure, and let the caller cleanup the
- * duplicates.
+ * If there aren't too many streams already, create a new stream.
+ * The "blkid" argument is the next block that we expect this stream to access.
+ * While we're here, clean up old streams (which haven't been
+ * accessed for at least zfetch_min_sec_reap seconds).
*/
-static int
-dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
+static void
+dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
{
- zstream_t *zs_walk;
- zstream_t *zs_next;
+ zstream_t *zs_next;
+ int numstreams = 0;
ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
- for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs_walk);
-
- if (dmu_zfetch_streams_equal(zs_walk, zs)) {
- return (0);
- }
- }
-
- list_insert_head(&zf->zf_stream, zs);
- zf->zf_stream_cnt++;
- return (1);
-}
-
-
-/*
- * Walk the list of zstreams in the given zfetch, find an old one (by time), and
- * reclaim it for use by the caller.
- */
-static zstream_t *
-dmu_zfetch_stream_reclaim(zfetch_t *zf)
-{
- zstream_t *zs;
- clock_t ticks;
-
- ticks = zfetch_min_sec_reap * hz;
- if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
- return (0);
-
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
-
- if (ddi_get_lbolt() - zs->zst_last > ticks)
- break;
+ /*
+ * Clean up old streams.
+ */
+ for (zstream_t *zs = list_head(&zf->zf_stream);
+ zs != NULL; zs = zs_next) {
+ zs_next = list_next(&zf->zf_stream, zs);
+ if (((gethrtime() - zs->zs_atime) / NANOSEC) >
+ zfetch_min_sec_reap)
+ dmu_zfetch_stream_remove(zf, zs);
+ else
+ numstreams++;
}
- if (zs) {
- dmu_zfetch_stream_remove(zf, zs);
- mutex_destroy(&zs->zst_lock);
- bzero(zs, sizeof (zstream_t));
- } else {
- zf->zf_alloc_fail++;
+ /*
+ * The maximum number of streams is normally zfetch_max_streams,
+ * but for small files we lower it such that it's at least possible
+ * for all the streams to be non-overlapping.
+ *
+ * If we are already at the maximum number of streams for this file,
+ * even after removing old streams, then don't create this stream.
+ */
+ uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
+ zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
+ zfetch_max_distance));
+ if (numstreams >= max_streams) {
+ ZFETCHSTAT_BUMP(zfetchstat_max_streams);
+ return;
}
- rw_exit(&zf->zf_rwlock);
-
- return (zs);
-}
-
-/*
- * Given a zfetch and zstream structure, remove the zstream structure from its
- * container in the zfetch structure. Perform the appropriate book-keeping.
- */
-static void
-dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
-{
- ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
-
- list_remove(&zf->zf_stream, zs);
- zf->zf_stream_cnt--;
-}
-
-static int
-dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
-{
- if (zs1->zst_offset != zs2->zst_offset)
- return (0);
- if (zs1->zst_len != zs2->zst_len)
- return (0);
+ zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
+ zs->zs_blkid = blkid;
+ zs->zs_pf_blkid = blkid;
+ zs->zs_atime = gethrtime();
+ mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
- if (zs1->zst_stride != zs2->zst_stride)
- return (0);
-
- if (zs1->zst_ph_offset != zs2->zst_ph_offset)
- return (0);
-
- if (zs1->zst_cap != zs2->zst_cap)
- return (0);
-
- if (zs1->zst_direction != zs2->zst_direction)
- return (0);
-
- return (1);
+ list_insert_head(&zf->zf_stream, zs);
}
/*
@@ -671,91 +215,86 @@ dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
* routines to create, delete, find, or operate upon prefetch streams.
*/
void
-dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
+dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks)
{
- zstream_t zst;
- zstream_t *newstream;
- boolean_t fetched;
- int inserted;
- unsigned int blkshft;
- uint64_t blksz;
+ zstream_t *zs;
if (zfs_prefetch_disable)
return;
- /* files that aren't ln2 blocksz are only one block -- nothing to do */
- if (!zf->zf_dnode->dn_datablkshift)
+ /*
+ * As a fast path for small (single-block) files, ignore access
+ * to the first block.
+ */
+ if (blkid == 0)
return;
- /* convert offset and size, into blockid and nblocks */
- blkshft = zf->zf_dnode->dn_datablkshift;
- blksz = (1 << blkshft);
-
- bzero(&zst, sizeof (zstream_t));
- zst.zst_offset = offset >> blkshft;
- zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
- P2ALIGN(offset, blksz)) >> blkshft;
+ rw_enter(&zf->zf_rwlock, RW_READER);
- fetched = dmu_zfetch_find(zf, &zst, prefetched);
- if (fetched) {
- ZFETCHSTAT_BUMP(zfetchstat_hits);
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_misses);
- fetched = dmu_zfetch_colinear(zf, &zst);
- if (fetched) {
- ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
+ for (zs = list_head(&zf->zf_stream); zs != NULL;
+ zs = list_next(&zf->zf_stream, zs)) {
+ if (blkid == zs->zs_blkid) {
+ mutex_enter(&zs->zs_lock);
+ /*
+ * zs_blkid could have changed before we
+ * acquired zs_lock; re-check them here.
+ */
+ if (blkid != zs->zs_blkid) {
+ mutex_exit(&zs->zs_lock);
+ continue;
+ }
+ break;
}
}
- if (!fetched) {
- newstream = dmu_zfetch_stream_reclaim(zf);
-
+ if (zs == NULL) {
/*
- * we still couldn't find a stream, drop the lock, and allocate
- * one if possible. Otherwise, give up and go home.
+ * This access is not part of any existing stream. Create
+ * a new stream for it.
*/
- if (newstream) {
- ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
- } else {
- uint64_t maxblocks;
- uint32_t max_streams;
- uint32_t cur_streams;
-
- ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
- cur_streams = zf->zf_stream_cnt;
- maxblocks = zf->zf_dnode->dn_maxblkid;
-
- max_streams = MIN(zfetch_max_streams,
- (maxblocks / zfetch_block_cap));
- if (max_streams == 0) {
- max_streams++;
- }
-
- if (cur_streams >= max_streams) {
- return;
- }
- newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
- }
+ ZFETCHSTAT_BUMP(zfetchstat_misses);
+ if (rw_tryupgrade(&zf->zf_rwlock))
+ dmu_zfetch_stream_create(zf, blkid + nblks);
+ rw_exit(&zf->zf_rwlock);
+ return;
+ }
- newstream->zst_offset = zst.zst_offset;
- newstream->zst_len = zst.zst_len;
- newstream->zst_stride = zst.zst_len;
- newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
- newstream->zst_cap = zst.zst_len;
- newstream->zst_direction = ZFETCH_FORWARD;
- newstream->zst_last = ddi_get_lbolt();
+ /*
+ * This access was to a block that we issued a prefetch for on
+ * behalf of this stream. Issue further prefetches for this stream.
+ *
+ * Normally, we start prefetching where we stopped
+ * prefetching last (zs_pf_blkid). But when we get our first
+ * hit on this stream, zs_pf_blkid == zs_blkid, we don't
+ * want to prefetch to block we just accessed. In this case,
+ * start just after the block we just accessed.
+ */
+ int64_t pf_start = MAX(zs->zs_pf_blkid, blkid + nblks);
- mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
+ /*
+ * Double our amount of prefetched data, but don't let the
+ * prefetch get further ahead than zfetch_max_distance.
+ */
+ int pf_nblks =
+ MIN((int64_t)zs->zs_pf_blkid - zs->zs_blkid + nblks,
+ zs->zs_blkid + nblks +
+ (zfetch_max_distance >> zf->zf_dnode->dn_datablkshift) - pf_start);
- rw_enter(&zf->zf_rwlock, RW_WRITER);
- inserted = dmu_zfetch_stream_insert(zf, newstream);
- rw_exit(&zf->zf_rwlock);
+ zs->zs_pf_blkid = pf_start + pf_nblks;
+ zs->zs_atime = gethrtime();
+ zs->zs_blkid = blkid + nblks;
- if (!inserted) {
- mutex_destroy(&newstream->zst_lock);
- kmem_free(newstream, sizeof (zstream_t));
- }
+ /*
+ * dbuf_prefetch() issues the prefetch i/o
+ * asynchronously, but it may need to wait for an
+ * indirect block to be read from disk. Therefore
+ * we do not want to hold any locks while we call it.
+ */
+ mutex_exit(&zs->zs_lock);
+ rw_exit(&zf->zf_rwlock);
+ for (int i = 0; i < pf_nblks; i++) {
+ dbuf_prefetch(zf->zf_dnode, 0, pf_start + i,
+ ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
}
+ ZFETCHSTAT_BUMP(zfetchstat_hits);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
index 96deeb7..a295f90 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -405,8 +406,9 @@ static dnode_t *
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
uint64_t object, dnode_handle_t *dnh)
{
- dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+ dnode_t *dn;
+ dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
ASSERT(!POINTER_IS_VALID(dn->dn_objset));
dn->dn_moved = 0;
@@ -443,13 +445,31 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
mutex_enter(&os->os_lock);
- list_insert_head(&os->os_dnodes, dn);
+ if (dnh->dnh_dnode != NULL) {
+ /* Lost the allocation race. */
+ mutex_exit(&os->os_lock);
+ kmem_cache_free(dnode_cache, dn);
+ return (dnh->dnh_dnode);
+ }
+
+ /*
+ * Exclude special dnodes from os_dnodes so an empty os_dnodes
+ * signifies that the special dnodes have no references from
+ * their children (the entries in os_dnodes). This allows
+ * dnode_destroy() to easily determine if the last child has
+ * been removed and then complete eviction of the objset.
+ */
+ if (!DMU_OBJECT_IS_SPECIAL(object))
+ list_insert_head(&os->os_dnodes, dn);
membar_producer();
+
/*
- * Everything else must be valid before assigning dn_objset makes the
- * dnode eligible for dnode_move().
+ * Everything else must be valid before assigning dn_objset
+ * makes the dnode eligible for dnode_move().
*/
dn->dn_objset = os;
+
+ dnh->dnh_dnode = dn;
mutex_exit(&os->os_lock);
arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
@@ -463,12 +483,18 @@ static void
dnode_destroy(dnode_t *dn)
{
objset_t *os = dn->dn_objset;
+ boolean_t complete_os_eviction = B_FALSE;
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
mutex_enter(&os->os_lock);
POINTER_INVALIDATE(&dn->dn_objset);
- list_remove(&os->os_dnodes, dn);
+ if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+ list_remove(&os->os_dnodes, dn);
+ complete_os_eviction =
+ list_is_empty(&os->os_dnodes) &&
+ list_link_active(&os->os_evicting_node);
+ }
mutex_exit(&os->os_lock);
/* the dnode can no longer move, so we can release the handle */
@@ -500,9 +526,12 @@ dnode_destroy(dnode_t *dn)
dn->dn_id_flags = 0;
dn->dn_unlisted_l0_blkid = 0;
- dmu_zfetch_rele(&dn->dn_zfetch);
+ dmu_zfetch_fini(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
+
+ if (complete_os_eviction)
+ dmu_objset_evict_done(os);
}
void
@@ -745,8 +774,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
dmu_zfetch_init(&ndn->dn_zfetch, NULL);
list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
- ndn->dn_zfetch.zf_stream_cnt = odn->dn_zfetch.zf_stream_cnt;
- ndn->dn_zfetch.zf_alloc_fail = odn->dn_zfetch.zf_alloc_fail;
/*
* Update back pointers. Updating the handle fixes the back pointer of
@@ -971,33 +998,32 @@ dnode_special_close(dnode_handle_t *dnh)
*/
while (refcount_count(&dn->dn_holds) > 0)
delay(1);
+ ASSERT(dn->dn_dbuf == NULL ||
+ dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
zrl_add(&dnh->dnh_zrlock);
dnode_destroy(dn); /* implicit zrl_remove() */
zrl_destroy(&dnh->dnh_zrlock);
dnh->dnh_dnode = NULL;
}
-dnode_t *
+void
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
dnode_handle_t *dnh)
{
- dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
- dnh->dnh_dnode = dn;
+ dnode_t *dn;
+
+ dn = dnode_create(os, dnp, NULL, object, dnh);
zrl_init(&dnh->dnh_zrlock);
DNODE_VERIFY(dn);
- return (dn);
}
static void
-dnode_buf_pageout(dmu_buf_t *db, void *arg)
+dnode_buf_pageout(void *dbu)
{
- dnode_children_t *children_dnodes = arg;
+ dnode_children_t *children_dnodes = dbu;
int i;
- int epb = db->db_size >> DNODE_SHIFT;
- ASSERT(epb == children_dnodes->dnc_count);
-
- for (i = 0; i < epb; i++) {
+ for (i = 0; i < children_dnodes->dnc_count; i++) {
dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
dnode_t *dn;
@@ -1027,7 +1053,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *arg)
dnh->dnh_dnode = NULL;
}
kmem_free(children_dnodes, sizeof (dnode_children_t) +
- epb * sizeof (dnode_handle_t));
+ children_dnodes->dnc_count * sizeof (dnode_handle_t));
}
/*
@@ -1088,7 +1114,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
drop_struct_lock = TRUE;
}
- blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t));
+ blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
db = dbuf_hold(mdn, blk, FTAG);
if (drop_struct_lock)
@@ -1117,10 +1143,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dnh = &children_dnodes->dnc_children[0];
for (i = 0; i < epb; i++) {
zrl_init(&dnh[i].dnh_zrlock);
- dnh[i].dnh_dnode = NULL;
}
- if (winner = dmu_buf_set_user(&db->db, children_dnodes,
- dnode_buf_pageout)) {
+ dmu_buf_init_user(&children_dnodes->dnc_dbu,
+ dnode_buf_pageout, NULL);
+ winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
+ if (winner != NULL) {
for (i = 0; i < epb; i++) {
zrl_destroy(&dnh[i].dnh_zrlock);
@@ -1135,17 +1162,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dnh = &children_dnodes->dnc_children[idx];
zrl_add(&dnh->dnh_zrlock);
- if ((dn = dnh->dnh_dnode) == NULL) {
+ dn = dnh->dnh_dnode;
+ if (dn == NULL) {
dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
- dnode_t *winner;
dn = dnode_create(os, phys, db, object, dnh);
- winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
- if (winner != NULL) {
- zrl_add(&dnh->dnh_zrlock);
- dnode_destroy(dn); /* implicit zrl_remove() */
- dn = winner;
- }
}
mutex_enter(&dn->dn_mtx);
@@ -1159,10 +1180,10 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
}
- mutex_exit(&dn->dn_mtx);
-
if (refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
+ mutex_exit(&dn->dn_mtx);
+
/* Now we can rely on the hold to prevent the dnode from moving. */
zrl_remove(&dnh->dnh_zrlock);
@@ -1205,12 +1226,18 @@ dnode_add_ref(dnode_t *dn, void *tag)
void
dnode_rele(dnode_t *dn, void *tag)
{
+ mutex_enter(&dn->dn_mtx);
+ dnode_rele_and_unlock(dn, tag);
+}
+
+void
+dnode_rele_and_unlock(dnode_t *dn, void *tag)
+{
uint64_t refs;
/* Get while the hold prevents the dnode from moving. */
dmu_buf_impl_t *db = dn->dn_dbuf;
dnode_handle_t *dnh = dn->dn_handle;
- mutex_enter(&dn->dn_mtx);
refs = refcount_remove(&dn->dn_holds, tag);
mutex_exit(&dn->dn_mtx);
@@ -1384,7 +1411,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
goto fail;
/* resize the old block */
- err = dbuf_hold_impl(dn, 0, 0, TRUE, FTAG, &db);
+ err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
if (err == 0)
dbuf_new_size(db, size, tx);
else if (err != ENOENT)
@@ -1557,8 +1584,8 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
ASSERT3U(blkoff + head, ==, blksz);
if (len < head)
head = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off), TRUE,
- FTAG, &db) == 0) {
+ if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
+ TRUE, FALSE, FTAG, &db) == 0) {
caddr_t data;
/* don't dirty if it isn't on disk and isn't dirty */
@@ -1595,8 +1622,8 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
if (tail) {
if (len < tail)
tail = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len),
- TRUE, FTAG, &db) == 0) {
+ if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
+ TRUE, FALSE, FTAG, &db) == 0) {
/* don't dirty if not on disk and not dirty */
if (db->db_last_dirty ||
(db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
@@ -1825,7 +1852,7 @@ dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
*/
static int
dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
- int lvl, uint64_t blkfill, uint64_t txg)
+ int lvl, uint64_t blkfill, uint64_t txg)
{
dmu_buf_impl_t *db = NULL;
void *data = NULL;
@@ -1847,8 +1874,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
epb = dn->dn_phys->dn_nblkptr;
data = dn->dn_phys->dn_blkptr;
} else {
- uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl);
- error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db);
+ uint64_t blkid = dbuf_whichblock(dn, lvl, *offset);
+ error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db);
if (error) {
if (error != ENOENT)
return (error);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
index 418d318..0787885 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -76,7 +77,8 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
/* set dbuf's parent pointers to new indirect buf */
for (i = 0; i < nblkptr; i++) {
- dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i);
+ dmu_buf_impl_t *child =
+ dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i);
if (child == NULL)
continue;
@@ -186,7 +188,7 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
rw_enter(&dn->dn_struct_rwlock, RW_READER);
err = dbuf_hold_impl(dn, db->db_level-1,
- (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
+ (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
rw_exit(&dn->dn_struct_rwlock);
if (err == ENOENT)
continue;
@@ -282,7 +284,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
continue;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
- i, B_TRUE, FTAG, &subdb));
+ i, TRUE, FALSE, FTAG, &subdb));
rw_exit(&dn->dn_struct_rwlock);
ASSERT3P(bp, ==, subdb->db_blkptr);
@@ -355,7 +357,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
continue;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
- TRUE, FTAG, &db));
+ TRUE, FALSE, FTAG, &db));
rw_exit(&dn->dn_struct_rwlock);
free_children(db, blkid, nblks, tx);
@@ -396,50 +398,44 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
void
dnode_evict_dbufs(dnode_t *dn)
{
- int progress;
- int pass = 0;
+ dmu_buf_impl_t db_marker;
+ dmu_buf_impl_t *db, *db_next;
- do {
- dmu_buf_impl_t *db, *db_next;
- int evicting = FALSE;
+ mutex_enter(&dn->dn_dbufs_mtx);
+ for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
- progress = FALSE;
- mutex_enter(&dn->dn_dbufs_mtx);
- for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
- db_next = AVL_NEXT(&dn->dn_dbufs, db);
#ifdef DEBUG
- DB_DNODE_ENTER(db);
- ASSERT3P(DB_DNODE(db), ==, dn);
- DB_DNODE_EXIT(db);
+ DB_DNODE_ENTER(db);
+ ASSERT3P(DB_DNODE(db), ==, dn);
+ DB_DNODE_EXIT(db);
#endif /* DEBUG */
- mutex_enter(&db->db_mtx);
- if (db->db_state == DB_EVICTING) {
- progress = TRUE;
- evicting = TRUE;
- mutex_exit(&db->db_mtx);
- } else if (refcount_is_zero(&db->db_holds)) {
- progress = TRUE;
- dbuf_clear(db); /* exits db_mtx for us */
- } else {
- mutex_exit(&db->db_mtx);
- }
-
+ mutex_enter(&db->db_mtx);
+ if (db->db_state != DB_EVICTING &&
+ refcount_is_zero(&db->db_holds)) {
+ db_marker.db_level = db->db_level;
+ db_marker.db_blkid = db->db_blkid;
+ db_marker.db_state = DB_SEARCH;
+ avl_insert_here(&dn->dn_dbufs, &db_marker, db,
+ AVL_BEFORE);
+
+ dbuf_clear(db);
+
+ db_next = AVL_NEXT(&dn->dn_dbufs, &db_marker);
+ avl_remove(&dn->dn_dbufs, &db_marker);
+ } else {
+ mutex_exit(&db->db_mtx);
+ db_next = AVL_NEXT(&dn->dn_dbufs, db);
}
- /*
- * NB: we need to drop dn_dbufs_mtx between passes so
- * that any DB_EVICTING dbufs can make progress.
- * Ideally, we would have some cv we could wait on, but
- * since we don't, just wait a bit to give the other
- * thread a chance to run.
- */
- mutex_exit(&dn->dn_dbufs_mtx);
- if (evicting)
- delay(1);
- pass++;
- ASSERT(pass < 100); /* sanity check */
- } while (progress);
+ }
+ mutex_exit(&dn->dn_dbufs_mtx);
+
+ dnode_evict_bonus(dn);
+}
+void
+dnode_evict_bonus(dnode_t *dn)
+{
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
mutex_enter(&dn->dn_bonus->db_mtx);
@@ -497,7 +493,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT(avl_is_empty(&dn->dn_dbufs));
- ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c
index 5fb7f96..bc08d1c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c
@@ -120,7 +120,7 @@ dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name,
int error;
zfs_bookmark_phys_t bmark_phys;
- if (!dsl_dataset_is_snapshot(snapds))
+ if (!snapds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
error = dsl_bookmark_hold_ds(dp, bookmark_name,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
index 7319c76..f4fdaf9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
@@ -21,9 +21,10 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2011 Martin Matuska <mm@FreeBSD.org>
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/dmu_objset.h>
@@ -77,7 +78,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, max_recordsize, CTLFLAG_RWTUN,
#define DS_REF_MAX (1ULL << 62)
extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
-extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
/*
* Figure out how much of this delta should be propogated to the dsl_dir
@@ -130,8 +130,10 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;
dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
dsl_dataset_phys(ds)->ds_unique_bytes += used;
- if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
- ds->ds_need_large_blocks = B_TRUE;
+ if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
+ ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] =
+ B_TRUE;
+ }
mutex_exit(&ds->ds_lock);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
compressed, uncompressed, tx);
@@ -161,7 +163,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
}
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
- ASSERT(!dsl_dataset_is_snapshot(ds));
+ ASSERT(!ds->ds_is_snapshot);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
@@ -259,14 +261,15 @@ dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
return (B_TRUE);
}
-/* ARGSUSED */
static void
-dsl_dataset_evict(dmu_buf_t *db, void *dsv)
+dsl_dataset_evict(void *dbu)
{
- dsl_dataset_t *ds = dsv;
+ dsl_dataset_t *ds = dbu;
ASSERT(ds->ds_owner == NULL);
+ ds->ds_dbuf = NULL;
+
unique_remove(ds->ds_fsid_guid);
if (ds->ds_objset != NULL)
@@ -278,10 +281,10 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv)
}
bplist_destroy(&ds->ds_pending_deadlist);
- if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0)
+ if (ds->ds_deadlist.dl_os != NULL)
dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_dir)
- dsl_dir_rele(ds->ds_dir, ds);
+ dsl_dir_async_rele(ds->ds_dir, ds);
ASSERT(!list_link_active(&ds->ds_synced_link));
@@ -369,6 +372,24 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
return (err);
}
+boolean_t
+dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
+{
+ dmu_buf_t *dbuf = ds->ds_dbuf;
+ boolean_t result = B_FALSE;
+
+ if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset,
+ ds->ds_object, DMU_BONUS_BLKID, tag)) {
+
+ if (ds == dmu_buf_get_user(dbuf))
+ result = B_TRUE;
+ else
+ dmu_buf_rele(dbuf, tag);
+ }
+
+ return (result);
+}
+
int
dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **dsp)
@@ -399,6 +420,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
ds->ds_dbuf = dbuf;
ds->ds_object = dsobj;
+ ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -413,19 +435,23 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
offsetof(dmu_sendarg_t, dsa_link));
if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
- int zaperr = zap_contains(mos, dsobj,
- DS_FIELD_LARGE_BLOCKS);
- if (zaperr != ENOENT) {
- VERIFY0(zaperr);
- ds->ds_large_blocks = B_TRUE;
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (!(spa_feature_table[f].fi_flags &
+ ZFEATURE_FLAG_PER_DATASET))
+ continue;
+ err = zap_contains(mos, dsobj,
+ spa_feature_table[f].fi_guid);
+ if (err == 0) {
+ ds->ds_feature_inuse[f] = B_TRUE;
+ } else {
+ ASSERT3U(err, ==, ENOENT);
+ err = 0;
+ }
}
}
- if (err == 0) {
- err = dsl_dir_hold_obj(dp,
- dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds,
- &ds->ds_dir);
- }
+ err = dsl_dir_hold_obj(dp,
+ dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir);
if (err != 0) {
mutex_destroy(&ds->ds_lock);
mutex_destroy(&ds->ds_opening_lock);
@@ -438,7 +464,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
return (err);
}
- if (!dsl_dataset_is_snapshot(ds)) {
+ if (!ds->ds_is_snapshot) {
ds->ds_snapname[0] = '\0';
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
err = dsl_dataset_hold_obj(dp,
@@ -465,7 +491,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
}
}
- if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
+ if (err == 0 && !ds->ds_is_snapshot) {
err = dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
&ds->ds_reserved);
@@ -478,8 +504,11 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds->ds_reserved = ds->ds_quota = 0;
}
- if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
- dsl_dataset_evict)) != NULL) {
+ dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
+ if (err == 0)
+ winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
+
+ if (err != 0 || winner != NULL) {
bplist_destroy(&ds->ds_pending_deadlist);
dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_prev)
@@ -517,6 +546,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
const char *snapname;
uint64_t obj;
int err = 0;
+ dsl_dataset_t *ds;
err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
if (err != 0)
@@ -525,36 +555,37 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
ASSERT(dsl_pool_config_held(dp));
obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
if (obj != 0)
- err = dsl_dataset_hold_obj(dp, obj, tag, dsp);
+ err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
else
err = SET_ERROR(ENOENT);
/* we may be looking for a snapshot */
if (err == 0 && snapname != NULL) {
- dsl_dataset_t *ds;
+ dsl_dataset_t *snap_ds;
if (*snapname++ != '@') {
- dsl_dataset_rele(*dsp, tag);
+ dsl_dataset_rele(ds, tag);
dsl_dir_rele(dd, FTAG);
return (SET_ERROR(ENOENT));
}
dprintf("looking for snapshot '%s'\n", snapname);
- err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
+ err = dsl_dataset_snap_lookup(ds, snapname, &obj);
if (err == 0)
- err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
- dsl_dataset_rele(*dsp, tag);
+ err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds);
+ dsl_dataset_rele(ds, tag);
if (err == 0) {
- mutex_enter(&ds->ds_lock);
- if (ds->ds_snapname[0] == 0)
- (void) strlcpy(ds->ds_snapname, snapname,
- sizeof (ds->ds_snapname));
- mutex_exit(&ds->ds_lock);
- *dsp = ds;
+ mutex_enter(&snap_ds->ds_lock);
+ if (snap_ds->ds_snapname[0] == 0)
+ (void) strlcpy(snap_ds->ds_snapname, snapname,
+ sizeof (snap_ds->ds_snapname));
+ mutex_exit(&snap_ds->ds_lock);
+ ds = snap_ds;
}
}
-
+ if (err == 0)
+ *dsp = ds;
dsl_dir_rele(dd, FTAG);
return (err);
}
@@ -676,6 +707,34 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
return (gotit);
}
+static void
+dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
+ uint64_t zero = 0;
+
+ VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
+
+ spa_feature_incr(spa, f, tx);
+ dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
+
+ VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
+ sizeof (zero), 1, &zero, tx));
+}
+
+void
+dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
+
+ VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
+
+ VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx));
+ spa_feature_decr(spa, f, tx);
+}
+
uint64_t
dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx)
@@ -736,8 +795,10 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags &
(DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);
- if (origin->ds_large_blocks)
- dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (origin->ds_feature_inuse[f])
+ dsl_dataset_activate_feature(dsobj, f, tx);
+ }
dmu_buf_will_dirty(origin->ds_dbuf, tx);
dsl_dataset_phys(origin)->ds_num_children++;
@@ -901,7 +962,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
uint64_t mrs_used;
uint64_t dlused, dlcomp, dluncomp;
- ASSERT(!dsl_dataset_is_snapshot(ds));
+ ASSERT(!ds->ds_is_snapshot);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
@@ -1299,8 +1360,10 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;
dmu_buf_rele(dbuf, FTAG);
- if (ds->ds_large_blocks)
- dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_inuse[f])
+ dsl_dataset_activate_feature(dsobj, f, tx);
+ }
ASSERT3U(ds->ds_prev != 0, ==,
dsl_dataset_phys(ds)->ds_prev_snap_obj != 0);
@@ -1592,9 +1655,13 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dmu_objset_sync(ds->ds_objset, zio, tx);
- if (ds->ds_need_large_blocks && !ds->ds_large_blocks) {
- dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx);
- ds->ds_large_blocks = B_TRUE;
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_activation_needed[f]) {
+ if (ds->ds_feature_inuse[f])
+ continue;
+ dsl_dataset_activate_feature(ds->ds_object, f, tx);
+ ds->ds_feature_inuse[f] = B_TRUE;
+ }
}
}
@@ -1657,7 +1724,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
dsl_dataset_phys(ds)->ds_uncompressed_bytes);
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
dsl_dataset_phys(ds)->ds_unique_bytes);
@@ -1725,7 +1792,7 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
stat->dds_origin[0] = '\0';
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
stat->dds_is_snapshot = B_TRUE;
stat->dds_num_clones =
dsl_dataset_phys(ds)->ds_num_children - 1;
@@ -2005,7 +2072,7 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
return (error);
/* must not be a snapshot */
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -2606,7 +2673,7 @@ promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
return (error);
dd = ddpa->ddpa_clone->ds_dir;
- if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
+ if (ddpa->ddpa_clone->ds_is_snapshot ||
!dsl_dir_is_clone(dd)) {
dsl_dataset_rele(ddpa->ddpa_clone, tag);
return (SET_ERROR(EINVAL));
@@ -2698,8 +2765,8 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
int64_t unused_refres_delta;
/* they should both be heads */
- if (dsl_dataset_is_snapshot(clone) ||
- dsl_dataset_is_snapshot(origin_head))
+ if (clone->ds_is_snapshot ||
+ origin_head->ds_is_snapshot)
return (SET_ERROR(EINVAL));
/* if we are not forcing, the branch point should be just before them */
@@ -2758,6 +2825,40 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
+ /*
+ * Swap per-dataset feature flags.
+ */
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (!(spa_feature_table[f].fi_flags &
+ ZFEATURE_FLAG_PER_DATASET)) {
+ ASSERT(!clone->ds_feature_inuse[f]);
+ ASSERT(!origin_head->ds_feature_inuse[f]);
+ continue;
+ }
+
+ boolean_t clone_inuse = clone->ds_feature_inuse[f];
+ boolean_t origin_head_inuse = origin_head->ds_feature_inuse[f];
+
+ if (clone_inuse) {
+ dsl_dataset_deactivate_feature(clone->ds_object, f, tx);
+ clone->ds_feature_inuse[f] = B_FALSE;
+ }
+ if (origin_head_inuse) {
+ dsl_dataset_deactivate_feature(origin_head->ds_object,
+ f, tx);
+ origin_head->ds_feature_inuse[f] = B_FALSE;
+ }
+ if (clone_inuse) {
+ dsl_dataset_activate_feature(origin_head->ds_object,
+ f, tx);
+ origin_head->ds_feature_inuse[f] = B_TRUE;
+ }
+ if (origin_head_inuse) {
+ dsl_dataset_activate_feature(clone->ds_object, f, tx);
+ clone->ds_feature_inuse[f] = B_TRUE;
+ }
+ }
+
dmu_buf_will_dirty(clone->ds_dbuf, tx);
dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
@@ -2978,7 +3079,7 @@ dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
if (error != 0)
return (error);
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -3061,7 +3162,7 @@ dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
if (error != 0)
return (error);
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -3275,8 +3376,8 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
uint64_t snapobj;
dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
- ASSERT(dsl_dataset_is_snapshot(firstsnap));
- ASSERT(dsl_dataset_is_snapshot(lastsnap));
+ ASSERT(firstsnap->ds_is_snapshot);
+ ASSERT(lastsnap->ds_is_snapshot);
/*
* Check that the snapshots are in the same dsl_dir, and firstsnap
@@ -3312,77 +3413,6 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
return (err);
}
-static int
-dsl_dataset_activate_large_blocks_check(void *arg, dmu_tx_t *tx)
-{
- const char *dsname = arg;
- dsl_dataset_t *ds;
- dsl_pool_t *dp = dmu_tx_pool(tx);
- int error = 0;
-
- if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
- return (SET_ERROR(ENOTSUP));
-
- ASSERT(spa_feature_is_enabled(dp->dp_spa,
- SPA_FEATURE_EXTENSIBLE_DATASET));
-
- error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
- if (error != 0)
- return (error);
-
- if (ds->ds_large_blocks)
- error = EALREADY;
- dsl_dataset_rele(ds, FTAG);
-
- return (error);
-}
-
-void
-dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx)
-{
- spa_t *spa = dmu_tx_pool(tx)->dp_spa;
- objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
- uint64_t zero = 0;
-
- spa_feature_incr(spa, SPA_FEATURE_LARGE_BLOCKS, tx);
- dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
-
- VERIFY0(zap_add(mos, dsobj, DS_FIELD_LARGE_BLOCKS,
- sizeof (zero), 1, &zero, tx));
-}
-
-static void
-dsl_dataset_activate_large_blocks_sync(void *arg, dmu_tx_t *tx)
-{
- const char *dsname = arg;
- dsl_dataset_t *ds;
-
- VERIFY0(dsl_dataset_hold(dmu_tx_pool(tx), dsname, FTAG, &ds));
-
- dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx);
- ASSERT(!ds->ds_large_blocks);
- ds->ds_large_blocks = B_TRUE;
- dsl_dataset_rele(ds, FTAG);
-}
-
-int
-dsl_dataset_activate_large_blocks(const char *dsname)
-{
- int error;
-
- error = dsl_sync_task(dsname,
- dsl_dataset_activate_large_blocks_check,
- dsl_dataset_activate_large_blocks_sync, (void *)dsname,
- 1, ZFS_SPACE_CHECK_RESERVED);
-
- /*
- * EALREADY indicates that this dataset already supports large blocks.
- */
- if (error == EALREADY)
- error = 0;
- return (error);
-}
-
/*
* Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
* For example, they could both be snapshots of the same filesystem, and
@@ -3401,12 +3431,12 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
boolean_t ret;
ASSERT(dsl_pool_config_held(dp));
- ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0);
+ ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
if (earlier_txg == 0)
earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
- if (dsl_dataset_is_snapshot(later) &&
+ if (later->ds_is_snapshot &&
earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
return (B_FALSE);
@@ -3427,7 +3457,6 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
return (ret);
}
-
void
dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
{
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
index 4ac562b..d26c6cd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/dsl_dataset.h>
@@ -119,6 +120,8 @@ dsl_deadlist_close(dsl_deadlist_t *dl)
void *cookie = NULL;
dsl_deadlist_entry_t *dle;
+ dl->dl_os = NULL;
+
if (dl->dl_oldfmt) {
dl->dl_oldfmt = B_FALSE;
bpobj_close(&dl->dl_bpobj);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
index 2912e33..9e9c19d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
@@ -561,7 +561,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
SPA_VERSION_DELEGATED_PERMS)
return (SET_ERROR(EPERM));
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
/*
* Snapshots are treated as descendents only,
* local permissions do not apply.
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
index a776e14..c7a623c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
*/
@@ -51,7 +51,7 @@ typedef struct dmu_snapshots_destroy_arg {
int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
- if (!dsl_dataset_is_snapshot(ds))
+ if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
if (dsl_dataset_long_held(ds))
@@ -267,9 +267,11 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
obj = ds->ds_object;
- if (ds->ds_large_blocks) {
- ASSERT0(zap_contains(mos, obj, DS_FIELD_LARGE_BLOCKS));
- spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx);
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_inuse[f]) {
+ dsl_dataset_deactivate_feature(obj, f, tx);
+ ds->ds_feature_inuse[f] = B_FALSE;
+ }
}
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
ASSERT3P(ds->ds_prev, ==, NULL);
@@ -354,7 +356,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
dsl_dataset_remove_clones_key(ds,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
- if (dsl_dataset_is_snapshot(ds_next)) {
+ if (ds_next->ds_is_snapshot) {
dsl_dataset_t *ds_nextnext;
/*
@@ -552,7 +554,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
@@ -604,8 +606,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
uint64_t count;
objset_t *mos;
- ASSERT(!dsl_dataset_is_snapshot(ds));
- if (dsl_dataset_is_snapshot(ds))
+ ASSERT(!ds->ds_is_snapshot);
+ if (ds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
if (refcount_count(&ds->ds_longholds) != expected_holds)
@@ -736,12 +738,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT0(ds->ds_reserved);
}
- if (ds->ds_large_blocks)
- spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx);
+ obj = ds->ds_object;
- dsl_scan_ds_destroyed(ds, tx);
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_inuse[f]) {
+ dsl_dataset_deactivate_feature(obj, f, tx);
+ ds->ds_feature_inuse[f] = B_FALSE;
+ }
+ }
- obj = ds->ds_object;
+ dsl_scan_ds_destroyed(ds, tx);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
/* This is a clone */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
index 26f5c2d..4b268da 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
@@ -24,6 +24,7 @@
* All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -131,14 +132,15 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
-/* ARGSUSED */
static void
-dsl_dir_evict(dmu_buf_t *db, void *arg)
+dsl_dir_evict(void *dbu)
{
- dsl_dir_t *dd = arg;
+ dsl_dir_t *dd = dbu;
dsl_pool_t *dp = dd->dd_pool;
int t;
+ dd->dd_dbuf = NULL;
+
for (t = 0; t < TXG_SIZE; t++) {
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
ASSERT(dd->dd_tempreserved[t] == 0);
@@ -146,9 +148,9 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
if (dd->dd_parent)
- dsl_dir_rele(dd->dd_parent, dd);
+ dsl_dir_async_rele(dd->dd_parent, dd);
- spa_close(dd->dd_pool->dp_spa, dd);
+ spa_async_close(dd->dd_pool->dp_spa, dd);
/*
* The props callback list should have been cleaned up by
@@ -244,8 +246,9 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dmu_buf_rele(origin_bonus, FTAG);
}
- winner = dmu_buf_set_user_ie(dbuf, dd, dsl_dir_evict);
- if (winner) {
+ dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
+ winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
+ if (winner != NULL) {
if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
@@ -289,6 +292,21 @@ dsl_dir_rele(dsl_dir_t *dd, void *tag)
dmu_buf_rele(dd->dd_dbuf, tag);
}
+/*
+ * Remove a reference to the given dsl dir that is being asynchronously
+ * released. Async releases occur from a taskq performing eviction of
+ * dsl datasets and dirs. This process is identical to a normal release
+ * with the exception of using the async API for releasing the reference on
+ * the spa.
+ */
+void
+dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
+{
+ dprintf_dd(dd, "%s\n", "");
+ spa_async_close(dd->dd_pool->dp_spa, tag);
+ dmu_buf_rele(dd->dd_dbuf, tag);
+}
+
/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
void
dsl_dir_name(dsl_dir_t *dd, char *buf)
@@ -419,7 +437,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
}
while (next != NULL) {
- dsl_dir_t *child_ds;
+ dsl_dir_t *child_dd;
err = getcomponent(next, buf, &nextnext);
if (err != 0)
break;
@@ -438,11 +456,11 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
break;
}
- err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
+ err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
if (err != 0)
break;
dsl_dir_rele(dd, tag);
- dd = child_ds;
+ dd = child_dd;
next = nextnext;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
index 3e9710d..2bb74c8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/dsl_pool.h>
@@ -418,9 +419,18 @@ dsl_pool_close(dsl_pool_t *dp)
txg_list_destroy(&dp->dp_sync_tasks);
txg_list_destroy(&dp->dp_dirty_dirs);
- arc_flush(dp->dp_spa);
+ /*
+ * We can't set retry to TRUE since we're explicitly specifying
+ * a spa to flush. This is good enough; any missed buffers for
+ * this spa won't cause trouble, and they'll eventually fall
+ * out of the ARC just like any other unused buffer.
+ */
+ arc_flush(dp->dp_spa, FALSE);
+
txg_fini(dp);
dsl_scan_fini(dp);
+ dmu_buf_user_evict_wait();
+
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_vnrele_taskq);
@@ -852,7 +862,7 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dp->dp_origin_snap != NULL);
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
- tx, DS_FIND_CHILDREN));
+ tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE));
}
/* ARGSUSED */
@@ -906,7 +916,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
- upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
+ upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE));
}
void
@@ -1140,6 +1150,13 @@ dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
}
void
+dsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag)
+{
+ ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
+ rrw_enter_read_prio(&dp->dp_config_rwlock, tag);
+}
+
+void
dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
{
rrw_exit(&dp->dp_config_rwlock, tag);
@@ -1150,3 +1167,9 @@ dsl_pool_config_held(dsl_pool_t *dp)
{
return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
}
+
+boolean_t
+dsl_pool_config_held_writer(dsl_pool_t *dp)
+{
+ return (RRW_WRITE_HELD(&dp->dp_config_rwlock));
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
index 398c571..094b07d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c
@@ -162,19 +162,17 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
{
zfs_prop_t prop = zfs_name_to_prop(propname);
boolean_t inheritable;
- boolean_t snapshot;
uint64_t zapobj;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
- snapshot = dsl_dataset_is_snapshot(ds);
zapobj = dsl_dataset_phys(ds)->ds_props_obj;
if (zapobj != 0) {
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int err;
- ASSERT(snapshot);
+ ASSERT(ds->ds_is_snapshot);
/* Check for a local value. */
err = zap_lookup(mos, zapobj, propname, intsz, numints, buf);
@@ -214,7 +212,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
}
return (dsl_prop_get_dd(ds->ds_dir, propname,
- intsz, numints, buf, setpoint, snapshot));
+ intsz, numints, buf, setpoint, ds->ds_is_snapshot));
}
/*
@@ -442,9 +440,31 @@ dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
uint64_t value;
+ /*
+ * Callback entries do not have holds on their datasets
+ * so that datasets with registered callbacks are still
+ * eligible for eviction. Unlike operations on callbacks
+ * for a single dataset, we are performing a recursive
+ * descent of related datasets and the calling context
+ * for this iteration only has a dataset hold on the root.
+ * Without a hold, the callback's pointer to the dataset
+ * could be invalidated by eviction at any time.
+ *
+ * Use dsl_dataset_try_add_ref() to verify that the
+ * dataset has not begun eviction processing and to
+ * prevent eviction from occurring for the duration
+ * of the callback. If the hold attempt fails, this
+ * object is already being evicted and the callback can
+ * be safely ignored.
+ */
+ if (!dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
+ continue;
+
if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
sizeof (value), 1, &value, NULL) == 0)
cbr->cbr_func(cbr->cbr_arg, value);
+
+ dsl_dataset_rele(cbr->cbr_ds, FTAG);
}
mutex_exit(&dd->dd_lock);
@@ -497,19 +517,28 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- uint64_t propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj;
+ uint64_t propobj;
- if (strcmp(cbr->cbr_propname, propname) != 0)
+ /*
+ * cbr->cbf_ds may be invalidated due to eviction,
+ * requiring the use of dsl_dataset_try_add_ref().
+ * See comment block in dsl_prop_notify_all_cb()
+ * for details.
+ */
+ if (strcmp(cbr->cbr_propname, propname) != 0 ||
+ !dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
continue;
+ propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj;
+
/*
- * If the property is set on this ds, then it is not
- * inherited here; don't call the callback.
+ * If the property is not set on this ds, then it is
+ * inherited here; call the callback.
*/
- if (propobj && 0 == zap_contains(mos, propobj, propname))
- continue;
+ if (propobj == 0 || zap_contains(mos, propobj, propname) != 0)
+ cbr->cbr_func(cbr->cbr_arg, value);
- cbr->cbr_func(cbr->cbr_arg, value);
+ dsl_dataset_rele(cbr->cbr_ds, FTAG);
}
mutex_exit(&dd->dd_lock);
@@ -544,7 +573,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
isint = (dodefault(propname, 8, 1, &intval) == 0);
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
ASSERT(version >= SPA_VERSION_SNAP_PROPS);
if (dsl_dataset_phys(ds)->ds_props_obj == 0) {
dmu_buf_will_dirty(ds->ds_dbuf, tx);
@@ -641,7 +670,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
if (isint) {
VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
dsl_prop_cb_record_t *cbr;
/*
* It's a snapshot; nothing can inherit this
@@ -759,7 +788,7 @@ dsl_props_set_check(void *arg, dmu_tx_t *tx)
}
}
- if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) {
+ if (ds->ds_is_snapshot && version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(ENOTSUP));
}
@@ -982,7 +1011,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- if (dsl_dataset_is_snapshot(ds))
+ if (ds->ds_is_snapshot)
flags |= DSL_PROP_GET_SNAPSHOT;
ASSERT(dsl_pool_config_held(dp));
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
index fcdb9a9..06cfced 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@@ -409,7 +409,7 @@ static uint64_t
dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
{
uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
- if (dsl_dataset_is_snapshot(ds))
+ if (ds->ds_is_snapshot)
return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg));
return (smt);
}
@@ -609,7 +609,8 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
* If we already visited this bp & everything below (in
* a prior txg sync), don't bother doing it again.
*/
- if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
+ if (zbookmark_subtree_completed(dnp, zb,
+ &scn->scn_phys.scn_bookmark))
return (B_TRUE);
/*
@@ -845,7 +846,7 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
return;
if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
/* Note, scn_cur_{min,max}_txg stays the same. */
scn->scn_phys.scn_bookmark.zb_objset =
dsl_dataset_phys(ds)->ds_next_snap_obj;
@@ -867,7 +868,7 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
- if (dsl_dataset_is_snapshot(ds)) {
+ if (ds->ds_is_snapshot) {
/*
* We keep the same mintxg; it could be >
* ds_creation_txg if the previous snapshot was
@@ -1051,7 +1052,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
* ZIL here, rather than in scan_recurse(), because the regular
* snapshot block-sharing rules don't apply to it.
*/
- if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds))
+ if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !ds->ds_is_snapshot)
dsl_scan_zil(dp, &os->os_zil_header);
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c
index 0985ec9..a67db5f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c
@@ -352,7 +352,7 @@ dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
objset_t *mos;
int numholds;
- if (!dsl_dataset_is_snapshot(ds))
+ if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
if (nvlist_empty(holds))
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c
new file mode 100644
index 0000000..8296057
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c
@@ -0,0 +1,366 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/multilist.h>
+
+/* needed for spa_get_random() */
+#include <sys/spa.h>
+
+/*
+ * Given the object contained on the list, return a pointer to the
+ * object's multilist_node_t structure it contains.
+ */
+static multilist_node_t *
+multilist_d2l(multilist_t *ml, void *obj)
+{
+ return ((multilist_node_t *)((char *)obj + ml->ml_offset));
+}
+
+/*
+ * Initialize a new mutlilist using the parameters specified.
+ *
+ * - 'size' denotes the size of the structure containing the
+ * multilist_node_t.
+ * - 'offset' denotes the byte offset of the mutlilist_node_t within
+ * the structure that contains it.
+ * - 'num' specifies the number of internal sublists to create.
+ * - 'index_func' is used to determine which sublist to insert into
+ * when the multilist_insert() function is called; as well as which
+ * sublist to remove from when multilist_remove() is called. The
+ * requirements this function must meet, are the following:
+ *
+ * - It must always return the same value when called on the same
+ * object (to ensure the object is removed from the list it was
+ * inserted into).
+ *
+ * - It must return a value in the range [0, number of sublists).
+ * The multilist_get_num_sublists() function may be used to
+ * determine the number of sublists in the multilist.
+ *
+ * Also, in order to reduce internal contention between the sublists
+ * during insertion and removal, this function should choose evenly
+ * between all available sublists when inserting. This isn't a hard
+ * requirement, but a general rule of thumb in order to garner the
+ * best multi-threaded performance out of the data structure.
+ */
+void
+multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
+ multilist_sublist_index_func_t *index_func)
+{
+ ASSERT3P(ml, !=, NULL);
+ ASSERT3U(size, >, 0);
+ ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
+ ASSERT3U(num, >, 0);
+ ASSERT3P(index_func, !=, NULL);
+
+ ml->ml_offset = offset;
+ ml->ml_num_sublists = num;
+ ml->ml_index_func = index_func;
+
+ ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
+ ml->ml_num_sublists, KM_SLEEP);
+
+ ASSERT3P(ml->ml_sublists, !=, NULL);
+
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
+ multilist_sublist_t *mls = &ml->ml_sublists[i];
+ mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&mls->mls_list, size, offset);
+ }
+}
+
+/*
+ * Destroy the given multilist object, and free up any memory it holds.
+ */
+void
+multilist_destroy(multilist_t *ml)
+{
+ ASSERT(multilist_is_empty(ml));
+
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
+ multilist_sublist_t *mls = &ml->ml_sublists[i];
+
+ ASSERT(list_is_empty(&mls->mls_list));
+
+ list_destroy(&mls->mls_list);
+ mutex_destroy(&mls->mls_lock);
+ }
+
+ ASSERT3P(ml->ml_sublists, !=, NULL);
+ kmem_free(ml->ml_sublists,
+ sizeof (multilist_sublist_t) * ml->ml_num_sublists);
+
+ ml->ml_num_sublists = 0;
+ ml->ml_offset = 0;
+}
+
+/*
+ * Insert the given object into the multilist.
+ *
+ * This function will insert the object specified into the sublist
+ * determined using the function given at multilist creation time.
+ *
+ * The sublist locks are automatically acquired if not already held, to
+ * ensure consistency when inserting and removing from multiple threads.
+ */
+void
+multilist_insert(multilist_t *ml, void *obj)
+{
+ unsigned int sublist_idx = ml->ml_index_func(ml, obj);
+ multilist_sublist_t *mls;
+ boolean_t need_lock;
+
+ DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
+ unsigned int, sublist_idx, void *, obj);
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+
+ mls = &ml->ml_sublists[sublist_idx];
+
+ /*
+ * Note: Callers may already hold the sublist lock by calling
+ * multilist_sublist_lock(). Here we rely on MUTEX_HELD()
+ * returning TRUE if and only if the current thread holds the
+ * lock. While it's a little ugly to make the lock recursive in
+ * this way, it works and allows the calling code to be much
+ * simpler -- otherwise it would have to pass around a flag
+ * indicating that it already has the lock.
+ */
+ need_lock = !MUTEX_HELD(&mls->mls_lock);
+
+ if (need_lock)
+ mutex_enter(&mls->mls_lock);
+
+ ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
+
+ multilist_sublist_insert_head(mls, obj);
+
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+}
+
+/*
+ * Remove the given object from the multilist.
+ *
+ * This function will remove the object specified from the sublist
+ * determined using the function given at multilist creation time.
+ *
+ * The necessary sublist locks are automatically acquired, to ensure
+ * consistency when inserting and removing from multiple threads.
+ */
+void
+multilist_remove(multilist_t *ml, void *obj)
+{
+ unsigned int sublist_idx = ml->ml_index_func(ml, obj);
+ multilist_sublist_t *mls;
+ boolean_t need_lock;
+
+ DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
+ unsigned int, sublist_idx, void *, obj);
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+
+ mls = &ml->ml_sublists[sublist_idx];
+ /* See comment in multilist_insert(). */
+ need_lock = !MUTEX_HELD(&mls->mls_lock);
+
+ if (need_lock)
+ mutex_enter(&mls->mls_lock);
+
+ ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
+
+ multilist_sublist_remove(mls, obj);
+
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+}
+
+/*
+ * Check to see if this multilist object is empty.
+ *
+ * This will return TRUE if it finds all of the sublists of this
+ * multilist to be empty, and FALSE otherwise. Each sublist lock will be
+ * automatically acquired as necessary.
+ *
+ * If concurrent insertions and removals are occurring, the semantics
+ * of this function become a little fuzzy. Instead of locking all
+ * sublists for the entire call time of the function, each sublist is
+ * only locked as it is individually checked for emptiness. Thus, it's
+ * possible for this function to return TRUE with non-empty sublists at
+ * the time the function returns. This would be due to another thread
+ * inserting into a given sublist, after that specific sublist was check
+ * and deemed empty, but before all sublists have been checked.
+ */
+int
+multilist_is_empty(multilist_t *ml)
+{
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
+ multilist_sublist_t *mls = &ml->ml_sublists[i];
+ /* See comment in multilist_insert(). */
+ boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
+
+ if (need_lock)
+ mutex_enter(&mls->mls_lock);
+
+ if (!list_is_empty(&mls->mls_list)) {
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+
+ return (FALSE);
+ }
+
+ if (need_lock)
+ mutex_exit(&mls->mls_lock);
+ }
+
+ return (TRUE);
+}
+
+/* Return the number of sublists composing this multilist */
+unsigned int
+multilist_get_num_sublists(multilist_t *ml)
+{
+ return (ml->ml_num_sublists);
+}
+
+/* Return a randomly selected, valid sublist index for this multilist */
+unsigned int
+multilist_get_random_index(multilist_t *ml)
+{
+ return (spa_get_random(ml->ml_num_sublists));
+}
+
+/* Lock and return the sublist specified at the given index */
+multilist_sublist_t *
+multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
+{
+ multilist_sublist_t *mls;
+
+ ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
+ mls = &ml->ml_sublists[sublist_idx];
+ mutex_enter(&mls->mls_lock);
+
+ return (mls);
+}
+
+void
+multilist_sublist_unlock(multilist_sublist_t *mls)
+{
+ mutex_exit(&mls->mls_lock);
+}
+
+/*
+ * We're allowing any object to be inserted into this specific sublist,
+ * but this can lead to trouble if multilist_remove() is called to
+ * remove this object. Specifically, if calling ml_index_func on this
+ * object returns an index for sublist different than what is passed as
+ * a parameter here, any call to multilist_remove() with this newly
+ * inserted object is undefined! (the call to multilist_remove() will
+ * remove the object from a list that it isn't contained in)
+ */
+void
+multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_insert_head(&mls->mls_list, obj);
+}
+
+/* please see comment above multilist_sublist_insert_head */
+void
+multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_insert_tail(&mls->mls_list, obj);
+}
+
+/*
+ * Move the object one element forward in the list.
+ *
+ * This function will move the given object forward in the list (towards
+ * the head) by one object. So, in essence, it will swap its position in
+ * the list with its "prev" pointer. If the given object is already at the
+ * head of the list, it cannot be moved forward any more than it already
+ * is, so no action is taken.
+ *
+ * NOTE: This function **must not** remove any object from the list other
+ * than the object given as the parameter. This is relied upon in
+ * arc_evict_state_impl().
+ */
+void
+multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
+{
+ void *prev = list_prev(&mls->mls_list, obj);
+
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ ASSERT(!list_is_empty(&mls->mls_list));
+
+ /* 'obj' must be at the head of the list, nothing to do */
+ if (prev == NULL)
+ return;
+
+ list_remove(&mls->mls_list, obj);
+ list_insert_before(&mls->mls_list, prev, obj);
+}
+
+void
+multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ list_remove(&mls->mls_list, obj);
+}
+
+void *
+multilist_sublist_head(multilist_sublist_t *mls)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_head(&mls->mls_list));
+}
+
+void *
+multilist_sublist_tail(multilist_sublist_t *mls)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_tail(&mls->mls_list));
+}
+
+void *
+multilist_sublist_next(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_next(&mls->mls_list, obj));
+}
+
+void *
+multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
+{
+ ASSERT(MUTEX_HELD(&mls->mls_lock));
+ return (list_prev(&mls->mls_list, obj));
+}
+
+void
+multilist_link_init(multilist_node_t *link)
+{
+ list_link_init(link);
+}
+
+int
+multilist_link_active(multilist_node_t *link)
+{
+ return (list_link_active(link));
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
index e3aa01a..51394c0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c
@@ -159,8 +159,8 @@ rrw_destroy(rrwlock_t *rrl)
refcount_destroy(&rrl->rr_linked_rcount);
}
-void
-rrw_enter_read(rrwlock_t *rrl, void *tag)
+static void
+rrw_enter_read_impl(rrwlock_t *rrl, boolean_t prio, void *tag)
{
mutex_enter(&rrl->rr_lock);
#if !defined(DEBUG) && defined(_KERNEL)
@@ -176,7 +176,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted &&
- refcount_is_zero(&rrl->rr_anon_rcount) &&
+ refcount_is_zero(&rrl->rr_anon_rcount) && !prio &&
rrn_find(rrl) == NULL))
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
@@ -192,6 +192,25 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
}
void
+rrw_enter_read(rrwlock_t *rrl, void *tag)
+{
+ rrw_enter_read_impl(rrl, B_FALSE, tag);
+}
+
+/*
+ * take a read lock even if there are pending write lock requests. if we want
+ * to take a lock reentrantly, but from different threads (that have a
+ * relationship to each other), the normal detection mechanism to overrule
+ * the pending writer does not work, so we have to give an explicit hint here.
+ */
+void
+rrw_enter_read_prio(rrwlock_t *rrl, void *tag)
+{
+ rrw_enter_read_impl(rrl, B_TRUE, tag);
+}
+
+
+void
rrw_enter_write(rrwlock_t *rrl)
{
mutex_enter(&rrl->rr_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
index e40361e..90355a9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
@@ -23,6 +23,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 iXsystems, Inc
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -210,12 +211,6 @@ sa_cache_constructor(void *buf, void *unused, int kmflag)
{
sa_handle_t *hdl = buf;
- hdl->sa_bonus_tab = NULL;
- hdl->sa_spill_tab = NULL;
- hdl->sa_os = NULL;
- hdl->sa_userp = NULL;
- hdl->sa_bonus = NULL;
- hdl->sa_spill = NULL;
mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
return (0);
}
@@ -1304,10 +1299,10 @@ sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
}
/*ARGSUSED*/
-void
-sa_evict(dmu_buf_t *db, void *sap)
+static void
+sa_evict(void *dbu)
{
- panic("evicting sa dbuf %p\n", (void *)db);
+ panic("evicting sa dbuf\n");
}
static void
@@ -1346,18 +1341,16 @@ sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
void
sa_handle_destroy(sa_handle_t *hdl)
{
+ dmu_buf_t *db = hdl->sa_bonus;
+
mutex_enter(&hdl->sa_lock);
- (void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl,
- NULL, NULL);
+ (void) dmu_buf_remove_user(db, &hdl->sa_dbu);
- if (hdl->sa_bonus_tab) {
+ if (hdl->sa_bonus_tab)
sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
- hdl->sa_bonus_tab = NULL;
- }
- if (hdl->sa_spill_tab) {
+
+ if (hdl->sa_spill_tab)
sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
- hdl->sa_spill_tab = NULL;
- }
dmu_buf_rele(hdl->sa_bonus, NULL);
@@ -1374,7 +1367,7 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
{
int error = 0;
dmu_object_info_t doi;
- sa_handle_t *handle;
+ sa_handle_t *handle = NULL;
#ifdef ZFS_DEBUG
dmu_object_info_from_db(db, &doi);
@@ -1384,22 +1377,31 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
/* find handle, if it exists */
/* if one doesn't exist then create a new one, and initialize it */
- handle = (hdl_type == SA_HDL_SHARED) ? dmu_buf_get_user(db) : NULL;
+ if (hdl_type == SA_HDL_SHARED)
+ handle = dmu_buf_get_user(db);
+
if (handle == NULL) {
- sa_handle_t *newhandle;
+ sa_handle_t *winner = NULL;
+
handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
+ handle->sa_dbu.dbu_evict_func = NULL;
handle->sa_userp = userp;
handle->sa_bonus = db;
handle->sa_os = os;
handle->sa_spill = NULL;
+ handle->sa_bonus_tab = NULL;
+ handle->sa_spill_tab = NULL;
error = sa_build_index(handle, SA_BONUS);
- newhandle = (hdl_type == SA_HDL_SHARED) ?
- dmu_buf_set_user_ie(db, handle, sa_evict) : NULL;
- if (newhandle != NULL) {
+ if (hdl_type == SA_HDL_SHARED) {
+ dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
+ winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
+ }
+
+ if (winner != NULL) {
kmem_cache_free(sa_cache, handle);
- handle = newhandle;
+ handle = winner;
}
}
*handlepp = handle;
@@ -1917,14 +1919,6 @@ sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
}
void
-sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl)
-{
- (void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus,
- oldhdl, newhdl, sa_evict);
- oldhdl->sa_bonus = NULL;
-}
-
-void
sa_set_userp(sa_handle_t *hdl, void *ptr)
{
hdl->sa_userp = ptr;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index c07737e..b57eb95 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -24,6 +24,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
/*
@@ -1127,6 +1128,8 @@ spa_activate(spa_t *spa, int mode)
list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_config_dirty_node));
+ list_create(&spa->spa_evicting_os_list, sizeof (objset_t),
+ offsetof(objset_t, os_evicting_node));
list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_state_dirty_node));
@@ -1159,9 +1162,12 @@ spa_deactivate(spa_t *spa)
*/
trim_thread_destroy(spa);
+ spa_evicting_os_wait(spa);
+
txg_list_destroy(&spa->spa_vdev_txg_list);
list_destroy(&spa->spa_config_dirty_list);
+ list_destroy(&spa->spa_evicting_os_list);
list_destroy(&spa->spa_state_dirty_list);
for (int t = 0; t < ZIO_TYPES; t++) {
@@ -1624,6 +1630,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db);
if (error != 0)
return (error);
+
nvsize = *(uint64_t *)db->db_data;
dmu_buf_rele(db, FTAG);
@@ -1783,13 +1790,14 @@ static boolean_t
spa_check_logs(spa_t *spa)
{
boolean_t rv = B_FALSE;
+ dsl_pool_t *dp = spa_get_dsl(spa);
switch (spa->spa_log_state) {
case SPA_LOG_MISSING:
/* need to recheck in case slog has been restored */
case SPA_LOG_UNKNOWN:
- rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
- NULL, DS_FIND_CHILDREN) != 0);
+ rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0);
if (rv)
spa_set_log_state(spa, SPA_LOG_MISSING);
break;
@@ -1933,7 +1941,7 @@ static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
/*
* Note: normally this routine will not be called if
@@ -2176,6 +2184,11 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
mosconfig, &ereport);
}
+ /*
+ * Don't count references from objsets that are already closed
+ * and are making their way through the eviction process.
+ */
+ spa_evicting_os_wait(spa);
spa->spa_minref = refcount_count(&spa->spa_refcount);
if (error) {
if (error != EEXIST) {
@@ -2741,7 +2754,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
return (SET_ERROR(ENXIO));
- if (spa_check_logs(spa)) {
+ if (spa_writeable(spa) && spa_check_logs(spa)) {
*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
}
@@ -2772,6 +2785,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa->spa_load_max_txg == UINT64_MAX)) {
dmu_tx_t *tx;
int need_update = B_FALSE;
+ dsl_pool_t *dp = spa_get_dsl(spa);
ASSERT(state != SPA_LOAD_TRYIMPORT);
@@ -2784,9 +2798,8 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
*/
spa->spa_claiming = B_TRUE;
- tx = dmu_tx_create_assigned(spa_get_dsl(spa),
- spa_first_txg(spa));
- (void) dmu_objset_find(spa_name(spa),
+ tx = dmu_tx_create_assigned(dp, spa_first_txg(spa));
+ (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
zil_claim, tx, DS_FIND_CHILDREN);
dmu_tx_commit(tx);
@@ -3761,6 +3774,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_history_log_version(spa, "create");
+ /*
+ * Don't count references from objsets that are already closed
+ * and are making their way through the eviction process.
+ */
+ spa_evicting_os_wait(spa);
spa->spa_minref = refcount_count(&spa->spa_refcount);
mutex_exit(&spa_namespace_lock);
@@ -4496,6 +4514,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
* have to force it to sync before checking spa_refcnt.
*/
txg_wait_synced(spa->spa_dsl_pool, 0);
+ spa_evicting_os_wait(spa);
/*
* A pool cannot be exported or destroyed if there are active
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index bc84a08..baffee0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -23,6 +23,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -603,6 +604,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -611,6 +613,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
@@ -723,6 +726,7 @@ spa_remove(spa_t *spa)
ASSERT(MUTEX_HELD(&spa_namespace_lock));
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
+ ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0);
nvlist_free(spa->spa_config_splitting);
@@ -767,6 +771,7 @@ spa_remove(spa_t *spa)
bplist_destroy(&spa->spa_free_bplist[t]);
cv_destroy(&spa->spa_async_cv);
+ cv_destroy(&spa->spa_evicting_os_cv);
cv_destroy(&spa->spa_proc_cv);
cv_destroy(&spa->spa_scrub_io_cv);
cv_destroy(&spa->spa_suspend_cv);
@@ -774,6 +779,7 @@ spa_remove(spa_t *spa)
mutex_destroy(&spa->spa_async_lock);
mutex_destroy(&spa->spa_errlist_lock);
mutex_destroy(&spa->spa_errlog_lock);
+ mutex_destroy(&spa->spa_evicting_os_lock);
mutex_destroy(&spa->spa_history_lock);
mutex_destroy(&spa->spa_proc_lock);
mutex_destroy(&spa->spa_props_lock);
@@ -830,6 +836,20 @@ spa_close(spa_t *spa, void *tag)
}
/*
+ * Remove a reference to the given spa_t held by a dsl dir that is
+ * being asynchronously released. Async releases occur from a taskq
+ * performing eviction of dsl datasets and dirs. The namespace lock
+ * isn't held and the hold by the object being evicted may contribute to
+ * spa_minref (e.g. dataset or directory released during pool export),
+ * so the asserts in spa_close() do not apply.
+ */
+void
+spa_async_close(spa_t *spa, void *tag)
+{
+ (void) refcount_remove(&spa->spa_refcount, tag);
+}
+
+/*
* Check to see if the spa refcount is zero. Must be called with
* spa_namespace_lock held. We really compare against spa_minref, which is the
* number of references acquired when opening a pool
@@ -1725,6 +1745,34 @@ spa_log_class(spa_t *spa)
return (spa->spa_log_class);
}
+void
+spa_evicting_os_register(spa_t *spa, objset_t *os)
+{
+ mutex_enter(&spa->spa_evicting_os_lock);
+ list_insert_head(&spa->spa_evicting_os_list, os);
+ mutex_exit(&spa->spa_evicting_os_lock);
+}
+
+void
+spa_evicting_os_deregister(spa_t *spa, objset_t *os)
+{
+ mutex_enter(&spa->spa_evicting_os_lock);
+ list_remove(&spa->spa_evicting_os_list, os);
+ cv_broadcast(&spa->spa_evicting_os_cv);
+ mutex_exit(&spa->spa_evicting_os_lock);
+}
+
+void
+spa_evicting_os_wait(spa_t *spa)
+{
+ mutex_enter(&spa->spa_evicting_os_lock);
+ while (!list_is_empty(&spa->spa_evicting_os_list))
+ cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
+ mutex_exit(&spa->spa_evicting_os_lock);
+
+ dmu_buf_user_evict_wait();
+}
+
int
spa_max_replication(spa_t *spa)
{
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
index aeac124..1ea829f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
@@ -80,8 +80,8 @@ space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
mutex_exit(sm->sm_lock);
if (end > bufsize) {
- dmu_prefetch(sm->sm_os, space_map_object(sm), bufsize,
- end - bufsize);
+ dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
+ end - bufsize, ZIO_PRIORITY_SYNC_READ);
}
mutex_enter(sm->sm_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
index eb21867..04a80f7 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
@@ -37,6 +37,12 @@ extern "C" {
#include <sys/dmu.h>
#include <sys/spa.h>
+/*
+ * Used by arc_flush() to inform arc_evict_state() that it should evict
+ * all available buffers from the arc state being passed in.
+ */
+#define ARC_EVICT_ALL -1ULL
+
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
@@ -58,22 +64,30 @@ typedef enum arc_flags
ARC_FLAG_CACHED = 1 << 4, /* I/O was in cache */
ARC_FLAG_L2CACHE = 1 << 5, /* cache in L2ARC */
ARC_FLAG_L2COMPRESS = 1 << 6, /* compress in L2ARC */
+ ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 7, /* I/O from zfetch */
/*
* Private ARC flags. These flags are private ARC only flags that
* will show up in b_flags in the arc_hdr_buf_t. These flags should
* only be set by ARC code.
*/
- ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */
- ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */
- ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */
- ARC_FLAG_FREED_IN_READ = 1 << 10, /* freed during read */
- ARC_FLAG_BUF_AVAILABLE = 1 << 11, /* block not in use */
- ARC_FLAG_INDIRECT = 1 << 12, /* indirect block */
- ARC_FLAG_FREE_IN_PROGRESS = 1 << 13, /* about to be freed */
- ARC_FLAG_L2_WRITING = 1 << 14, /* write in progress */
- ARC_FLAG_L2_EVICTED = 1 << 15, /* evicted during I/O */
- ARC_FLAG_L2_WRITE_HEAD = 1 << 16, /* head of write list */
+ ARC_FLAG_IN_HASH_TABLE = 1 << 8, /* buffer is hashed */
+ ARC_FLAG_IO_IN_PROGRESS = 1 << 9, /* I/O in progress */
+ ARC_FLAG_IO_ERROR = 1 << 10, /* I/O failed for buf */
+ ARC_FLAG_FREED_IN_READ = 1 << 11, /* freed during read */
+ ARC_FLAG_BUF_AVAILABLE = 1 << 12, /* block not in use */
+ ARC_FLAG_INDIRECT = 1 << 13, /* indirect block */
+ /* Indicates that block was read with ASYNC priority. */
+ ARC_FLAG_PRIO_ASYNC_READ = 1 << 14,
+ ARC_FLAG_L2_WRITING = 1 << 15, /* write in progress */
+ ARC_FLAG_L2_EVICTED = 1 << 16, /* evicted during I/O */
+ ARC_FLAG_L2_WRITE_HEAD = 1 << 17, /* head of write list */
+ /* indicates that the buffer contains metadata (otherwise, data) */
+ ARC_FLAG_BUFC_METADATA = 1 << 18,
+
+ /* Flags specifying whether optional hdr struct fields are defined */
+ ARC_FLAG_HAS_L1HDR = 1 << 19,
+ ARC_FLAG_HAS_L2HDR = 1 << 20,
} arc_flags_t;
struct arc_buf {
@@ -96,6 +110,7 @@ typedef enum arc_buf_contents {
*/
typedef enum arc_space_type {
ARC_SPACE_DATA,
+ ARC_SPACE_META,
ARC_SPACE_HDRS,
ARC_SPACE_L2HDRS,
ARC_SPACE_OTHER,
@@ -134,7 +149,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *priv);
boolean_t arc_clear_callback(arc_buf_t *buf);
-void arc_flush(spa_t *spa);
+void arc_flush(spa_t *spa, boolean_t retry);
void arc_tempreserve_clear(uint64_t reserve);
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bpobj.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bpobj.h
index af975c7..2a36519 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bpobj.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bpobj.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#ifndef _SYS_BPOBJ_H
@@ -77,7 +77,6 @@ void bpobj_close(bpobj_t *bpo);
int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
-int bpobj_iterate_dbg(bpobj_t *bpo, uint64_t *itorp, blkptr_t *bp);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bqueue.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bqueue.h
new file mode 100644
index 0000000..63722df
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bqueue.h
@@ -0,0 +1,54 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#ifndef _BQUEUE_H
+#define _BQUEUE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+
+typedef struct bqueue {
+ list_t bq_list;
+ kmutex_t bq_lock;
+ kcondvar_t bq_add_cv;
+ kcondvar_t bq_pop_cv;
+ uint64_t bq_size;
+ uint64_t bq_maxsize;
+ size_t bq_node_offset;
+} bqueue_t;
+
+typedef struct bqueue_node {
+ list_node_t bqn_node;
+ uint64_t bqn_size;
+} bqueue_node_t;
+
+
+int bqueue_init(bqueue_t *, uint64_t, size_t);
+void bqueue_destroy(bqueue_t *);
+void bqueue_enqueue(bqueue_t *, void *, uint64_t);
+void *bqueue_dequeue(bqueue_t *);
+boolean_t bqueue_empty(bqueue_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BQUEUE_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
index 319406a..482ccb0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_DBUF_H
@@ -226,9 +227,8 @@ typedef struct dmu_buf_impl {
/* Data which is unique to data (leaf) blocks: */
- /* stuff we store for the user (see dmu_buf_set_user) */
- void *db_user_ptr;
- dmu_buf_evict_func_t *db_evict_func;
+ /* User callback information. */
+ dmu_buf_user_t *db_user;
uint8_t db_immediate_evict;
uint8_t db_freed_in_flight;
@@ -245,8 +245,7 @@ typedef struct dbuf_hash_table {
kmutex_t hash_mutexes[DBUF_MUTEXES];
} dbuf_hash_table_t;
-
-uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
+uint64_t dbuf_whichblock(struct dnode *di, int64_t level, uint64_t offset);
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
void dbuf_create_bonus(struct dnode *dn);
@@ -258,18 +257,23 @@ void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
void *tag);
-int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
+int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp);
-void dbuf_prefetch(struct dnode *dn, uint64_t blkid, zio_priority_t prio);
+void dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid,
+ zio_priority_t prio, arc_flags_t aflags);
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
+boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj,
+ uint64_t blkid, void *tag);
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
-dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
+dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
+ uint64_t blkid);
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
index 3881d24..3b055cc 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
@@ -26,6 +26,7 @@
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2013 DEY Storage Systems, Inc.
* Copyright 2014 HybridCluster. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -41,11 +42,10 @@
* dmu_spa.h.
*/
-#include <sys/types.h>
-#include <sys/param.h>
+#include <sys/zfs_context.h>
#include <sys/cred.h>
-#include <sys/time.h>
#include <sys/fs/zfs.h>
+#include <sys/zio_priority.h>
#ifdef __cplusplus
extern "C" {
@@ -244,6 +244,7 @@ void zfs_znode_byteswap(void *buf, size_t size);
#define DS_FIND_SNAPSHOTS (1<<0)
#define DS_FIND_CHILDREN (1<<1)
+#define DS_FIND_SERIALIZE (1<<2)
/*
* The maximum number of bytes that can be accessed as part of one
@@ -293,8 +294,6 @@ typedef struct dmu_buf {
void *db_data; /* data in buffer */
} dmu_buf_t;
-typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
-
/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
@@ -462,7 +461,23 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **, int flags);
+
+/*
+ * Add a reference to a dmu buffer that has already been held via
+ * dmu_buf_hold() in the current context.
+ */
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
+
+/*
+ * Attempt to add a reference to a dmu buffer that is in an unknown state,
+ * using a pointer that may have been invalidated by eviction processing.
+ * The request will succeed if the passed in dbuf still represents the
+ * same os/object/blkid, is ineligible for eviction, and has at least
+ * one hold by a user other than the syncer.
+ */
+boolean_t dmu_buf_try_add_ref(dmu_buf_t *, objset_t *os, uint64_t object,
+ uint64_t blkid, void *tag);
+
void dmu_buf_rele(dmu_buf_t *db, void *tag);
uint64_t dmu_buf_refcount(dmu_buf_t *db);
@@ -477,39 +492,130 @@ uint64_t dmu_buf_refcount(dmu_buf_t *db);
* individually with dmu_buf_rele.
*/
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
+ uint64_t length, boolean_t read, void *tag,
+ int *numbufsp, dmu_buf_t ***dbpp);
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
+typedef void dmu_buf_evict_func_t(void *user_ptr);
+
+/*
+ * A DMU buffer user object may be associated with a dbuf for the
+ * duration of its lifetime. This allows the user of a dbuf (client)
+ * to attach private data to a dbuf (e.g. in-core only data such as a
+ * dnode_children_t, zap_t, or zap_leaf_t) and be optionally notified
+ * when that dbuf has been evicted. Clients typically respond to the
+ * eviction notification by freeing their private data, thus ensuring
+ * the same lifetime for both dbuf and private data.
+ *
+ * The mapping from a dmu_buf_user_t to any client private data is the
+ * client's responsibility. All current consumers of the API with private
+ * data embed a dmu_buf_user_t as the first member of the structure for
+ * their private data. This allows conversions between the two types
+ * with a simple cast. Since the DMU buf user API never needs access
+ * to the private data, other strategies can be employed if necessary
+ * or convenient for the client (e.g. using container_of() to do the
+ * conversion for private data that cannot have the dmu_buf_user_t as
+ * its first member).
+ *
+ * Eviction callbacks are executed without the dbuf mutex held or any
+ * other type of mechanism to guarantee that the dbuf is still available.
+ * For this reason, users must assume the dbuf has already been freed
+ * and not reference the dbuf from the callback context.
+ *
+ * Users requesting "immediate eviction" are notified as soon as the dbuf
+ * is only referenced by dirty records (dirties == holds). Otherwise the
+ * notification occurs after eviction processing for the dbuf begins.
+ */
+typedef struct dmu_buf_user {
+ /*
+ * Asynchronous user eviction callback state.
+ */
+ taskq_ent_t dbu_tqent;
+
+ /* This instance's eviction function pointer. */
+ dmu_buf_evict_func_t *dbu_evict_func;
+#ifdef ZFS_DEBUG
+ /*
+ * Pointer to user's dbuf pointer. NULL for clients that do
+ * not associate a dbuf with their user data.
+ *
+ * The dbuf pointer is cleared upon eviction so as to catch
+ * use-after-evict bugs in clients.
+ */
+ dmu_buf_t **dbu_clear_on_evict_dbufp;
+#endif
+} dmu_buf_user_t;
+
+/*
+ * Initialize the given dmu_buf_user_t instance with the eviction function
+ * evict_func, to be called when the user is evicted.
+ *
+ * NOTE: This function should only be called once on a given dmu_buf_user_t.
+ * To allow enforcement of this, dbu must already be zeroed on entry.
+ */
+#ifdef __lint
+/* Very ugly, but it beats issuing suppression directives in many Makefiles. */
+extern void
+dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
+ dmu_buf_t **clear_on_evict_dbufp);
+#else /* __lint */
+static inline void
+dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
+ dmu_buf_t **clear_on_evict_dbufp)
+{
+ ASSERT(dbu->dbu_evict_func == NULL);
+ ASSERT(evict_func != NULL);
+ dbu->dbu_evict_func = evict_func;
+#ifdef ZFS_DEBUG
+ dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
+#endif
+}
+#endif /* __lint */
+
/*
- * Returns NULL on success, or the existing user ptr if it's already
- * been set.
+ * Attach user data to a dbuf and mark it for normal (when the dbuf's
+ * data is cleared or its reference count goes to zero) eviction processing.
*
- * user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
+ * Returns NULL on success, or the existing user if another user currently
+ * owns the buffer.
+ */
+void *dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *user);
+
+/*
+ * Attach user data to a dbuf and mark it for immediate (its dirty and
+ * reference counts are equal) eviction processing.
*
- * If non-NULL, pageout func will be called when this buffer is being
- * excised from the cache, so that you can clean up the data structure
- * pointed to by user_ptr.
+ * Returns NULL on success, or the existing user if another user currently
+ * owns the buffer.
+ */
+void *dmu_buf_set_user_ie(dmu_buf_t *db, dmu_buf_user_t *user);
+
+/*
+ * Replace the current user of a dbuf.
*
- * dmu_evict_user() will call the pageout func for all buffers in a
- * objset with a given pageout func.
+ * If given the current user of a dbuf, replaces the dbuf's user with
+ * "new_user" and returns the user data pointer that was replaced.
+ * Otherwise returns the current, and unmodified, dbuf user pointer.
*/
-void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr,
- dmu_buf_evict_func_t *pageout_func);
+void *dmu_buf_replace_user(dmu_buf_t *db,
+ dmu_buf_user_t *old_user, dmu_buf_user_t *new_user);
+
/*
- * set_user_ie is the same as set_user, but request immediate eviction
- * when hold count goes to zero.
+ * Remove the specified user data for a DMU buffer.
+ *
+ * Returns the user that was removed on success, or the current user if
+ * another user currently owns the buffer.
*/
-void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
- dmu_buf_evict_func_t *pageout_func);
-void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
- void *user_ptr, dmu_buf_evict_func_t *pageout_func);
-void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
+void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
/*
- * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
+ * Returns the user data (dmu_buf_user_t *) associated with this dbuf.
*/
void *dmu_buf_get_user(dmu_buf_t *db);
+/* Block until any in-progress dmu buf user evictions complete. */
+void dmu_buf_user_evict_wait(void);
+
/*
* Returns the blkptr associated with this dbuf, or NULL if not set.
*/
@@ -638,14 +744,14 @@ void dmu_xuio_clear(struct xuio *uio, int i);
void xuio_stat_wbuf_copied();
void xuio_stat_wbuf_nocopy();
-extern int zfs_prefetch_disable;
+extern boolean_t zfs_prefetch_disable;
extern int zfs_max_recordsize;
/*
* Asynchronously try to read in the data.
*/
-void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t len);
+void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
+ uint64_t len, enum zio_priority pri);
typedef struct dmu_object_info {
/* All sizes are in bytes unless otherwise indicated. */
@@ -810,6 +916,15 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
uint64_t *off);
/*
+ * Check if a DMU object has any dirty blocks. If so, sync out
+ * all pending transaction groups. Otherwise, this function
+ * does not alter DMU state. This could be improved to only sync
+ * out the necessary transaction groups for this particular
+ * object.
+ */
+int dmu_object_wait_synced(objset_t *os, uint64_t object);
+
+/*
* Initial setup and final teardown.
*/
extern void dmu_init(void);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
index 804f0c1..9e98350 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -74,22 +75,25 @@ struct objset {
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
/*
- * The following "special" dnodes have no parent and are exempt from
- * dnode_move(), but they root their descendents in this objset using
- * handles anyway, so that all access to dnodes from dbufs consistently
- * uses handles.
+ * The following "special" dnodes have no parent, are exempt
+ * from dnode_move(), and are not recorded in os_dnodes, but they
+ * root their descendents in this objset using handles anyway, so
+ * that all access to dnodes from dbufs consistently uses handles.
*/
dnode_handle_t os_meta_dnode;
dnode_handle_t os_userused_dnode;
dnode_handle_t os_groupused_dnode;
zilog_t *os_zil;
+ list_node_t os_evicting_node;
+
/* can change, under dsl_dir's locks: */
enum zio_checksum os_checksum;
enum zio_compress os_compress;
uint8_t os_copies;
enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify;
+ boolean_t os_evicting;
zfs_logbias_op_t os_logbias;
zfs_cache_type_t os_primary_cache;
zfs_cache_type_t os_secondary_cache;
@@ -138,6 +142,8 @@ struct objset {
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
+int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
+ dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_refresh_ownership(objset_t *os, void *tag);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
@@ -170,6 +176,8 @@ int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
int dmu_fsname(const char *snapname, char *buf);
+void dmu_objset_evict_done(objset_t *os);
+
void dmu_objset_init(void);
void dmu_objset_fini(void);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
index b03cb09..143d43f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
@@ -45,6 +45,8 @@ int dmu_send(const char *tosnap, const char *fromsnap,
#endif
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
+int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
+ uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok,
#ifdef illumos
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
index 78cadd2..6f61198 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h
@@ -23,8 +23,12 @@
* Use is subject to license terms.
*/
-#ifndef _DFETCH_H
-#define _DFETCH_H
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#ifndef _DMU_ZFETCH_H
+#define _DMU_ZFETCH_H
#include <sys/zfs_context.h>
@@ -36,41 +40,30 @@ extern uint64_t zfetch_array_rd_sz;
struct dnode; /* so we can reference dnode */
-typedef enum zfetch_dirn {
- ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
- ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
-} zfetch_dirn_t;
-
typedef struct zstream {
- uint64_t zst_offset; /* offset of starting block in range */
- uint64_t zst_len; /* length of range, in blocks */
- zfetch_dirn_t zst_direction; /* direction of prefetch */
- uint64_t zst_stride; /* length of stride, in blocks */
- uint64_t zst_ph_offset; /* prefetch offset, in blocks */
- uint64_t zst_cap; /* prefetch limit (cap), in blocks */
- kmutex_t zst_lock; /* protects stream */
- clock_t zst_last; /* lbolt of last prefetch */
- avl_node_t zst_node; /* embed avl node here */
+ uint64_t zs_blkid; /* expect next access at this blkid */
+ uint64_t zs_pf_blkid; /* next block to prefetch */
+ kmutex_t zs_lock; /* protects stream */
+ hrtime_t zs_atime; /* time last prefetch issued */
+ list_node_t zs_node; /* link for zf_stream */
} zstream_t;
typedef struct zfetch {
krwlock_t zf_rwlock; /* protects zfetch structure */
- list_t zf_stream; /* AVL tree of zstream_t's */
+ list_t zf_stream; /* list of zstream_t's */
struct dnode *zf_dnode; /* dnode that owns this zfetch */
- uint32_t zf_stream_cnt; /* # of active streams */
- uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
} zfetch_t;
void zfetch_init(void);
void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
-void dmu_zfetch_rele(zfetch_t *);
-void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
+void dmu_zfetch_fini(zfetch_t *);
+void dmu_zfetch(zfetch_t *, uint64_t, uint64_t);
#ifdef __cplusplus
}
#endif
-#endif /* _DFETCH_H */
+#endif /* _DMU_ZFETCH_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
index 5668af1..69cc54d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_DNODE_H
@@ -255,6 +256,7 @@ typedef struct dnode_handle {
} dnode_handle_t;
typedef struct dnode_children {
+ dmu_buf_user_t dnc_dbu; /* User evict data */
size_t dnc_count; /* number of children */
dnode_handle_t dnc_children[]; /* sized dynamically */
} dnode_children_t;
@@ -265,7 +267,7 @@ typedef struct free_range {
uint64_t fr_nblks;
} free_range_t;
-dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
+void dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
uint64_t object, dnode_handle_t *dnh);
void dnode_special_close(dnode_handle_t *dnh);
@@ -279,6 +281,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
+void dnode_rele_and_unlock(dnode_t *dn, void *tag);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
@@ -300,6 +303,7 @@ void dnode_fini(void);
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
int minlvl, uint64_t blkfill, uint64_t txg);
void dnode_evict_dbufs(dnode_t *dn);
+void dnode_evict_bonus(dnode_t *dn);
#ifdef ZFS_DEBUG
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
index 3160a05..001bff5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -20,9 +20,10 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_DSL_DATASET_H
@@ -37,6 +38,7 @@
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
+#include <zfeature_common.h>
#ifdef __cplusplus
extern "C" {
@@ -132,17 +134,18 @@ typedef struct dsl_dataset_phys {
} dsl_dataset_phys_t;
typedef struct dsl_dataset {
+ dmu_buf_user_t ds_dbu;
+
/* Immutable: */
struct dsl_dir *ds_dir;
dmu_buf_t *ds_dbuf;
uint64_t ds_object;
uint64_t ds_fsid_guid;
+ boolean_t ds_is_snapshot;
/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
uint64_t ds_bookmarks; /* DMU_OTN_ZAP_METADATA */
- boolean_t ds_large_blocks;
- boolean_t ds_need_large_blocks;
/* has internal locking: */
dsl_deadlist_t ds_deadlist;
@@ -181,6 +184,18 @@ typedef struct dsl_dataset {
kmutex_t ds_sendstream_lock;
list_t ds_sendstreams;
+ /*
+ * For ZFEATURE_FLAG_PER_DATASET features, set if this dataset
+ * uses this feature.
+ */
+ uint8_t ds_feature_inuse[SPA_FEATURES];
+
+ /*
+ * Set if we need to activate the feature on this dataset this txg
+ * (used only in syncing context).
+ */
+ uint8_t ds_feature_activation_needed[SPA_FEATURES];
+
/* Protected by ds_lock; keep at end of struct for better locality */
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
@@ -197,17 +212,16 @@ dsl_dataset_phys(dsl_dataset_t *ds)
*/
#define MAX_TAG_PREFIX_LEN 17
-inline boolean_t
-dsl_dataset_is_snapshot(dsl_dataset_t *ds)
-{
- return (dsl_dataset_phys(ds)->ds_num_children != 0);
-}
+#define dsl_dataset_is_snapshot(ds) \
+ (dsl_dataset_phys(ds)->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
+boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
+ void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
@@ -261,8 +275,6 @@ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
-int dsl_dataset_activate_large_blocks(const char *dsname);
-void dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx);
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
@@ -302,6 +314,9 @@ void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx);
int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result);
+void dsl_dataset_deactivate_feature(uint64_t dsobj,
+ spa_feature_t f, dmu_tx_t *tx);
+
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
index 772bfbe..f50014d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_DSL_DIR_H
@@ -84,6 +85,8 @@ typedef struct dsl_dir_phys {
} dsl_dir_phys_t;
struct dsl_dir {
+ dmu_buf_user_t dd_dbu;
+
/* These are immutable; no lock needed: */
uint64_t dd_object;
dsl_pool_t *dd_pool;
@@ -119,6 +122,7 @@ dsl_dir_phys(dsl_dir_t *dd)
}
void dsl_dir_rele(dsl_dir_t *dd, void *tag);
+void dsl_dir_async_rele(dsl_dir_t *dd, void *tag);
int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **, const char **tail);
int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
index a6fb201..0e27a53 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
@@ -152,8 +152,10 @@ void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
int64_t used, int64_t comp, int64_t uncomp);
void dsl_pool_config_enter(dsl_pool_t *dp, void *tag);
+void dsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag);
void dsl_pool_config_exit(dsl_pool_t *dp, void *tag);
boolean_t dsl_pool_config_held(dsl_pool_t *dp);
+boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
boolean_t dsl_pool_need_dirty_delay(dsl_pool_t *dp);
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h
new file mode 100644
index 0000000..5ebb7fe
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h
@@ -0,0 +1,106 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_MULTILIST_H
+#define _SYS_MULTILIST_H
+
+#include <sys/zfs_context.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef list_node_t multilist_node_t;
+typedef struct multilist multilist_t;
+typedef struct multilist_sublist multilist_sublist_t;
+typedef unsigned int multilist_sublist_index_func_t(multilist_t *, void *);
+
+struct multilist_sublist {
+ /*
+ * The mutex used internally to implement thread safe insertions
+ * and removals to this individual sublist. It can also be locked
+ * by a consumer using multilist_sublist_{lock,unlock}, which is
+ * useful if a consumer needs to traverse the list in a thread
+ * safe manner.
+ */
+ kmutex_t mls_lock;
+ /*
+ * The actual list object containing all objects in this sublist.
+ */
+ list_t mls_list;
+ /*
+ * Pad to cache line (64 bytes), in an effort to try and prevent
+ * cache line contention.
+ */
+ uint8_t mls_pad[24];
+};
+
+struct multilist {
+ /*
+ * This is used to get to the multilist_node_t structure given
+ * the void *object contained on the list.
+ */
+ size_t ml_offset;
+ /*
+ * The number of sublists used internally by this multilist.
+ */
+ uint64_t ml_num_sublists;
+ /*
+ * The array of pointers to the actual sublists.
+ */
+ multilist_sublist_t *ml_sublists;
+ /*
+ * Pointer to function which determines the sublist to use
+ * when inserting and removing objects from this multilist.
+ * Please see the comment above multilist_create for details.
+ */
+ multilist_sublist_index_func_t *ml_index_func;
+};
+
+void multilist_destroy(multilist_t *);
+void multilist_create(multilist_t *, size_t, size_t, unsigned int,
+ multilist_sublist_index_func_t *);
+
+void multilist_insert(multilist_t *, void *);
+void multilist_remove(multilist_t *, void *);
+int multilist_is_empty(multilist_t *);
+
+unsigned int multilist_get_num_sublists(multilist_t *);
+unsigned int multilist_get_random_index(multilist_t *);
+
+multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
+void multilist_sublist_unlock(multilist_sublist_t *);
+
+void multilist_sublist_insert_head(multilist_sublist_t *, void *);
+void multilist_sublist_insert_tail(multilist_sublist_t *, void *);
+void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj);
+void multilist_sublist_remove(multilist_sublist_t *, void *);
+
+void *multilist_sublist_head(multilist_sublist_t *);
+void *multilist_sublist_tail(multilist_sublist_t *);
+void *multilist_sublist_next(multilist_sublist_t *, void *);
+void *multilist_sublist_prev(multilist_sublist_t *, void *);
+
+void multilist_link_init(multilist_node_t *);
+int multilist_link_active(multilist_node_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MULTILIST_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
index 5ac2794..a1844f0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h
@@ -69,6 +69,7 @@ void rrw_init(rrwlock_t *rrl, boolean_t track_all);
void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
void rrw_enter_read(rrwlock_t *rrl, void *tag);
+void rrw_enter_read_prio(rrwlock_t *rrl, void *tag);
void rrw_enter_write(rrwlock_t *rrl);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa.h
index e125201..a6578f6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa.h
@@ -134,7 +134,6 @@ int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
void sa_object_info(sa_handle_t *, dmu_object_info_t *);
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
-void sa_update_user(sa_handle_t *, sa_handle_t *);
void *sa_get_userdata(sa_handle_t *);
void sa_set_userp(sa_handle_t *, void *);
dmu_buf_t *sa_get_db(sa_handle_t *);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h
index 6b9af2e..50874c6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_SA_IMPL_H
@@ -210,11 +211,12 @@ typedef enum sa_data_op {
*/
struct sa_handle {
+ dmu_buf_user_t sa_dbu;
kmutex_t sa_lock;
dmu_buf_t *sa_bonus;
dmu_buf_t *sa_spill;
objset_t *sa_os;
- void *sa_userp;
+ void *sa_userp;
sa_idx_tab_t *sa_bonus_tab; /* idx of bonus */
sa_idx_tab_t *sa_spill_tab; /* only present if spill activated */
};
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
index cb5e011..09f16bc 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_SPA_H
@@ -458,6 +459,19 @@ _NOTE(CONSTCOND) } while (0)
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
((zc1).zc_word[3] - (zc2).zc_word[3])))
+#define ZIO_CHECKSUM_IS_ZERO(zc) \
+ (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
+ (zc)->zc_word[2] | (zc)->zc_word[3]))
+
+#define ZIO_CHECKSUM_BSWAP(zcp) \
+{ \
+ (zcp)->zc_word[0] = BSWAP_64((zcp)->zc_word[0]); \
+ (zcp)->zc_word[1] = BSWAP_64((zcp)->zc_word[1]); \
+ (zcp)->zc_word[2] = BSWAP_64((zcp)->zc_word[2]); \
+ (zcp)->zc_word[3] = BSWAP_64((zcp)->zc_word[3]); \
+}
+
+
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
@@ -522,12 +536,13 @@ _NOTE(CONSTCOND) } while (0)
if (bp == NULL) { \
len += func(buf + len, size - len, "<NULL>"); \
} else if (BP_IS_HOLE(bp)) { \
- len += func(buf + len, size - len, "<hole>"); \
- if (bp->blk_birth > 0) { \
- len += func(buf + len, size - len, \
- " birth=%lluL", \
- (u_longlong_t)bp->blk_birth); \
- } \
+ len += func(buf + len, size - len, \
+ "HOLE [L%llu %s] " \
+ "size=%llxL birth=%lluL", \
+ (u_longlong_t)BP_GET_LEVEL(bp), \
+ type, \
+ (u_longlong_t)BP_GET_LSIZE(bp), \
+ (u_longlong_t)bp->blk_birth); \
} else if (BP_IS_EMBEDDED(bp)) { \
len = func(buf + len, size - len, \
"EMBEDDED [L%llu %s] et=%u %s " \
@@ -702,6 +717,7 @@ extern spa_t *spa_next(spa_t *prev);
/* Refcount functions */
extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
+extern void spa_async_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_NONE 0x00
@@ -774,6 +790,9 @@ extern uint64_t spa_version(spa_t *spa);
extern boolean_t spa_deflate(spa_t *spa);
extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
+extern void spa_evicting_os_register(spa_t *, objset_t *os);
+extern void spa_evicting_os_deregister(spa_t *, objset_t *os);
+extern void spa_evicting_os_wait(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_prev_software_version(spa_t *spa);
extern int spa_busy(void);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index 6b6d4bd..626d9d5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -23,6 +23,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -145,6 +146,9 @@ struct spa {
uint64_t spa_claim_max_txg; /* highest claimed birth txg */
timespec_t spa_loaded_ts; /* 1st successful open time */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
+ kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */
+ list_t spa_evicting_os_list; /* Objsets being evicted. */
+ kcondvar_t spa_evicting_os_cv; /* Objset Eviction Completion */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
int spa_min_ashift; /* of vdevs in normal class */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
index 2cbcf9c..ff46ba6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
@@ -61,6 +61,7 @@ extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
extern boolean_t vdev_is_bootable(vdev_t *vd);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
+extern int vdev_count_leaves(spa_t *spa);
extern void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t d,
uint64_t txg, uint64_t size);
extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
index 6934245..a31d04c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
@@ -113,6 +113,8 @@ struct vdev_queue {
vdev_t *vq_vdev;
vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
avl_tree_t vq_active_tree;
+ avl_tree_t vq_read_offset_tree;
+ avl_tree_t vq_write_offset_tree;
uint64_t vq_last_offset;
hrtime_t vq_io_complete_ts; /* time last i/o completed */
kmutex_t vq_lock;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
index d95cd5d..09b052e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_ZAP_IMPL_H
@@ -139,6 +140,7 @@ typedef struct zap_phys {
typedef struct zap_table_phys zap_table_phys_t;
typedef struct zap {
+ dmu_buf_user_t zap_dbu;
objset_t *zap_objset;
uint64_t zap_object;
struct dmu_buf *zap_dbuf;
@@ -195,7 +197,7 @@ boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap);
-void zap_evict(dmu_buf_t *db, void *vmzap);
+void zap_evict(void *dbu);
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
index cd8b74a..76b3ecc 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap_leaf.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#ifndef _SYS_ZAP_LEAF_H
@@ -152,6 +153,7 @@ typedef union zap_leaf_chunk {
} zap_leaf_chunk_t;
typedef struct zap_leaf {
+ dmu_buf_user_t l_dbu;
krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
index 71ca044..f9eca27 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -242,6 +242,22 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */
} drr_write_embedded;
+
+ /*
+ * Nore: drr_checksum is overlaid with all record types
+ * except DRR_BEGIN. Therefore its (non-pad) members
+ * must not overlap with members from the other structs.
+ * We accomplish this by putting its members at the very
+ * end of the struct.
+ */
+ struct drr_checksum {
+ uint64_t drr_pad[34];
+ /*
+ * fletcher-4 checksum of everything preceding the
+ * checksum.
+ */
+ zio_cksum_t drr_checksum;
+ } drr_checksum;
} drr_u;
} dmu_replay_record_t;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
index 895d632..d3fe6e9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
@@ -37,6 +37,9 @@
extern "C" {
#endif
+struct dsl_pool;
+struct dsl_dataset;
+
/*
* Intent log format:
*
@@ -404,8 +407,10 @@ extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_commit(zilog_t *zilog, uint64_t oid);
extern int zil_vdev_offline(const char *osname, void *txarg);
-extern int zil_claim(const char *osname, void *txarg);
-extern int zil_check_log_chain(const char *osname, void *txarg);
+extern int zil_claim(struct dsl_pool *dp,
+ struct dsl_dataset *ds, void *txarg);
+extern int zil_check_log_chain(struct dsl_pool *dp,
+ struct dsl_dataset *ds, void *tx);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog, uint64_t synced_txg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index df7edf4..ad35273 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -29,6 +29,7 @@
#ifndef _ZIO_H
#define _ZIO_H
+#include <sys/zio_priority.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/txg.h>
@@ -125,33 +126,25 @@ enum zio_compress {
*/
#define ZIO_COMPRESS_LEGACY_FUNCTIONS ZIO_COMPRESS_LZ4
-/* N.B. when altering this value, also change BOOTFS_COMPRESS_VALID below */
-#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
-#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
+/*
+ * The meaning of "compress = on" selected by the compression features enabled
+ * on a given pool.
+ */
+#define ZIO_COMPRESS_LEGACY_ON_VALUE ZIO_COMPRESS_LZJB
+#define ZIO_COMPRESS_LZ4_ON_VALUE ZIO_COMPRESS_LZ4
+
+#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
(compress) == ZIO_COMPRESS_LZ4 || \
- ((compress) == ZIO_COMPRESS_ON && \
- ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
+ (compress) == ZIO_COMPRESS_ON || \
(compress) == ZIO_COMPRESS_OFF)
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
-typedef enum zio_priority {
- ZIO_PRIORITY_SYNC_READ,
- ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
- ZIO_PRIORITY_ASYNC_READ, /* prefetch */
- ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
- ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
- ZIO_PRIORITY_TRIM, /* free requests used for TRIM */
- ZIO_PRIORITY_NUM_QUEUEABLE,
-
- ZIO_PRIORITY_NOW /* non-queued I/Os (e.g. ioctl) */
-} zio_priority_t;
-
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
@@ -259,6 +252,7 @@ extern const char *zio_type_name[ZIO_TYPES];
* Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>.
* ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
* dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
+ * dnode visit bookmarks are <objset, object id of dnode, -3, 0>.
*
* Note: this structure is called a bookmark because its original purpose
* was to remember where to resume a pool-wide traverse.
@@ -291,6 +285,9 @@ typedef struct zbookmark_phys {
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)
+#define ZB_DNODE_LEVEL (-3LL)
+#define ZB_DNODE_BLKID (0ULL)
+
#define ZB_IS_ZERO(zb) \
((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
(zb)->zb_level == 0 && (zb)->zb_blkid == 0)
@@ -457,6 +454,7 @@ struct zio {
uint64_t io_offset;
hrtime_t io_timestamp;
avl_node_t io_queue_node;
+ avl_node_t io_offset_node;
/* Internal pipeline state */
enum zio_flag io_flags;
@@ -581,8 +579,8 @@ extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
enum zio_checksum parent);
extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
enum zio_checksum child, enum zio_checksum parent);
-extern enum zio_compress zio_compress_select(enum zio_compress child,
- enum zio_compress parent);
+extern enum zio_compress zio_compress_select(spa_t *spa,
+ enum zio_compress child, enum zio_compress parent);
extern void zio_suspend(spa_t *spa, zio_t *zio);
extern int zio_resume(spa_t *spa);
@@ -631,8 +629,10 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
extern void spa_handle_ignored_writes(spa_t *spa);
/* zbookmark_phys functions */
-boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
- const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
+boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
+int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2,
+ uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
index 0956c04..0c293ab 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZIO_CHECKSUM_H
@@ -34,16 +35,16 @@ extern "C" {
/*
* Signature for checksum functions.
*/
-typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
+typedef void zio_checksum_func_t(const void *, uint64_t, zio_cksum_t *);
/*
* Information about each checksum function.
*/
typedef struct zio_checksum_info {
- zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
+ zio_checksum_func_t *ci_func[2]; /* checksum function per byteorder */
int ci_correctable; /* number of correctable bits */
int ci_eck; /* uses zio embedded checksum? */
- int ci_dedup; /* strong enough for dedup? */
+ boolean_t ci_dedup; /* strong enough for dedup? */
char *ci_name; /* descriptive name */
} zio_checksum_info_t;
@@ -61,7 +62,7 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
* Checksum routines.
*/
-extern zio_checksum_t zio_checksum_SHA256;
+extern zio_checksum_func_t zio_checksum_SHA256;
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
void *data, uint64_t size);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_priority.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_priority.h
new file mode 100644
index 0000000..32e90e2
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_priority.h
@@ -0,0 +1,41 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+#ifndef _ZIO_PRIORITY_H
+#define _ZIO_PRIORITY_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum zio_priority {
+ ZIO_PRIORITY_SYNC_READ,
+ ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
+ ZIO_PRIORITY_ASYNC_READ, /* prefetch */
+ ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
+ ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
+ ZIO_PRIORITY_TRIM, /* free requests used for TRIM */
+ ZIO_PRIORITY_NUM_QUEUEABLE,
+
+ ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
+} zio_priority_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZIO_PRIORITY_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index 5a59bae..24d4744 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -272,6 +272,26 @@ vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
return (NULL);
}
+static int
+vdev_count_leaves_impl(vdev_t *vd)
+{
+ int n = 0;
+
+ if (vd->vdev_ops->vdev_op_leaf)
+ return (1);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ n += vdev_count_leaves_impl(vd->vdev_child[c]);
+
+ return (n);
+}
+
+int
+vdev_count_leaves(spa_t *spa)
+{
+ return (vdev_count_leaves_impl(spa->spa_root_vdev));
+}
+
void
vdev_add_child(vdev_t *pvd, vdev_t *cvd)
{
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
index 1dba319..49d87e6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
@@ -302,6 +302,23 @@ vdev_queue_offset_compare(const void *x1, const void *x2)
return (0);
}
+static inline avl_tree_t *
+vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
+{
+ return (&vq->vq_class[p].vqc_queued_tree);
+}
+
+static inline avl_tree_t *
+vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
+{
+ if (t == ZIO_TYPE_READ)
+ return (&vq->vq_read_offset_tree);
+ else if (t == ZIO_TYPE_WRITE)
+ return (&vq->vq_write_offset_tree);
+ else
+ return (NULL);
+}
+
int
vdev_queue_timestamp_compare(const void *x1, const void *x2)
{
@@ -336,19 +353,27 @@ vdev_queue_init(vdev_t *vd)
avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
+ int (*compfn) (const void *, const void *);
+
/*
- * The synchronous i/o queues are FIFO rather than LBA ordered.
- * This provides more consistent latency for these i/os, and
- * they tend to not be tightly clustered anyway so there is
- * little to no throughput loss.
+ * The synchronous i/o queues are dispatched in FIFO rather
+ * than LBA order. This provides more consistent latency for
+ * these i/os.
*/
- boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
- p == ZIO_PRIORITY_SYNC_WRITE);
- avl_create(&vq->vq_class[p].vqc_queued_tree,
- fifo ? vdev_queue_timestamp_compare :
- vdev_queue_offset_compare,
+ if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE)
+ compfn = vdev_queue_timestamp_compare;
+ else
+ compfn = vdev_queue_offset_compare;
+
+ avl_create(vdev_queue_class_tree(vq, p), compfn,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
}
@@ -361,8 +386,10 @@ vdev_queue_fini(vdev_t *vd)
vdev_queue_t *vq = &vd->vdev_queue;
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
- avl_destroy(&vq->vq_class[p].vqc_queued_tree);
+ avl_destroy(vdev_queue_class_tree(vq, p));
avl_destroy(&vq->vq_active_tree);
+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
mutex_destroy(&vq->vq_lock);
}
@@ -371,9 +398,13 @@ static void
vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
+ avl_tree_t *qtt;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+ avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
+ qtt = vdev_queue_type_tree(vq, zio->io_type);
+ if (qtt)
+ avl_add(qtt, zio);
#ifdef illumos
mutex_enter(&spa->spa_iokstat_lock);
@@ -388,9 +419,13 @@ static void
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
+ avl_tree_t *qtt;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+ avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
+ qtt = vdev_queue_type_tree(vq, zio->io_type);
+ if (qtt)
+ avl_remove(qtt, zio);
#ifdef illumos
mutex_enter(&spa->spa_iokstat_lock);
@@ -563,7 +598,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
/* find a queue that has not reached its minimum # outstanding i/os */
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
vq->vq_class[p].vqc_active <
vdev_queue_class_min_active(p))
return (p);
@@ -574,7 +609,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
* maximum # outstanding i/os.
*/
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
vq->vq_class[p].vqc_active <
vdev_queue_class_max_active(spa, p))
return (p);
@@ -608,15 +643,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
return (NULL);
- /*
- * The synchronous i/o queues are not sorted by LBA, so we can't
- * find adjacent i/os. These i/os tend to not be tightly clustered,
- * or too large to aggregate, so this has little impact on performance.
- */
- if (zio->io_priority == ZIO_PRIORITY_SYNC_READ ||
- zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
- return (NULL);
-
first = last = zio;
if (zio->io_type == ZIO_TYPE_READ)
@@ -642,8 +668,8 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
* recording the last non-option I/O.
*/
flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
- t = &vq->vq_class[zio->io_priority].vqc_queued_tree;
- while ((dio = AVL_PREV(t, first)) != NULL &&
+ t = vdev_queue_type_tree(vq, zio->io_type);
+ while (t != NULL && (dio = AVL_PREV(t, first)) != NULL &&
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
IO_GAP(dio, first) <= maxgap) {
@@ -751,7 +777,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
zio_t *zio, *aio;
zio_priority_t p;
avl_index_t idx;
- vdev_queue_class_t *vqc;
+ avl_tree_t *tree;
zio_t search;
again:
@@ -770,13 +796,13 @@ again:
*
* For FIFO queues (sync), issue the i/o with the lowest timestamp.
*/
- vqc = &vq->vq_class[p];
+ tree = vdev_queue_class_tree(vq, p);
search.io_timestamp = 0;
search.io_offset = vq->vq_last_offset + 1;
- VERIFY3P(avl_find(&vqc->vqc_queued_tree, &search, &idx), ==, NULL);
- zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER);
+ VERIFY3P(avl_find(tree, &search, &idx), ==, NULL);
+ zio = avl_nearest(tree, idx, AVL_AFTER);
if (zio == NULL)
- zio = avl_first(&vqc->vqc_queued_tree);
+ zio = avl_first(tree);
ASSERT3U(zio->io_priority, ==, p);
aio = vdev_queue_aggregate(vq, zio);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
index 397acb9..44919d2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
/*
@@ -52,7 +53,6 @@ int fzap_default_block_shift = 14; /* 16k blocksize */
extern inline zap_phys_t *zap_f_phys(zap_t *zap);
-static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
void
@@ -81,7 +81,7 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
zap->zap_ismicro = FALSE;
- (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, zap_evict);
+ zap->zap_dbu.dbu_evict_func = zap_evict;
mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
@@ -162,8 +162,9 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
tbl->zt_nextblk = newblk;
ASSERT0(tbl->zt_blks_copied);
- dmu_prefetch(zap->zap_objset, zap->zap_object,
- tbl->zt_blk << bs, tbl->zt_numblks << bs);
+ dmu_prefetch(zap->zap_objset, zap->zap_object, 0,
+ tbl->zt_blk << bs, tbl->zt_numblks << bs,
+ ZIO_PRIORITY_SYNC_READ);
}
/*
@@ -387,11 +388,20 @@ zap_allocate_blocks(zap_t *zap, int nblocks)
return (newblk);
}
+static void
+zap_leaf_pageout(void *dbu)
+{
+ zap_leaf_t *l = dbu;
+
+ rw_destroy(&l->l_rwlock);
+ kmem_free(l, sizeof (zap_leaf_t));
+}
+
static zap_leaf_t *
zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
{
void *winner;
- zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
+ zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
@@ -403,7 +413,8 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
DMU_READ_NO_PREFETCH));
- winner = dmu_buf_set_user(l->l_dbuf, l, zap_leaf_pageout);
+ dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+ winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
ASSERT(winner == NULL);
dmu_buf_will_dirty(l->l_dbuf, tx);
@@ -435,16 +446,6 @@ zap_put_leaf(zap_leaf_t *l)
dmu_buf_rele(l->l_dbuf, NULL);
}
-_NOTE(ARGSUSED(0))
-static void
-zap_leaf_pageout(dmu_buf_t *db, void *vl)
-{
- zap_leaf_t *l = vl;
-
- rw_destroy(&l->l_rwlock);
- kmem_free(l, sizeof (zap_leaf_t));
-}
-
static zap_leaf_t *
zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
{
@@ -452,19 +453,20 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
ASSERT(blkid != 0);
- l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
+ l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
rw_init(&l->l_rwlock, 0, 0, 0);
rw_enter(&l->l_rwlock, RW_WRITER);
l->l_blkid = blkid;
l->l_bs = highbit64(db->db_size) - 1;
l->l_dbuf = db;
- winner = dmu_buf_set_user(db, l, zap_leaf_pageout);
+ dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+ winner = dmu_buf_set_user(db, &l->l_dbu);
rw_exit(&l->l_rwlock);
if (winner != NULL) {
/* someone else set it first */
- zap_leaf_pageout(NULL, l);
+ zap_leaf_pageout(&l->l_dbu);
l = winner;
}
@@ -938,7 +940,8 @@ fzap_prefetch(zap_name_t *zn)
if (zap_idx_to_blk(zap, idx, &blk) != 0)
return;
bs = FZAP_BLOCK_SHIFT(zap);
- dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
+ dmu_prefetch(zap->zap_objset, zap->zap_object, 0, blk << bs, 1 << bs,
+ ZIO_PRIORITY_SYNC_READ);
}
/*
@@ -1309,9 +1312,10 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
} else {
int b;
- dmu_prefetch(zap->zap_objset, zap->zap_object,
+ dmu_prefetch(zap->zap_objset, zap->zap_object, 0,
zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
- zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs);
+ zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
+ ZIO_PRIORITY_SYNC_READ);
for (b = 0; b < zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
b++) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
index 26377b6..8ca68b8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zio.h>
@@ -393,7 +394,8 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
* it, because zap_lockdir() checks zap_ismicro without the lock
* held.
*/
- winner = dmu_buf_set_user(db, zap, zap_evict);
+ dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
+ winner = dmu_buf_set_user(db, &zap->zap_dbu);
if (winner != NULL) {
rw_exit(&zap->zap_rwlock);
@@ -547,7 +549,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
sz = zap->zap_dbuf->db_size;
- mzp = kmem_alloc(sz, KM_SLEEP);
+ mzp = zio_buf_alloc(sz);
bcopy(zap->zap_dbuf->db_data, mzp, sz);
nchunks = zap->zap_m.zap_num_chunks;
@@ -555,7 +557,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
1ULL << fzap_default_block_shift, 0, tx);
if (err) {
- kmem_free(mzp, sz);
+ zio_buf_free(mzp, sz);
return (err);
}
}
@@ -581,7 +583,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
if (err)
break;
}
- kmem_free(mzp, sz);
+ zio_buf_free(mzp, sz);
*zapp = zap;
return (err);
}
@@ -690,11 +692,10 @@ zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
return (dmu_object_free(os, zapobj, tx));
}
-_NOTE(ARGSUSED(0))
void
-zap_evict(dmu_buf_t *db, void *vzap)
+zap_evict(void *dbu)
{
- zap_t *zap = vzap;
+ zap_t *zap = dbu;
rw_destroy(&zap->zap_rwlock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
index 7540320..80a3f0b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -245,7 +245,7 @@ feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
{
int err;
uint64_t refcount;
- uint64_t zapobj = feature->fi_can_readonly ?
+ uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
/*
@@ -296,7 +296,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
dmu_tx_t *tx)
{
ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature));
- uint64_t zapobj = feature->fi_can_readonly ?
+ uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid,
@@ -322,7 +322,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
if (refcount == 0)
spa_deactivate_mos_feature(spa, feature->fi_guid);
- else if (feature->fi_mos)
+ else if (feature->fi_flags & ZFEATURE_FLAG_MOS)
spa_activate_mos_feature(spa, feature->fi_guid, tx);
}
@@ -333,8 +333,9 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
void
feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
{
- uint64_t initial_refcount = feature->fi_activate_on_enable ? 1 : 0;
- uint64_t zapobj = feature->fi_can_readonly ?
+ uint64_t initial_refcount =
+ (feature->fi_flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE) ? 1 : 0;
+ uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
ASSERT(0 != zapobj);
@@ -379,7 +380,7 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
{
uint64_t refcount;
zfeature_info_t *feature = &spa_feature_table[fid];
- uint64_t zapobj = feature->fi_can_readonly ?
+ uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
ASSERT(VALID_FEATURE_FID(fid));
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
index e6c627c..22bbfb2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
*/
/*
@@ -1149,10 +1150,11 @@ zfsctl_shares_lookup(ap)
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOTSUP));
}
- if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
+ if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
error = VOP_LOOKUP(ZTOV(dzp), vpp, cnp);
+ VN_RELE(ZTOV(dzp));
+ }
- VN_RELE(ZTOV(dzp));
ZFS_EXIT(zfsvfs);
return (error);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
index cf42cae..d3a339e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
@@ -5189,6 +5189,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
if ((error = get_nvlist(zc->zc_nvlist_src,
zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
VN_RELE(vp);
+ VN_RELE(ZTOV(sharedir));
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -5377,7 +5378,7 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
- if (error == 0 && !dsl_dataset_is_snapshot(new)) {
+ if (error == 0 && !new->ds_is_snapshot) {
dsl_dataset_rele(new, FTAG);
error = SET_ERROR(EINVAL);
}
@@ -5386,7 +5387,7 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
- if (error == 0 && !dsl_dataset_is_snapshot(old)) {
+ if (error == 0 && !old->ds_is_snapshot) {
dsl_dataset_rele(old, FTAG);
error = SET_ERROR(EINVAL);
}
@@ -5488,7 +5489,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
*
* innvl: {
- * (optional) "fromsnap" -> full snap name to send an incremental from
+ * (optional) "from" -> full snap or bookmark name to send an incremental
+ * from
* }
*
* outnvl: {
@@ -5499,7 +5501,6 @@ static int
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
dsl_pool_t *dp;
- dsl_dataset_t *fromsnap = NULL;
dsl_dataset_t *tosnap;
int error;
char *fromname;
@@ -5515,27 +5516,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
- error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+ error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
- error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
- if (error != 0) {
- dsl_dataset_rele(tosnap, FTAG);
- dsl_pool_rele(dp, FTAG);
- return (error);
+ if (strchr(fromname, '@') != NULL) {
+ /*
+ * If from is a snapshot, hold it and use the more
+ * efficient dmu_send_estimate to estimate send space
+ * size using deadlists.
+ */
+ dsl_dataset_t *fromsnap;
+ error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+ if (error != 0)
+ goto out;
+ error = dmu_send_estimate(tosnap, fromsnap, &space);
+ dsl_dataset_rele(fromsnap, FTAG);
+ } else if (strchr(fromname, '#') != NULL) {
+ /*
+ * If from is a bookmark, fetch the creation TXG of the
+ * snapshot it was created from and use that to find
+ * blocks that were born after it.
+ */
+ zfs_bookmark_phys_t frombm;
+
+ error = dsl_bookmark_lookup(dp, fromname, tosnap,
+ &frombm);
+ if (error != 0)
+ goto out;
+ error = dmu_send_estimate_from_txg(tosnap,
+ frombm.zbm_creation_txg, &space);
+ } else {
+ /*
+ * from is not properly formatted as a snapshot or
+ * bookmark
+ */
+ error = SET_ERROR(EINVAL);
+ goto out;
}
+ } else {
+ // If estimating the size of a full send, use dmu_send_estimate
+ error = dmu_send_estimate(tosnap, NULL, &space);
}
- error = dmu_send_estimate(tosnap, fromsnap, &space);
fnvlist_add_uint64(outnvl, "space", space);
- if (fromsnap != NULL)
- dsl_dataset_rele(fromsnap, FTAG);
+out:
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
-
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c
index ed5f276..3a472aa 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c
@@ -22,8 +22,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#include <sys/types.h>
-#include <sys/param.h>
+#include <sys/zfs_context.h>
#include <sys/vnode.h>
#include <sys/sa.h>
#include <sys/zfs_acl.h>
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index b690b69..201627a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -939,7 +939,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
&sa_obj);
if (error)
- return (error);
+ goto out;
} else {
/*
* Pre SA versions file systems should never touch
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index d90c830..3a4f348 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -293,25 +293,32 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
int *rvalp, caller_context_t *ct)
{
offset_t off;
+ offset_t ndata;
+ dmu_object_info_t doi;
int error;
zfsvfs_t *zfsvfs;
znode_t *zp;
switch (com) {
case _FIOFFS:
+ {
return (0);
/*
* The following two ioctls are used by bfu. Faking out,
* necessary to avoid bfu errors.
*/
+ }
case _FIOGDIO:
case _FIOSDIO:
+ {
return (0);
+ }
case _FIO_SEEK_DATA:
case _FIO_SEEK_HOLE:
-#ifdef sun
+ {
+#ifdef illumos
if (ddi_copyin((void *)data, &off, sizeof (off), flag))
return (SET_ERROR(EFAULT));
#else
@@ -335,6 +342,48 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
#endif
return (0);
}
+#ifdef illumos
+ case _FIO_COUNT_FILLED:
+ {
+ /*
+ * _FIO_COUNT_FILLED adds a new ioctl command which
+ * exposes the number of filled blocks in a
+ * ZFS object.
+ */
+ zp = VTOZ(vp);
+ zfsvfs = zp->z_zfsvfs;
+ ZFS_ENTER(zfsvfs);
+ ZFS_VERIFY_ZP(zp);
+
+ /*
+ * Wait for all dirty blocks for this object
+ * to get synced out to disk, and the DMU info
+ * updated.
+ */
+ error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id);
+ if (error) {
+ ZFS_EXIT(zfsvfs);
+ return (error);
+ }
+
+ /*
+ * Retrieve fill count from DMU object.
+ */
+ error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi);
+ if (error) {
+ ZFS_EXIT(zfsvfs);
+ return (error);
+ }
+
+ ndata = doi.doi_fill_count;
+
+ ZFS_EXIT(zfsvfs);
+ if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag))
+ return (SET_ERROR(EFAULT));
+ return (0);
+ }
+#endif
+ }
return (SET_ERROR(ENOTTY));
}
@@ -2665,7 +2714,8 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon
/* Prefetch znode */
if (prefetch)
- dmu_prefetch(os, objnum, 0, 0);
+ dmu_prefetch(os, objnum, 0, 0, 0,
+ ZIO_PRIORITY_SYNC_READ);
skip_entry:
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
index 7f64ec6..32a9ee7 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
@@ -486,7 +486,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
dsl_pool_t *dp = zilog->zl_dmu_pool;
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
- if (dsl_dataset_is_snapshot(ds))
+ if (ds->ds_is_snapshot)
panic("dirtying snapshot!");
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
@@ -639,7 +639,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
}
int
-zil_claim(const char *osname, void *txarg)
+zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
{
dmu_tx_t *tx = txarg;
uint64_t first_txg = dmu_tx_get_txg(tx);
@@ -648,15 +648,16 @@ zil_claim(const char *osname, void *txarg)
objset_t *os;
int error;
- error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);
+ error = dmu_objset_own_obj(dp, ds->ds_object,
+ DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error != 0) {
/*
* EBUSY indicates that the objset is inconsistent, in which
* case it can not have a ZIL.
*/
if (error != EBUSY) {
- cmn_err(CE_WARN, "can't open objset for %s, error %u",
- osname, error);
+ cmn_err(CE_WARN, "can't open objset for %llu, error %u",
+ (unsigned long long)ds->ds_object, error);
}
return (0);
}
@@ -703,8 +704,9 @@ zil_claim(const char *osname, void *txarg)
* Checksum errors are ok as they indicate the end of the chain.
* Any other error (no device or read failure) returns an error.
*/
+/* ARGSUSED */
int
-zil_check_log_chain(const char *osname, void *tx)
+zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
{
zilog_t *zilog;
objset_t *os;
@@ -713,9 +715,10 @@ zil_check_log_chain(const char *osname, void *tx)
ASSERT(tx == NULL);
- error = dmu_objset_hold(osname, FTAG, &os);
+ error = dmu_objset_from_ds(ds, &os);
if (error != 0) {
- cmn_err(CE_WARN, "can't open objset for %s", osname);
+ cmn_err(CE_WARN, "can't open objset %llu, error %d",
+ (unsigned long long)ds->ds_object, error);
return (0);
}
@@ -738,10 +741,8 @@ zil_check_log_chain(const char *osname, void *tx)
valid = vdev_log_state_valid(vd);
spa_config_exit(os->os_spa, SCL_STATE, FTAG);
- if (!valid) {
- dmu_objset_rele(os, FTAG);
+ if (!valid)
return (0);
- }
}
/*
@@ -754,8 +755,6 @@ zil_check_log_chain(const char *osname, void *tx)
error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
- dmu_objset_rele(os, FTAG);
-
return ((error == ECKSUM || error == ENOENT) ? 0 : error);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
index 39200a5..8548b2d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -93,6 +93,9 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
extern vmem_t *zio_alloc_arena;
#endif
+#define BP_SPANB(indblkshift, level) \
+ (((uint64_t)1) << ((level) * ((indblkshift) - SPA_BLKPTRSHIFT)))
+#define COMPARE_META_LEVEL 0x80000000ul
/*
* The following actions directly effect the spa's sync-to-convergence logic.
* The values below define the sync pass when we start performing the action.
@@ -1216,8 +1219,6 @@ zio_write_bp_init(zio_t *zio)
zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
return (ZIO_PIPELINE_CONTINUE);
}
- zio->io_bp_override = NULL;
- BP_ZERO(bp);
}
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
@@ -3455,37 +3456,127 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_done
};
-/* dnp is the dnode for zb1->zb_object */
-boolean_t
-zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_phys_t *zb1,
- const zbookmark_phys_t *zb2)
-{
- uint64_t zb1nextL0, zb2thisobj;
- ASSERT(zb1->zb_objset == zb2->zb_objset);
- ASSERT(zb2->zb_level == 0);
- /* The objset_phys_t isn't before anything. */
- if (dnp == NULL)
- return (B_FALSE);
- zb1nextL0 = (zb1->zb_blkid + 1) <<
- ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+/*
+ * Compare two zbookmark_phys_t's to see which we would reach first in a
+ * pre-order traversal of the object tree.
+ *
+ * This is simple in every case aside from the meta-dnode object. For all other
+ * objects, we traverse them in order (object 1 before object 2, and so on).
+ * However, all of these objects are traversed while traversing object 0, since
+ * the data it points to is the list of objects. Thus, we need to convert to a
+ * canonical representation so we can compare meta-dnode bookmarks to
+ * non-meta-dnode bookmarks.
+ *
+ * We do this by calculating "equivalents" for each field of the zbookmark.
+ * zbookmarks outside of the meta-dnode use their own object and level, and
+ * calculate the level 0 equivalent (the first L0 blkid that is contained in the
+ * blocks this bookmark refers to) by multiplying their blkid by their span
+ * (the number of L0 blocks contained within one block at their level).
+ * zbookmarks inside the meta-dnode calculate their object equivalent
+ * (which is L0equiv * dnodes per data block), use 0 for their L0equiv, and use
+ * level + 1<<31 (any value larger than a level could ever be) for their level.
+ * This causes them to always compare before a bookmark in their object
+ * equivalent, compare appropriately to bookmarks in other objects, and to
+ * compare appropriately to other bookmarks in the meta-dnode.
+ */
+int
+zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2, uint8_t ibs2,
+ const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2)
+{
+ /*
+ * These variables represent the "equivalent" values for the zbookmark,
+ * after converting zbookmarks inside the meta dnode to their
+ * normal-object equivalents.
+ */
+ uint64_t zb1obj, zb2obj;
+ uint64_t zb1L0, zb2L0;
+ uint64_t zb1level, zb2level;
+
+ if (zb1->zb_object == zb2->zb_object &&
+ zb1->zb_level == zb2->zb_level &&
+ zb1->zb_blkid == zb2->zb_blkid)
+ return (0);
- zb2thisobj = zb2->zb_object ? zb2->zb_object :
- zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
+ /*
+ * BP_SPANB calculates the span in blocks.
+ */
+ zb1L0 = (zb1->zb_blkid) * BP_SPANB(ibs1, zb1->zb_level);
+ zb2L0 = (zb2->zb_blkid) * BP_SPANB(ibs2, zb2->zb_level);
if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
- uint64_t nextobj = zb1nextL0 *
- (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
- return (nextobj <= zb2thisobj);
+ zb1obj = zb1L0 * (dbss1 << (SPA_MINBLOCKSHIFT - DNODE_SHIFT));
+ zb1L0 = 0;
+ zb1level = zb1->zb_level + COMPARE_META_LEVEL;
+ } else {
+ zb1obj = zb1->zb_object;
+ zb1level = zb1->zb_level;
}
- if (zb1->zb_object < zb2thisobj)
- return (B_TRUE);
- if (zb1->zb_object > zb2thisobj)
- return (B_FALSE);
- if (zb2->zb_object == DMU_META_DNODE_OBJECT)
+ if (zb2->zb_object == DMU_META_DNODE_OBJECT) {
+ zb2obj = zb2L0 * (dbss2 << (SPA_MINBLOCKSHIFT - DNODE_SHIFT));
+ zb2L0 = 0;
+ zb2level = zb2->zb_level + COMPARE_META_LEVEL;
+ } else {
+ zb2obj = zb2->zb_object;
+ zb2level = zb2->zb_level;
+ }
+
+ /* Now that we have a canonical representation, do the comparison. */
+ if (zb1obj != zb2obj)
+ return (zb1obj < zb2obj ? -1 : 1);
+ else if (zb1L0 != zb2L0)
+ return (zb1L0 < zb2L0 ? -1 : 1);
+ else if (zb1level != zb2level)
+ return (zb1level > zb2level ? -1 : 1);
+ /*
+ * This can (theoretically) happen if the bookmarks have the same object
+ * and level, but different blkids, if the block sizes are not the same.
+ * There is presently no way to change the indirect block sizes
+ */
+ return (0);
+}
+
+/*
+ * This function checks the following: given that last_block is the place that
+ * our traversal stopped last time, does that guarantee that we've visited
+ * every node under subtree_root? Therefore, we can't just use the raw output
+ * of zbookmark_compare. We have to pass in a modified version of
+ * subtree_root; by incrementing the block id, and then checking whether
+ * last_block is before or equal to that, we can tell whether or not having
+ * visited last_block implies that all of subtree_root's children have been
+ * visited.
+ */
+boolean_t
+zbookmark_subtree_completed(const dnode_phys_t *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block)
+{
+ zbookmark_phys_t mod_zb = *subtree_root;
+ mod_zb.zb_blkid++;
+ ASSERT(last_block->zb_level == 0);
+
+ /* The objset_phys_t isn't before anything. */
+ if (dnp == NULL)
return (B_FALSE);
- return (zb1nextL0 <= zb2->zb_blkid);
+
+ /*
+ * We pass in 1ULL << (DNODE_BLOCK_SHIFT - SPA_MINBLOCKSHIFT) for the
+ * data block size in sectors, because that variable is only used if
+ * the bookmark refers to a block in the meta-dnode. Since we don't
+ * know without examining it what object it refers to, and there's no
+ * harm in passing in this value in other cases, we always pass it in.
+ *
+ * We pass in 0 for the indirect block size shift because zb2 must be
+ * level 0. The indirect block size is only used to calculate the span
+ * of the bookmark, but since the bookmark must be level 0, the span is
+ * always 1, so the math works out.
+ *
+ * If you make changes to how the zbookmark_compare code works, be sure
+ * to make sure that this code still works afterwards.
+ */
+ return (zbookmark_compare(dnp->dn_datablkszsec, dnp->dn_indblkshift,
+ 1ULL << (DNODE_BLOCK_SHIFT - SPA_MINBLOCKSHIFT), 0, &mod_zb,
+ last_block) <= 0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
index 87f3262..309d913 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
@@ -35,6 +35,7 @@
#include <sys/compress.h>
#include <sys/kstat.h>
#include <sys/spa.h>
+#include <sys/zfeature.h>
#include <sys/zio.h>
#include <sys/zio_compress.h>
@@ -81,19 +82,27 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
};
enum zio_compress
-zio_compress_select(enum zio_compress child, enum zio_compress parent)
+zio_compress_select(spa_t *spa, enum zio_compress child,
+ enum zio_compress parent)
{
+ enum zio_compress result;
+
ASSERT(child < ZIO_COMPRESS_FUNCTIONS);
ASSERT(parent < ZIO_COMPRESS_FUNCTIONS);
- ASSERT(parent != ZIO_COMPRESS_INHERIT && parent != ZIO_COMPRESS_ON);
+ ASSERT(parent != ZIO_COMPRESS_INHERIT);
- if (child == ZIO_COMPRESS_INHERIT)
- return (parent);
+ result = child;
+ if (result == ZIO_COMPRESS_INHERIT)
+ result = parent;
- if (child == ZIO_COMPRESS_ON)
- return (ZIO_COMPRESS_ON_VALUE);
+ if (result == ZIO_COMPRESS_ON) {
+ if (spa_feature_is_active(spa, SPA_FEATURE_LZ4_COMPRESS))
+ result = ZIO_COMPRESS_LZ4_ON_VALUE;
+ else
+ result = ZIO_COMPRESS_LEGACY_ON_VALUE;
+ }
- return (child);
+ return (result);
}
size_t
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
index 991a0a3..0a7f4e4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
@@ -438,7 +438,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
* fault injection isn't a performance critical path.
*/
if (flags & ZINJECT_FLUSH_ARC)
- arc_flush(NULL);
+ /*
+ * We must use FALSE to ensure arc_flush returns, since
+ * we're not preventing concurrent ARC insertions.
+ */
+ arc_flush(NULL, FALSE);
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c
index ec94b08..2215184 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 by Delphix. All rights reserved.
*/
/*
@@ -42,7 +43,7 @@
* A ZRL can be locked only while there are zero references, so ZRL_LOCKED is
* treated as zero references.
*/
-#define ZRL_LOCKED ((uint32_t)-1)
+#define ZRL_LOCKED -1
#define ZRL_DESTROYED -2
void
@@ -60,7 +61,7 @@ zrl_init(zrlock_t *zrl)
void
zrl_destroy(zrlock_t *zrl)
{
- ASSERT(zrl->zr_refcount == 0);
+ ASSERT0(zrl->zr_refcount);
mutex_destroy(&zrl->zr_mtx);
zrl->zr_refcount = ZRL_DESTROYED;
@@ -80,7 +81,7 @@ zrl_add(zrlock_t *zrl)
uint32_t cas = atomic_cas_32(
(uint32_t *)&zrl->zr_refcount, n, n + 1);
if (cas == n) {
- ASSERT((int32_t)n >= 0);
+ ASSERT3S((int32_t)n, >=, 0);
#ifdef ZFS_DEBUG
if (zrl->zr_owner == curthread) {
DTRACE_PROBE2(zrlock__reentry,
@@ -98,7 +99,7 @@ zrl_add(zrlock_t *zrl)
while (zrl->zr_refcount == ZRL_LOCKED) {
cv_wait(&zrl->zr_cv, &zrl->zr_mtx);
}
- ASSERT(zrl->zr_refcount >= 0);
+ ASSERT3S(zrl->zr_refcount, >=, 0);
zrl->zr_refcount++;
#ifdef ZFS_DEBUG
zrl->zr_owner = curthread;
@@ -112,14 +113,14 @@ zrl_remove(zrlock_t *zrl)
{
uint32_t n;
- n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount);
- ASSERT((int32_t)n >= 0);
#ifdef ZFS_DEBUG
if (zrl->zr_owner == curthread) {
zrl->zr_owner = NULL;
zrl->zr_caller = NULL;
}
#endif
+ n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount);
+ ASSERT3S((int32_t)n, >=, 0);
}
int
@@ -132,14 +133,14 @@ zrl_tryenter(zrlock_t *zrl)
(uint32_t *)&zrl->zr_refcount, 0, ZRL_LOCKED);
if (cas == 0) {
#ifdef ZFS_DEBUG
- ASSERT(zrl->zr_owner == NULL);
+ ASSERT3P(zrl->zr_owner, ==, NULL);
zrl->zr_owner = curthread;
#endif
return (1);
}
}
- ASSERT((int32_t)n > ZRL_DESTROYED);
+ ASSERT3S((int32_t)n, >, ZRL_DESTROYED);
return (0);
}
@@ -147,11 +148,11 @@ zrl_tryenter(zrlock_t *zrl)
void
zrl_exit(zrlock_t *zrl)
{
- ASSERT(zrl->zr_refcount == ZRL_LOCKED);
+ ASSERT3S(zrl->zr_refcount, ==, ZRL_LOCKED);
mutex_enter(&zrl->zr_mtx);
#ifdef ZFS_DEBUG
- ASSERT(zrl->zr_owner == curthread);
+ ASSERT3P(zrl->zr_owner, ==, curthread);
zrl->zr_owner = NULL;
membar_producer(); /* make sure the owner store happens first */
#endif
@@ -163,7 +164,7 @@ zrl_exit(zrlock_t *zrl)
int
zrl_refcount(zrlock_t *zrl)
{
- ASSERT(zrl->zr_refcount > ZRL_DESTROYED);
+ ASSERT3S(zrl->zr_refcount, >, ZRL_DESTROYED);
int n = (int)zrl->zr_refcount;
return (n <= 0 ? 0 : n);
@@ -172,7 +173,7 @@ zrl_refcount(zrlock_t *zrl)
int
zrl_is_zero(zrlock_t *zrl)
{
- ASSERT(zrl->zr_refcount > ZRL_DESTROYED);
+ ASSERT3S(zrl->zr_refcount, >, ZRL_DESTROYED);
return (zrl->zr_refcount <= 0);
}
@@ -180,7 +181,7 @@ zrl_is_zero(zrlock_t *zrl)
int
zrl_is_locked(zrlock_t *zrl)
{
- ASSERT(zrl->zr_refcount > ZRL_DESTROYED);
+ ASSERT3S(zrl->zr_refcount, >, ZRL_DESTROYED);
return (zrl->zr_refcount == ZRL_LOCKED);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
index 234e13e..491c365 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
@@ -359,7 +359,7 @@ zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zvol_extent_t *ze;
int bs = ma->ma_zv->zv_volblocksize;
- if (BP_IS_HOLE(bp) ||
+ if (bp == NULL || BP_IS_HOLE(bp) ||
zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
return (0);
@@ -2776,8 +2776,12 @@ zvol_geom_worker(void *arg)
break;
case BIO_READ:
case BIO_WRITE:
+ case BIO_DELETE:
zvol_strategy(bp);
break;
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ break;
}
}
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h b/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h
index 0d98b26..0d5a35a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h
@@ -32,6 +32,9 @@
#include <sys/param.h>
#include <sys/isa_defs.h>
+#if defined(__FreeBSD__) && defined(_KERNEL)
+#include <sys/libkern.h>
+#endif
#ifdef __cplusplus
extern "C" {
@@ -382,6 +385,9 @@ extern unsigned char bcd_to_byte[256];
static __inline int
highbit(ulong_t i)
{
+#if defined(__FreeBSD__) && defined(_KERNEL) && defined(HAVE_INLINE_FLSL)
+ return (flsl(i));
+#else
register int h = 1;
if (i == 0)
@@ -407,6 +413,7 @@ highbit(ulong_t i)
h += 1;
}
return (h);
+#endif
}
/*
@@ -416,6 +423,9 @@ highbit(ulong_t i)
static __inline int
highbit64(uint64_t i)
{
+#if defined(__FreeBSD__) && defined(_KERNEL) && defined(HAVE_INLINE_FLSLL)
+ return (flsll(i));
+#else
int h = 1;
if (i == 0)
@@ -439,6 +449,7 @@ highbit64(uint64_t i)
h += 1;
}
return (h);
+#endif
}
#ifdef __cplusplus
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 714b864..47bb13b 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -1042,20 +1042,16 @@ linprocfs_doprocmaps(PFS_FILL_ARGS)
ino = 0;
if (lobj) {
off = IDX_TO_OFF(lobj->size);
- if (lobj->type == OBJT_VNODE) {
- vp = lobj->handle;
- if (vp)
- vref(vp);
- }
- else
- vp = NULL;
+ vp = vm_object_vnode(lobj);
+ if (vp != NULL)
+ vref(vp);
if (lobj != obj)
VM_OBJECT_RUNLOCK(lobj);
flags = obj->flags;
ref_count = obj->ref_count;
shadow_count = obj->shadow_count;
VM_OBJECT_RUNLOCK(obj);
- if (vp) {
+ if (vp != NULL) {
vn_fullpath(td, vp, &name, &freename);
vn_lock(vp, LK_SHARED | LK_RETRY);
VOP_GETATTR(vp, &vat, td->td_ucred);
diff --git a/sys/conf/files b/sys/conf/files
index 123a602..94a05f5 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -83,8 +83,9 @@ cam/ctl/ctl_backend_ramdisk.c optional ctl
cam/ctl/ctl_cmd_table.c optional ctl
cam/ctl/ctl_frontend.c optional ctl
cam/ctl/ctl_frontend_cam_sim.c optional ctl
-cam/ctl/ctl_frontend_internal.c optional ctl
+cam/ctl/ctl_frontend_ioctl.c optional ctl
cam/ctl/ctl_frontend_iscsi.c optional ctl
+cam/ctl/ctl_ha.c optional ctl
cam/ctl/ctl_scsi_all.c optional ctl
cam/ctl/ctl_tpc.c optional ctl
cam/ctl/ctl_tpc_local.c optional ctl
@@ -145,6 +146,7 @@ cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c optional zfs compile-with
cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c optional zfs compile-with "${ZFS_C}"
+cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c optional zfs compile-with "${ZFS_C}"
@@ -174,6 +176,7 @@ cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c optional zfs compile-with "$
cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c optional zfs compile-with "${ZFS_C}"
+cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c optional zfs compile-with "${ZFS_C}"
cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c optional zfs compile-with "${ZFS_C}"
@@ -3257,6 +3260,7 @@ kgssapi/gsstest.c optional kgssapi_debug
# the file should be moved to conf/files.<arch> from here.
#
libkern/arc4random.c standard
+libkern/asprintf.c standard
libkern/bcd.c standard
libkern/bsearch.c standard
libkern/crc32.c standard
diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c
index ad2bf38..a3930ea 100644
--- a/sys/contrib/ipfilter/netinet/ip_state.c
+++ b/sys/contrib/ipfilter/netinet/ip_state.c
@@ -3646,7 +3646,8 @@ ipf_state_del(softc, is, why)
is->is_me = NULL;
is->is_ref--;
}
- if (is->is_ref > 1) {
+ is->is_ref--;
+ if (is->is_ref > 0) {
int refs;
is->is_ref--;
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index 7b80807..1f5ef7b 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -326,6 +326,9 @@ ahci_probe(device_t dev)
pci_get_subclass(dev) == PCIS_STORAGE_SATA &&
pci_get_progif(dev) == PCIP_STORAGE_SATA_AHCI_1_0)
valid = 1;
+ else if (pci_get_class(dev) == PCIC_STORAGE &&
+ pci_get_subclass(dev) == PCIS_STORAGE_RAID)
+ valid = 2;
/* Is this a known AHCI chip? */
for (i = 0; ahci_ids[i].id != 0; i++) {
if (ahci_ids[i].id == devid &&
@@ -342,7 +345,7 @@ ahci_probe(device_t dev)
return (BUS_PROBE_DEFAULT);
}
}
- if (!valid)
+ if (valid != 1)
return (ENXIO);
device_set_desc_copy(dev, "AHCI SATA controller");
return (BUS_PROBE_DEFAULT);
diff --git a/sys/dev/bxe/ecore_hsi.h b/sys/dev/bxe/ecore_hsi.h
index 005bb2e..f78f4ea 100644
--- a/sys/dev/bxe/ecore_hsi.h
+++ b/sys/dev/bxe/ecore_hsi.h
@@ -2536,9 +2536,9 @@ struct shmem2_region {
#define SHMEM_EEE_SUPPORTED_MASK 0x000f0000
#define SHMEM_EEE_SUPPORTED_SHIFT 16
#define SHMEM_EEE_ADV_STATUS_MASK 0x00f00000
- #define SHMEM_EEE_100M_ADV (1<<0)
- #define SHMEM_EEE_1G_ADV (1<<1)
- #define SHMEM_EEE_10G_ADV (1<<2)
+ #define SHMEM_EEE_100M_ADV (1U<<0)
+ #define SHMEM_EEE_1G_ADV (1U<<1)
+ #define SHMEM_EEE_10G_ADV (1U<<2)
#define SHMEM_EEE_ADV_STATUS_SHIFT 20
#define SHMEM_EEE_LP_ADV_STATUS_MASK 0x0f000000
#define SHMEM_EEE_LP_ADV_STATUS_SHIFT 24
diff --git a/sys/dev/hwpmc/hwpmc_core.c b/sys/dev/hwpmc/hwpmc_core.c
index 80c40e7..cc5159c 100644
--- a/sys/dev/hwpmc/hwpmc_core.c
+++ b/sys/dev/hwpmc/hwpmc_core.c
@@ -2572,7 +2572,7 @@ core_intr(int cpu, struct trapframe *tf)
TRAPF_USERMODE(tf));
v = pm->pm_sc.pm_reloadcount;
- v = iaf_reload_count_to_perfctr_value(v);
+ v = iap_reload_count_to_perfctr_value(v);
/*
* Stop the counter, reload it but only restart it if
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 9e369fe..10f9304 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -1660,7 +1660,8 @@ pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
continue;
}
- if (lobj->type != OBJT_VNODE || lobj->handle == NULL) {
+ vp = vm_object_vnode(lobj);
+ if (vp == NULL) {
if (lobj != obj)
VM_OBJECT_RUNLOCK(lobj);
VM_OBJECT_RUNLOCK(obj);
@@ -1672,7 +1673,7 @@ pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
* vnode, so we don't emit redundant MAP-IN
* directives.
*/
- if (entry->start == last_end && lobj->handle == last_vp) {
+ if (entry->start == last_end && vp == last_vp) {
last_end = entry->end;
if (lobj != obj)
VM_OBJECT_RUNLOCK(lobj);
@@ -1695,7 +1696,6 @@ pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
last_timestamp = map->timestamp;
vm_map_unlock_read(map);
- vp = lobj->handle;
vref(vp);
if (lobj != obj)
VM_OBJECT_RUNLOCK(lobj);
diff --git a/sys/dev/isp/isp.c b/sys/dev/isp/isp.c
index a347143..9e8e29b 100644
--- a/sys/dev/isp/isp.c
+++ b/sys/dev/isp/isp.c
@@ -65,16 +65,18 @@ __FBSDID("$FreeBSD$");
*/
#define MBOX_DELAY_COUNT 1000000 / 100
#define ISP_MARK_PORTDB(a, b, c) \
- isp_prt(isp, ISP_LOG_SANCFG, \
- "Chan %d ISP_MARK_PORTDB@LINE %d", b, __LINE__); \
- isp_mark_portdb(a, b, c)
+ do { \
+ isp_prt(isp, ISP_LOG_SANCFG, \
+ "Chan %d ISP_MARK_PORTDB@LINE %d", (b), __LINE__); \
+ isp_mark_portdb((a), (b), (c)); \
+ } while (0)
/*
* Local static data
*/
static const char fconf[] = "Chan %d PortDB[%d] changed:\n current =(0x%x@0x%06x 0x%08x%08x 0x%08x%08x)\n database=(0x%x@0x%06x 0x%08x%08x 0x%08x%08x)";
static const char notresp[] = "Not RESPONSE in RESPONSE Queue (type 0x%x) @ idx %d (next %d) nlooked %d";
-static const char topology[] = "Chan %d WWPN 0x%08x%08x PortID 0x%06x N-Port Handle %d, Connection '%s'";
+static const char topology[] = "Chan %d WWPN 0x%08x%08x PortID 0x%06x handle 0x%x, Connection '%s'";
static const char bun[] = "bad underrun (count %d, resid %d, status %s)";
static const char lipd[] = "Chan %d LIP destroyed %d active commands";
static const char sacq[] = "unable to acquire scratch area";
@@ -2226,6 +2228,7 @@ static void
isp_mark_portdb(ispsoftc_t *isp, int chan, int disposition)
{
fcparam *fcp = FCPARAM(isp, chan);
+ fcportdb_t *lp;
int i;
if (chan < 0 || chan >= isp->isp_nchan) {
@@ -2233,32 +2236,28 @@ isp_mark_portdb(ispsoftc_t *isp, int chan, int disposition)
return;
}
for (i = 0; i < MAX_FC_TARG; i++) {
- if (fcp->portdb[i].target_mode) {
- if (disposition < 0) {
- isp_prt(isp, ISP_LOGTINFO, "isp_mark_portdb: Chan %d zeroing handle 0x" "%04x port 0x%06x", chan,
- fcp->portdb[i].handle, fcp->portdb[i].portid);
- ISP_MEMZERO(&fcp->portdb[i], sizeof (fcportdb_t));
- }
- continue;
- }
- if (disposition == 0) {
- ISP_MEMZERO(&fcp->portdb[i], sizeof (fcportdb_t));
- } else {
- switch (fcp->portdb[i].state) {
- case FC_PORTDB_STATE_CHANGED:
- case FC_PORTDB_STATE_PENDING_VALID:
- case FC_PORTDB_STATE_VALID:
- case FC_PORTDB_STATE_PROBATIONAL:
- fcp->portdb[i].state = FC_PORTDB_STATE_PROBATIONAL;
- break;
- case FC_PORTDB_STATE_ZOMBIE:
- break;
- case FC_PORTDB_STATE_NIL:
- default:
- ISP_MEMZERO(&fcp->portdb[i], sizeof (fcportdb_t));
- fcp->portdb[i].state = FC_PORTDB_STATE_NIL;
- break;
+ lp = &fcp->portdb[i];
+ switch (lp->state) {
+ case FC_PORTDB_STATE_PROBATIONAL:
+ case FC_PORTDB_STATE_DEAD:
+ case FC_PORTDB_STATE_CHANGED:
+ case FC_PORTDB_STATE_PENDING_VALID:
+ case FC_PORTDB_STATE_VALID:
+ if (disposition > 0)
+ lp->state = FC_PORTDB_STATE_PROBATIONAL;
+ else {
+ lp->state = FC_PORTDB_STATE_NIL;
+ isp_async(isp, ISPASYNC_DEV_GONE, chan, lp);
}
+ break;
+ case FC_PORTDB_STATE_ZOMBIE:
+ break;
+ case FC_PORTDB_STATE_NIL:
+ case FC_PORTDB_STATE_NEW:
+ default:
+ ISP_MEMZERO(lp, sizeof(*lp));
+ lp->state = FC_PORTDB_STATE_NIL;
+ break;
}
}
}
@@ -2446,7 +2445,7 @@ isp_port_login(ispsoftc_t *isp, uint16_t handle, uint32_t portid)
return (MBOX_PORT_ID_USED | (mbs.param[1] << 16));
case MBOX_LOOP_ID_USED:
- isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: handle 0x%04x in use for port id 0x%02xXXXX", handle, mbs.param[1] & 0xff);
+ isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: handle 0x%x in use for port id 0x%02xXXXX", handle, mbs.param[1] & 0xff);
return (MBOX_LOOP_ID_USED);
case MBOX_COMMAND_COMPLETE:
@@ -2532,7 +2531,7 @@ isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb, int dolock)
pdb->portid = BITS2WORD_24XX(un.bill.pdb_portid_bits);
ISP_MEMCPY(pdb->portname, un.bill.pdb_portname, 8);
ISP_MEMCPY(pdb->nodename, un.bill.pdb_nodename, 8);
- isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Port 0x%06x flags 0x%x curstate %x", chan, pdb->portid, un.bill.pdb_flags, un.bill.pdb_curstate);
+ isp_prt(isp, ISP_LOG_SANCFG, "Chan %d handle 0x%x Port 0x%06x flags 0x%x curstate %x", chan, id, pdb->portid, un.bill.pdb_flags, un.bill.pdb_curstate);
if (un.bill.pdb_curstate < PDB2400_STATE_PLOGI_DONE || un.bill.pdb_curstate > PDB2400_STATE_LOGGED_IN) {
mbs.param[0] = MBOX_NOT_LOGGED_IN;
if (dolock) {
@@ -2560,6 +2559,7 @@ isp_dump_chip_portdb(ispsoftc_t *isp, int chan, int dolock)
isp_pdb_t pdb;
int lim, loopid;
+ isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGINFO, "Chan %d chip port dump", chan);
if (ISP_CAP_2KLOGIN(isp)) {
lim = NPH_MAX_2K;
} else {
@@ -2961,25 +2961,14 @@ isp_pdb_sync(ispsoftc_t *isp, int chan)
for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
lp = &fcp->portdb[dbidx];
- if (lp->state == FC_PORTDB_STATE_NIL || lp->target_mode) {
- continue;
- }
-
- if (lp->state == FC_PORTDB_STATE_VALID) {
- if (dbidx != FL_ID) {
- isp_prt(isp,
- ISP_LOGERR, "portdb idx %d already valid",
- dbidx);
- }
+ if (lp->state == FC_PORTDB_STATE_NIL ||
+ lp->state == FC_PORTDB_STATE_VALID) {
continue;
}
switch (lp->state) {
case FC_PORTDB_STATE_PROBATIONAL:
case FC_PORTDB_STATE_DEAD:
- /*
- * It's up to the outer layers to clear isp_dev_map.
- */
lp->state = FC_PORTDB_STATE_NIL;
isp_async(isp, ISPASYNC_DEV_GONE, chan, lp);
if (lp->autologin == 0) {
@@ -2999,48 +2988,36 @@ isp_pdb_sync(ispsoftc_t *isp, int chan)
*/
break;
case FC_PORTDB_STATE_NEW:
- /*
- * It's up to the outer layers to assign a virtual
- * target id in isp_dev_map (if any).
- */
lp->portid = lp->new_portid;
lp->prli_word3 = lp->new_prli_word3;
lp->state = FC_PORTDB_STATE_VALID;
isp_async(isp, ISPASYNC_DEV_ARRIVED, chan, lp);
lp->new_prli_word3 = 0;
lp->new_portid = 0;
- lp->announced = 0;
break;
case FC_PORTDB_STATE_CHANGED:
-/*
- * XXXX FIX THIS
- */
lp->state = FC_PORTDB_STATE_VALID;
isp_async(isp, ISPASYNC_DEV_CHANGED, chan, lp);
+ lp->portid = lp->new_portid;
+ lp->prli_word3 = lp->new_prli_word3;
lp->new_prli_word3 = 0;
lp->new_portid = 0;
- lp->announced = 0;
break;
case FC_PORTDB_STATE_PENDING_VALID:
lp->portid = lp->new_portid;
lp->prli_word3 = lp->new_prli_word3;
- if (lp->dev_map_idx) {
- int t = lp->dev_map_idx - 1;
- fcp->isp_dev_map[t] = dbidx + 1;
- }
lp->state = FC_PORTDB_STATE_VALID;
isp_async(isp, ISPASYNC_DEV_STAYED, chan, lp);
if (dbidx != FL_ID) {
lp->new_prli_word3 = 0;
lp->new_portid = 0;
}
- lp->announced = 0;
break;
case FC_PORTDB_STATE_ZOMBIE:
break;
default:
isp_prt(isp, ISP_LOGWARN,
- "isp_scan_loop: state %d for idx %d",
+ "isp_pdb_sync: state %d for idx %d",
lp->state, dbidx);
isp_dump_portdb(isp, chan);
}
@@ -3111,7 +3088,6 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop 0..%d", chan, lim-1);
-
/*
* Run through the list and get the port database info for each one.
*/
@@ -3195,6 +3171,9 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
*/
if (tmp.node_wwn == 0 || tmp.port_wwn == 0 || tmp.portid == 0) {
int a, b, c;
+ isp_prt(isp, ISP_LOGWARN,
+ "Chan %d bad pdb (WWNN %016jx, WWPN %016jx, PortID %06x, W3 0x%x, H 0x%x) @ handle 0x%x",
+ chan, tmp.node_wwn, tmp.port_wwn, tmp.portid, tmp.prli_word3, tmp.handle, handle);
a = (tmp.node_wwn == 0);
b = (tmp.port_wwn == 0);
c = (tmp.portid == 0);
@@ -3204,13 +3183,10 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
tmp.port_wwn =
isp_get_wwn(isp, chan, handle, 0);
if (tmp.node_wwn && tmp.port_wwn) {
- isp_prt(isp, ISP_LOGINFO, "DODGED!");
+ isp_prt(isp, ISP_LOGWARN, "DODGED!");
goto cont;
}
}
- isp_prt(isp, ISP_LOGWARN,
- "Chan %d bad pdb (%1d%1d%1d) @ handle 0x%x", chan,
- a, b, c, handle);
isp_dump_portdb(isp, chan);
continue;
}
@@ -3218,30 +3194,19 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
/*
* Now search the entire port database
- * for the same Port and Node WWN.
+ * for the same Port WWN.
*/
- for (i = 0; i < MAX_FC_TARG; i++) {
- lp = &fcp->portdb[i];
-
- if (lp->state == FC_PORTDB_STATE_NIL || lp->target_mode) {
- continue;
- }
- if (lp->node_wwn != tmp.node_wwn) {
- continue;
- }
- if (lp->port_wwn != tmp.port_wwn) {
- continue;
- }
-
+ if (isp_find_pdb_by_wwn(isp, chan, tmp.port_wwn, &lp)) {
/*
* Okay- we've found a non-nil entry that matches.
* Check to make sure it's probational or a zombie.
*/
if (lp->state != FC_PORTDB_STATE_PROBATIONAL &&
- lp->state != FC_PORTDB_STATE_ZOMBIE) {
+ lp->state != FC_PORTDB_STATE_ZOMBIE &&
+ lp->state != FC_PORTDB_STATE_VALID) {
isp_prt(isp, ISP_LOGERR,
"Chan %d [%d] not probational/zombie (0x%x)",
- chan, i, lp->state);
+ chan, FC_PORTDB_TGT(isp, chan, lp), lp->state);
isp_dump_portdb(isp, chan);
ISP_MARK_PORTDB(isp, chan, 1);
isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop DONE (bad)", chan);
@@ -3253,6 +3218,7 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
* automatically.
*/
lp->autologin = 1;
+ lp->node_wwn = tmp.node_wwn;
/*
* Check to make see if really still the same
@@ -3263,7 +3229,7 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
lp->new_prli_word3 = tmp.prli_word3;
lp->state = FC_PORTDB_STATE_PENDING_VALID;
isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x Pending Valid", chan, tmp.portid, tmp.handle);
- break;
+ continue;
}
/*
@@ -3280,13 +3246,6 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
lp->state = FC_PORTDB_STATE_CHANGED;
lp->new_portid = tmp.portid;
lp->new_prli_word3 = tmp.prli_word3;
- break;
- }
-
- /*
- * Did we find and update an old entry?
- */
- if (i < MAX_FC_TARG) {
continue;
}
@@ -3295,9 +3254,6 @@ isp_scan_loop(ispsoftc_t *isp, int chan)
* for it and save info for later disposition.
*/
for (i = 0; i < MAX_FC_TARG; i++) {
- if (fcp->portdb[i].target_mode) {
- continue;
- }
if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL) {
break;
}
@@ -3697,7 +3653,7 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
* that have the same domain and area code as our own
* portid.
*/
- if (ISP_CAP_MULTI_ID(isp)) {
+ if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) {
if ((portid >> 8) == (fcp->isp_portid >> 8)) {
isp_prt(isp, ISP_LOG_SANCFG,
"Chan %d skip PortID 0x%06x",
@@ -3723,7 +3679,7 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
lp = &fcp->portdb[dbidx];
- if (lp->state != FC_PORTDB_STATE_PROBATIONAL || lp->target_mode) {
+ if (lp->state != FC_PORTDB_STATE_PROBATIONAL) {
continue;
}
if (lp->portid == portid) {
@@ -3765,7 +3721,7 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
if (r != 0) {
lp->new_portid = portid;
lp->state = FC_PORTDB_STATE_DEAD;
- isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x is dead", chan, portid);
+ isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric PortID 0x%06x handle 0x%x is dead (%d)", chan, portid, lp->handle, r);
continue;
}
@@ -3781,7 +3737,7 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
if (pdb.handle != lp->handle ||
pdb.portid != portid ||
wwpn != lp->port_wwn ||
- wwnn != lp->node_wwn) {
+ (lp->node_wwn != 0 && wwnn != lp->node_wwn)) {
isp_prt(isp, ISP_LOG_SANCFG,
fconf, chan, dbidx, pdb.handle, pdb.portid,
(uint32_t) (wwnn >> 32), (uint32_t) wwnn,
@@ -3799,8 +3755,9 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
* portid consistency after re-login.
*
*/
- if (isp_login_device(isp, chan, portid, &pdb,
- &oldhandle)) {
+ if ((fcp->role & ISP_ROLE_INITIATOR) == 0 ||
+ isp_login_device(isp, chan, portid, &pdb,
+ &oldhandle)) {
lp->new_portid = portid;
lp->state = FC_PORTDB_STATE_DEAD;
if (fcp->isp_loopstate !=
@@ -3821,7 +3778,7 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename);
MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname);
if (wwpn != lp->port_wwn ||
- wwnn != lp->node_wwn) {
+ (lp->node_wwn != 0 && wwnn != lp->node_wwn)) {
isp_prt(isp, ISP_LOGWARN, "changed WWN"
" after relogin");
lp->new_portid = portid;
@@ -3859,6 +3816,9 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
continue;
}
+ if ((fcp->role & ISP_ROLE_INITIATOR) == 0)
+ continue;
+
/*
* Ah- a new entry. Search the database again for all non-NIL
* entries to make sure we never ever make a new database entry
@@ -3872,12 +3832,6 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
lp <= &fcp->portdb[SNS_ID]) {
continue;
}
- /*
- * Skip any target mode entries.
- */
- if (lp->target_mode) {
- continue;
- }
if (lp->state == FC_PORTDB_STATE_NIL) {
if (dbidx == MAX_FC_TARG) {
dbidx = lp - fcp->portdb;
@@ -3950,10 +3904,9 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
if (dbidx >= FL_ID && dbidx <= SNS_ID) {
continue;
}
- if (fcp->portdb[dbidx].target_mode) {
- continue;
- }
- if (fcp->portdb[dbidx].node_wwn == wwnn && fcp->portdb[dbidx].port_wwn == wwpn) {
+ if ((fcp->portdb[dbidx].node_wwn == wwnn ||
+ fcp->portdb[dbidx].node_wwn == 0) &&
+ fcp->portdb[dbidx].port_wwn == wwpn) {
break;
}
}
@@ -3991,6 +3944,7 @@ isp_scan_fabric(ispsoftc_t *isp, int chan)
*/
lp = &fcp->portdb[dbidx];
lp->handle = handle;
+ lp->node_wwn = wwnn;
lp->new_portid = portid;
lp->new_prli_word3 = nr;
if (lp->portid != portid || lp->prli_word3 != nr) {
@@ -4324,7 +4278,8 @@ isp_start(XS_T *xs)
ispreq_t *reqp;
void *cdbp, *qep;
uint16_t *tptr;
- int target, dmaresult, hdlidx = 0;
+ fcportdb_t *lp;
+ int target, dmaresult;
XS_INITERR(xs);
isp = XS_ISP(xs);
@@ -4373,29 +4328,22 @@ isp_start(XS_T *xs)
return (CMD_RQLATER);
}
- if (XS_TGT(xs) >= MAX_FC_TARG) {
- isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%d target too big", XS_CHANNEL(xs), target, XS_LUN(xs));
+ isp_prt(isp, ISP_LOGDEBUG2, "XS_TGT(xs)=%d", target);
+ lp = &fcp->portdb[target];
+ if (target < 0 || target >= MAX_FC_TARG ||
+ lp->is_target == 0) {
XS_SETERR(xs, HBA_SELTIMEOUT);
return (CMD_COMPLETE);
}
-
- hdlidx = fcp->isp_dev_map[XS_TGT(xs)] - 1;
- isp_prt(isp, ISP_LOGDEBUG2, "XS_TGT(xs)=%d- hdlidx value %d", XS_TGT(xs), hdlidx);
- if (hdlidx < 0 || hdlidx >= MAX_FC_TARG) {
- XS_SETERR(xs, HBA_SELTIMEOUT);
- return (CMD_COMPLETE);
- }
- if (fcp->portdb[hdlidx].state == FC_PORTDB_STATE_ZOMBIE) {
+ if (lp->state == FC_PORTDB_STATE_ZOMBIE) {
isp_prt(isp, ISP_LOGDEBUG1, "%d.%d.%d target zombie", XS_CHANNEL(xs), target, XS_LUN(xs));
return (CMD_RQLATER);
}
- if (fcp->portdb[hdlidx].state != FC_PORTDB_STATE_VALID) {
- isp_prt(isp, ISP_LOGDEBUG1, "%d.%d.%d bad db port state 0x%x", XS_CHANNEL(xs), target, XS_LUN(xs), fcp->portdb[hdlidx].state);
+ if (lp->state != FC_PORTDB_STATE_VALID) {
+ isp_prt(isp, ISP_LOGDEBUG1, "%d.%d.%d bad db port state 0x%x", XS_CHANNEL(xs), target, XS_LUN(xs), lp->state);
XS_SETERR(xs, HBA_SELTIMEOUT);
return (CMD_COMPLETE);
}
- target = fcp->portdb[hdlidx].handle;
- fcp->portdb[hdlidx].dirty = 1;
} else {
sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs));
if ((sdp->role & ISP_ROLE_INITIATOR) == 0) {
@@ -4406,6 +4354,7 @@ isp_start(XS_T *xs)
if (sdp->update) {
isp_spi_update(isp, XS_CHANNEL(xs));
}
+ lp = NULL;
}
start_again:
@@ -4537,7 +4486,6 @@ isp_start(XS_T *xs)
reqp->req_cdblen = cdblen;
} else if (IS_24XX(isp)) {
ispreqt7_t *t7 = (ispreqt7_t *)local;
- fcportdb_t *lp;
if (cdblen > sizeof (t7->req_cdb)) {
isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen);
@@ -4545,8 +4493,7 @@ isp_start(XS_T *xs)
return (CMD_COMPLETE);
}
- lp = &FCPARAM(isp, XS_CHANNEL(xs))->portdb[hdlidx];
- t7->req_nphdl = target;
+ t7->req_nphdl = lp->handle;
t7->req_tidlo = lp->portid;
t7->req_tidhi = lp->portid >> 16;
t7->req_vpidx = ISP_GET_VPIDX(isp, XS_CHANNEL(xs));
@@ -4566,14 +4513,12 @@ isp_start(XS_T *xs)
cdbp = t7->req_cdb;
} else {
ispreqt2_t *t2 = (ispreqt2_t *)local;
- fcportdb_t *lp;
if (cdblen > sizeof t2->req_cdb) {
isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen);
XS_SETERR(xs, HBA_BOTCH);
return (CMD_COMPLETE);
}
- lp = &FCPARAM(isp, XS_CHANNEL(xs))->portdb[hdlidx];
if (FCPARAM(isp, XS_CHANNEL(xs))->fctape_enabled && (lp->prli_word3 & PRLI_WD3_RETRY)) {
if (FCP_NEXT_CRN(isp, &t2->req_crn, xs)) {
isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%d cannot generate next CRN", XS_CHANNEL(xs), target, XS_LUN(xs));
@@ -4583,16 +4528,16 @@ isp_start(XS_T *xs)
}
if (ISP_CAP_2KLOGIN(isp)) {
ispreqt2e_t *t2e = (ispreqt2e_t *)local;
- t2e->req_target = target;
+ t2e->req_target = lp->handle;
t2e->req_scclun = XS_LUN(xs);
cdbp = t2e->req_cdb;
} else if (ISP_CAP_SCCFW(isp)) {
ispreqt2_t *t2 = (ispreqt2_t *)local;
- t2->req_target = target;
+ t2->req_target = lp->handle;
t2->req_scclun = XS_LUN(xs);
cdbp = t2->req_cdb;
} else {
- t2->req_target = target;
+ t2->req_target = lp->handle;
t2->req_lun_trn = XS_LUN(xs);
cdbp = t2->req_cdb;
}
@@ -4690,16 +4635,15 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...)
isp24xx_statusreq_t *sp;
fcparam *fcp = FCPARAM(isp, chan);
fcportdb_t *lp;
- int hdlidx;
- hdlidx = fcp->isp_dev_map[tgt] - 1;
- if (hdlidx < 0 || hdlidx >= MAX_FC_TARG) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d bad handle %d trying to reset target %d", chan, hdlidx, tgt);
+ if (tgt < 0 || tgt >= MAX_FC_TARG) {
+ isp_prt(isp, ISP_LOGWARN, "Chan %d trying to reset bad target %d", chan, tgt);
break;
}
- lp = &fcp->portdb[hdlidx];
- if (lp->state != FC_PORTDB_STATE_VALID) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d handle %d for abort of target %d no longer valid", chan, hdlidx, tgt);
+ lp = &fcp->portdb[tgt];
+ if (lp->is_target == 0 ||
+ lp->state != FC_PORTDB_STATE_VALID) {
+ isp_prt(isp, ISP_LOGWARN, "Chan %d abort of no longer valid target %d", chan, tgt);
break;
}
@@ -4780,17 +4724,16 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...)
isp24xx_abrt_t local, *ab = &local, *ab2;
fcparam *fcp;
fcportdb_t *lp;
- int hdlidx;
fcp = FCPARAM(isp, chan);
- hdlidx = fcp->isp_dev_map[tgt] - 1;
- if (hdlidx < 0 || hdlidx >= MAX_FC_TARG) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d bad handle %d trying to abort target %d", chan, hdlidx, tgt);
+ if (tgt < 0 || tgt >= MAX_FC_TARG) {
+ isp_prt(isp, ISP_LOGWARN, "Chan %d trying to abort bad target %d", chan, tgt);
break;
}
- lp = &fcp->portdb[hdlidx];
- if (lp->state != FC_PORTDB_STATE_VALID) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d handle %d for abort of target %d no longer valid", chan, hdlidx, tgt);
+ lp = &fcp->portdb[tgt];
+ if (lp->is_target == 0 ||
+ lp->state != FC_PORTDB_STATE_VALID) {
+ isp_prt(isp, ISP_LOGWARN, "Chan %d abort of no longer valid target %d", chan, tgt);
break;
}
isp_prt(isp, ISP_LOGALL, "Chan %d Abort Cmd for N-Port 0x%04x @ Port 0x%06x", chan, lp->handle, lp->portid);
@@ -4830,7 +4773,7 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...)
if (ab->abrt_nphdl == ISP24XX_ABRT_OKAY) {
return (0);
}
- isp_prt(isp, ISP_LOGWARN, "Chan %d handle %d abort returned 0x%x", chan, hdlidx, ab->abrt_nphdl);
+ isp_prt(isp, ISP_LOGWARN, "Chan %d handle %d abort returned 0x%x", chan, tgt, ab->abrt_nphdl);
break;
} else if (IS_FC(isp)) {
if (ISP_CAP_SCCFW(isp)) {
@@ -4990,6 +4933,22 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...)
} while ((r & 0xffff) == MBOX_LOOP_ID_USED);
return (r);
}
+ case ISPCTL_CHANGE_ROLE:
+ {
+ int role, r;
+
+ va_start(ap, ctl);
+ chan = va_arg(ap, int);
+ role = va_arg(ap, int);
+ va_end(ap);
+ if (IS_FC(isp)) {
+ r = isp_fc_change_role(isp, chan, role);
+ } else {
+ SDPARAM(isp, chan)->role = role;
+ r = 0;
+ }
+ return (r);
+ }
default:
isp_prt(isp, ISP_LOGERR, "Unknown Control Opcode 0x%x", ctl);
break;
@@ -7860,26 +7819,26 @@ isp_setdfltfcparm(ispsoftc_t *isp, int chan)
* not disturb an already active list of commands.
*/
-void
+int
isp_reinit(ispsoftc_t *isp, int do_load_defaults)
{
- int i;
+ int i, res = 0;
isp_reset(isp, do_load_defaults);
-
if (isp->isp_state != ISP_RESETSTATE) {
+ res = EIO;
isp_prt(isp, ISP_LOGERR, "%s: cannot reset card", __func__);
ISP_DISABLE_INTS(isp);
goto cleanup;
}
isp_init(isp);
-
if (isp->isp_state == ISP_INITSTATE) {
isp->isp_state = ISP_RUNSTATE;
}
if (isp->isp_state != ISP_RUNSTATE) {
+ res = EIO;
#ifndef ISP_TARGET_MODE
isp_prt(isp, ISP_LOGWARN, "%s: not at runstate", __func__);
#endif
@@ -7896,18 +7855,16 @@ isp_reinit(ispsoftc_t *isp, int do_load_defaults)
ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS);
}
}
- }
+ }
cleanup:
-
isp->isp_nactive = 0;
-
isp_clear_commands(isp);
if (IS_FC(isp)) {
- for (i = 0; i < isp->isp_nchan; i++) {
+ for (i = 0; i < isp->isp_nchan; i++)
ISP_MARK_PORTDB(isp, i, -1);
- }
}
+ return (res);
}
/*
diff --git a/sys/dev/isp/isp_freebsd.c b/sys/dev/isp/isp_freebsd.c
index e125327..1ff64e1 100644
--- a/sys/dev/isp/isp_freebsd.c
+++ b/sys/dev/isp/isp_freebsd.c
@@ -52,11 +52,10 @@ MODULE_DEPEND(isp, cam, 1, 1, 1);
int isp_announced = 0;
int isp_fabric_hysteresis = 5;
int isp_loop_down_limit = 60; /* default loop down limit */
-int isp_change_is_bad = 0; /* "changed" devices are bad */
int isp_quickboot_time = 7; /* don't wait more than N secs for loop up */
int isp_gone_device_time = 30; /* grace time before reporting device lost */
int isp_autoconfig = 1; /* automatically attach/detach devices */
-static const char prom3[] = "Chan %d PortID 0x%06x Departed from Target %u because of %s";
+static const char prom3[] = "Chan %d [%u] PortID 0x%06x Departed because of %s";
static void isp_freeze_loopdown(ispsoftc_t *, int, char *);
static d_ioctl_t ispioctl;
@@ -102,20 +101,17 @@ isp_role_sysctl(SYSCTL_HANDLER_ARGS)
ISP_LOCK(isp);
old = FCPARAM(isp, chan)->role;
+ /* We don't allow target mode switch from here. */
+ value = (old & ISP_ROLE_TARGET) | (value & ISP_ROLE_INITIATOR);
+
/* If nothing has changed -- we are done. */
if (value == old) {
ISP_UNLOCK(isp);
return (0);
}
- /* We don't allow target mode switch from here. */
- if ((value ^ old) & ISP_ROLE_TARGET) {
- ISP_UNLOCK(isp);
- return (EPERM);
- }
-
/* Actually change the role. */
- error = isp_fc_change_role(isp, chan, value);
+ error = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, value);
ISP_UNLOCK(isp);
return (error);
}
@@ -170,6 +166,7 @@ isp_attach_chan(ispsoftc_t *isp, struct cam_devq *devq, int chan)
ISP_SET_PC(isp, chan, proc_active, 0);
isp_prt(isp, ISP_LOGERR, "cannot create test target thread");
}
+ ISP_SPI_PC(isp, chan)->num_threads += 1;
#endif
} else {
fcparam *fcp = FCPARAM(isp, chan);
@@ -208,12 +205,14 @@ isp_attach_chan(ispsoftc_t *isp, struct cam_devq *devq, int chan)
cam_sim_free(fc->sim, FALSE);
return (ENOMEM);
}
+ ISP_FC_PC(isp, chan)->num_threads += 1;
#ifdef ISP_INTERNAL_TARGET
ISP_SET_PC(isp, chan, proc_active, 1);
if (THREAD_CREATE(isp_target_thread_fc, fc, &fc->target_proc, 0, 0, "%s: isp_test_tgt%d", device_get_nameunit(isp->isp_osinfo.dev), chan)) {
ISP_SET_PC(isp, chan, proc_active, 0);
isp_prt(isp, ISP_LOGERR, "cannot create test target thread");
}
+ ISP_FC_PC(isp, chan)->num_threads += 1;
#endif
if (chan > 0) {
snprintf(name, sizeof(name), "chan%d", chan);
@@ -234,6 +233,47 @@ isp_attach_chan(ispsoftc_t *isp, struct cam_devq *devq, int chan)
return (0);
}
+static void
+isp_detach_internal_target(ispsoftc_t *isp, int chan)
+{
+#ifdef ISP_INTERNAL_TARGET
+ void *wchan;
+
+ ISP_GET_PC(isp, chan, target_proc, wchan);
+ ISP_SET_PC(isp, chan, proc_active, 0);
+ wakeup(wchan);
+#endif
+}
+
+static void
+isp_detach_chan(ispsoftc_t *isp, int chan)
+{
+ struct cam_sim *sim;
+ struct cam_path *path;
+ struct ccb_setasync csa;
+ int *num_threads;
+
+ ISP_GET_PC(isp, chan, sim, sim);
+ ISP_GET_PC(isp, chan, path, path);
+ ISP_GET_PC_ADDR(isp, chan, num_threads, num_threads);
+
+ xpt_setup_ccb(&csa.ccb_h, path, 5);
+ csa.ccb_h.func_code = XPT_SASYNC_CB;
+ csa.event_enable = 0;
+ csa.callback = isp_cam_async;
+ csa.callback_arg = sim;
+ xpt_action((union ccb *)&csa);
+ xpt_free_path(path);
+ xpt_bus_deregister(cam_sim_path(sim));
+ cam_sim_free(sim, FALSE);
+
+ /* Wait for the channel's spawned threads to exit. */
+ wakeup(isp->isp_osinfo.pc.ptr);
+ isp_detach_internal_target(isp, chan);
+ while (*num_threads != 0)
+ mtx_sleep(isp, &isp->isp_osinfo.lock, PRIBIO, "isp_reap", 100);
+}
+
int
isp_attach(ispsoftc_t *isp)
{
@@ -284,13 +324,9 @@ unwind:
while (--chan >= 0) {
struct cam_sim *sim;
struct cam_path *path;
- if (IS_FC(isp)) {
- sim = ISP_FC_PC(isp, chan)->sim;
- path = ISP_FC_PC(isp, chan)->path;
- } else {
- sim = ISP_SPI_PC(isp, chan)->sim;
- path = ISP_SPI_PC(isp, chan)->path;
- }
+
+ ISP_GET_PC(isp, chan, sim, sim);
+ ISP_GET_PC(isp, chan, path, path);
xpt_free_path(path);
ISP_LOCK(isp);
xpt_bus_deregister(cam_sim_path(sim));
@@ -314,49 +350,26 @@ int
isp_detach(ispsoftc_t *isp)
{
struct cam_sim *sim;
- struct cam_path *path;
- struct ccb_setasync csa;
int chan;
ISP_LOCK(isp);
for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1) {
- if (IS_FC(isp)) {
- sim = ISP_FC_PC(isp, chan)->sim;
- path = ISP_FC_PC(isp, chan)->path;
- } else {
- sim = ISP_SPI_PC(isp, chan)->sim;
- path = ISP_SPI_PC(isp, chan)->path;
- }
+ ISP_GET_PC(isp, chan, sim, sim);
if (sim->refcount > 2) {
ISP_UNLOCK(isp);
return (EBUSY);
}
}
+ /* Tell spawned threads that we're exiting. */
+ isp->isp_osinfo.is_exiting = 1;
if (isp->isp_osinfo.timer_active) {
callout_stop(&isp->isp_osinfo.tmo);
isp->isp_osinfo.timer_active = 0;
}
- for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1) {
- if (IS_FC(isp)) {
- sim = ISP_FC_PC(isp, chan)->sim;
- path = ISP_FC_PC(isp, chan)->path;
- } else {
- sim = ISP_SPI_PC(isp, chan)->sim;
- path = ISP_SPI_PC(isp, chan)->path;
- }
- xpt_setup_ccb(&csa.ccb_h, path, 5);
- csa.ccb_h.func_code = XPT_SASYNC_CB;
- csa.event_enable = 0;
- csa.callback = isp_cam_async;
- csa.callback_arg = sim;
- ISP_LOCK(isp);
- xpt_action((union ccb *)&csa);
- ISP_UNLOCK(isp);
- xpt_free_path(path);
- xpt_bus_deregister(cam_sim_path(sim));
- cam_sim_free(sim, FALSE);
- }
+ for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1)
+ isp_detach_chan(isp, chan);
ISP_UNLOCK(isp);
+
if (isp->isp_osinfo.cdev) {
destroy_dev(isp->isp_osinfo.cdev);
isp->isp_osinfo.cdev = NULL;
@@ -445,38 +458,18 @@ ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td)
retval = EINVAL;
break;
}
- if (IS_FC(isp)) {
- /*
- * We don't really support dual role at present on FC cards.
- *
- * We should, but a bunch of things are currently broken,
- * so don't allow it.
- */
- if (nr == ISP_ROLE_BOTH) {
- isp_prt(isp, ISP_LOGERR, "cannot support dual role at present");
- retval = EINVAL;
- break;
- }
+ ISP_LOCK(isp);
+ if (IS_FC(isp))
*(int *)addr = FCPARAM(isp, chan)->role;
-#ifdef ISP_INTERNAL_TARGET
- ISP_LOCK(isp);
- retval = isp_fc_change_role(isp, chan, nr);
- ISP_UNLOCK(isp);
-#else
- FCPARAM(isp, chan)->role = nr;
-#endif
- } else {
+ else
*(int *)addr = SDPARAM(isp, chan)->role;
- SDPARAM(isp, chan)->role = nr;
- }
+ retval = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, nr);
+ ISP_UNLOCK(isp);
retval = 0;
break;
case ISP_RESETHBA:
ISP_LOCK(isp);
-#ifdef ISP_TARGET_MODE
- isp_del_all_wwn_entries(isp, ISP_NOCHAN);
-#endif
isp_reinit(isp, 0);
ISP_UNLOCK(isp);
retval = 0;
@@ -528,7 +521,7 @@ ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td)
break;
}
lp = &FCPARAM(isp, ifc->chan)->portdb[ifc->loopid];
- if (lp->state == FC_PORTDB_STATE_VALID || lp->target_mode) {
+ if (lp->state != FC_PORTDB_STATE_NIL) {
ifc->role = (lp->prli_word3 & SVC3_ROLE_MASK) >> SVC3_ROLE_SHIFT;
ifc->loopid = lp->handle;
ifc->portid = lp->portid;
@@ -1249,11 +1242,6 @@ isp_enable_lun(ispsoftc_t *isp, union ccb *ccb)
target = ccb->ccb_h.target_id;
lun = ccb->ccb_h.target_lun;
ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path, "enabling lun %u\n", lun);
- if (target != CAM_TARGET_WILDCARD && target != 0) {
- ccb->ccb_h.status = CAM_TID_INVALID;
- xpt_done(ccb);
- return;
- }
if (target == CAM_TARGET_WILDCARD && lun != CAM_LUN_WILDCARD) {
ccb->ccb_h.status = CAM_LUN_INVALID;
xpt_done(ccb);
@@ -1466,12 +1454,6 @@ isp_disable_lun(ispsoftc_t *isp, union ccb *ccb)
target = ccb->ccb_h.target_id;
lun = ccb->ccb_h.target_lun;
ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path, "disabling lun %u\n", lun);
- if (target != CAM_TARGET_WILDCARD && target != 0) {
- ccb->ccb_h.status = CAM_TID_INVALID;
- xpt_done(ccb);
- return;
- }
-
if (target == CAM_TARGET_WILDCARD && lun != CAM_LUN_WILDCARD) {
ccb->ccb_h.status = CAM_LUN_INVALID;
xpt_done(ccb);
@@ -1523,13 +1505,7 @@ isp_disable_lun(ispsoftc_t *isp, union ccb *ccb)
done:
if (status == CAM_REQ_CMP) {
tptr->enabled = 0;
- /*
- * If we have no more luns enabled for this bus,
- * delete all tracked wwns for it (if we are FC),
- * and disable target mode.
- */
if (is_any_lun_enabled(isp, bus) == 0) {
- isp_del_all_wwn_entries(isp, bus);
if (isp_disable_target_mode(isp, bus)) {
status = CAM_REQ_CMP_ERR;
}
@@ -1922,10 +1898,10 @@ isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how)
cto->ct_header.rqs_entry_count = 1;
cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM;
ATPD_SET_SEQNO(cto, atp);
- if (ISP_CAP_2KLOGIN(isp) == 0) {
- ((ct2e_entry_t *)cto)->ct_iid = cso->init_id;
+ if (ISP_CAP_2KLOGIN(isp)) {
+ ((ct2e_entry_t *)cto)->ct_iid = atp->nphdl;
} else {
- cto->ct_iid = cso->init_id;
+ cto->ct_iid = atp->nphdl;
if (ISP_CAP_SCCFW(isp) == 0) {
cto->ct_lun = ccb->ccb_h.target_lun;
}
@@ -2465,26 +2441,24 @@ isp_handle_platform_atio2(ispsoftc_t *isp, at2_entry_t *aep)
* We don't get 'suggested' sense data as we do with SCSI cards.
*/
atiop->sense_len = 0;
- if (ISP_CAP_2KLOGIN(isp)) {
- /*
- * NB: We could not possibly have 2K logins if we
- * NB: also did not have SCC FW.
- */
- atiop->init_id = ((at2e_entry_t *)aep)->at_iid;
- } else {
- atiop->init_id = aep->at_iid;
- }
/*
* If we're not in the port database, add ourselves.
*/
- if (!IS_2100(isp) && isp_find_pdb_by_loopid(isp, 0, atiop->init_id, &lp) == 0) {
- uint64_t iid =
- (((uint64_t) aep->at_wwpn[0]) << 48) |
- (((uint64_t) aep->at_wwpn[1]) << 32) |
- (((uint64_t) aep->at_wwpn[2]) << 16) |
- (((uint64_t) aep->at_wwpn[3]) << 0);
- isp_add_wwn_entry(isp, 0, iid, atiop->init_id, PORT_ANY, 0);
+ if (IS_2100(isp))
+ atiop->init_id = nphdl;
+ else {
+ if ((isp_find_pdb_by_handle(isp, 0, nphdl, &lp) == 0 ||
+ lp->state == FC_PORTDB_STATE_ZOMBIE)) {
+ uint64_t iid =
+ (((uint64_t) aep->at_wwpn[0]) << 48) |
+ (((uint64_t) aep->at_wwpn[1]) << 32) |
+ (((uint64_t) aep->at_wwpn[2]) << 16) |
+ (((uint64_t) aep->at_wwpn[3]) << 0);
+ isp_add_wwn_entry(isp, 0, iid, nphdl, PORT_ANY, 0);
+ isp_find_pdb_by_handle(isp, 0, nphdl, &lp);
+ }
+ atiop->init_id = FC_PORTDB_TGT(isp, 0, lp);
}
atiop->cdb_len = ATIO2_CDBLEN;
ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cdb, ATIO2_CDBLEN);
@@ -2513,7 +2487,7 @@ isp_handle_platform_atio2(ispsoftc_t *isp, at2_entry_t *aep)
atp->orig_datalen = aep->at_datalen;
atp->bytes_xfered = 0;
atp->lun = lun;
- atp->nphdl = atiop->init_id;
+ atp->nphdl = nphdl;
atp->sid = PORT_ANY;
atp->oxid = aep->at_oxid;
atp->cdb0 = aep->at_cdb[0];
@@ -2542,7 +2516,6 @@ isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep)
int cdbxlen;
uint16_t lun, chan, nphdl = NIL_HANDLE;
uint32_t did, sid;
- uint64_t wwn = INI_NONE;
fcportdb_t *lp;
tstate_t *tptr;
struct ccb_accept_tio *atiop;
@@ -2560,7 +2533,7 @@ isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep)
* If we can't, we're somewhat in trouble because we can't actually respond w/o that information.
* We also, as a matter of course, need to know the WWN of the initiator too.
*/
- if (ISP_CAP_MULTI_ID(isp)) {
+ if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) {
/*
* Find the right channel based upon D_ID
*/
@@ -2605,11 +2578,11 @@ isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep)
*/
isp_prt(isp, ISP_LOGTINFO, "%s: [RX_ID 0x%x] D_ID 0x%06x found on Chan %d for S_ID 0x%06x wasn't in PDB already",
__func__, aep->at_rxid, did, chan, sid);
+ isp_dump_portdb(isp, chan);
isp_endcmd(isp, aep, NIL_HANDLE, chan, ECMD_TERMINATE, 0);
return;
}
nphdl = lp->handle;
- wwn = lp->port_wwn;
/*
* Get the tstate pointer
@@ -2697,7 +2670,7 @@ isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep)
SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle);
tptr->atio_count--;
ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, atiop->ccb_h.path, "Take FREE ATIO count now %d\n", tptr->atio_count);
- atiop->init_id = nphdl;
+ atiop->init_id = FC_PORTDB_TGT(isp, chan, lp);
atiop->ccb_h.target_id = FCPARAM(isp, chan)->isp_loopid;
atiop->ccb_h.target_lun = lun;
atiop->sense_len = 0;
@@ -2952,9 +2925,9 @@ isp_handle_platform_ctio(ispsoftc_t *isp, void *arg)
}
if (atp == NULL) {
/*
- * In case of target mode disable at least ISP2532 return
- * invalid zero ct_rxid value. Try to workaround that using
- * tag_id from the CCB, pointed by valid ct_syshandle.
+ * XXX: isp_clear_commands() generates fake CTIO with zero
+ * ct_rxid value, filling only ct_syshandle. Workaround
+ * that using tag_id from the CCB, pointed by ct_syshandle.
*/
atp = isp_find_atpd(isp, tptr, ccb->csio.tag_id);
}
@@ -3111,7 +3084,7 @@ isp_handle_platform_notify_fc(ispsoftc_t *isp, in_fcentry_t *inp)
{
tstate_t *tptr;
uint16_t lun;
- uint32_t loopid;
+ uint32_t loopid, sid;
uint64_t wwn;
atio_private_data_t *atp;
fcportdb_t *lp;
@@ -3127,10 +3100,12 @@ isp_handle_platform_notify_fc(ispsoftc_t *isp, in_fcentry_t *inp)
} else {
loopid = inp->in_iid;
}
- if (isp_find_pdb_by_loopid(isp, 0, loopid, &lp)) {
+ if (isp_find_pdb_by_handle(isp, 0, loopid, &lp)) {
wwn = lp->port_wwn;
+ sid = lp->portid;
} else {
wwn = INI_ANY;
+ sid = PORT_ANY;
}
tptr = get_lun_statep(isp, 0, lun);
if (tptr == NULL) {
@@ -3162,7 +3137,7 @@ isp_handle_platform_notify_fc(ispsoftc_t *isp, in_fcentry_t *inp)
nt->nt_tgt = FCPARAM(isp, 0)->isp_wwpn;
nt->nt_wwn = wwn;
nt->nt_nphdl = loopid;
- nt->nt_sid = PORT_ANY;
+ nt->nt_sid = sid;
nt->nt_did = PORT_ANY;
nt->nt_lun = lun;
nt->nt_need_ack = 1;
@@ -3281,7 +3256,7 @@ isp_handle_platform_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *inot)
case IN24XX_PORT_LOGOUT:
ptr = "PORT LOGOUT";
- if (isp_find_pdb_by_loopid(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), nphdl, &lp)) {
+ if (isp_find_pdb_by_handle(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), nphdl, &lp)) {
isp_del_wwn_entry(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), lp->port_wwn, nphdl, lp->portid);
}
/* FALLTHROUGH */
@@ -3474,10 +3449,11 @@ isp_handle_platform_target_tmf(ispsoftc_t *isp, isp_notify_t *notify)
goto bad;
}
- if (isp_find_pdb_by_sid(isp, notify->nt_channel, notify->nt_sid, &lp) == 0) {
+ if (isp_find_pdb_by_sid(isp, notify->nt_channel, notify->nt_sid, &lp) == 0 &&
+ isp_find_pdb_by_handle(isp, notify->nt_channel, notify->nt_nphdl, &lp) == 0) {
inot->initiator_id = CAM_TARGET_WILDCARD;
} else {
- inot->initiator_id = lp->handle;
+ inot->initiator_id = FC_PORTDB_TGT(isp, notify->nt_channel, lp);
}
inot->seq_id = notify->nt_tagval;
inot->tag_id = notify->nt_tagval >> 32;
@@ -4269,7 +4245,7 @@ isp_target_thread(ispsoftc_t *isp, int chan)
/*
* Add resources
*/
- ISP_GET_PC_ADDR(isp, chan, target_proc, wchan);
+ ISP_GET_PC(isp, chan, target_proc, wchan);
for (i = 0; i < 4; i++) {
ccb = malloc(sizeof (*ccb), M_ISPTARG, M_WAITOK | M_ZERO);
xpt_setup_ccb(&ccb->ccb_h, wperiph->path, 1);
@@ -4357,14 +4333,24 @@ static void
isp_target_thread_pi(void *arg)
{
struct isp_spi *pi = arg;
- isp_target_thread(cam_sim_softc(pi->sim), cam_sim_bus(pi->sim));
+ ispsoftc_t *isp = cam_sim_softc(pi->sim);
+ int chan = cam_sim_bus(pi->sim);
+
+ isp_target_thread(isp, chan);
+ ISP_SPI_PC(isp, chan)->num_threads -= 1;
+ kthread_exit();
}
static void
isp_target_thread_fc(void *arg)
{
struct isp_fc *fc = arg;
- isp_target_thread(cam_sim_softc(fc->sim), cam_sim_bus(fc->sim));
+ ispsoftc_t *isp = cam_sim_softc(pi->sim);
+ int chan = cam_sim_bus(pi->sim);
+
+ isp_target_thread(isp, chan);
+ ISP_FC_PC(isp, chan)->num_threads -= 1;
+ kthread_exit();
}
static int
@@ -4607,13 +4593,6 @@ isp_make_here(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt)
xpt_free_ccb(ccb);
return;
}
-
- /*
- * Since we're about to issue a rescan, mark this device as not
- * reported gone.
- */
- fcp->reported_gone = 0;
-
xpt_rescan(ccb);
}
@@ -4627,11 +4606,6 @@ isp_make_gone(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt)
return;
}
if (xpt_create_path(&tp, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) == CAM_REQ_CMP) {
- /*
- * We're about to send out the lost device async
- * notification, so indicate that we have reported it gone.
- */
- fcp->reported_gone = 1;
xpt_async(AC_LOST_DEVICE, tp, NULL);
xpt_free_path(tp);
}
@@ -4661,7 +4635,9 @@ isp_gdt_task(void *arg, int pending)
ispsoftc_t *isp = fc->isp;
int chan = fc - isp->isp_osinfo.pc.fc;
fcportdb_t *lp;
- int dbidx, tgt, more_to_do = 0;
+ struct ac_contract ac;
+ struct ac_device_changed *adc;
+ int dbidx, more_to_do = 0;
ISP_LOCK(isp);
isp_prt(isp, ISP_LOGDEBUG0, "Chan %d GDT timer expired", chan);
@@ -4671,21 +4647,27 @@ isp_gdt_task(void *arg, int pending)
if (lp->state != FC_PORTDB_STATE_ZOMBIE) {
continue;
}
- if (lp->dev_map_idx == 0 || lp->target_mode) {
- continue;
- }
if (lp->gone_timer != 0) {
- isp_prt(isp, ISP_LOG_SANCFG, "%s: Chan %d more to do for target %u (timer=%u)", __func__, chan, lp->dev_map_idx - 1, lp->gone_timer);
lp->gone_timer -= 1;
more_to_do++;
continue;
}
- tgt = lp->dev_map_idx - 1;
- FCPARAM(isp, chan)->isp_dev_map[tgt] = 0;
- lp->dev_map_idx = 0;
+ isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Gone Device Timeout");
+ if (lp->is_target) {
+ lp->is_target = 0;
+ isp_make_gone(isp, lp, chan, dbidx);
+ }
+ if (lp->is_initiator) {
+ lp->is_initiator = 0;
+ ac.contract_number = AC_CONTRACT_DEV_CHG;
+ adc = (struct ac_device_changed *) ac.contract_data;
+ adc->wwpn = lp->port_wwn;
+ adc->port = lp->portid;
+ adc->target = dbidx;
+ adc->arrived = 0;
+ xpt_async(AC_CONTRACT, fc->path, &ac);
+ }
lp->state = FC_PORTDB_STATE_NIL;
- isp_prt(isp, ISP_LOGCONFIG, prom3, chan, lp->portid, tgt, "Gone Device Timeout");
- isp_make_gone(isp, lp, chan, tgt);
}
if (fc->ready) {
if (more_to_do) {
@@ -4721,7 +4703,9 @@ isp_ldt_task(void *arg, int pending)
ispsoftc_t *isp = fc->isp;
int chan = fc - isp->isp_osinfo.pc.fc;
fcportdb_t *lp;
- int dbidx, tgt, i;
+ struct ac_contract ac;
+ struct ac_device_changed *adc;
+ int dbidx, i;
ISP_LOCK(isp);
isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Loop Down Timer expired @ %lu", chan, (unsigned long) time_uptime);
@@ -4733,18 +4717,12 @@ isp_ldt_task(void *arg, int pending)
for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) {
lp = &FCPARAM(isp, chan)->portdb[dbidx];
- if (lp->state != FC_PORTDB_STATE_PROBATIONAL) {
- continue;
- }
- if (lp->dev_map_idx == 0 || lp->target_mode) {
+ if (lp->state == FC_PORTDB_STATE_NIL)
continue;
- }
/*
* XXX: CLEAN UP AND COMPLETE ANY PENDING COMMANDS FIRST!
*/
-
-
for (i = 0; i < isp->isp_maxcmds; i++) {
struct ccb_scsiio *xs;
@@ -4754,36 +4732,31 @@ isp_ldt_task(void *arg, int pending)
if ((xs = isp->isp_xflist[i].cmd) == NULL) {
continue;
}
- if (dbidx != (FCPARAM(isp, chan)->isp_dev_map[XS_TGT(xs)] - 1)) {
+ if (dbidx != XS_TGT(xs)) {
continue;
}
isp_prt(isp, ISP_LOGWARN, "command handle 0x%x for %d.%d.%d orphaned by loop down timeout",
isp->isp_xflist[i].handle, chan, XS_TGT(xs), XS_LUN(xs));
}
- /*
- * Mark that we've announced that this device is gone....
- */
- lp->announced = 1;
-
- /*
- * but *don't* change the state of the entry. Just clear
- * any target id stuff and announce to CAM that the
- * device is gone. This way any necessary PLOGO stuff
- * will happen when loop comes back up.
- */
-
- tgt = lp->dev_map_idx - 1;
- FCPARAM(isp, chan)->isp_dev_map[tgt] = 0;
- lp->dev_map_idx = 0;
- lp->state = FC_PORTDB_STATE_NIL;
- isp_prt(isp, ISP_LOGCONFIG, prom3, chan, lp->portid, tgt, "Loop Down Timeout");
- isp_make_gone(isp, lp, chan, tgt);
+ isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Loop Down Timeout");
+ if (lp->is_target) {
+ lp->is_target = 0;
+ isp_make_gone(isp, lp, chan, dbidx);
+ }
+ if (lp->is_initiator) {
+ lp->is_initiator = 0;
+ ac.contract_number = AC_CONTRACT_DEV_CHG;
+ adc = (struct ac_device_changed *) ac.contract_data;
+ adc->wwpn = lp->port_wwn;
+ adc->port = lp->portid;
+ adc->target = dbidx;
+ adc->arrived = 0;
+ xpt_async(AC_CONTRACT, fc->path, &ac);
+ }
}
- if (FCPARAM(isp, chan)->role & ISP_ROLE_INITIATOR) {
- isp_unfreeze_loopdown(isp, chan);
- }
+ isp_unfreeze_loopdown(isp, chan);
/*
* The loop down timer has expired. Wake up the kthread
* to notice that fact (or make it false).
@@ -4804,7 +4777,7 @@ isp_kthread(void *arg)
mtx_lock(&isp->isp_osinfo.lock);
- for (;;) {
+ while (isp->isp_osinfo.is_exiting == 0) {
int lb, lim;
isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d checking FC state", __func__, chan);
@@ -4900,7 +4873,9 @@ isp_kthread(void *arg)
mtx_lock(&isp->isp_osinfo.lock);
}
}
+ fc->num_threads -= 1;
mtx_unlock(&isp->isp_osinfo.lock);
+ kthread_exit();
}
static void
@@ -5272,7 +5247,6 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
fcparam *fcp = FCPARAM(isp, bus);
struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi;
struct ccb_trans_settings_fc *fc = &cts->xport_specific.fc;
- unsigned int hdlidx;
cts->protocol = PROTO_SCSI;
cts->protocol_version = SCSI_REV_2;
@@ -5284,9 +5258,8 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
fc->valid = CTS_FC_VALID_SPEED;
fc->bitrate = 100000;
fc->bitrate *= fcp->isp_gbspeed;
- hdlidx = fcp->isp_dev_map[tgt] - 1;
- if (hdlidx < MAX_FC_TARG) {
- fcportdb_t *lp = &fcp->portdb[hdlidx];
+ if (tgt < MAX_FC_TARG) {
+ fcportdb_t *lp = &fcp->portdb[tgt];
fc->wwnn = lp->node_wwn;
fc->wwpn = lp->port_wwn;
fc->port = lp->portid;
@@ -5426,21 +5399,10 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
}
break;
case KNOB_ROLE_BOTH:
-#if 0
if (fcp->role != ISP_ROLE_BOTH) {
rchange = 1;
newrole = ISP_ROLE_BOTH;
}
-#else
- /*
- * We don't really support dual role at present on FC cards.
- *
- * We should, but a bunch of things are currently broken,
- * so don't allow it.
- */
- isp_prt(isp, ISP_LOGERR, "cannot support dual role at present");
- ccb->ccb_h.status = CAM_REQ_INVALID;
-#endif
break;
}
if (rchange) {
@@ -5449,7 +5411,8 @@ isp_action(struct cam_sim *sim, union ccb *ccb)
ISP_SET_PC(isp, bus, tm_enabled, 0);
ISP_SET_PC(isp, bus, tm_luns_enabled, 0);
#endif
- if (isp_fc_change_role(isp, bus, newrole) != 0) {
+ if (isp_control(isp, ISPCTL_CHANGE_ROLE,
+ bus, newrole) != 0) {
ccb->ccb_h.status = CAM_REQ_CMP_ERR;
xpt_done(ccb);
break;
@@ -5606,17 +5569,9 @@ isp_done(XS_T *sccb)
else if ((IS_FC(isp))
&& (XS_TGT(sccb) < MAX_FC_TARG)) {
fcparam *fcp;
- int hdlidx;
fcp = FCPARAM(isp, XS_CHANNEL(sccb));
- hdlidx = fcp->isp_dev_map[XS_TGT(sccb)] - 1;
- /*
- * Note that we have reported that this device is
- * gone. If it reappears, we'll need to issue a
- * rescan.
- */
- if (hdlidx >= 0 && hdlidx < MAX_FC_TARG)
- fcp->portdb[hdlidx].reported_gone = 1;
+ fcp->portdb[XS_TGT(sccb)].is_target = 0;
}
if ((sccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
sccb->ccb_h.status |= CAM_DEV_QFRZN;
@@ -5640,14 +5595,15 @@ void
isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
{
int bus;
- static const char prom0[] = "Chan %d PortID 0x%06x handle 0x%x %s %s WWPN 0x%08x%08x";
- static const char prom2[] = "Chan %d PortID 0x%06x handle 0x%x %s %s tgt %u WWPN 0x%08x%08x";
+ static const char prom[] = "Chan %d [%d] WWPN 0x%16jx PortID 0x%06x handle 0x%x %s %s";
char buf[64];
char *msg = NULL;
target_id_t tgt;
fcportdb_t *lp;
struct isp_fc *fc;
struct cam_path *tmppath;
+ struct ac_contract ac;
+ struct ac_device_changed *adc;
va_list ap;
switch (cmd) {
@@ -5749,10 +5705,10 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
if (fc->path) {
isp_freeze_loopdown(isp, bus, msg);
}
- if (!callout_active(&fc->ldt)) {
- callout_reset(&fc->ldt, fc->loop_down_limit * hz, isp_ldt, fc);
- isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Starting Loop Down Timer @ %lu", (unsigned long) time_uptime);
- }
+ }
+ if (!callout_active(&fc->ldt)) {
+ callout_reset(&fc->ldt, fc->loop_down_limit * hz, isp_ldt, fc);
+ isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Starting Loop Down Timer @ %lu", (unsigned long) time_uptime);
}
}
isp_fcp_reset_crn(fc, /*tgt*/0, /*tgt_set*/ 0);
@@ -5782,36 +5738,25 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
lp = va_arg(ap, fcportdb_t *);
va_end(ap);
fc = ISP_FC_PC(isp, bus);
- lp->announced = 0;
- lp->gone_timer = 0;
- if ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) && (lp->prli_word3 & PRLI_WD3_TARGET_FUNCTION)) {
- int dbidx = lp - FCPARAM(isp, bus)->portdb;
- int i;
-
- for (i = 0; i < MAX_FC_TARG; i++) {
- if (i >= FL_ID && i <= SNS_ID) {
- continue;
- }
- if (FCPARAM(isp, bus)->isp_dev_map[i] == 0) {
- break;
- }
- }
- if (i < MAX_FC_TARG) {
- FCPARAM(isp, bus)->isp_dev_map[i] = dbidx + 1;
- lp->dev_map_idx = i + 1;
- } else {
- isp_prt(isp, ISP_LOGWARN, "out of target ids");
- isp_dump_portdb(isp, bus);
- }
- }
+ tgt = FC_PORTDB_TGT(isp, bus, lp);
isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
- if (lp->dev_map_idx) {
- tgt = lp->dev_map_idx - 1;
- isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "arrived at", tgt, (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
+ isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "arrived");
+ if ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) &&
+ (lp->prli_word3 & PRLI_WD3_TARGET_FUNCTION)) {
+ lp->is_target = 1;
isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
isp_make_here(isp, lp, bus, tgt);
- } else {
- isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "arrived", (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
+ }
+ if ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) &&
+ (lp->prli_word3 & PRLI_WD3_INITIATOR_FUNCTION)) {
+ lp->is_initiator = 1;
+ ac.contract_number = AC_CONTRACT_DEV_CHG;
+ adc = (struct ac_device_changed *) ac.contract_data;
+ adc->wwpn = lp->port_wwn;
+ adc->port = lp->portid;
+ adc->target = tgt;
+ adc->arrived = 1;
+ xpt_async(AC_CONTRACT, fc->path, &ac);
}
break;
case ISPASYNC_DEV_CHANGED:
@@ -5820,92 +5765,68 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
lp = va_arg(ap, fcportdb_t *);
va_end(ap);
fc = ISP_FC_PC(isp, bus);
- lp->announced = 0;
- lp->gone_timer = 0;
- if (isp_change_is_bad) {
- lp->state = FC_PORTDB_STATE_NIL;
- if (lp->dev_map_idx) {
- tgt = lp->dev_map_idx - 1;
- FCPARAM(isp, bus)->isp_dev_map[tgt] = 0;
- lp->dev_map_idx = 0;
- isp_prt(isp, ISP_LOGCONFIG, prom3, bus, lp->portid, tgt, "change is bad");
- isp_make_gone(isp, lp, bus, tgt);
- } else {
- isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
- isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "changed and departed",
- (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
- }
- } else {
- lp->portid = lp->new_portid;
- lp->prli_word3 = lp->new_prli_word3;
- isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
- if (lp->dev_map_idx) {
- int t = lp->dev_map_idx - 1;
- FCPARAM(isp, bus)->isp_dev_map[t] = (lp - FCPARAM(isp, bus)->portdb) + 1;
- tgt = lp->dev_map_idx - 1;
- isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "changed at", tgt,
- (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
+ tgt = FC_PORTDB_TGT(isp, bus, lp);
+ isp_gen_role_str(buf, sizeof (buf), lp->new_prli_word3);
+ isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->new_portid, lp->handle, buf, "changed");
+changed:
+ if (lp->is_target !=
+ ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) &&
+ (lp->new_prli_word3 & PRLI_WD3_TARGET_FUNCTION))) {
+ lp->is_target = !lp->is_target;
+ if (lp->is_target) {
isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
+ isp_make_here(isp, lp, bus, tgt);
} else {
- isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "changed", (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
+ isp_make_gone(isp, lp, bus, tgt);
+ isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
}
}
+ if (lp->is_initiator !=
+ ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) &&
+ (lp->new_prli_word3 & PRLI_WD3_INITIATOR_FUNCTION))) {
+ lp->is_initiator = !lp->is_initiator;
+ ac.contract_number = AC_CONTRACT_DEV_CHG;
+ adc = (struct ac_device_changed *) ac.contract_data;
+ adc->wwpn = lp->port_wwn;
+ adc->port = lp->portid;
+ adc->target = tgt;
+ adc->arrived = lp->is_initiator;
+ xpt_async(AC_CONTRACT, fc->path, &ac);
+ }
break;
case ISPASYNC_DEV_STAYED:
va_start(ap, cmd);
bus = va_arg(ap, int);
lp = va_arg(ap, fcportdb_t *);
va_end(ap);
+ fc = ISP_FC_PC(isp, bus);
+ tgt = FC_PORTDB_TGT(isp, bus, lp);
isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
- if (lp->dev_map_idx) {
- fc = ISP_FC_PC(isp, bus);
- tgt = lp->dev_map_idx - 1;
- isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "stayed at", tgt,
- (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
- /*
- * Only issue a rescan if we've actually reported
- * that this device is gone.
- */
- if (lp->reported_gone != 0) {
- isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "rescanned at", tgt,
- (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
- isp_make_here(isp, lp, bus, tgt);
- }
- } else {
- isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "stayed",
- (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
- }
- break;
+ isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "stayed");
+ goto changed;
case ISPASYNC_DEV_GONE:
va_start(ap, cmd);
bus = va_arg(ap, int);
lp = va_arg(ap, fcportdb_t *);
va_end(ap);
fc = ISP_FC_PC(isp, bus);
+ tgt = FC_PORTDB_TGT(isp, bus, lp);
/*
- * If this has a virtual target and we haven't marked it
- * that we're going to have isp_gdt tell the OS it's gone,
- * set the isp_gdt timer running on it.
- *
- * If it isn't marked that isp_gdt is going to get rid of it,
- * announce that it's gone.
- *
+ * If this has a virtual target or initiator set the isp_gdt
+ * timer running on it to delay its departure.
*/
isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
- if (lp->dev_map_idx && lp->announced == 0) {
- lp->announced = 1;
+ if (lp->is_target || lp->is_initiator) {
lp->state = FC_PORTDB_STATE_ZOMBIE;
- lp->gone_timer = ISP_FC_PC(isp, bus)->gone_device_time;
+ lp->gone_timer = fc->gone_device_time;
+ isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone zombie");
if (fc->ready && !callout_active(&fc->gdt)) {
isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Starting Gone Device Timer with %u seconds time now %lu", bus, lp->gone_timer, (unsigned long)time_uptime);
callout_reset(&fc->gdt, hz, isp_gdt, fc);
}
- tgt = lp->dev_map_idx - 1;
- isp_prt(isp, ISP_LOGCONFIG, prom2, bus, lp->portid, lp->handle, buf, "gone zombie at", tgt, (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
- isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1);
- } else if (lp->announced == 0) {
- isp_prt(isp, ISP_LOGCONFIG, prom0, bus, lp->portid, lp->handle, buf, "departed", (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn);
+ break;
}
+ isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone");
break;
case ISPASYNC_CHANGE_NOTIFY:
{
@@ -5971,13 +5892,11 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
case NT_LIP_RESET:
case NT_LINK_UP:
case NT_LINK_DOWN:
+ case NT_HBA_RESET:
/*
* No action need be taken here.
*/
break;
- case NT_HBA_RESET:
- isp_del_all_wwn_entries(isp, ISP_NOCHAN);
- break;
case NT_GLOBAL_LOGOUT:
case NT_LOGOUT:
/*
@@ -5985,34 +5904,6 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
*/
isp_handle_platform_target_notify_ack(isp, notify);
break;
- case NT_ARRIVED:
- {
- struct ac_contract ac;
- struct ac_device_changed *fc;
-
- ac.contract_number = AC_CONTRACT_DEV_CHG;
- fc = (struct ac_device_changed *) ac.contract_data;
- fc->wwpn = notify->nt_wwn;
- fc->port = notify->nt_sid;
- fc->target = notify->nt_nphdl;
- fc->arrived = 1;
- xpt_async(AC_CONTRACT, ISP_FC_PC(isp, notify->nt_channel)->path, &ac);
- break;
- }
- case NT_DEPARTED:
- {
- struct ac_contract ac;
- struct ac_device_changed *fc;
-
- ac.contract_number = AC_CONTRACT_DEV_CHG;
- fc = (struct ac_device_changed *) ac.contract_data;
- fc->wwpn = notify->nt_wwn;
- fc->port = notify->nt_sid;
- fc->target = notify->nt_nphdl;
- fc->arrived = 0;
- xpt_async(AC_CONTRACT, ISP_FC_PC(isp, notify->nt_channel)->path, &ac);
- break;
- }
default:
isp_prt(isp, ISP_LOGALL, "target notify code 0x%x", notify->nt_ncode);
isp_handle_platform_target_notify_ack(isp, notify);
@@ -6103,7 +5994,7 @@ isp_async(ispsoftc_t *isp, ispasync_t cmd, ...)
nt->nt_tgt = TGT_ANY;
} else {
nt->nt_tgt = FCPARAM(isp, chan)->isp_wwpn;
- if (isp_find_pdb_by_loopid(isp, chan, abts->abts_nphdl, &lp)) {
+ if (isp_find_pdb_by_handle(isp, chan, abts->abts_nphdl, &lp)) {
nt->nt_wwn = lp->port_wwn;
} else {
nt->nt_wwn = INI_ANY;
diff --git a/sys/dev/isp/isp_freebsd.h b/sys/dev/isp/isp_freebsd.h
index b9a7d6ac..78b597f 100644
--- a/sys/dev/isp/isp_freebsd.h
+++ b/sys/dev/isp/isp_freebsd.h
@@ -270,6 +270,7 @@ struct isp_fc {
unsigned int inject_lost_data_frame;
#endif
#endif
+ int num_threads;
};
struct isp_spi {
@@ -293,6 +294,7 @@ struct isp_spi {
struct proc * target_proc;
#endif
#endif
+ int num_threads;
};
struct isposinfo {
@@ -367,6 +369,8 @@ struct isposinfo {
struct isp_spi *spi;
void *ptr;
} pc;
+
+ int is_exiting;
};
#define ISP_FC_PC(isp, chan) (&(isp)->isp_osinfo.pc.fc[(chan)])
#define ISP_SPI_PC(isp, chan) (&(isp)->isp_osinfo.pc.spi[(chan)])
diff --git a/sys/dev/isp/isp_library.c b/sys/dev/isp/isp_library.c
index b56b1dc..f7ee7bb 100644
--- a/sys/dev/isp/isp_library.c
+++ b/sys/dev/isp/isp_library.c
@@ -411,10 +411,6 @@ isp_fc_runstate(ispsoftc_t *isp, int chan, int tval)
}
}
- if ((fcp->role & ISP_ROLE_INITIATOR) == 0) {
- return (0);
- }
-
if (isp_control(isp, ISPCTL_SCAN_LOOP, chan) != 0) {
isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: scan loop fails on channel %d", chan);
return (LOOP_PDB_RCVD);
@@ -444,7 +440,7 @@ isp_dump_portdb(ispsoftc_t *isp, int chan)
int i;
for (i = 0; i < MAX_FC_TARG; i++) {
- char mb[4], buf1[64], buf2[64];
+ char buf1[64], buf2[64];
const char *dbs[8] = {
"NIL ",
"PROB",
@@ -457,18 +453,13 @@ isp_dump_portdb(ispsoftc_t *isp, int chan)
};
fcportdb_t *lp = &fcp->portdb[i];
- if (lp->state == FC_PORTDB_STATE_NIL && lp->target_mode == 0) {
+ if (lp->state == FC_PORTDB_STATE_NIL) {
continue;
}
- if (lp->dev_map_idx) {
- ISP_SNPRINTF(mb, sizeof (mb), "%3d", ((int) lp->dev_map_idx) - 1);
- } else {
- ISP_SNPRINTF(mb, sizeof (mb), "---");
- }
isp_gen_role_str(buf1, sizeof (buf1), lp->prli_word3);
isp_gen_role_str(buf2, sizeof (buf2), lp->new_prli_word3);
- isp_prt(isp, ISP_LOGALL, "Chan %d [%d]: hdl 0x%x %s al%d tgt %s %s 0x%06x =>%s 0x%06x; WWNN 0x%08x%08x WWPN 0x%08x%08x",
- chan, i, lp->handle, dbs[lp->state], lp->autologin, mb, buf1, lp->portid, buf2, lp->new_portid,
+ isp_prt(isp, ISP_LOGALL, "Chan %d [%d]: hdl 0x%x %s al%d %s 0x%06x =>%s 0x%06x; WWNN 0x%08x%08x WWPN 0x%08x%08x",
+ chan, i, lp->handle, dbs[lp->state], lp->autologin, buf1, lp->portid, buf2, lp->new_portid,
(uint32_t) (lp->node_wwn >> 32), (uint32_t) (lp->node_wwn), (uint32_t) (lp->port_wwn >> 32), (uint32_t) (lp->port_wwn));
}
}
@@ -603,31 +594,14 @@ isp_fc_change_role(ispsoftc_t *isp, int chan, int new_role)
return (ENXIO);
}
if (chan == 0) {
-#ifdef ISP_TARGET_MODE
- isp_del_all_wwn_entries(isp, chan);
-#endif
- isp_clear_commands(isp);
- isp_reset(isp, 0);
- if (isp->isp_state != ISP_RESETSTATE) {
- isp_prt(isp, ISP_LOGERR, "%s: cannot reset card", __func__);
- return (EIO);
- }
fcp->role = new_role;
- isp_init(isp);
- if (isp->isp_state != ISP_INITSTATE) {
- isp_prt(isp, ISP_LOGERR, "%s: cannot init card", __func__);
- return (EIO);
- }
- isp->isp_state = ISP_RUNSTATE;
- return (0);
+ return (isp_reinit(isp, 0));
} else if (ISP_CAP_MULTI_ID(isp)) {
mbreg_t mbs;
vp_modify_t *vp;
uint8_t qe[QENTRY_LEN], *scp;
ISP_MEMZERO(qe, QENTRY_LEN);
- /* Acquire Scratch */
-
if (FC_SCRATCH_ACQUIRE(isp, chan)) {
return (EBUSY);
}
@@ -671,12 +645,6 @@ isp_fc_change_role(ispsoftc_t *isp, int chan, int new_role)
MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan);
isp_get_vp_modify(isp, (vp_modify_t *)&scp[QENTRY_LEN], vp);
-#ifdef ISP_TARGET_MODE
- isp_del_all_wwn_entries(isp, chan);
-#endif
- /*
- * Release Scratch
- */
FC_SCRATCH_RELEASE(isp, chan);
if (vp->vp_mod_status != VP_STS_OK) {
@@ -2358,8 +2326,10 @@ isp_destroy_tgt_handle(ispsoftc_t *isp, uint32_t handle)
}
}
+#endif
+
/*
- * Find target mode entries
+ * Find port database entries
*/
int
isp_find_pdb_by_wwn(ispsoftc_t *isp, int chan, uint64_t wwn, fcportdb_t **lptr)
@@ -2370,10 +2340,10 @@ isp_find_pdb_by_wwn(ispsoftc_t *isp, int chan, uint64_t wwn, fcportdb_t **lptr)
if (chan >= isp->isp_nchan)
return (0);
fcp = FCPARAM(isp, chan);
- for (i = MAX_FC_TARG - 1; i >= 0; i--) {
+ for (i = 0; i < MAX_FC_TARG; i++) {
fcportdb_t *lp = &fcp->portdb[i];
- if (lp->target_mode == 0)
+ if (lp->state == FC_PORTDB_STATE_NIL)
continue;
if (lp->port_wwn == wwn) {
*lptr = lp;
@@ -2383,8 +2353,10 @@ isp_find_pdb_by_wwn(ispsoftc_t *isp, int chan, uint64_t wwn, fcportdb_t **lptr)
return (0);
}
+#ifdef ISP_TARGET_MODE
+
int
-isp_find_pdb_by_loopid(ispsoftc_t *isp, int chan, uint32_t loopid, fcportdb_t **lptr)
+isp_find_pdb_by_handle(ispsoftc_t *isp, int chan, uint32_t handle, fcportdb_t **lptr)
{
fcparam *fcp;
int i;
@@ -2392,9 +2364,15 @@ isp_find_pdb_by_loopid(ispsoftc_t *isp, int chan, uint32_t loopid, fcportdb_t **
if (chan >= isp->isp_nchan)
return (0);
fcp = FCPARAM(isp, chan);
- if ((i = fcp->isp_tgt_map[loopid]) > 0) {
- *lptr = &fcp->portdb[i - 1];
- return (1);
+ for (i = 0; i < MAX_FC_TARG; i++) {
+ fcportdb_t *lp = &fcp->portdb[i];
+
+ if (lp->state == FC_PORTDB_STATE_NIL)
+ continue;
+ if (lp->handle == handle) {
+ *lptr = lp;
+ return (1);
+ }
}
return (0);
}
@@ -2408,10 +2386,10 @@ isp_find_pdb_by_sid(ispsoftc_t *isp, int chan, uint32_t sid, fcportdb_t **lptr)
if (chan >= isp->isp_nchan)
return (0);
fcp = FCPARAM(isp, chan);
- for (i = MAX_FC_TARG - 1; i >= 0; i--) {
+ for (i = 0; i < MAX_FC_TARG; i++) {
fcportdb_t *lp = &fcp->portdb[i];
- if (lp->target_mode == 0)
+ if (lp->state == FC_PORTDB_STATE_NIL)
continue;
if (lp->portid == sid) {
*lptr = lp;
@@ -2448,14 +2426,13 @@ isp_add_wwn_entry(ispsoftc_t *isp, int chan, uint64_t ini, uint16_t nphdl, uint3
char buf[64];
fcparam *fcp;
fcportdb_t *lp;
- isp_notify_t nt;
- int i, something, take, taken;
+ int i, change;
fcp = FCPARAM(isp, chan);
if (nphdl >= MAX_NPORT_HANDLE) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d IID 0x%016llx "
- "N-Port handle 0x%04x Port ID 0x%06x -- bad handle",
- chan, (unsigned long long) ini, nphdl, s_id);
+ isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN, "Chan %d WWPN 0x%016llx "
+ "PortID 0x%06x handle 0x%x -- bad handle",
+ chan, (unsigned long long) ini, s_id, nphdl);
return;
}
@@ -2464,146 +2441,118 @@ isp_add_wwn_entry(ispsoftc_t *isp, int chan, uint64_t ini, uint16_t nphdl, uint3
* with new parameters. Some cases of update can be suspicious,
* so log them verbosely and dump the whole port database.
*/
- if ((i = fcp->isp_tgt_map[nphdl]) > 0) {
- take = taken = i - 1;
- lp = &fcp->portdb[taken];
- something = 0;
+ if ((VALID_INI(ini) && isp_find_pdb_by_wwn(isp, chan, ini, &lp)) ||
+ (s_id != PORT_NONE && isp_find_pdb_by_sid(isp, chan, s_id, &lp))) {
+ change = 0;
+ lp->new_portid = lp->portid;
+ lp->new_prli_word3 = lp->prli_word3;
if (s_id != PORT_NONE && lp->portid != s_id) {
if (lp->portid == PORT_NONE) {
isp_prt(isp, ISP_LOGTINFO,
- "Chan %d IID 0x%016llx N-port handle 0x%04x "
- "gets Port ID 0x%06x",
+ "Chan %d WWPN 0x%016llx handle 0x%x "
+ "gets PortID 0x%06x",
chan, (unsigned long long) lp->port_wwn,
nphdl, s_id);
} else {
isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
- "Chan %d IID 0x%016llx N-port handle 0x%04x "
- "changes Port ID 0x%06x to 0x%06x",
+ "Chan %d WWPN 0x%016llx handle 0x%x "
+ "changes PortID 0x%06x to 0x%06x",
chan, (unsigned long long) lp->port_wwn,
nphdl, lp->portid, s_id);
if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
isp_dump_portdb(isp, chan);
}
- lp->portid = s_id;
- something++;
+ lp->new_portid = s_id;
+ change++;
}
if (VALID_INI(ini) && lp->port_wwn != ini) {
if (!VALID_INI(lp->port_wwn)) {
isp_prt(isp, ISP_LOGTINFO,
- "Chan %d N-port handle 0x%04x Port ID "
- "0x%06x gets WWN 0x%016llxx",
- chan, nphdl, lp->portid,
+ "Chan %d PortID 0x%06x handle 0x%x "
+ "gets WWN 0x%016llxx",
+ chan, lp->portid, nphdl,
(unsigned long long) ini);
} else if (lp->port_wwn != ini) {
isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
- "Chan %d N-port handle 0x%04x Port ID "
- "0x%06x changes WWN 0x%016llx to 0x%016llx",
- chan, nphdl, lp->portid,
+ "Chan %d PortID 0x%06x handle 0x%x "
+ "changes WWN 0x%016llx to 0x%016llx",
+ chan, lp->portid, nphdl,
(unsigned long long) lp->port_wwn,
(unsigned long long) ini);
if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
isp_dump_portdb(isp, chan);
}
lp->port_wwn = ini;
- something++;
+ change++;
}
- if (lp->prli_word3 != prli_params) {
- lp->prli_word3 = prli_params;
- isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
+ if (prli_params != 0 && lp->prli_word3 != prli_params) {
+ isp_gen_role_str(buf, sizeof (buf), prli_params);
isp_prt(isp, ISP_LOGTINFO|ISP_LOGCONFIG,
- "Chan %d IID 0x%016llx N-Port Handle 0x%04x "
- "Port ID 0x%06x changes PRLI Word 3 %s",
+ "Chan %d WWPN 0x%016llx PortID 0x%06x "
+ "handle 0x%x changes PRLI Word 3 %s",
chan, (unsigned long long) lp->port_wwn,
- lp->handle, lp->portid, buf);
- something++;
+ lp->portid, lp->handle, buf);
+ lp->new_prli_word3 = prli_params;
+ change++;
}
- if (!something) {
- isp_prt(isp, ISP_LOGTINFO,
- "Chan %d IID 0x%016llx N-Port Handle 0x%04x "
- "Port ID 0x%06x reentered",
+ if (lp->handle != nphdl) {
+ isp_prt(isp, ISP_LOGTINFO|ISP_LOGCONFIG,
+ "Chan %d WWPN 0x%016llx PortID 0x%06x "
+ "changes handle 0x%x to 0x%x",
chan, (unsigned long long) lp->port_wwn,
- lp->handle, lp->portid);
- }
- } else
- take = taken = -1;
-
- /*
- * Search for records colliding on handler, Port ID or WWN.
- * Remove any found collisions, logging suspicious cases of
- * still valid records.
- */
- for (i = 0; i < MAX_FC_TARG; i++) {
- lp = &fcp->portdb[i];
- if (lp->target_mode == 0 || i == take)
- continue;
- if (lp->handle != nphdl && lp->portid != s_id &&
- lp->port_wwn != ini)
- continue;
- if (lp->state == FC_PORTDB_STATE_VALID) {
- isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
- "Chan %d IID 0x%016llx N-Port Handle 0x%04x "
- "Port ID 0x%06x is conflicting",
+ lp->portid, lp->handle, nphdl);
+ lp->handle = nphdl;
+ change++;
+ }
+ lp->state = FC_PORTDB_STATE_VALID;
+ if (change) {
+ isp_async(isp, ISPASYNC_DEV_CHANGED, chan, lp);
+ lp->portid = lp->new_portid;
+ lp->prli_word3 = lp->new_prli_word3;
+ lp->new_prli_word3 = 0;
+ lp->new_portid = 0;
+ } else {
+ isp_prt(isp, ISP_LOGTINFO,
+ "Chan %d WWPN 0x%016llx PortID 0x%06x "
+ "handle 0x%x reentered",
chan, (unsigned long long) lp->port_wwn,
- lp->handle, lp->portid);
- if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
- isp_dump_portdb(isp, chan);
- isp_del_wwn_entry(isp, chan,
- lp->port_wwn, lp->handle, lp->portid);
+ lp->portid, lp->handle);
+ isp_async(isp, ISPASYNC_DEV_STAYED, chan, lp);
}
- ISP_MEMZERO(lp, sizeof (fcportdb_t));
- take = i;
- }
-
- /* If valid record already exists -- we are done. */
- if (taken >= 0)
return;
+ }
/* Search for room to insert new record. */
- if (take < 0) {
- for (i = MAX_FC_TARG - 1; i >= 0; i--) {
- if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL) {
- take = i;
- break;
- }
- }
+ for (i = 0; i < MAX_FC_TARG; i++) {
+ if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL)
+ break;
}
- if (take < 0) {
+ if (i >= MAX_FC_TARG) {
isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN,
- "Chan %d IID 0x%016llx N-Port Handle 0x%04x Port ID 0x%06x "
+ "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x "
"-- no room in port database",
- chan, (unsigned long long) ini, nphdl, s_id);
+ chan, (unsigned long long) ini, s_id, nphdl);
if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN))
isp_dump_portdb(isp, chan);
return;
}
/* Insert new record and mark it valid. */
- lp = &fcp->portdb[take];
+ lp = &fcp->portdb[i];
ISP_MEMZERO(lp, sizeof (fcportdb_t));
- lp->target_mode = 1;
lp->handle = nphdl;
lp->portid = s_id;
lp->port_wwn = ini;
- lp->prli_word3 = prli_params;
+ lp->prli_word3 = (prli_params != 0) ? prli_params : PRLI_WD3_INITIATOR_FUNCTION;
lp->state = FC_PORTDB_STATE_VALID;
- fcp->isp_tgt_map[nphdl] = take + 1;
isp_gen_role_str(buf, sizeof (buf), lp->prli_word3);
- isp_prt(isp, ISP_LOGTINFO, "Chan %d IID 0x%016llx N-Port Handle 0x%04x"
- " Port ID 0x%06x vtgt %d %s added", chan,
- (unsigned long long) ini, nphdl, s_id, take, buf);
-
- /* Notify above levels about new initiator arrival. */
- ISP_MEMZERO(&nt, sizeof (nt));
- nt.nt_hba = isp;
- nt.nt_wwn = ini;
- nt.nt_tgt = FCPARAM(isp, chan)->isp_wwpn;
- nt.nt_sid = s_id;
- nt.nt_did = FCPARAM(isp, chan)->isp_portid;
- nt.nt_nphdl = nphdl;
- nt.nt_channel = chan;
- nt.nt_ncode = NT_ARRIVED;
- isp_async(isp, ISPASYNC_TARGET_NOTIFY, &nt);
+ isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx "
+ "PortID 0x%06x handle 0x%x vtgt %d %s added", chan,
+ (unsigned long long) ini, s_id, nphdl, i, buf);
+
+ /* Notify above levels about new port arrival. */
+ isp_async(isp, ISPASYNC_DEV_ARRIVED, chan, lp);
}
/*
@@ -2613,45 +2562,27 @@ void
isp_del_wwn_entry(ispsoftc_t *isp, int chan, uint64_t ini, uint16_t nphdl, uint32_t s_id)
{
fcparam *fcp;
- isp_notify_t nt;
fcportdb_t *lp;
if (nphdl >= MAX_NPORT_HANDLE) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d IID 0x%016llx bad N-Port handle 0x%04x Port ID 0x%06x",
- chan, (unsigned long long) ini, nphdl, s_id);
+ isp_prt(isp, ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x bad handle 0x%x",
+ chan, (unsigned long long) ini, s_id, nphdl);
return;
}
fcp = FCPARAM(isp, chan);
- if (fcp->isp_tgt_map[nphdl] == 0) {
- lp = NULL;
- } else {
- lp = &fcp->portdb[fcp->isp_tgt_map[nphdl] - 1];
- if (lp->target_mode == 0) {
- lp = NULL;
- }
- }
- if (lp == NULL) {
- isp_prt(isp, ISP_LOGWARN, "Chan %d IID 0x%016llx N-Port Handle 0x%04x Port ID 0x%06x cannot be found to be deleted",
- chan, (unsigned long long) ini, nphdl, s_id);
+ if (isp_find_pdb_by_handle(isp, chan, nphdl, &lp) == 0) {
+ isp_prt(isp, ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x cannot be found to be deleted",
+ chan, (unsigned long long) ini, s_id, nphdl);
isp_dump_portdb(isp, chan);
return;
}
- isp_prt(isp, ISP_LOGTINFO, "Chan %d IID 0x%016llx N-Port Handle 0x%04x Port ID 0x%06x vtgt %d deleted",
- chan, (unsigned long long) lp->port_wwn, nphdl, lp->portid, fcp->isp_tgt_map[nphdl] - 1);
- fcp->isp_tgt_map[nphdl] = 0;
- lp->state = FC_PORTDB_STATE_DEAD;
+ isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x vtgt %d deleted",
+ chan, (unsigned long long) lp->port_wwn, lp->portid, nphdl, FC_PORTDB_TGT(isp, chan, lp));
+ lp->state = FC_PORTDB_STATE_NIL;
- ISP_MEMZERO(&nt, sizeof (nt));
- nt.nt_hba = isp;
- nt.nt_wwn = lp->port_wwn;
- nt.nt_tgt = FCPARAM(isp, chan)->isp_wwpn;
- nt.nt_sid = lp->portid;
- nt.nt_did = FCPARAM(isp, chan)->isp_portid;
- nt.nt_nphdl = nphdl;
- nt.nt_channel = chan;
- nt.nt_ncode = NT_DEPARTED;
- isp_async(isp, ISPASYNC_TARGET_NOTIFY, &nt);
+ /* Notify above levels about gone port. */
+ isp_async(isp, ISPASYNC_DEV_GONE, chan, lp);
}
void
@@ -2682,11 +2613,11 @@ isp_del_all_wwn_entries(ispsoftc_t *isp, int chan)
if (fcp == NULL) {
return;
}
- for (i = 0; i < MAX_NPORT_HANDLE; i++) {
- if (fcp->isp_tgt_map[i]) {
- fcportdb_t *lp = &fcp->portdb[fcp->isp_tgt_map[i] - 1];
+ for (i = 0; i < MAX_FC_TARG; i++) {
+ fcportdb_t *lp = &fcp->portdb[i];
+
+ if (lp->state != FC_PORTDB_STATE_NIL)
isp_del_wwn_entry(isp, chan, lp->port_wwn, lp->handle, lp->portid);
- }
}
}
@@ -2713,7 +2644,7 @@ isp_del_wwn_entries(ispsoftc_t *isp, isp_notify_t *mp)
* We need to find the actual entry so we can delete it.
*/
if (mp->nt_nphdl != NIL_HANDLE) {
- if (isp_find_pdb_by_loopid(isp, mp->nt_channel, mp->nt_nphdl, &lp)) {
+ if (isp_find_pdb_by_handle(isp, mp->nt_channel, mp->nt_nphdl, &lp)) {
isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid);
return;
}
@@ -2730,8 +2661,8 @@ isp_del_wwn_entries(ispsoftc_t *isp, isp_notify_t *mp)
return;
}
}
- isp_prt(isp, ISP_LOGWARN, "Chan %d unable to find entry to delete N-port handle 0x%04x initiator WWN 0x%016llx Port ID 0x%06x",
- mp->nt_channel, mp->nt_nphdl, (unsigned long long) mp->nt_wwn, mp->nt_sid);
+ isp_prt(isp, ISP_LOGWARN, "Chan %d unable to find entry to delete WWPN 0x%016jx PortID 0x%06x handle 0x%x",
+ mp->nt_channel, mp->nt_wwn, mp->nt_sid, mp->nt_nphdl);
}
void
diff --git a/sys/dev/isp/isp_library.h b/sys/dev/isp/isp_library.h
index a326bfa..cc0cfaa 100644
--- a/sys/dev/isp/isp_library.h
+++ b/sys/dev/isp/isp_library.h
@@ -165,9 +165,10 @@ int isp_allocate_xs_tgt(ispsoftc_t *, void *, uint32_t *);
void *isp_find_xs_tgt(ispsoftc_t *, uint32_t);
uint32_t isp_find_tgt_handle(ispsoftc_t *, void *);
void isp_destroy_tgt_handle(ispsoftc_t *, uint32_t);
-
+#endif
int isp_find_pdb_by_wwn(ispsoftc_t *, int, uint64_t, fcportdb_t **);
-int isp_find_pdb_by_loopid(ispsoftc_t *, int, uint32_t, fcportdb_t **);
+#ifdef ISP_TARGET_MODE
+int isp_find_pdb_by_handle(ispsoftc_t *, int, uint32_t, fcportdb_t **);
int isp_find_pdb_by_sid(ispsoftc_t *, int, uint32_t, fcportdb_t **);
void isp_find_chan_by_did(ispsoftc_t *, uint32_t, uint16_t *);
void isp_add_wwn_entry(ispsoftc_t *, int, uint64_t, uint16_t, uint32_t, uint16_t);
diff --git a/sys/dev/isp/isp_target.h b/sys/dev/isp/isp_target.h
index 40a1732..8922a33 100644
--- a/sys/dev/isp/isp_target.h
+++ b/sys/dev/isp/isp_target.h
@@ -51,8 +51,6 @@ typedef enum {
NT_LOGOUT,
NT_GLOBAL_LOGOUT,
NT_CHANGED,
- NT_ARRIVED,
- NT_DEPARTED,
NT_HBA_RESET
} isp_ncode_t;
diff --git a/sys/dev/isp/ispmbox.h b/sys/dev/isp/ispmbox.h
index 85b31e5..d2a29b2 100644
--- a/sys/dev/isp/ispmbox.h
+++ b/sys/dev/isp/ispmbox.h
@@ -1442,7 +1442,7 @@ typedef struct {
uint16_t snscb_addr[4]; /* response buffer address */
uint16_t snscb_sblen; /* subcommand buffer length (words) */
uint16_t snscb_reserved1;
- uint16_t snscb_data[1]; /* variable data */
+ uint16_t snscb_data[]; /* variable data */
} sns_screq_t; /* Subcommand Request Structure */
typedef struct {
@@ -1503,7 +1503,7 @@ typedef struct {
uint8_t snscb_port_type;
uint8_t snscb_port_id[3];
uint8_t snscb_portname[8];
- uint16_t snscb_data[1]; /* variable data */
+ uint16_t snscb_data[]; /* variable data */
} sns_scrsp_t; /* Subcommand Response Structure */
typedef struct {
diff --git a/sys/dev/isp/ispvar.h b/sys/dev/isp/ispvar.h
index 4ac91b9..2d5379a 100644
--- a/sys/dev/isp/ispvar.h
+++ b/sys/dev/isp/ispvar.h
@@ -344,12 +344,7 @@ typedef struct {
* devices) or by the driver (e.g., for fabric devices).
*
* It has a state. If the state if VALID, that means that we've logged into
- * the device. We also *may* have a initiator map index entry. This is a value
- * from 0..MAX_FC_TARG that is used to index into the isp_dev_map array. If
- * the value therein is non-zero, then that value minus one is used to index
- * into the Port Database to find the handle for forming commands. There is
- * back-index minus one value within to Port Database entry that tells us
- * which entry in isp_dev_map points to us (to avoid searching).
+ * the device.
*
* Local loop devices the firmware automatically performs PLOGI on for us
* (which is why that handle is imposed upon us). Fabric devices we assign
@@ -381,9 +376,6 @@ typedef struct {
* duples.
*
* + There can never be two non-NIL entries with the same handle.
- *
- * + There can never be two non-NIL entries which have the same dev_map_idx
- * value.
*/
typedef struct {
/*
@@ -394,9 +386,6 @@ typedef struct {
uint16_t handle;
/*
- * The dev_map_idx, if nonzero, is the system virtual target ID (+1)
- * as a cross-reference with the isp_dev_map.
- *
* A device is 'autologin' if the firmware automatically logs into
* it (re-logins as needed). Basically, local private loop devices.
*
@@ -404,27 +393,24 @@ typedef struct {
*
* The state is the current state of this entry.
*
+ * The is_target is the current state of target on this port.
+ *
+ * The is_initiator is the current state of initiator on this port.
+ *
* Portid is obvious, as are node && port WWNs. The new_role and
* new_portid is for when we are pending a change.
- *
- * The 'target_mode' tag means that this entry arrived via a
- * target mode command and is immune from normal flushing rules.
- * You should also never see anything with an initiator role
- * with this set.
*/
uint16_t prli_word3; /* PRLI parameters */
uint16_t new_prli_word3; /* Incoming new PRLI parameters */
- uint16_t dev_map_idx : 12,
+ uint16_t : 12,
autologin : 1, /* F/W does PLOGI/PLOGO */
state : 3;
- uint32_t : 7,
- target_mode : 1,
+ uint32_t : 6,
+ is_target : 1,
+ is_initiator : 1,
portid : 24;
uint32_t
- : 5,
- reported_gone : 1,
- announced : 1,
- dirty : 1, /* commands have been run */
+ : 8,
new_portid : 24;
uint64_t node_wwn;
uint64_t port_wwn;
@@ -440,6 +426,8 @@ typedef struct {
#define FC_PORTDB_STATE_ZOMBIE 6
#define FC_PORTDB_STATE_VALID 7
+#define FC_PORTDB_TGT(isp, bus, pdb) (int)(lp - FCPARAM(isp, bus)->portdb)
+
/*
* FC card specific information
*
@@ -492,27 +480,6 @@ typedef struct {
fcportdb_t portdb[MAX_FC_TARG];
/*
- * This maps system virtual 'target' id to a portdb entry.
- *
- * The mapping function is to take any non-zero entry and
- * subtract one to get the portdb index. This means that
- * entries which are zero are unmapped (i.e., don't exist).
- */
- uint16_t isp_dev_map[MAX_FC_TARG];
-
-#ifdef ISP_TARGET_MODE
- /*
- * This maps N-Port Handle to portdb entry so we
- * don't have to search for every incoming command.
- *
- * The mapping function is to take any non-zero entry and
- * subtract one to get the portdb index. This means that
- * entries which are zero are unmapped (i.e., don't exist).
- */
- uint16_t isp_tgt_map[MAX_NPORT_HANDLE];
-#endif
-
- /*
* Scratch DMA mapped in area to fetch Port Database stuff, etc.
*/
void * isp_scratch;
@@ -855,7 +822,7 @@ void isp_init(ispsoftc_t *);
/*
* Reset the ISP and call completion for any orphaned commands.
*/
-void isp_reinit(ispsoftc_t *, int);
+int isp_reinit(ispsoftc_t *, int);
/*
* Internal Interrupt Service Routine
@@ -915,6 +882,8 @@ void isp_done(XS_T *);
* Get PDB on this channel for this N-port handle
* ... ISPCTL_PLOGX, isp_plcmd_t *)
* Performa a port login/logout
+ * ... ISPCTL_CHANGE_ROLE, int channel, int role);
+ * Change role of specified channel
*
* ISPCTL_PDB_SYNC is somewhat misnamed. It actually is the final step, in
* order, of ISPCTL_FCLINK_TEST, ISPCTL_SCAN_FABRIC, and ISPCTL_SCAN_LOOP.
@@ -937,7 +906,8 @@ typedef enum {
ISPCTL_GET_NAMES,
ISPCTL_RUN_MBOXCMD,
ISPCTL_GET_PDB,
- ISPCTL_PLOGX
+ ISPCTL_PLOGX,
+ ISPCTL_CHANGE_ROLE
} ispctl_t;
int isp_control(ispsoftc_t *, ispctl_t, ...);
diff --git a/sys/dev/uart/uart_bus_pci.c b/sys/dev/uart/uart_bus_pci.c
index b297502..c9ddc76 100644
--- a/sys/dev/uart/uart_bus_pci.c
+++ b/sys/dev/uart/uart_bus_pci.c
@@ -120,6 +120,7 @@ static const struct pci_id pci_ns8250_ids[] = {
8 * DEFAULT_RCLK },
{ 0x8086, 0x1c3d, 0xffff, 0, "Intel AMT - KT Controller", 0x10 },
{ 0x8086, 0x1d3d, 0xffff, 0, "Intel C600/X79 Series Chipset KT Controller", 0x10 },
+{ 0x8086, 0x1e3d, 0xffff, 0, "Intel Panther Point KT Controller", 0x10 },
{ 0x8086, 0x2a07, 0xffff, 0, "Intel AMT - PM965/GM965 KT Controller", 0x10 },
{ 0x8086, 0x2a47, 0xffff, 0, "Mobile 4 Series Chipset KT Controller", 0x10 },
{ 0x8086, 0x2e17, 0xffff, 0, "4 Series Chipset Serial KT Controller", 0x10 },
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index e544bba..716faa3 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -64,6 +64,8 @@ struct fifoinfo {
struct pipe *fi_pipe;
long fi_readers;
long fi_writers;
+ u_int fi_rgen;
+ u_int fi_wgen;
};
static vop_print_t fifo_print;
@@ -137,6 +139,7 @@ fifo_open(ap)
struct thread *td;
struct fifoinfo *fip;
struct pipe *fpipe;
+ u_int gen;
int error, stops_deferred;
vp = ap->a_vp;
@@ -164,6 +167,7 @@ fifo_open(ap)
PIPE_LOCK(fpipe);
if (ap->a_mode & FREAD) {
fip->fi_readers++;
+ fip->fi_rgen++;
if (fip->fi_readers == 1) {
fpipe->pipe_state &= ~PIPE_EOF;
if (fip->fi_writers > 0)
@@ -179,6 +183,7 @@ fifo_open(ap)
return (ENXIO);
}
fip->fi_writers++;
+ fip->fi_wgen++;
if (fip->fi_writers == 1) {
fpipe->pipe_state &= ~PIPE_EOF;
if (fip->fi_readers > 0)
@@ -187,6 +192,7 @@ fifo_open(ap)
}
if ((ap->a_mode & O_NONBLOCK) == 0) {
if ((ap->a_mode & FREAD) && fip->fi_writers == 0) {
+ gen = fip->fi_wgen;
VOP_UNLOCK(vp, 0);
stops_deferred = sigallowstop();
error = msleep(&fip->fi_readers, PIPE_MTX(fpipe),
@@ -194,7 +200,7 @@ fifo_open(ap)
if (stops_deferred)
sigdeferstop();
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- if (error) {
+ if (error != 0 && gen == fip->fi_wgen) {
fip->fi_readers--;
if (fip->fi_readers == 0) {
PIPE_LOCK(fpipe);
@@ -214,6 +220,7 @@ fifo_open(ap)
*/
}
if ((ap->a_mode & FWRITE) && fip->fi_readers == 0) {
+ gen = fip->fi_rgen;
VOP_UNLOCK(vp, 0);
stops_deferred = sigallowstop();
error = msleep(&fip->fi_writers, PIPE_MTX(fpipe),
@@ -221,7 +228,7 @@ fifo_open(ap)
if (stops_deferred)
sigdeferstop();
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- if (error) {
+ if (error != 0 && gen == fip->fi_rgen) {
fip->fi_writers--;
if (fip->fi_writers == 0) {
PIPE_LOCK(fpipe);
diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c
index eae70229..b00aac3 100644
--- a/sys/fs/procfs/procfs_map.c
+++ b/sys/fs/procfs/procfs_map.c
@@ -159,11 +159,11 @@ procfs_doprocmap(PFS_FILL_ARGS)
freepath = NULL;
fullpath = "-";
if (lobj) {
+ vp = NULL;
switch (lobj->type) {
default:
case OBJT_DEFAULT:
type = "default";
- vp = NULL;
break;
case OBJT_VNODE:
type = "vnode";
@@ -171,13 +171,19 @@ procfs_doprocmap(PFS_FILL_ARGS)
vref(vp);
break;
case OBJT_SWAP:
- type = "swap";
- vp = NULL;
+ if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+ type = "vnode";
+ if ((lobj->flags & OBJ_TMPFS) != 0) {
+ vp = lobj->un_pager.swp.swp_tmpfs;
+ vref(vp);
+ }
+ } else {
+ type = "swap";
+ }
break;
case OBJT_SG:
case OBJT_DEVICE:
type = "device";
- vp = NULL;
break;
}
if (lobj != obj)
diff --git a/sys/gnu/fs/reiserfs/reiserfs_vfsops.c b/sys/gnu/fs/reiserfs/reiserfs_vfsops.c
index ad2aab0..0c351dc 100644
--- a/sys/gnu/fs/reiserfs/reiserfs_vfsops.c
+++ b/sys/gnu/fs/reiserfs/reiserfs_vfsops.c
@@ -1022,6 +1022,7 @@ uint32_t find_hash_out(struct reiserfs_mount *rmp)
}
} while (0);
+ free(ip, M_REISERFSNODE);
pathrelse(&path);
return (hash);
}
diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c
index 8cd4440..6acd32a 100644
--- a/sys/i386/i386/elf_machdep.c
+++ b/sys/i386/i386/elf_machdep.c
@@ -177,6 +177,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Word rtype, symidx;
const Elf_Rel *rel;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -212,8 +213,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_386_32: /* S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
addr += addend;
if (*where != addr)
@@ -221,8 +222,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_386_PC32: /* S + A - P */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
addr += addend - (Elf_Addr)where;
if (*where != addr)
@@ -239,8 +240,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_386_GLOB_DAT: /* S */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
if (*where != addr)
*where = addr;
diff --git a/sys/ia64/ia64/elf_machdep.c b/sys/ia64/ia64/elf_machdep.c
index 05cb641..f1ab4a0 100644
--- a/sys/ia64/ia64/elf_machdep.c
+++ b/sys/ia64/ia64/elf_machdep.c
@@ -127,17 +127,18 @@ elf64_dump_thread(struct thread *td, void *dst, size_t *off __unused)
}
-static Elf_Addr
-lookup_fdesc(linker_file_t lf, Elf_Size symidx, elf_lookup_fn lookup)
+static int
+lookup_fdesc(linker_file_t lf, Elf_Size symidx, elf_lookup_fn lookup,
+ Elf_Addr *addr1)
{
linker_file_t top;
Elf_Addr addr;
const char *symname;
- int i;
+ int i, error;
static int eot = 0;
- addr = lookup(lf, symidx, 0);
- if (addr == 0) {
+ error = lookup(lf, symidx, 0, &addr);
+ if (error != 0) {
top = lf;
symname = elf_get_symname(top, symidx);
for (i = 0; i < top->ndeps; i++) {
@@ -148,30 +149,33 @@ lookup_fdesc(linker_file_t lf, Elf_Size symidx, elf_lookup_fn lookup)
break;
}
if (addr == 0)
- return (0);
+ return (EINVAL);
}
if (eot)
- return (0);
+ return (EINVAL);
/*
* Lookup and/or construct OPD
*/
for (i = 0; i < 8192; i += 2) {
- if (fptr_storage[i] == addr)
- return (Elf_Addr)(fptr_storage + i);
+ if (fptr_storage[i] == addr) {
+ *addr1 = (Elf_Addr)(fptr_storage + i);
+ return (0);
+ }
if (fptr_storage[i] == 0) {
fptr_storage[i] = addr;
fptr_storage[i+1] = link_elf_get_gp(lf);
- return (Elf_Addr)(fptr_storage + i);
+ *addr1 = (Elf_Addr)(fptr_storage + i);
+ return (0);
}
}
printf("%s: fptr table full\n", __func__);
eot = 1;
- return (0);
+ return (EINVAL);
}
/* Process one elf relocation with addend. */
@@ -184,6 +188,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Size rtype, symidx;
const Elf_Rel *rel;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -223,8 +228,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
case R_IA_64_NONE:
break;
case R_IA_64_DIR64LSB: /* word64 LSB S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
*where = addr + addend;
break;
@@ -233,16 +238,16 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
printf("%s: addend ignored for OPD relocation\n",
__func__);
}
- addr = lookup_fdesc(lf, symidx, lookup);
- if (addr == 0)
+ error = lookup_fdesc(lf, symidx, lookup, &addr);
+ if (error != 0)
return (-1);
*where = addr;
break;
case R_IA_64_REL64LSB: /* word64 LSB BD + A */
break;
case R_IA_64_IPLTLSB:
- addr = lookup_fdesc(lf, symidx, lookup);
- if (addr == 0)
+ error = lookup_fdesc(lf, symidx, lookup, &addr);
+ if (error != 0)
return (-1);
where[0] = *((Elf_Addr*)addr) + addend;
where[1] = *((Elf_Addr*)addr + 1);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 63ecf0f..8986a58 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -692,7 +692,7 @@ orphanpg(pg)
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
PROC_LOCK(p);
- if (P_SHOULDSTOP(p)) {
+ if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
PROC_UNLOCK(p);
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
PROC_LOCK(p);
@@ -2074,7 +2074,15 @@ sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
vref(vp);
break;
case OBJT_SWAP:
- kve->kve_type = KVME_TYPE_SWAP;
+ if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+ kve->kve_type = KVME_TYPE_VNODE;
+ if ((lobj->flags & OBJ_TMPFS) != 0) {
+ vp = lobj->un_pager.swp.swp_tmpfs;
+ vref(vp);
+ }
+ } else {
+ kve->kve_type = KVME_TYPE_SWAP;
+ }
break;
case OBJT_DEVICE:
kve->kve_type = KVME_TYPE_DEVICE;
@@ -2300,7 +2308,15 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
vref(vp);
break;
case OBJT_SWAP:
- kve->kve_type = KVME_TYPE_SWAP;
+ if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+ kve->kve_type = KVME_TYPE_VNODE;
+ if ((lobj->flags & OBJ_TMPFS) != 0) {
+ vp = lobj->un_pager.swp.swp_tmpfs;
+ vref(vp);
+ }
+ } else {
+ kve->kve_type = KVME_TYPE_SWAP;
+ }
break;
case OBJT_DEVICE:
kve->kve_type = KVME_TYPE_DEVICE;
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index 1ac08de..194c603 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -402,9 +402,11 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
return (0);
}
} else {
- if (rm->lock_object.lo_flags & LO_SLEEPABLE)
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
+ THREAD_SLEEPING_OK();
sx_xlock(&rm->rm_lock_sx);
- else
+ THREAD_NO_SLEEPING();
+ } else
mtx_lock(&rm->rm_lock_mtx);
}
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 08b4f66..e9240ce 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -1888,20 +1888,27 @@ SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
static int cpu_tick_variable;
static uint64_t cpu_tick_frequency;
+static DPCPU_DEFINE(uint64_t, tc_cpu_ticks_base);
+static DPCPU_DEFINE(unsigned, tc_cpu_ticks_last);
+
static uint64_t
tc_cpu_ticks(void)
{
- static uint64_t base;
- static unsigned last;
- unsigned u;
struct timecounter *tc;
+ uint64_t res, *base;
+ unsigned u, *last;
+ critical_enter();
+ base = DPCPU_PTR(tc_cpu_ticks_base);
+ last = DPCPU_PTR(tc_cpu_ticks_last);
tc = timehands->th_counter;
u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
- if (u < last)
- base += (uint64_t)tc->tc_counter_mask + 1;
- last = u;
- return (u + base);
+ if (u < *last)
+ *base += (uint64_t)tc->tc_counter_mask + 1;
+ *last = u;
+ res = u + *base;
+ critical_exit();
+ return (res);
}
void
diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c
index bd2b051..595e869 100644
--- a/sys/kern/kern_umtx.c
+++ b/sys/kern/kern_umtx.c
@@ -1273,7 +1273,7 @@ kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
return (ret);
umtxq_lock(&key);
- ret = umtxq_signal(&key, n_wake);
+ umtxq_signal(&key, n_wake);
umtxq_unlock(&key);
umtx_key_release(&key);
return (0);
@@ -1805,7 +1805,7 @@ umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
uq_owner = owner->td_umtxq;
mtx_assert(&umtx_lock, MA_OWNED);
if (pi->pi_owner != NULL)
- panic("pi_ower != NULL");
+ panic("pi_owner != NULL");
pi->pi_owner = owner;
TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
}
@@ -1829,9 +1829,8 @@ umtx_pi_disown(struct umtx_pi *pi)
static int
umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
{
- struct umtx_q *uq, *uq_owner;
+ struct umtx_q *uq;
- uq_owner = owner->td_umtxq;
mtx_lock(&umtx_lock);
if (pi->pi_owner == owner) {
mtx_unlock(&umtx_lock);
@@ -1977,11 +1976,8 @@ umtx_pi_unref(struct umtx_pi *pi)
KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
if (--pi->pi_refcount == 0) {
mtx_lock(&umtx_lock);
- if (pi->pi_owner != NULL) {
- TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
- pi, pi_link);
- pi->pi_owner = NULL;
- }
+ if (pi->pi_owner != NULL)
+ umtx_pi_disown(pi);
KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
("blocked queue not empty"));
mtx_unlock(&umtx_lock);
@@ -2241,7 +2237,7 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
mtx_lock(&umtx_lock);
pi = uq_first->uq_pi_blocked;
KASSERT(pi != NULL, ("pi == NULL?"));
- if (pi->pi_owner != curthread) {
+ if (pi->pi_owner != td) {
mtx_unlock(&umtx_lock);
umtxq_unbusy(&key);
umtxq_unlock(&key);
@@ -2249,7 +2245,7 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
/* userland messed the mutex */
return (EPERM);
}
- uq_me = curthread->td_umtxq;
+ uq_me = td->td_umtxq;
umtx_pi_disown(pi);
/* get highest priority thread which is still sleeping. */
uq_first = TAILQ_FIRST(&pi->pi_blocked);
@@ -2265,9 +2261,9 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
pri = UPRI(uq_first2->uq_thread);
}
}
- thread_lock(curthread);
- sched_lend_user_prio(curthread, pri);
- thread_unlock(curthread);
+ thread_lock(td);
+ sched_lend_user_prio(td, pri);
+ thread_unlock(td);
mtx_unlock(&umtx_lock);
if (uq_first)
umtxq_signal_thread(uq_first);
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 631ba75..b4f6586 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -158,7 +158,7 @@ static int link_elf_each_function_nameval(linker_file_t,
static void link_elf_reloc_local(linker_file_t);
static long link_elf_symtab_get(linker_file_t, const Elf_Sym **);
static long link_elf_strtab_get(linker_file_t, caddr_t *);
-static Elf_Addr elf_lookup(linker_file_t, Elf_Size, int);
+static int elf_lookup(linker_file_t, Elf_Size, int, Elf_Addr *);
static kobj_method_t link_elf_methods[] = {
KOBJMETHOD(linker_lookup_symbol, link_elf_lookup_symbol),
@@ -1498,8 +1498,8 @@ elf_get_symname(linker_file_t lf, Elf_Size symidx)
* This is not only more efficient, it's also more correct. It's not always
* the case that the symbol can be found through the hash table.
*/
-static Elf_Addr
-elf_lookup(linker_file_t lf, Elf_Size symidx, int deps)
+static int
+elf_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
{
elf_file_t ef = (elf_file_t)lf;
const Elf_Sym *sym;
@@ -1507,8 +1507,10 @@ elf_lookup(linker_file_t lf, Elf_Size symidx, int deps)
Elf_Addr addr, start, base;
/* Don't even try to lookup the symbol if the index is bogus. */
- if (symidx >= ef->nchains)
- return (0);
+ if (symidx >= ef->nchains) {
+ *res = 0;
+ return (EINVAL);
+ }
sym = ef->symtab + symidx;
@@ -1518,9 +1520,12 @@ elf_lookup(linker_file_t lf, Elf_Size symidx, int deps)
*/
if (ELF_ST_BIND(sym->st_info) == STB_LOCAL) {
/* Force lookup failure when we have an insanity. */
- if (sym->st_shndx == SHN_UNDEF || sym->st_value == 0)
- return (0);
- return ((Elf_Addr)ef->address + sym->st_value);
+ if (sym->st_shndx == SHN_UNDEF || sym->st_value == 0) {
+ *res = 0;
+ return (EINVAL);
+ }
+ *res = ((Elf_Addr)ef->address + sym->st_value);
+ return (0);
}
/*
@@ -1533,8 +1538,10 @@ elf_lookup(linker_file_t lf, Elf_Size symidx, int deps)
symbol = ef->strtab + sym->st_name;
/* Force a lookup failure if the symbol name is bogus. */
- if (*symbol == 0)
- return (0);
+ if (*symbol == 0) {
+ *res = 0;
+ return (EINVAL);
+ }
addr = ((Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps));
@@ -1544,7 +1551,8 @@ elf_lookup(linker_file_t lf, Elf_Size symidx, int deps)
else if (elf_set_find(&set_vnet_list, addr, &start, &base))
addr = addr - start + base;
#endif
- return addr;
+ *res = addr;
+ return (0);
}
static void
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 4f40258..453d8ce 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -144,7 +144,8 @@ static void link_elf_reloc_local(linker_file_t);
static long link_elf_symtab_get(linker_file_t, const Elf_Sym **);
static long link_elf_strtab_get(linker_file_t, caddr_t *);
-static Elf_Addr elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps);
+static int elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
+ Elf_Addr *);
static kobj_method_t link_elf_methods[] = {
KOBJMETHOD(linker_lookup_symbol, link_elf_lookup_symbol),
@@ -1224,38 +1225,46 @@ elf_obj_cleanup_globals_cache(elf_file_t ef)
* This is not only more efficient, it's also more correct. It's not always
* the case that the symbol can be found through the hash table.
*/
-static Elf_Addr
-elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps)
+static int
+elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
{
elf_file_t ef = (elf_file_t)lf;
Elf_Sym *sym;
const char *symbol;
- Elf_Addr ret;
+ Elf_Addr res1;
/* Don't even try to lookup the symbol if the index is bogus. */
- if (symidx >= ef->ddbsymcnt)
- return (0);
+ if (symidx >= ef->ddbsymcnt) {
+ *res = 0;
+ return (EINVAL);
+ }
sym = ef->ddbsymtab + symidx;
/* Quick answer if there is a definition included. */
- if (sym->st_shndx != SHN_UNDEF)
- return (sym->st_value);
+ if (sym->st_shndx != SHN_UNDEF) {
+ *res = sym->st_value;
+ return (0);
+ }
/* If we get here, then it is undefined and needs a lookup. */
switch (ELF_ST_BIND(sym->st_info)) {
case STB_LOCAL:
/* Local, but undefined? huh? */
- return (0);
+ *res = 0;
+ return (EINVAL);
case STB_GLOBAL:
+ case STB_WEAK:
/* Relative to Data or Function name */
symbol = ef->ddbstrtab + sym->st_name;
/* Force a lookup failure if the symbol name is bogus. */
- if (*symbol == 0)
- return (0);
- ret = ((Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps));
+ if (*symbol == 0) {
+ *res = 0;
+ return (EINVAL);
+ }
+ res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
/*
* Cache global lookups during module relocation. The failure
@@ -1267,18 +1276,20 @@ elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps)
* restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
* above.
*/
- if (ret != 0) {
+ if (res1 != 0) {
sym->st_shndx = SHN_FBSD_CACHED;
- sym->st_value = ret;
+ sym->st_value = res1;
+ *res = res1;
+ return (0);
+ } else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
+ sym->st_value = 0;
+ *res = 0;
+ return (0);
}
- return (ret);
-
- case STB_WEAK:
- printf("link_elf_obj: Weak symbols not supported\n");
- return (0);
+ return (EINVAL);
default:
- return (0);
+ return (EINVAL);
}
}
diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh
index 8391d3d..8be4896 100644
--- a/sys/kern/makesyscalls.sh
+++ b/sys/kern/makesyscalls.sh
@@ -57,7 +57,7 @@ case $# in
;;
esac
-if [ -n "$2" -a -f "$2" ]; then
+if [ -n "$2" ]; then
. $2
fi
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 1aa79df..6498ae7 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -793,6 +793,8 @@ sched_fork_thread(struct thread *td, struct thread *childtd)
{
struct td_sched *ts;
+ childtd->td_oncpu = NOCPU;
+ childtd->td_lastcpu = NOCPU;
childtd->td_estcpu = td->td_estcpu;
childtd->td_lock = &sched_lock;
childtd->td_cpuset = cpuset_ref(td->td_cpuset);
@@ -1672,6 +1674,8 @@ sched_throw(struct thread *td)
} else {
lock_profile_release_lock(&sched_lock.lock_object);
MPASS(td->td_lock == &sched_lock);
+ td->td_lastcpu = td->td_oncpu;
+ td->td_oncpu = NOCPU;
}
mtx_assert(&sched_lock, MA_OWNED);
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 961f80d..30aad12 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -2071,6 +2071,8 @@ sched_fork_thread(struct thread *td, struct thread *child)
*/
ts = td->td_sched;
ts2 = child->td_sched;
+ child->td_oncpu = NOCPU;
+ child->td_lastcpu = NOCPU;
child->td_lock = TDQ_LOCKPTR(tdq);
child->td_cpuset = cpuset_ref(td->td_cpuset);
ts2->ts_cpu = ts->ts_cpu;
@@ -2694,6 +2696,8 @@ sched_throw(struct thread *td)
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
tdq_load_rem(tdq, td);
lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
+ td->td_lastcpu = td->td_oncpu;
+ td->td_oncpu = NOCPU;
}
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
newtd = choosethread();
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 25927f7..5786e90 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -76,7 +76,7 @@ __FBSDID("$FreeBSD$");
#define NBUF 0
#endif
#ifndef MAXFILES
-#define MAXFILES (maxproc * 2)
+#define MAXFILES (40 + 32 * maxusers)
#endif
static int sysctl_kern_vm_guest(SYSCTL_HANDLER_ARGS);
@@ -261,6 +261,8 @@ init_param2(long physpages)
TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
if (maxproc > (physpages / 12))
maxproc = physpages / 12;
+ if (maxproc > pid_max)
+ maxproc = pid_max;
maxprocperuid = (maxproc * 9) / 10;
/*
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
index ebc08e7..5698bd4 100644
--- a/sys/kern/subr_prf.c
+++ b/sys/kern/subr_prf.c
@@ -305,7 +305,7 @@ log(int level, const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
- (void)_vprintf(level, log_open ? TOLOG : TOCONS, fmt, ap);
+ (void)_vprintf(level, log_open ? TOLOG : TOCONS | TOLOG, fmt, ap);
va_end(ap);
msgbuftrigger = 1;
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index 9eeadb4..925d732 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -64,14 +64,14 @@ syscallenter(struct thread *td, struct syscall_args *sa)
td->td_pticks = 0;
if (td->td_ucred != p->p_ucred)
cred_update_thread(td);
- if (p->p_flag & P_TRACED) {
- traced = 1;
+ traced = (p->p_flag & P_TRACED) != 0;
+ if (traced || td->td_dbgflags & TDB_USERWR) {
PROC_LOCK(p);
td->td_dbgflags &= ~TDB_USERWR;
- td->td_dbgflags |= TDB_SCE;
+ if (traced)
+ td->td_dbgflags |= TDB_SCE;
PROC_UNLOCK(p);
- } else
- traced = 0;
+ }
error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSCALL))
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 6ad4694..123dd10 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -402,7 +402,7 @@ ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve)
lobj = tobj;
pve->pve_offset += tobj->backing_object_offset;
}
- vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL;
+ vp = vm_object_vnode(lobj);
if (vp != NULL)
vref(vp);
if (lobj != obj)
diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c
index 9ec7340..473cd79 100644
--- a/sys/kern/vfs_mountroot.c
+++ b/sys/kern/vfs_mountroot.c
@@ -245,7 +245,7 @@ vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
return (error);
}
-static int
+static void
vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
{
struct nameidata nd;
@@ -355,8 +355,6 @@ vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
printf("mountroot: unable to unlink /dev/dev "
"(error %d)\n", error);
}
-
- return (0);
}
/*
@@ -948,12 +946,10 @@ vfs_mountroot(void)
while (!error) {
error = vfs_mountroot_parse(sb, mp);
if (!error) {
- error = vfs_mountroot_shuffle(td, mp);
- if (!error) {
- sbuf_clear(sb);
- error = vfs_mountroot_readconf(td, sb);
- sbuf_finish(sb);
- }
+ vfs_mountroot_shuffle(td, mp);
+ sbuf_clear(sb);
+ error = vfs_mountroot_readconf(td, sb);
+ sbuf_finish(sb);
}
}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index a9e17f1..10b5b28 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -3010,8 +3010,8 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
"cleanbuf %d dirtybuf %d\n",
vp->v_object, vp->v_object->ref_count,
vp->v_object->resident_page_count,
- vp->v_bufobj.bo_dirty.bv_cnt,
- vp->v_bufobj.bo_clean.bv_cnt);
+ vp->v_bufobj.bo_clean.bv_cnt,
+ vp->v_bufobj.bo_dirty.bv_cnt);
printf(" ");
lockmgr_printinfo(vp->v_vnlock);
if (vp->v_data != NULL)
diff --git a/sys/libkern/asprintf.c b/sys/libkern/asprintf.c
new file mode 100644
index 0000000..dbae786
--- /dev/null
+++ b/sys/libkern/asprintf.c
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/stdarg.h>
+
+int
+vasprintf(char **buf, struct malloc_type *mtp, const char *format, va_list va)
+{
+ int len, ret;
+ va_list tmp_va;
+ char dummy;
+
+ va_copy(tmp_va, va);
+ len = vsnprintf(&dummy, 0, format, tmp_va);
+ va_end(tmp_va);
+ if (len < 0) {
+ *buf = NULL;
+ return (len);
+ }
+
+ /* Account for null terminator. */
+ len += 1;
+ *buf = malloc(len, mtp, M_NOWAIT);
+ if (*buf == NULL)
+ return (-1);
+
+ ret = vsnprintf(*buf, len, format, va);
+ if (ret < 0) {
+ free(*buf, mtp);
+ *buf = NULL;
+ }
+
+ return (ret);
+}
+
+int
+asprintf(char **buf, struct malloc_type *mtp, const char *format, ...)
+{
+ int ret;
+ va_list va;
+
+ va_start(va, format);
+ ret = vasprintf(buf, mtp, format, va);
+ va_end(va);
+
+ return (ret);
+}
diff --git a/sys/mips/mips/elf_machdep.c b/sys/mips/mips/elf_machdep.c
index d374713..2e22281 100644
--- a/sys/mips/mips/elf_machdep.c
+++ b/sys/mips/mips/elf_machdep.c
@@ -174,6 +174,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Word rtype = (Elf_Word)0, symidx;
const Elf_Rel *rel = NULL;
const Elf_Rela *rela = NULL;
+ int error;
/*
* Stash R_MIPS_HI16 info so we can use it when processing R_MIPS_LO16
@@ -213,8 +214,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_MIPS_32: /* S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addr += addend;
if (*where != addr)
@@ -222,8 +223,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_MIPS_26: /* ((A << 2) | (P & 0xf0000000) + S) >> 2 */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addend &= 0x03ffffff;
@@ -241,8 +242,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_MIPS_64: /* S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addr += addend;
if (*(Elf64_Addr*)where != addr)
@@ -251,8 +252,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
case R_MIPS_HI16: /* ((AHL + S) - ((short)(AHL + S)) >> 16 */
if (rela != NULL) {
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addr += addend;
*where &= 0xffff0000;
@@ -266,8 +267,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
case R_MIPS_LO16: /* AHL + S */
if (rela != NULL) {
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addr += addend;
*where &= 0xffff0000;
@@ -275,8 +276,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
}
else {
ahl += (int16_t)addend;
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addend &= 0xffff0000;
@@ -292,8 +293,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_MIPS_HIGHER: /* %higher(A+S) */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addr += addend;
*where &= 0xffff0000;
@@ -301,8 +302,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_MIPS_HIGHEST: /* %highest(A+S) */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
addr += addend;
*where &= 0xffff0000;
diff --git a/sys/modules/ctl/Makefile b/sys/modules/ctl/Makefile
index 0ba5836..dc64a4b 100644
--- a/sys/modules/ctl/Makefile
+++ b/sys/modules/ctl/Makefile
@@ -11,8 +11,9 @@ SRCS+= ctl_backend_ramdisk.c
SRCS+= ctl_cmd_table.c
SRCS+= ctl_frontend.c
SRCS+= ctl_frontend_cam_sim.c
-SRCS+= ctl_frontend_internal.c
+SRCS+= ctl_frontend_ioctl.c
SRCS+= ctl_frontend_iscsi.c
+SRCS+= ctl_ha.c
SRCS+= ctl_scsi_all.c
SRCS+= ctl_tpc.c
SRCS+= ctl_tpc_local.c
diff --git a/sys/netgraph/ng_pppoe.c b/sys/netgraph/ng_pppoe.c
index 6c2ed67..382410c 100644
--- a/sys/netgraph/ng_pppoe.c
+++ b/sys/netgraph/ng_pppoe.c
@@ -168,6 +168,13 @@ static const struct ng_cmdlist ng_pppoe_cmds[] = {
&ng_parse_enaddr_type,
NULL
},
+ {
+ NGM_PPPOE_COOKIE,
+ NGM_PPPOE_SETMAXP,
+ "setmaxp",
+ &ng_parse_uint16_type,
+ NULL
+ },
{ 0 }
};
@@ -262,6 +269,7 @@ struct PPPoE {
struct ether_header eh;
LIST_HEAD(, sess_con) listeners;
struct sess_hash_entry sesshash[SESSHASHSIZE];
+ struct maxptag max_payload; /* PPP-Max-Payload (RFC4638) */
};
typedef struct PPPoE *priv_p;
@@ -1004,6 +1012,13 @@ ng_pppoe_rcvmsg(node_p node, item_p item, hook_p lasthook)
bcopy(msg->data, &privp->eh.ether_shost,
ETHER_ADDR_LEN);
break;
+ case NGM_PPPOE_SETMAXP:
+ if (msg->header.arglen != sizeof(uint16_t))
+ LEAVE(EINVAL);
+ privp->max_payload.hdr.tag_type = PTT_MAX_PAYL;
+ privp->max_payload.hdr.tag_len = htons(sizeof(uint16_t));
+ privp->max_payload.data = htons(*((uint16_t *)msg->data));
+ break;
default:
LEAVE(EINVAL);
}
@@ -1071,6 +1086,8 @@ pppoe_start(sessp sp)
init_tags(sp);
insert_tag(sp, &uniqtag.hdr);
insert_tag(sp, &neg->service.hdr);
+ if (privp->max_payload.data != 0)
+ insert_tag(sp, &privp->max_payload.hdr);
make_packet(sp);
/*
* Send packet and prepare to retransmit it after timeout.
@@ -1124,6 +1141,28 @@ send_sessionid(sessp sp)
return (error);
}
+static int
+send_maxp(sessp sp, const struct pppoe_tag *tag)
+{
+ int error;
+ struct ng_mesg *msg;
+ struct ngpppoe_maxp *maxp;
+
+ CTR2(KTR_NET, "%20s: called %d", __func__, sp->Session_ID);
+
+ NG_MKMESSAGE(msg, NGM_PPPOE_COOKIE, NGM_PPPOE_SETMAXP,
+ sizeof(struct ngpppoe_maxp), M_NOWAIT);
+ if (msg == NULL)
+ return (ENOMEM);
+
+ maxp = (struct ngpppoe_maxp *)msg->data;
+ strncpy(maxp->hook, NG_HOOK_NAME(sp->hook), NG_HOOKSIZ);
+ maxp->data = ntohs(((const struct maxptag *)tag)->data);
+ NG_SEND_MSG_ID(error, NG_HOOK_NODE(sp->hook), msg, sp->creator, 0);
+
+ return (error);
+}
+
/*
* Receive data from session hook and do something with it.
*/
@@ -1464,6 +1503,9 @@ ng_pppoe_rcvdata_ether(hook_p hook, item_p item)
insert_tag(sp, tag); /* return it */
send_acname(sp, tag);
}
+ if ((tag = get_tag(ph, PTT_MAX_PAYL)) &&
+ (privp->max_payload.data != 0))
+ insert_tag(sp, tag); /* return it */
insert_tag(sp, &neg->service.hdr); /* Service */
scan_tags(sp, ph);
make_packet(sp);
@@ -1602,6 +1644,9 @@ ng_pppoe_rcvdata_ether(hook_p hook, item_p item)
m_freem(neg->m);
free(sp->neg, M_NETGRAPH_PPPOE);
sp->neg = NULL;
+ if ((tag = get_tag(ph, PTT_MAX_PAYL)) &&
+ (privp->max_payload.data != 0))
+ send_maxp(sp, tag);
pppoe_send_event(sp, NGM_PPPOE_SUCCESS);
break;
case PADT_CODE:
diff --git a/sys/netgraph/ng_pppoe.h b/sys/netgraph/ng_pppoe.h
index 6ef81b7..3b74f07 100644
--- a/sys/netgraph/ng_pppoe.h
+++ b/sys/netgraph/ng_pppoe.h
@@ -51,6 +51,7 @@
#define NG_PPPOE_NODE_TYPE "pppoe"
#define NGM_PPPOE_COOKIE 1089893072
+#define NGM_PPPOE_SETMAXP_COOKIE 1441624322
#define PPPOE_SERVICE_NAME_SIZE 64 /* for now */
@@ -83,6 +84,7 @@ enum cmd {
NGM_PPPOE_SETMODE = 12, /* set to standard or compat modes */
NGM_PPPOE_GETMODE = 13, /* see current mode */
NGM_PPPOE_SETENADDR = 14, /* set Ethernet address */
+ NGM_PPPOE_SETMAXP = 15 /* Set PPP-Max-Payload value */
};
/***********************
@@ -147,6 +149,13 @@ struct ngpppoe_sts {
{ NULL } \
}
+/*
+ * This structure is used to send PPP-Max-Payload value from server to client.
+ */
+struct ngpppoe_maxp {
+ char hook[NG_HOOKSIZ]; /* hook associated with event session */
+ uint16_t data;
+};
/********************************************************************
* Constants and definitions specific to pppoe
@@ -229,6 +238,10 @@ struct datatag {
u_int8_t data[PPPOE_SERVICE_NAME_SIZE];
};
+struct maxptag {
+ struct pppoe_tag hdr;
+ uint16_t data;
+};
/*
* Define the order in which we will place tags in packets
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index be9e0e7..c848306 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1746,9 +1746,9 @@ tcp_usrclosed(struct tcpcb *tp)
#ifdef TCP_OFFLOAD
tcp_offload_listen_stop(tp);
#endif
+ tcp_state_change(tp, TCPS_CLOSED);
/* FALLTHROUGH */
case TCPS_CLOSED:
- tcp_state_change(tp, TCPS_CLOSED);
tp = tcp_close(tp);
/*
* tcp_close() should never return NULL here as the socket is
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 231b269..28a830f 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -2388,7 +2388,7 @@ in6if_do_dad(struct ifnet *ifp)
* However, some interfaces can be up before the RUNNING
* status. Additionaly, users may try to assign addresses
* before the interface becomes up (or running).
- * This function returns EAGAIN in that case.
+ * This function returns EAGAIN in that case.
* The caller should mark "tentative" on the address instead of
* performing DAD immediately.
*/
diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c
index dbe58df..b8f4bed 100644
--- a/sys/powerpc/powerpc/elf32_machdep.c
+++ b/sys/powerpc/powerpc/elf32_machdep.c
@@ -164,6 +164,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Addr addend;
Elf_Word rtype, symidx;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -183,20 +184,20 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
switch (rtype) {
- case R_PPC_NONE:
- break;
+ case R_PPC_NONE:
+ break;
case R_PPC_ADDR32: /* word32 S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
- return -1;
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
+ return -1;
addr += addend;
*where = addr;
- break;
+ break;
- case R_PPC_ADDR16_LO: /* #lo(S) */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ case R_PPC_ADDR16_LO: /* #lo(S) */
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
/*
* addend values are sometimes relative to sections
@@ -211,8 +212,8 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
break;
case R_PPC_ADDR16_HA: /* #ha(S) */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return -1;
/*
* addend values are sometimes relative to sections
@@ -223,17 +224,17 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
addr = relocbase + addend;
else
addr += addend;
- *hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0))
+ *hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0))
& 0xffff;
break;
case R_PPC_RELATIVE: /* word32 B + A */
- *where = elf_relocaddr(lf, relocbase + addend);
- break;
+ *where = elf_relocaddr(lf, relocbase + addend);
+ break;
default:
- printf("kldload: unexpected relocation type %d\n",
- (int) rtype);
+ printf("kldload: unexpected relocation type %d\n",
+ (int) rtype);
return -1;
}
return(0);
diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c
index 0c41a8d..558cf0a 100644
--- a/sys/powerpc/powerpc/elf64_machdep.c
+++ b/sys/powerpc/powerpc/elf64_machdep.c
@@ -135,6 +135,7 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
Elf_Addr addend;
Elf_Word rtype, symidx;
const Elf_Rela *rela;
+ int error;
switch (type) {
case ELF_RELOC_REL:
@@ -153,30 +154,30 @@ elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
switch (rtype) {
- case R_PPC_NONE:
- break;
+ case R_PPC_NONE:
+ break;
case R_PPC64_ADDR64: /* doubleword64 S + A */
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
- return -1;
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
+ return -1;
addr += addend;
- *where = addr;
- break;
+ *where = addr;
+ break;
case R_PPC_RELATIVE: /* doubleword64 B + A */
- *where = elf_relocaddr(lf, relocbase + addend);
- break;
+ *where = elf_relocaddr(lf, relocbase + addend);
+ break;
case R_PPC_JMP_SLOT: /* function descriptor copy */
- addr = lookup(lf, symidx, 1);
+ lookup(lf, symidx, 1, &addr);
memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr));
__asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory");
break;
default:
- printf("kldload: unexpected relocation type %d\n",
- (int) rtype);
+ printf("kldload: unexpected relocation type %d\n",
+ (int) rtype);
return -1;
}
return(0);
diff --git a/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c b/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c
index 64e691e..b3a920a 100644
--- a/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c
+++ b/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c
@@ -121,9 +121,6 @@ enum svc_rpc_gss_client_state {
};
#define SVC_RPC_GSS_SEQWINDOW 128
-#ifndef RPCAUTH_UNIXGIDS
-#define RPCAUTH_UNIXGIDS 16
-#endif
struct svc_rpc_gss_clientid {
unsigned long ci_hostid;
@@ -150,7 +147,7 @@ struct svc_rpc_gss_client {
int cl_rpcflavor; /* RPC pseudo sec flavor */
bool_t cl_done_callback; /* TRUE after call */
void *cl_cookie; /* user cookie from callback */
- gid_t cl_gid_storage[RPCAUTH_UNIXGIDS];
+ gid_t cl_gid_storage[NGROUPS];
gss_OID cl_mech; /* mechanism */
gss_qop_t cl_qop; /* quality of protection */
uint32_t cl_seqlast; /* sequence window origin */
@@ -776,7 +773,7 @@ svc_rpc_gss_build_ucred(struct svc_rpc_gss_client *client,
uc->gid = 65534;
uc->gidlist = client->cl_gid_storage;
- numgroups = RPCAUTH_UNIXGIDS;
+ numgroups = NGROUPS;
maj_stat = gss_pname_to_unix_cred(&min_stat, name, client->cl_mech,
&uc->uid, &uc->gid, &numgroups, &uc->gidlist[0]);
if (GSS_ERROR(maj_stat))
diff --git a/sys/sparc64/sparc64/elf_machdep.c b/sys/sparc64/sparc64/elf_machdep.c
index 4d55717..621e643 100644
--- a/sys/sparc64/sparc64/elf_machdep.c
+++ b/sys/sparc64/sparc64/elf_machdep.c
@@ -343,6 +343,7 @@ elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
Elf_Addr value;
Elf_Addr mask;
Elf_Addr addr;
+ int error;
if (type != ELF_RELOC_RELA)
return (-1);
@@ -371,8 +372,8 @@ elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
value = rela->r_addend;
if (RELOC_RESOLVE_SYMBOL(rtype)) {
- addr = lookup(lf, symidx, 1);
- if (addr == 0)
+ error = lookup(lf, symidx, 1, &addr);
+ if (error != 0)
return (-1);
value += addr;
if (RELOC_BARE_SYMBOL(rtype))
diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h
index 889b65c..b334fe9 100644
--- a/sys/sys/elf_common.h
+++ b/sys/sys/elf_common.h
@@ -114,6 +114,8 @@ typedef struct {
#define ELFOSABI_OPENVMS 13 /* Open VMS */
#define ELFOSABI_NSK 14 /* HP Non-Stop Kernel */
#define ELFOSABI_AROS 15 /* Amiga Research OS */
+#define ELFOSABI_FENIXOS 16 /* FenixOS */
+#define ELFOSABI_CLOUDABI 17 /* Nuxi CloudABI */
#define ELFOSABI_ARM 97 /* ARM */
#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
@@ -144,6 +146,7 @@ typedef struct {
#define EM_386 3 /* Intel i386. */
#define EM_68K 4 /* Motorola 68000. */
#define EM_88K 5 /* Motorola 88000. */
+#define EM_IAMCU 6 /* Intel MCU. */
#define EM_860 7 /* Intel i860. */
#define EM_MIPS 8 /* MIPS R3000 Big-Endian only. */
#define EM_S370 9 /* IBM System/370. */
@@ -238,6 +241,7 @@ typedef struct {
#define EM_UNICORE 110 /* Microprocessor series from PKU-Unity Ltd.
and MPRC of Peking University */
#define EM_AARCH64 183 /* AArch64 (64-bit ARM) */
+#define EM_RISCV 243 /* RISC-V */
/* Non-standard or deprecated. */
#define EM_486 6 /* Intel i486. */
@@ -514,6 +518,7 @@ typedef struct {
#define NT_PROCSTAT_OSREL 14 /* Procstat osreldate data. */
#define NT_PROCSTAT_PSSTRINGS 15 /* Procstat ps_strings data. */
#define NT_PROCSTAT_AUXV 16 /* Procstat auxv data. */
+#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state. */
/* Symbol Binding - ELFNN_ST_BIND - st_info */
@@ -647,6 +652,23 @@ typedef struct {
#define R_386_TLS_TPOFF32 37 /* GOT entry of -ve static TLS offset */
#define R_386_IRELATIVE 42 /* PLT entry resolved indirectly at runtime */
+#define R_AARCH64_NONE 0 /* No relocation */
+#define R_AARCH64_ABS64 257 /* Absolute offset */
+#define R_AARCH64_ABS32 258 /* Absolute, 32-bit overflow check */
+#define R_AARCH64_ABS16 259 /* Absolute, 16-bit overflow check */
+#define R_AARCH64_PREL64 260 /* PC relative */
+#define R_AARCH64_PREL32 261 /* PC relative, 32-bit overflow check */
+#define R_AARCH64_PREL16 262 /* PC relative, 16-bit overflow check */
+#define R_AARCH64_COPY 1024 /* Copy data from shared object */
+#define R_AARCH64_GLOB_DAT 1025 /* Set GOT entry to data address */
+#define R_AARCH64_JUMP_SLOT 1026 /* Set GOT entry to code address */
+#define R_AARCH64_RELATIVE 1027 /* Add load address of shared object */
+#define R_AARCH64_TLS_DTPREL64 1028
+#define R_AARCH64_TLS_DTPMOD64 1029
+#define R_AARCH64_TLS_TPREL64 1030
+#define R_AARCH64_TLSDESC 1031 /* Identify the TLS descriptor */
+#define R_AARCH64_IRELATIVE 1032
+
#define R_ARM_NONE 0 /* No relocation. */
#define R_ARM_PC24 1
#define R_ARM_ABS32 2
diff --git a/sys/sys/linker.h b/sys/sys/linker.h
index b2942f2..ce4d86e 100644
--- a/sys/sys/linker.h
+++ b/sys/sys/linker.h
@@ -259,7 +259,7 @@ extern int kld_debug;
#endif
-typedef Elf_Addr elf_lookup_fn(linker_file_t, Elf_Size, int);
+typedef int elf_lookup_fn(linker_file_t, Elf_Size, int, Elf_Addr *);
/* Support functions */
int elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 618cd48..9bf3ffa 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -206,11 +206,15 @@ int kvprintf(char const *, void (*)(int, void*), void *, int,
__va_list) __printflike(1, 0);
void log(int, const char *, ...) __printflike(2, 3);
void log_console(struct uio *);
+int asprintf(char **ret, struct malloc_type *mtp, const char *format,
+ ...) __printflike(3, 4);
int printf(const char *, ...) __printflike(1, 2);
int snprintf(char *, size_t, const char *, ...) __printflike(3, 4);
int sprintf(char *buf, const char *, ...) __printflike(2, 3);
int uprintf(const char *, ...) __printflike(1, 2);
int vprintf(const char *, __va_list) __printflike(1, 0);
+int vasprintf(char **ret, struct malloc_type *mtp, const char *format,
+ __va_list ap) __printflike(3, 0);
int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0);
int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0);
int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0);
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 8576f6d..9374daf 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -483,6 +483,27 @@ struct kinfo_vmentry {
};
/*
+ * The "vm.objects" sysctl provides a list of all VM objects in the system
+ * via an array of these entries.
+ */
+struct kinfo_vmobject {
+ int kvo_structsize; /* Variable size of record. */
+ int kvo_type; /* Object type: KVME_TYPE_*. */
+ uint64_t kvo_size; /* Object size in pages. */
+ uint64_t kvo_vn_fileid; /* inode number if vnode. */
+ uint32_t kvo_vn_fsid; /* dev_t of vnode location. */
+ int kvo_ref_count; /* Reference count. */
+ int kvo_shadow_count; /* Shadow count. */
+ int kvo_memattr; /* Memory attribute. */
+ uint64_t kvo_resident; /* Number of resident pages. */
+ uint64_t kvo_active; /* Number of active pages. */
+ uint64_t kvo_inactive; /* Number of inactive pages. */
+ uint64_t _kvo_qspare[8];
+ uint32_t _kvo_ispare[8];
+ char kvo_path[PATH_MAX]; /* Pathname, if any. */
+};
+
+/*
* The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
* another process as a series of entries. Each stack is represented by a
* series of symbol names and offsets as generated by stack_sbuf_print(9).
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index eb691b8..db0f60d 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -788,7 +788,8 @@ void vop_rename_fail(struct vop_rename_args *ap);
#define VOP_WRITE_PRE(ap) \
struct vattr va; \
- int error, osize, ooffset, noffset; \
+ int error; \
+ off_t osize, ooffset, noffset; \
\
osize = ooffset = noffset = 0; \
if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \
@@ -796,7 +797,7 @@ void vop_rename_fail(struct vop_rename_args *ap);
if (error) \
return (error); \
ooffset = (ap)->a_uio->uio_offset; \
- osize = va.va_size; \
+ osize = (off_t)va.va_size; \
}
#define VOP_WRITE_POST(ap, ret) \
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index b97b954..291d0dd 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -159,11 +159,10 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, vm_memattr_t memattr)
{
vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object;
- vm_offset_t addr;
+ vm_offset_t addr, i;
vm_ooffset_t offset;
vm_page_t m;
int pflags, tries;
- int i;
size = round_page(size);
if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr))
@@ -184,18 +183,7 @@ retry:
tries++;
goto retry;
}
- /*
- * Unmap and free the pages.
- */
- if (i != 0)
- pmap_remove(kernel_pmap, addr, addr + i);
- while (i != 0) {
- i -= PAGE_SIZE;
- m = vm_page_lookup(object,
- OFF_TO_IDX(offset + i));
- vm_page_unwire(m, 0);
- vm_page_free(m);
- }
+ kmem_unback(object, addr, i);
vmem_free(vmem, addr, size);
return (0);
}
@@ -353,25 +341,13 @@ retry:
* aren't on any queues.
*/
if (m == NULL) {
+ VM_OBJECT_WUNLOCK(object);
if ((flags & M_NOWAIT) == 0) {
- VM_OBJECT_WUNLOCK(object);
VM_WAIT;
VM_OBJECT_WLOCK(object);
goto retry;
}
- /*
- * Unmap and free the pages.
- */
- if (i != 0)
- pmap_remove(kernel_pmap, addr, addr + i);
- while (i != 0) {
- i -= PAGE_SIZE;
- m = vm_page_lookup(object,
- OFF_TO_IDX(offset + i));
- vm_page_unwire(m, 0);
- vm_page_free(m);
- }
- VM_OBJECT_WUNLOCK(object);
+ kmem_unback(object, addr, i);
return (KERN_NO_SPACE);
}
if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
@@ -387,6 +363,15 @@ retry:
return (KERN_SUCCESS);
}
+/*
+ * kmem_unback:
+ *
+ * Unmap and free the physical pages underlying the specified virtual
+ * address range.
+ *
+ * A physical page must exist within the specified object at each index
+ * that is being unmapped.
+ */
void
kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
{
@@ -396,9 +381,9 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
KASSERT(object == kmem_object || object == kernel_object,
("kmem_unback: only supports kernel objects."));
+ pmap_remove(kernel_pmap, addr, addr + size);
offset = addr - VM_MIN_KERNEL_ADDRESS;
VM_OBJECT_WLOCK(object);
- pmap_remove(kernel_pmap, addr, addr + size);
for (i = 0; i < size; i += PAGE_SIZE) {
m = vm_page_lookup(object, OFF_TO_IDX(offset + i));
vm_page_unwire(m, 0);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 36a2ead..9d08714 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
+#include <sys/user.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/sx.h>
@@ -2269,6 +2270,154 @@ next_page:
}
}
+struct vnode *
+vm_object_vnode(vm_object_t object)
+{
+
+ VM_OBJECT_ASSERT_LOCKED(object);
+ if (object->type == OBJT_VNODE)
+ return (object->handle);
+ if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0)
+ return (object->un_pager.swp.swp_tmpfs);
+ return (NULL);
+}
+
+static int
+sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
+{
+ struct kinfo_vmobject kvo;
+ char *fullpath, *freepath;
+ struct vnode *vp;
+ struct vattr va;
+ vm_object_t obj;
+ vm_page_t m;
+ int count, error;
+
+ if (req->oldptr == NULL) {
+ /*
+ * If an old buffer has not been provided, generate an
+ * estimate of the space needed for a subsequent call.
+ */
+ mtx_lock(&vm_object_list_mtx);
+ count = 0;
+ TAILQ_FOREACH(obj, &vm_object_list, object_list) {
+ if (obj->type == OBJT_DEAD)
+ continue;
+ count++;
+ }
+ mtx_unlock(&vm_object_list_mtx);
+ return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
+ count * 11 / 10));
+ }
+
+ error = 0;
+
+ /*
+ * VM objects are type stable and are never removed from the
+ * list once added. This allows us to safely read obj->object_list
+ * after reacquiring the VM object lock.
+ */
+ mtx_lock(&vm_object_list_mtx);
+ TAILQ_FOREACH(obj, &vm_object_list, object_list) {
+ if (obj->type == OBJT_DEAD)
+ continue;
+ VM_OBJECT_RLOCK(obj);
+ if (obj->type == OBJT_DEAD) {
+ VM_OBJECT_RUNLOCK(obj);
+ continue;
+ }
+ mtx_unlock(&vm_object_list_mtx);
+ kvo.kvo_size = ptoa(obj->size);
+ kvo.kvo_resident = obj->resident_page_count;
+ kvo.kvo_ref_count = obj->ref_count;
+ kvo.kvo_shadow_count = obj->shadow_count;
+ kvo.kvo_memattr = obj->memattr;
+ kvo.kvo_active = 0;
+ kvo.kvo_inactive = 0;
+ TAILQ_FOREACH(m, &obj->memq, listq) {
+ /*
+ * A page may belong to the object but be
+ * dequeued and set to PQ_NONE while the
+ * object lock is not held. This makes the
+ * reads of m->queue below racy, and we do not
+ * count pages set to PQ_NONE. However, this
+ * sysctl is only meant to give an
+ * approximation of the system anyway.
+ */
+ if (m->queue == PQ_ACTIVE)
+ kvo.kvo_active++;
+ else if (m->queue == PQ_INACTIVE)
+ kvo.kvo_inactive++;
+ }
+
+ kvo.kvo_vn_fileid = 0;
+ kvo.kvo_vn_fsid = 0;
+ freepath = NULL;
+ fullpath = "";
+ vp = NULL;
+ switch (obj->type) {
+ case OBJT_DEFAULT:
+ kvo.kvo_type = KVME_TYPE_DEFAULT;
+ break;
+ case OBJT_VNODE:
+ kvo.kvo_type = KVME_TYPE_VNODE;
+ vp = obj->handle;
+ vref(vp);
+ break;
+ case OBJT_SWAP:
+ kvo.kvo_type = KVME_TYPE_SWAP;
+ break;
+ case OBJT_DEVICE:
+ kvo.kvo_type = KVME_TYPE_DEVICE;
+ break;
+ case OBJT_PHYS:
+ kvo.kvo_type = KVME_TYPE_PHYS;
+ break;
+ case OBJT_DEAD:
+ kvo.kvo_type = KVME_TYPE_DEAD;
+ break;
+ case OBJT_SG:
+ kvo.kvo_type = KVME_TYPE_SG;
+ break;
+ case OBJT_MGTDEVICE:
+ kvo.kvo_type = KVME_TYPE_MGTDEVICE;
+ break;
+ default:
+ kvo.kvo_type = KVME_TYPE_UNKNOWN;
+ break;
+ }
+ VM_OBJECT_RUNLOCK(obj);
+ if (vp != NULL) {
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
+ kvo.kvo_vn_fileid = va.va_fileid;
+ kvo.kvo_vn_fsid = va.va_fsid;
+ }
+ vput(vp);
+ }
+
+ strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+
+ /* Pack record size down */
+ kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) +
+ strlen(kvo.kvo_path) + 1;
+ kvo.kvo_structsize = roundup(kvo.kvo_structsize,
+ sizeof(uint64_t));
+ error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize);
+ mtx_lock(&vm_object_list_mtx);
+ if (error)
+ break;
+ }
+ mtx_unlock(&vm_object_list_mtx);
+ return (error);
+}
+SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
+ "List of VM objects");
+
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kernel.h>
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 06111a1..ac8feae 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -297,6 +297,7 @@ boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t,
boolean_t);
void vm_object_unwire(vm_object_t object, vm_ooffset_t offset,
vm_size_t length, uint8_t queue);
+struct vnode *vm_object_vnode(vm_object_t object);
#endif /* _KERNEL */
#endif /* _VM_OBJECT_ */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 8024a7c..95bf6ca 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1710,6 +1710,7 @@ vm_page_alloc_contig_vdrop(struct spglist *lst)
*
* optional allocation flags:
* VM_ALLOC_NOBUSY do not exclusive busy the page
+ * VM_ALLOC_NODUMP do not include the page in a kernel core dump
* VM_ALLOC_NOOBJ page is not associated with an object and
* should not be exclusive busy
* VM_ALLOC_SBUSY shared busy the allocated page
@@ -3009,7 +3010,8 @@ vm_page_set_invalid(vm_page_t m, int base, int size)
bits = VM_PAGE_BITS_ALL;
else
bits = vm_page_bits(base, size);
- if (m->valid == VM_PAGE_BITS_ALL && bits != 0)
+ if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL &&
+ bits != 0)
pmap_remove_all(m);
KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) ||
!pmap_page_is_mapped(m),
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 7a1f944..accf517 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -227,6 +227,7 @@ struct vm_domain {
long vmd_segs; /* bitmask of the segments */
boolean_t vmd_oom;
int vmd_pass; /* local pagedaemon pass */
+ int vmd_last_active_scan;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
};
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 6a56fd7..ed80b1b 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -408,10 +408,13 @@ more:
ib = 0;
break;
}
- vm_page_lock(p);
vm_page_test_dirty(p);
- if (p->dirty == 0 ||
- p->queue != PQ_INACTIVE ||
+ if (p->dirty == 0) {
+ ib = 0;
+ break;
+ }
+ vm_page_lock(p);
+ if (p->queue != PQ_INACTIVE ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
ib = 0;
@@ -435,10 +438,11 @@ more:
if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
break;
- vm_page_lock(p);
vm_page_test_dirty(p);
- if (p->dirty == 0 ||
- p->queue != PQ_INACTIVE ||
+ if (p->dirty == 0)
+ break;
+ vm_page_lock(p);
+ if (p->queue != PQ_INACTIVE ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
break;
@@ -922,9 +926,10 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
vm_page_t m, next;
struct vm_pagequeue *pq;
vm_object_t object;
+ long min_scan;
int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
int vnodes_skipped = 0;
- int maxlaunder;
+ int maxlaunder, scan_tick, scanned;
int lockmode;
boolean_t queues_locked;
@@ -1115,9 +1120,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* then the page may still be modified until the last of those
* mappings are removed.
*/
- vm_page_test_dirty(m);
- if (m->dirty == 0 && object->ref_count != 0)
- pmap_remove_all(m);
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
if (m->valid == 0) {
/*
@@ -1353,34 +1360,37 @@ relock_queues:
* If we're just idle polling attempt to visit every
* active page within 'update_period' seconds.
*/
- if (pass == 0 && vm_pageout_update_period != 0) {
- maxscan /= vm_pageout_update_period;
- page_shortage = maxscan;
- }
+ scan_tick = ticks;
+ if (vm_pageout_update_period != 0) {
+ min_scan = pq->pq_cnt;
+ min_scan *= scan_tick - vmd->vmd_last_active_scan;
+ min_scan /= hz * vm_pageout_update_period;
+ } else
+ min_scan = 0;
+ if (min_scan > 0 || (page_shortage > 0 && maxscan > 0))
+ vmd->vmd_last_active_scan = scan_tick;
/*
- * Scan the active queue for things we can deactivate. We nominally
- * track the per-page activity counter and use it to locate
- * deactivation candidates.
+ * Scan the active queue for pages that can be deactivated. Update
+ * the per-page activity counter and use it to identify deactivation
+ * candidates.
*/
- m = TAILQ_FIRST(&pq->pq_pl);
- while (m != NULL && maxscan-- > 0 && page_shortage > 0) {
+ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
+ min_scan || (page_shortage > 0 && scanned < maxscan)); m = next,
+ scanned++) {
KASSERT(m->queue == PQ_ACTIVE,
("vm_pageout_scan: page %p isn't active", m));
next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0) {
- m = next;
+ if ((m->flags & PG_MARKER) != 0)
continue;
- }
KASSERT((m->flags & PG_FICTITIOUS) == 0,
("Fictitious page %p cannot be in active queue", m));
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("Unmanaged page %p cannot be in active queue", m));
if (!vm_pageout_page_lock(m, &next)) {
vm_page_unlock(m);
- m = next;
continue;
}
@@ -1433,7 +1443,6 @@ relock_queues:
} else
vm_page_requeue_locked(m);
vm_page_unlock(m);
- m = next;
}
vm_pagequeue_unlock(pq);
#if !defined(NO_SWAPPING)
@@ -1621,6 +1630,7 @@ vm_pageout_worker(void *arg)
*/
KASSERT(domain->vmd_segs != 0, ("domain without segments"));
+ domain->vmd_last_active_scan = ticks;
vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
/*
@@ -1641,9 +1651,15 @@ vm_pageout_worker(void *arg)
}
if (vm_pages_needed) {
/*
- * Still not done, take a second pass without waiting
- * (unlimited dirty cleaning), otherwise sleep a bit
- * and try again.
+ * We're still not done. Either vm_pages_needed was
+ * set by another thread during the previous scan
+ * (typically, this happens during a level 0 scan) or
+ * vm_pages_needed was already set and the scan failed
+ * to free enough pages. If we haven't yet performed
+ * a level >= 2 scan (unlimited dirty cleaning), then
+ * upgrade the level and scan again now. Otherwise,
+ * sleep a bit and try again later. While sleeping,
+ * vm_pages_needed can be cleared.
*/
if (domain->vmd_pass > 1)
msleep(&vm_pages_needed,
@@ -1654,15 +1670,14 @@ vm_pageout_worker(void *arg)
* Good enough, sleep until required to refresh
* stats.
*/
- domain->vmd_pass = 0;
msleep(&vm_pages_needed, &vm_page_queue_free_mtx,
PVM, "psleep", hz);
-
}
if (vm_pages_needed) {
cnt.v_pdwakeups++;
domain->vmd_pass++;
- }
+ } else
+ domain->vmd_pass = 0;
mtx_unlock(&vm_page_queue_free_mtx);
vm_pageout_scan(domain, domain->vmd_pass);
}
diff --git a/sys/x86/acpica/madt.c b/sys/x86/acpica/madt.c
index f54a358..368e951 100644
--- a/sys/x86/acpica/madt.c
+++ b/sys/x86/acpica/madt.c
@@ -53,8 +53,8 @@ static struct {
} *ioapics;
static struct lapic_info {
- u_int la_enabled:1;
- u_int la_acpi_id:8;
+ u_int la_enabled;
+ u_int la_acpi_id;
} lapics[MAX_APIC_ID + 1];
static int madt_found_sci_override;
@@ -220,34 +220,48 @@ madt_walk_table(acpi_subtable_handler *handler, void *arg)
}
static void
+madt_add_cpu(u_int acpi_id, u_int apic_id, u_int flags)
+{
+ struct lapic_info *la;
+
+ /*
+ * The MADT does not include a BSP flag, so we have to let the
+ * MP code figure out which CPU is the BSP on its own.
+ */
+ if (bootverbose)
+ printf("MADT: Found CPU APIC ID %u ACPI ID %u: %s\n",
+ apic_id, acpi_id, flags & ACPI_MADT_ENABLED ?
+ "enabled" : "disabled");
+ if (!(flags & ACPI_MADT_ENABLED))
+ return;
+ if (apic_id > MAX_APIC_ID) {
+ printf("MADT: Ignoring local APIC ID %u (too high)\n",
+ apic_id);
+ return;
+ }
+
+ la = &lapics[apic_id];
+ KASSERT(la->la_enabled == 0, ("Duplicate local APIC ID %u", apic_id));
+ la->la_enabled = 1;
+ la->la_acpi_id = acpi_id;
+ lapic_create(apic_id, 0);
+}
+
+static void
madt_probe_cpus_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
{
ACPI_MADT_LOCAL_APIC *proc;
- struct lapic_info *la;
+ ACPI_MADT_LOCAL_X2APIC *x2apic;
switch (entry->Type) {
case ACPI_MADT_TYPE_LOCAL_APIC:
- /*
- * The MADT does not include a BSP flag, so we have to
- * let the MP code figure out which CPU is the BSP on
- * its own.
- */
proc = (ACPI_MADT_LOCAL_APIC *)entry;
- if (bootverbose)
- printf("MADT: Found CPU APIC ID %u ACPI ID %u: %s\n",
- proc->Id, proc->ProcessorId,
- (proc->LapicFlags & ACPI_MADT_ENABLED) ?
- "enabled" : "disabled");
- if (!(proc->LapicFlags & ACPI_MADT_ENABLED))
- break;
- if (proc->Id > MAX_APIC_ID)
- panic("%s: CPU ID %u too high", __func__, proc->Id);
- la = &lapics[proc->Id];
- KASSERT(la->la_enabled == 0,
- ("Duplicate local APIC ID %u", proc->Id));
- la->la_enabled = 1;
- la->la_acpi_id = proc->ProcessorId;
- lapic_create(proc->Id, 0);
+ madt_add_cpu(proc->ProcessorId, proc->Id, proc->LapicFlags);
+ break;
+ case ACPI_MADT_TYPE_LOCAL_X2APIC:
+ x2apic = (ACPI_MADT_LOCAL_X2APIC *)entry;
+ madt_add_cpu(x2apic->Uid, x2apic->LocalApicId,
+ x2apic->LapicFlags);
break;
}
}
@@ -503,29 +517,44 @@ madt_parse_nmi(ACPI_MADT_NMI_SOURCE *nmi)
* Parse an entry for an NMI routed to a local APIC LVT pin.
*/
static void
-madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi)
+madt_handle_local_nmi(u_int acpi_id, UINT8 Lint, UINT16 IntiFlags)
{
u_int apic_id, pin;
- if (nmi->ProcessorId == 0xff)
+ if (acpi_id == 0xffffffff)
apic_id = APIC_ID_ALL;
- else if (madt_find_cpu(nmi->ProcessorId, &apic_id) != 0) {
+ else if (madt_find_cpu(acpi_id, &apic_id) != 0) {
if (bootverbose)
printf("MADT: Ignoring local NMI routed to "
- "ACPI CPU %u\n", nmi->ProcessorId);
+ "ACPI CPU %u\n", acpi_id);
return;
}
- if (nmi->Lint == 0)
+ if (Lint == 0)
pin = APIC_LVT_LINT0;
else
pin = APIC_LVT_LINT1;
lapic_set_lvt_mode(apic_id, pin, APIC_LVT_DM_NMI);
- if (!(nmi->IntiFlags & ACPI_MADT_TRIGGER_CONFORMS))
+ if (!(IntiFlags & ACPI_MADT_TRIGGER_CONFORMS))
lapic_set_lvt_triggermode(apic_id, pin,
- interrupt_trigger(nmi->IntiFlags, 0));
- if (!(nmi->IntiFlags & ACPI_MADT_POLARITY_CONFORMS))
+ interrupt_trigger(IntiFlags, 0));
+ if (!(IntiFlags & ACPI_MADT_POLARITY_CONFORMS))
lapic_set_lvt_polarity(apic_id, pin,
- interrupt_polarity(nmi->IntiFlags, 0));
+ interrupt_polarity(IntiFlags, 0));
+}
+
+static void
+madt_parse_local_nmi(ACPI_MADT_LOCAL_APIC_NMI *nmi)
+{
+
+ madt_handle_local_nmi(nmi->ProcessorId == 0xff ? 0xffffffff :
+ nmi->ProcessorId, nmi->Lint, nmi->IntiFlags);
+}
+
+static void
+madt_parse_local_x2apic_nmi(ACPI_MADT_LOCAL_X2APIC_NMI *nmi)
+{
+
+ madt_handle_local_nmi(nmi->Uid, nmi->Lint, nmi->IntiFlags);
}
/*
@@ -546,6 +575,10 @@ madt_parse_ints(ACPI_SUBTABLE_HEADER *entry, void *arg __unused)
case ACPI_MADT_TYPE_LOCAL_APIC_NMI:
madt_parse_local_nmi((ACPI_MADT_LOCAL_APIC_NMI *)entry);
break;
+ case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI:
+ madt_parse_local_x2apic_nmi(
+ (ACPI_MADT_LOCAL_X2APIC_NMI *)entry);
+ break;
}
}
diff --git a/tests/sys/vm/Makefile b/tests/sys/vm/Makefile
index 1795eef..08fbb21 100644
--- a/tests/sys/vm/Makefile
+++ b/tests/sys/vm/Makefile
@@ -2,6 +2,6 @@
TESTSDIR= ${TESTSBASE}/sys/vm
-TAP_TESTS_C+= mmap_test
+ATF_TESTS_C+= mmap_test
.include <bsd.test.mk>
diff --git a/tests/sys/vm/mmap_test.c b/tests/sys/vm/mmap_test.c
index 7591a09..88013da 100644
--- a/tests/sys/vm/mmap_test.c
+++ b/tests/sys/vm/mmap_test.c
@@ -29,16 +29,18 @@
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
-#include <sys/types.h>
+#include <atf-c.h>
#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
#include <stdio.h>
-#include <string.h>
+#include <stdlib.h>
static const struct {
void *addr;
int ok[2]; /* Depending on security.bsd.map_at_zero {0, !=0}. */
-} tests[] = {
+} map_at_zero_tests[] = {
{ (void *)0, { 0, 1 } }, /* Test sysctl. */
{ (void *)1, { 0, 0 } },
{ (void *)(PAGE_SIZE - 1), { 0, 0 } },
@@ -52,54 +54,214 @@ static const struct {
#define MAP_AT_ZERO "security.bsd.map_at_zero"
-int
-main(void)
+ATF_TC_WITHOUT_HEAD(mmap__map_at_zero);
+ATF_TC_BODY(mmap__map_at_zero, tc)
{
void *p;
size_t len;
- int i, error, mib[3], map_at_zero;
-
- error = 0;
-
- /* Get the current sysctl value of security.bsd.map_at_zero. */
- len = sizeof(mib) / sizeof(*mib);
- if (sysctlnametomib(MAP_AT_ZERO, mib, &len) == -1) {
- printf("1..0 # SKIP: sysctlnametomib(\"%s\") failed: %s\n",
- MAP_AT_ZERO, strerror(errno));
- return (0);
- }
+ unsigned int i;
+ int map_at_zero;
len = sizeof(map_at_zero);
- if (sysctl(mib, 3, &map_at_zero, &len, NULL, 0) == -1) {
- printf("1..0 # SKIP: sysctl for %s failed: %s\n", MAP_AT_ZERO,
+ if (sysctlbyname(MAP_AT_ZERO, &map_at_zero, &len, NULL, 0) == -1) {
+ atf_tc_skip("sysctl for %s failed: %s\n", MAP_AT_ZERO,
strerror(errno));
- return (0);
+ return;
}
/* Normalize to 0 or 1 for array access. */
map_at_zero = !!map_at_zero;
- printf("1..%zu\n", nitems(tests));
- for (i = 0; i < (int)nitems(tests); i++) {
- p = mmap((void *)tests[i].addr, PAGE_SIZE,
+ for (i = 0; i < nitems(map_at_zero_tests); i++) {
+ p = mmap((void *)map_at_zero_tests[i].addr, PAGE_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_FIXED,
-1, 0);
if (p == MAP_FAILED) {
- if (tests[i].ok[map_at_zero] != 0)
- error++;
- printf("%sok %d # mmap(%p, ...) failed\n",
- tests[i].ok[map_at_zero] == 0 ? "" : "not ",
- i + 1,
- tests[i].addr);
+ ATF_CHECK_MSG(map_at_zero_tests[i].ok[map_at_zero] == 0,
+ "mmap(%p, ...) failed", map_at_zero_tests[i].addr);
} else {
- if (tests[i].ok[map_at_zero] != 1)
- error++;
- printf("%sok %d # mmap(%p, ...) succeeded: p=%p\n",
- tests[i].ok[map_at_zero] == 1 ? "" : "not ",
- i + 1,
- tests[i].addr, p);
+ ATF_CHECK_MSG(map_at_zero_tests[i].ok[map_at_zero] == 1,
+ "mmap(%p, ...) succeeded: p=%p\n",
+ map_at_zero_tests[i].addr, p);
}
}
+}
+
+static void
+checked_mmap(int prot, int flags, int fd, int error, const char *msg)
+{
+ void *p;
+
+ p = mmap(NULL, getpagesize(), prot, flags, fd, 0);
+ if (p == MAP_FAILED) {
+ if (error == 0)
+ ATF_CHECK_MSG(0, "%s failed with errno %d", msg,
+ errno);
+ else
+ ATF_CHECK_EQ_MSG(error, errno,
+ "%s failed with wrong errno %d (expected %d)", msg,
+ errno, error);
+ } else {
+ ATF_CHECK_MSG(error == 0, "%s succeeded", msg);
+ munmap(p, getpagesize());
+ }
+}
+
+ATF_TC_WITHOUT_HEAD(mmap__bad_arguments);
+ATF_TC_BODY(mmap__bad_arguments, tc)
+{
+ int devstatfd, shmfd, zerofd;
+
+ ATF_REQUIRE((devstatfd = open("/dev/devstat", O_RDONLY)) >= 0);
+ ATF_REQUIRE((shmfd = shm_open(SHM_ANON, O_RDWR, 0644)) >= 0);
+ ATF_REQUIRE(ftruncate(shmfd, getpagesize()) == 0);
+ ATF_REQUIRE((zerofd = open("/dev/zero", O_RDONLY)) >= 0);
+
+ /* These should work. */
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_ANON, -1, 0,
+ "simple MAP_ANON");
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_SHARED, shmfd, 0,
+ "simple shm fd shared");
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_PRIVATE, shmfd, 0,
+ "simple shm fd private");
+ checked_mmap(PROT_READ, MAP_SHARED, zerofd, 0,
+ "simple /dev/zero shared");
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_PRIVATE, zerofd, 0,
+ "simple /dev/zero private");
+ checked_mmap(PROT_READ, MAP_SHARED, devstatfd, 0,
+ "simple /dev/devstat shared");
+
+#if 0
+ /*
+ * These tests do not fail without r271635 and followup fixes.
+ * Those changes will not be merged to stable/10 since they
+ * are potentially disruptive.
+ */
+
+ /* Extra PROT flags. */
+ checked_mmap(PROT_READ | PROT_WRITE | 0x100000, MAP_ANON, -1, EINVAL,
+ "MAP_ANON with extra PROT flags");
+ checked_mmap(0xffff, MAP_SHARED, shmfd, EINVAL,
+ "shm fd with garbage PROT");
+
+ /* Undefined flag. */
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_ANON | MAP_RESERVED0080, -1,
+ EINVAL, "Undefined flag");
+
+ /* Both MAP_SHARED and MAP_PRIVATE */
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE |
+ MAP_SHARED, -1, EINVAL, "MAP_ANON with both SHARED and PRIVATE");
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_SHARED, shmfd,
+ EINVAL, "shm fd with both SHARED and PRIVATE");
+
+ /* At least one of MAP_SHARED or MAP_PRIVATE without ANON */
+ checked_mmap(PROT_READ | PROT_WRITE, 0, shmfd, EINVAL,
+ "shm fd without sharing flag");
+#endif
+
+ /* MAP_ANON with either sharing flag (impacts fork). */
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0,
+ "shared MAP_ANON");
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0,
+ "private MAP_ANON");
+
+ /* MAP_ANON should require an fd of -1. */
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, EINVAL,
+ "MAP_ANON with fd != -1");
+
+ /* Writable MAP_SHARED should fail on read-only descriptors. */
+ checked_mmap(PROT_READ | PROT_WRITE, MAP_SHARED, zerofd, EACCES,
+ "MAP_SHARED of read-only /dev/zero");
+
+ /*
+ * Character devices other than /dev/zero do not support private
+ * mappings.
+ */
+ checked_mmap(PROT_READ, MAP_PRIVATE, devstatfd, EINVAL,
+ "MAP_PRIVATE of /dev/devstat");
+}
+
+ATF_TC_WITHOUT_HEAD(mmap__dev_zero_private);
+ATF_TC_BODY(mmap__dev_zero_private, tc)
+{
+ char *p1, *p2, *p3;
+ size_t i;
+ int fd;
+
+ ATF_REQUIRE((fd = open("/dev/zero", O_RDONLY)) >= 0);
+
+ p1 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
+ 0);
+ ATF_REQUIRE(p1 != MAP_FAILED);
+
+ p2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
+ 0);
+ ATF_REQUIRE(p2 != MAP_FAILED);
+
+ for (i = 0; i < getpagesize(); i++)
+ ATF_REQUIRE_EQ_MSG(0, p1[i], "byte at p1[%zu] is %x", i, p1[i]);
+
+ ATF_REQUIRE(memcmp(p1, p2, getpagesize()) == 0);
+
+ p1[0] = 1;
+
+ ATF_REQUIRE(p2[0] == 0);
+
+ p2[0] = 2;
+
+ ATF_REQUIRE(p1[0] == 1);
+
+ p3 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
+ 0);
+ ATF_REQUIRE(p3 != MAP_FAILED);
+
+ ATF_REQUIRE(p3[0] == 0);
+}
+
+ATF_TC_WITHOUT_HEAD(mmap__dev_zero_shared);
+ATF_TC_BODY(mmap__dev_zero_shared, tc)
+{
+ char *p1, *p2, *p3;
+ size_t i;
+ int fd;
+
+ ATF_REQUIRE((fd = open("/dev/zero", O_RDWR)) >= 0);
+
+ p1 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ 0);
+ ATF_REQUIRE(p1 != MAP_FAILED);
+
+ p2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ 0);
+ ATF_REQUIRE(p2 != MAP_FAILED);
+
+ for (i = 0; i < getpagesize(); i++)
+ ATF_REQUIRE_EQ_MSG(0, p1[i], "byte at p1[%zu] is %x", i, p1[i]);
+
+ ATF_REQUIRE(memcmp(p1, p2, getpagesize()) == 0);
+
+ p1[0] = 1;
+
+ ATF_REQUIRE(p2[0] == 0);
+
+ p2[0] = 2;
+
+ ATF_REQUIRE(p1[0] == 1);
+
+ p3 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ 0);
+ ATF_REQUIRE(p3 != MAP_FAILED);
+
+ ATF_REQUIRE(p3[0] == 0);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, mmap__map_at_zero);
+ ATF_TP_ADD_TC(tp, mmap__bad_arguments);
+ ATF_TP_ADD_TC(tp, mmap__dev_zero_private);
+ ATF_TP_ADD_TC(tp, mmap__dev_zero_shared);
- return (error != 0);
+ return (atf_no_error());
}
diff --git a/tools/build/options/makeman b/tools/build/options/makeman
index 00309d9..0491bed 100755
--- a/tools/build/options/makeman
+++ b/tools/build/options/makeman
@@ -59,7 +59,7 @@ show_options()
fi
done
- cat $t/settings | while read opt targets ; do
+ while read opt targets ; do
if [ "${targets}" = "${ALL_TARGETS}" ] ; then
echo "WITHOUT_${opt}"
elif [ -z "${targets}" ] ; then
@@ -68,7 +68,7 @@ show_options()
echo "WITHOUT_${opt}" $(no_targets "${ALL_TARGETS}" "${targets}")
echo "WITH_${opt} ${targets}"
fi
- done
+ done < $t/settings
}
#
@@ -250,31 +250,33 @@ EOF
:> $t/deps2
fi
+ havedeps=0
if [ -s $t/deps ] ; then
+ havedeps=1
echo 'When set, it also enforces the following options:'
echo '.Pp'
echo '.Bl -item -compact'
- cat $t/deps | while read opt2 ; do
+ while read opt2 ; do
echo '.It'
echo ".Va ${opt2}"
- done
+ done < $t/deps
echo '.El'
fi
if [ -s $t/deps2 ] ; then
- if [ -s $t/deps ] ; then
+ if [ ${havedeps} -eq 1 ] ; then
echo '.Pp'
fi
echo 'When set, the following options are also in effect:'
echo '.Pp'
echo '.Bl -inset -compact'
- cat $t/deps2 | while read opt2 ; do
+ while read opt2 ; do
echo ".It Va ${opt2}"
noopt=$(echo ${opt2} | sed -e's/WITH_/WITHOUT_/;t' -e's/WITHOUT_/WITH_/')
echo '(unless'
echo ".Va ${noopt}"
echo 'is set explicitly)'
- done
+ done < $t/deps2
echo '.El'
fi
twiddle >&2
diff --git a/usr.bin/ar/ar.1 b/usr.bin/ar/ar.1
index b986bcf..079d262 100644
--- a/usr.bin/ar/ar.1
+++ b/usr.bin/ar/ar.1
@@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 31, 2015
+.Dd September 24, 2015
.Dt AR 1
.Os
.Sh NAME
@@ -210,6 +210,7 @@ and 0644 instead of file mode from the members named by arguments
.Ar .
This ensures that checksums on the resulting archives are reproducible
when member contents are identical.
+This option is enabled by default.
If multiple
.Fl D
and
diff --git a/usr.bin/ar/ar.c b/usr.bin/ar/ar.c
index 3d5e2b8..0c69315 100644
--- a/usr.bin/ar/ar.c
+++ b/usr.bin/ar/ar.c
@@ -100,10 +100,12 @@ main(int argc, char **argv)
struct bsdar *bsdar, bsdar_storage;
char *p;
size_t len;
- int i, opt;
+ int i, opt, Dflag, Uflag;
bsdar = &bsdar_storage;
memset(bsdar, 0, sizeof(*bsdar));
+ Dflag = 0;
+ Uflag = 0;
if ((bsdar->progname = getprogname()) == NULL)
bsdar->progname = "ar";
@@ -120,10 +122,12 @@ main(int argc, char **argv)
/* Ignored. */
break;
case 'D':
- bsdar->options |= AR_D;
+ Dflag = 1;
+ Uflag = 0;
break;
case 'U':
- bsdar->options &= ~AR_D;
+ Uflag = 1;
+ Dflag = 0;
break;
case 'V':
ranlib_version();
@@ -140,8 +144,11 @@ main(int argc, char **argv)
if (*argv == NULL)
ranlib_usage();
+ /* Enable determinstic mode unless -U is set. */
+ if (Uflag == 0)
+ bsdar->options |= AR_D;
bsdar->options |= AR_S;
- for (;(bsdar->filename = *argv++) != NULL;)
+ while ((bsdar->filename = *argv++) != NULL)
ar_mode_s(bsdar);
exit(EX_OK);
@@ -180,7 +187,8 @@ main(int argc, char **argv)
set_mode(bsdar, opt);
break;
case 'D':
- bsdar->options |= AR_D;
+ Dflag = 1;
+ Uflag = 0;
break;
case 'f':
case 'T':
@@ -220,7 +228,8 @@ main(int argc, char **argv)
set_mode(bsdar, opt);
break;
case 'U':
- bsdar->options &= ~AR_D;
+ Uflag = 1;
+ Dflag = 0;
break;
case 'u':
bsdar->options |= AR_U;
@@ -273,6 +282,10 @@ main(int argc, char **argv)
argv++;
}
+ /* Set determinstic mode for -D, and by default without -U. */
+ if (Dflag || (Uflag == 0 && (bsdar->mode == 'q' || bsdar->mode == 'r')))
+ bsdar->options |= AR_D;
+
if (bsdar->options & AR_A)
only_mode(bsdar, "-a", "mqr");
if (bsdar->options & AR_B)
@@ -281,8 +294,10 @@ main(int argc, char **argv)
only_mode(bsdar, "-c", "qr");
if (bsdar->options & AR_CC)
only_mode(bsdar, "-C", "x");
- if (bsdar->options & AR_D)
+ if (Dflag)
only_mode(bsdar, "-D", "qr");
+ if (Uflag)
+ only_mode(bsdar, "-U", "qr");
if (bsdar->options & AR_O)
only_mode(bsdar, "-o", "x");
if (bsdar->options & AR_SS)
diff --git a/usr.bin/bmake/Makefile.inc b/usr.bin/bmake/Makefile.inc
index 61ccb8e..2cbb87b 100644
--- a/usr.bin/bmake/Makefile.inc
+++ b/usr.bin/bmake/Makefile.inc
@@ -13,7 +13,7 @@
MK_BMAKE= yes
.endif
-.if defined(MK_BMAKE) && ${MK_BMAKE} != "no"
+.if defined(MK_BMAKE) && ${MK_BMAKE} != "no" && exists(${.CURDIR}/tests)
PROG= make
.endif
diff --git a/usr.bin/ctlstat/ctlstat.8 b/usr.bin/ctlstat/ctlstat.8
index 98a3369..77eb790 100644
--- a/usr.bin/ctlstat/ctlstat.8
+++ b/usr.bin/ctlstat/ctlstat.8
@@ -34,7 +34,7 @@
.\" $Id: //depot/users/kenm/FreeBSD-test2/usr.bin/ctlstat/ctlstat.8#2 $
.\" $FreeBSD$
.\"
-.Dd May 22, 2015
+.Dd September 21, 2015
.Dt CTLSTAT 8
.Os
.Sh NAME
@@ -50,6 +50,7 @@
.Op Fl j
.Op Fl l Ar lun
.Op Fl n Ar numdevs
+.Op Fl p Ar port
.Op Fl w Ar wait
.Sh DESCRIPTION
The
@@ -64,7 +65,7 @@ The options are as follows:
.Bl -tag -width 10n
.It Fl t
Total mode.
-This displays separate columns with the total CTL read and write output,
+This displays separate columns with the total read and write output,
and a combined total column that also includes non I/O operations.
.It Fl c Ar count
Display statistics this many times.
@@ -74,23 +75,20 @@ Disable CPU statistics display.
Display DMA operation time (latency) instead of overall I/O time (latency).
.It Fl D
Text dump mode.
-Dump all available statistics every 30 seconds in a text format suitable
-for parsing.
+Dump statistics every 30 seconds in a text format suitable for parsing.
No statistics are computed in this mode, only raw numbers are displayed.
.It Fl h
Suppress display of the header.
.It Fl j
JSON dump mode.
-Dump all available statistics every 30 seconds in JavaScript Object
-Notation (JSON) format.
+Dump statistics every 30 seconds in JavaScript Object Notation (JSON) format.
No statistics are computed in this mode, only raw numbers are displayed.
.It Fl l Ar lun
Request statistics for the specified LUN.
-This option is incompatible with total
-.Fl ( t )
-mode.
.It Fl n Ar numdevs
Display statistics for this many devices.
+.It Fl p Ar port
+Request statistics for the specified port.
.It Fl w Ar wait
Wait this many seconds in between displays.
If this option is not specified,
diff --git a/usr.bin/ctlstat/ctlstat.c b/usr.bin/ctlstat/ctlstat.c
index 3bfb248..3587586 100644
--- a/usr.bin/ctlstat/ctlstat.c
+++ b/usr.bin/ctlstat/ctlstat.c
@@ -62,7 +62,6 @@ __FBSDID("$FreeBSD$");
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl_scsi_all.h>
#include <cam/ctl/ctl_util.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
@@ -79,7 +78,7 @@ __FBSDID("$FreeBSD$");
*/
#define CTL_STAT_LUN_BITS 1024L
-static const char *ctlstat_opts = "Cc:Ddhjl:n:tw:";
+static const char *ctlstat_opts = "Cc:Ddhjl:n:p:tw:";
static const char *ctlstat_usage = "Usage: ctlstat [-CDdjht] [-l lunnum]"
"[-c count] [-n numdevs] [-w wait]\n";
@@ -103,12 +102,16 @@ typedef enum {
#define CTLSTAT_FLAG_TOTALS (1 << 3)
#define CTLSTAT_FLAG_DMA_TIME (1 << 4)
#define CTLSTAT_FLAG_LUN_TIME_VALID (1 << 5)
+#define CTLSTAT_FLAG_LUN_MASK (1 << 6)
+#define CTLSTAT_FLAG_PORT_MASK (1 << 7)
#define F_CPU(ctx) ((ctx)->flags & CTLSTAT_FLAG_CPU)
#define F_HDR(ctx) ((ctx)->flags & CTLSTAT_FLAG_HEADER)
#define F_FIRST(ctx) ((ctx)->flags & CTLSTAT_FLAG_FIRST_RUN)
#define F_TOTALS(ctx) ((ctx)->flags & CTLSTAT_FLAG_TOTALS)
#define F_DMA(ctx) ((ctx)->flags & CTLSTAT_FLAG_DMA_TIME)
#define F_LUNVAL(ctx) ((ctx)->flags & CTLSTAT_FLAG_LUN_TIME_VALID)
+#define F_LUNMASK(ctx) ((ctx)->flags & CTLSTAT_FLAG_LUN_MASK)
+#define F_PORTMASK(ctx) ((ctx)->flags & CTLSTAT_FLAG_PORT_MASK)
struct ctlstat_context {
ctlstat_mode_types mode;
@@ -121,6 +124,7 @@ struct ctlstat_context {
uint64_t cur_total_jiffies, prev_total_jiffies;
uint64_t cur_idle, prev_idle;
bitstr_t bit_decl(lun_mask, CTL_STAT_LUN_BITS);
+ bitstr_t bit_decl(port_mask, CTL_MAX_PORTS);
int num_luns;
int numdevs;
int header_interval;
@@ -134,7 +138,8 @@ static void usage(int error);
static int getstats(int fd, int *num_luns, struct ctl_lun_io_stats **xlun_stats,
struct timespec *cur_time, int *lun_time_valid);
static int getcpu(struct ctl_cpu_stats *cpu_stats);
-static void compute_stats(struct ctl_lun_io_stats *cur_stats,
+static void compute_stats(struct ctlstat_context *ctx,
+ struct ctl_lun_io_stats *cur_stats,
struct ctl_lun_io_stats *prev_stats,
long double etime, long double *mbsec,
long double *kb_per_transfer,
@@ -235,7 +240,7 @@ getcpu(struct ctl_cpu_stats *cpu_stats)
}
static void
-compute_stats(struct ctl_lun_io_stats *cur_stats,
+compute_stats(struct ctlstat_context *ctx, struct ctl_lun_io_stats *cur_stats,
struct ctl_lun_io_stats *prev_stats, long double etime,
long double *mbsec, long double *kb_per_transfer,
long double *transfers_per_second, long double *ms_per_transfer,
@@ -252,6 +257,9 @@ compute_stats(struct ctl_lun_io_stats *cur_stats,
bzero(&total_time_ts, sizeof(total_time_ts));
bzero(&total_dma_ts, sizeof(total_dma_ts));
for (port = 0; port < CTL_MAX_PORTS; port++) {
+ if (F_PORTMASK(ctx) &&
+ bit_test(ctx->port_mask, port) == 0)
+ continue;
for (i = 0; i < CTL_STATS_NUM_TYPES; i++) {
total_bytes += cur_stats->ports[port].bytes[i];
total_operations +=
@@ -327,8 +335,8 @@ compute_stats(struct ctl_lun_io_stats *cur_stats,
*/
#define PRINT_BINTIME(prefix, bt) \
- printf("%s %jd s %ju frac\n", prefix, (intmax_t)(bt).sec, \
- (uintmax_t)(bt).frac)
+ printf("%s %jd.%06ju\n", prefix, (intmax_t)(bt).sec, \
+ (uintmax_t)(((bt).frac >> 32) * 1000000 >> 32))
static const char *iotypes[] = {"NO IO", "READ", "WRITE"};
static void
@@ -337,8 +345,13 @@ ctlstat_dump(struct ctlstat_context *ctx) {
struct ctl_lun_io_stats *stats = ctx->cur_lun_stats;
for (lun = 0; lun < ctx->num_luns;lun++) {
+ if (F_LUNMASK(ctx) && bit_test(ctx->lun_mask, lun) == 0)
+ continue;
printf("lun %d\n", lun);
for (port = 0; port < CTL_MAX_PORTS; port++) {
+ if (F_PORTMASK(ctx) &&
+ bit_test(ctx->port_mask, port) == 0)
+ continue;
printf(" port %d\n",
stats[lun].ports[port].targ_port);
for (iotype = 0; iotype < CTL_STATS_NUM_TYPES;
@@ -361,9 +374,8 @@ ctlstat_dump(struct ctlstat_context *ctx) {
}
#define JSON_BINTIME(prefix, bt) \
- printf("\"%s\":{\"sec\":%jd,\"frac\":%ju},", \
- prefix, (intmax_t)(bt).sec, (uintmax_t)(bt).frac)
-
+ printf("\"%s\":%jd.%06ju,", prefix, (intmax_t)(bt).sec, \
+ (uintmax_t)(((bt).frac >> 32) * 1000000 >> 32))
static void
ctlstat_json(struct ctlstat_context *ctx) {
int iotype, lun, port;
@@ -371,8 +383,13 @@ ctlstat_json(struct ctlstat_context *ctx) {
printf("{\"luns\":[");
for (lun = 0; lun < ctx->num_luns; lun++) {
+ if (F_LUNMASK(ctx) && bit_test(ctx->lun_mask, lun) == 0)
+ continue;
printf("{\"ports\":[");
for (port = 0; port < CTL_MAX_PORTS;port++) {
+ if (F_PORTMASK(ctx) &&
+ bit_test(ctx->port_mask, port) == 0)
+ continue;
printf("{\"num\":%d,\"io\":[",
stats[lun].ports[port].targ_port);
for (iotype = 0; iotype < CTL_STATS_NUM_TYPES;
@@ -443,17 +460,16 @@ ctlstat_standard(struct ctlstat_context *ctx) {
hdr_devs = 0;
+ if (F_CPU(ctx))
+ fprintf(stdout, " CPU");
if (F_TOTALS(ctx)) {
- fprintf(stdout, "%s System Read %s"
- "System Write %sSystem Total%s\n",
- (F_LUNVAL(ctx) != 0) ? " " : "",
- (F_LUNVAL(ctx) != 0) ? " " : "",
- (F_LUNVAL(ctx) != 0) ? " " : "",
- (F_CPU(ctx)) ? " CPU" : "");
+ fprintf(stdout, "%s Read %s"
+ " Write %s Total\n",
+ (F_LUNVAL(ctx) != 0) ? " " : "",
+ (F_LUNVAL(ctx) != 0) ? " " : "",
+ (F_LUNVAL(ctx) != 0) ? " " : "");
hdr_devs = 3;
} else {
- if (F_CPU(ctx))
- fprintf(stdout, " CPU ");
for (i = 0; i < min(CTL_STAT_LUN_BITS,
ctx->num_luns); i++) {
int lun;
@@ -466,7 +482,8 @@ ctlstat_standard(struct ctlstat_context *ctx) {
lun = (int)ctx->cur_lun_stats[i
].lun_number;
- if (bit_test(ctx->lun_mask, lun) == 0)
+ if (F_LUNMASK(ctx) &&
+ bit_test(ctx->lun_mask, lun) == 0)
continue;
fprintf(stdout, "%15.6s%d %s",
"lun", lun,
@@ -475,17 +492,19 @@ ctlstat_standard(struct ctlstat_context *ctx) {
}
fprintf(stdout, "\n");
}
+ if (F_CPU(ctx))
+ fprintf(stdout, " ");
for (i = 0; i < hdr_devs; i++)
- fprintf(stdout, "%s %sKB/t %s MB/s ",
- ((F_CPU(ctx) != 0) && (i == 0) &&
- (F_TOTALS(ctx) == 0)) ? " " : "",
- (F_LUNVAL(ctx) != 0) ? " ms " : "",
+ fprintf(stdout, "%s KB/t %s MB/s",
+ (F_LUNVAL(ctx) != 0) ? " ms" : "",
(F_DMA(ctx) == 0) ? "tps" : "dps");
fprintf(stdout, "\n");
ctx->header_interval = 20;
}
}
+ if (F_CPU(ctx))
+ fprintf(stdout, "%3.0Lf%%", cpu_percentage);
if (F_TOTALS(ctx) != 0) {
long double mbsec[3];
long double kb_per_transfer[3];
@@ -517,7 +536,13 @@ ctlstat_standard(struct ctlstat_context *ctx) {
&ctx->cur_lun_stats[i].ports[p].dma_time[j])
for (i = 0; i < ctx->num_luns; i++) {
+ if (F_LUNMASK(ctx) && bit_test(ctx->lun_mask,
+ (int)ctx->cur_lun_stats[i].lun_number) == 0)
+ continue;
for (port = 0; port < CTL_MAX_PORTS; port++) {
+ if (F_PORTMASK(ctx) &&
+ bit_test(ctx->port_mask, port) == 0)
+ continue;
for (j = 0; j < CTL_STATS_NUM_TYPES; j++) {
ADD_STATS_BYTES(2, port, i, j);
ADD_STATS_OPERATIONS(2, port, i, j);
@@ -542,29 +567,24 @@ ctlstat_standard(struct ctlstat_context *ctx) {
}
for (i = 0; i < 3; i++) {
- compute_stats(&ctx->cur_total_stats[i],
+ compute_stats(ctx, &ctx->cur_total_stats[i],
F_FIRST(ctx) ? NULL : &ctx->prev_total_stats[i],
etime, &mbsec[i], &kb_per_transfer[i],
&transfers_per_sec[i],
&ms_per_transfer[i], &ms_per_dma[i],
&dmas_per_sec[i]);
if (F_DMA(ctx) != 0)
- fprintf(stdout, " %2.2Lf",
+ fprintf(stdout, " %5.1Lf",
ms_per_dma[i]);
else if (F_LUNVAL(ctx) != 0)
- fprintf(stdout, " %2.2Lf",
+ fprintf(stdout, " %5.1Lf",
ms_per_transfer[i]);
- fprintf(stdout, " %5.2Lf %3.0Lf %5.2Lf ",
+ fprintf(stdout, " %4.0Lf %5.0Lf %4.0Lf",
kb_per_transfer[i],
(F_DMA(ctx) == 0) ? transfers_per_sec[i] :
dmas_per_sec[i], mbsec[i]);
}
- if (F_CPU(ctx))
- fprintf(stdout, " %5.1Lf%%", cpu_percentage);
} else {
- if (F_CPU(ctx))
- fprintf(stdout, "%5.1Lf%% ", cpu_percentage);
-
for (i = 0; i < min(CTL_STAT_LUN_BITS, ctx->num_luns); i++) {
long double mbsec, kb_per_transfer;
long double transfers_per_sec;
@@ -572,21 +592,21 @@ ctlstat_standard(struct ctlstat_context *ctx) {
long double ms_per_dma;
long double dmas_per_sec;
- if (bit_test(ctx->lun_mask,
+ if (F_LUNMASK(ctx) && bit_test(ctx->lun_mask,
(int)ctx->cur_lun_stats[i].lun_number) == 0)
continue;
- compute_stats(&ctx->cur_lun_stats[i], F_FIRST(ctx) ?
- NULL : &ctx->prev_lun_stats[i], etime,
- &mbsec, &kb_per_transfer,
- &transfers_per_sec, &ms_per_transfer,
- &ms_per_dma, &dmas_per_sec);
+ compute_stats(ctx, &ctx->cur_lun_stats[i],
+ F_FIRST(ctx) ? NULL : &ctx->prev_lun_stats[i],
+ etime, &mbsec, &kb_per_transfer,
+ &transfers_per_sec, &ms_per_transfer,
+ &ms_per_dma, &dmas_per_sec);
if (F_DMA(ctx))
- fprintf(stdout, " %2.2Lf",
+ fprintf(stdout, " %5.1Lf",
ms_per_dma);
else if (F_LUNVAL(ctx) != 0)
- fprintf(stdout, " %2.2Lf",
+ fprintf(stdout, " %5.1Lf",
ms_per_transfer);
- fprintf(stdout, " %5.2Lf %3.0Lf %5.2Lf ",
+ fprintf(stdout, " %4.0Lf %5.0Lf %4.0Lf",
kb_per_transfer, (F_DMA(ctx) == 0) ?
transfers_per_sec : dmas_per_sec, mbsec);
}
@@ -598,7 +618,6 @@ main(int argc, char **argv)
{
int c;
int count, waittime;
- int set_lun;
int fd, retval;
struct ctlstat_context ctx;
@@ -642,20 +661,30 @@ main(int argc, char **argv)
if (cur_lun > CTL_STAT_LUN_BITS)
errx(1, "Invalid LUN number %d", cur_lun);
- bit_ffs(ctx.lun_mask, CTL_STAT_LUN_BITS, &set_lun);
- if (set_lun == -1)
+ if (!F_LUNMASK(&ctx))
ctx.numdevs = 1;
else
ctx.numdevs++;
bit_set(ctx.lun_mask, cur_lun);
+ ctx.flags |= CTLSTAT_FLAG_LUN_MASK;
break;
}
case 'n':
ctx.numdevs = atoi(optarg);
break;
+ case 'p': {
+ int cur_port;
+
+ cur_port = atoi(optarg);
+ if (cur_port > CTL_MAX_PORTS)
+ errx(1, "Invalid LUN number %d", cur_port);
+
+ bit_set(ctx.port_mask, cur_port);
+ ctx.flags |= CTLSTAT_FLAG_PORT_MASK;
+ break;
+ }
case 't':
ctx.flags |= CTLSTAT_FLAG_TOTALS;
- ctx.numdevs = 3;
break;
case 'w':
waittime = atoi(optarg);
@@ -668,13 +697,7 @@ main(int argc, char **argv)
}
}
- bit_ffs(ctx.lun_mask, CTL_STAT_LUN_BITS, &set_lun);
-
- if ((F_TOTALS(&ctx))
- && (set_lun != -1)) {
- errx(1, "Total Mode (-t) is incompatible with individual "
- "LUN mode (-l)");
- } else if (set_lun == -1) {
+ if (!F_TOTALS(&ctx) && !F_LUNMASK(&ctx)) {
/*
* Note that this just selects the first N LUNs to display,
* but at this point we have no knoweledge of which LUN
@@ -683,6 +706,7 @@ main(int argc, char **argv)
*/
bit_nset(ctx.lun_mask, 0, min(ctx.numdevs - 1,
CTL_STAT_LUN_BITS - 1));
+ ctx.flags |= CTLSTAT_FLAG_LUN_MASK;
}
if ((fd = open(CTL_DEFAULT_DEV, O_RDWR)) == -1)
diff --git a/usr.bin/elfdump/elfdump.c b/usr.bin/elfdump/elfdump.c
index 9640dd5..fe330f8 100644
--- a/usr.bin/elfdump/elfdump.c
+++ b/usr.bin/elfdump/elfdump.c
@@ -261,6 +261,7 @@ e_machines(u_int mach)
case EM_386: return "EM_386";
case EM_68K: return "EM_68K";
case EM_88K: return "EM_88K";
+ case EM_IAMCU: return "EM_IAMCU";
case EM_860: return "EM_860";
case EM_MIPS: return "EM_MIPS";
case EM_PPC: return "EM_PPC";
@@ -271,6 +272,7 @@ e_machines(u_int mach)
case EM_IA_64: return "EM_IA_64";
case EM_X86_64: return "EM_X86_64";
case EM_AARCH64:return "EM_AARCH64";
+ case EM_RISCV: return "EM_RISCV";
}
snprintf(machdesc, sizeof(machdesc),
"(unknown machine) -- type 0x%x", mach);
diff --git a/usr.bin/login/login.c b/usr.bin/login/login.c
index e2af83f..f637bc2 100644
--- a/usr.bin/login/login.c
+++ b/usr.bin/login/login.c
@@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <errno.h>
#include <grp.h>
-#include <libutil.h>
#include <login_cap.h>
#include <pwd.h>
#include <setjmp.h>
diff --git a/usr.bin/login/login_fbtab.c b/usr.bin/login/login_fbtab.c
index f642ea7..8faee56 100644
--- a/usr.bin/login/login_fbtab.c
+++ b/usr.bin/login/login_fbtab.c
@@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <errno.h>
#include <glob.h>
-#include <paths.h>
#include <stdio.h>
#include <string.h>
#include <syslog.h>
@@ -120,7 +119,7 @@ login_fbtab(char *tty, uid_t uid, gid_t gid)
/* login_protect - protect one device entry */
-void
+static void
login_protect(const char *table, char *pattern, int mask, uid_t uid, gid_t gid)
{
glob_t gl;
diff --git a/usr.bin/systat/iostat.c b/usr.bin/systat/iostat.c
index 3384f15..fa275eb 100644
--- a/usr.bin/systat/iostat.c
+++ b/usr.bin/systat/iostat.c
@@ -112,10 +112,8 @@ initiostat(void)
if ((num_devices = devstat_getnumdevs(NULL)) < 0)
return(0);
- cur.dinfo = (struct devinfo *)malloc(sizeof(struct devinfo));
- last.dinfo = (struct devinfo *)malloc(sizeof(struct devinfo));
- bzero(cur.dinfo, sizeof(struct devinfo));
- bzero(last.dinfo, sizeof(struct devinfo));
+ cur.dinfo = calloc(1, sizeof(struct devinfo));
+ last.dinfo = calloc(1, sizeof(struct devinfo));
/*
* This value for maxshowdevs (100) is bogus. I'm not sure exactly
@@ -196,7 +194,7 @@ numlabels(int row)
char tmpstr[10];
#define COLWIDTH 17
-#define DRIVESPERLINE ((wnd->_maxx - INSET) / COLWIDTH)
+#define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
for (ndrives = 0, i = 0; i < num_devices; i++)
if (dev_select[i].selected)
ndrives++;
@@ -204,7 +202,7 @@ numlabels(int row)
/*
* Deduct -regions for blank line after each scrolling region.
*/
- linesperregion = (wnd->_maxy - row - regions) / regions;
+ linesperregion = (getmaxy(wnd) - 1 - row - regions) / regions;
/*
* Minimum region contains space for two
* label lines and one line of statistics.
@@ -214,9 +212,9 @@ numlabels(int row)
_col = INSET;
for (i = 0; i < num_devices; i++)
if (dev_select[i].selected) {
- if (_col + COLWIDTH >= wnd->_maxx - INSET) {
+ if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
_col = INSET, row += linesperregion + 1;
- if (row > wnd->_maxy - (linesperregion + 1))
+ if (row > getmaxy(wnd) - 1 - (linesperregion + 1))
break;
}
sprintf(tmpstr, "%s%d", dev_select[i].device_name,
@@ -241,7 +239,7 @@ barlabels(int row)
linesperregion = 2 + kbpt;
for (i = 0; i < num_devices; i++)
if (dev_select[i].selected) {
- if (row > wnd->_maxy - linesperregion)
+ if (row > getmaxy(wnd) - 1 - linesperregion)
break;
sprintf(tmpstr, "%s%d", dev_select[i].device_name,
dev_select[i].unit_number);
@@ -276,7 +274,7 @@ showiostat(void)
row += 2;
for (i = 0; i < num_devices; i++)
if (dev_select[i].selected) {
- if (row > wnd->_maxy - linesperregion)
+ if (row > getmaxy(wnd) - linesperregion)
break;
row = devstats(row, INSET, i);
}
@@ -289,9 +287,9 @@ showiostat(void)
winsertln(wnd);
for (i = 0; i < num_devices; i++)
if (dev_select[i].selected) {
- if (_col + COLWIDTH >= wnd->_maxx - INSET) {
+ if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
_col = INSET, row += linesperregion + 1;
- if (row > wnd->_maxy - (linesperregion + 1))
+ if (row > getmaxy(wnd) - 1 - (linesperregion + 1))
break;
wmove(wnd, row + linesperregion, 0);
wdeleteln(wnd);
diff --git a/usr.bin/systat/netstat.c b/usr.bin/systat/netstat.c
index bb1318d..540da37 100644
--- a/usr.bin/systat/netstat.c
+++ b/usr.bin/systat/netstat.c
@@ -85,7 +85,7 @@ static char *inetname(struct sockaddr *);
static void inetprint(struct sockaddr *, const char *);
#define streq(a,b) (strcmp(a,b)==0)
-#define YMAX(w) ((w)->_maxy-1)
+#define YMAX(w) (getmaxy(w)-2)
WINDOW *
opennetstat(void)
diff --git a/usr.bin/systat/pigs.c b/usr.bin/systat/pigs.c
index a02a43f..d341f7d 100644
--- a/usr.bin/systat/pigs.c
+++ b/usr.bin/systat/pigs.c
@@ -94,8 +94,8 @@ showpigs(void)
qsort(pt, nproc, sizeof (struct p_times), compar);
y = 1;
i = nproc;
- if (i > wnd->_maxy-1)
- i = wnd->_maxy-1;
+ if (i > getmaxy(wnd)-2)
+ i = getmaxy(wnd)-2;
for (k = 0; i > 0 && pt[k].pt_pctcpu > 0.01; i--, y++, k++) {
uname = user_from_uid(pt[k].pt_kp->ki_uid, 0);
pname = pt[k].pt_kp->ki_comm;
diff --git a/usr.bin/systat/vmstat.c b/usr.bin/systat/vmstat.c
index cdb26a7..258e357 100644
--- a/usr.bin/systat/vmstat.c
+++ b/usr.bin/systat/vmstat.c
@@ -205,12 +205,9 @@ initkre(void)
return(0);
}
- cur.dinfo = (struct devinfo *)malloc(sizeof(struct devinfo));
- last.dinfo = (struct devinfo *)malloc(sizeof(struct devinfo));
- run.dinfo = (struct devinfo *)malloc(sizeof(struct devinfo));
- bzero(cur.dinfo, sizeof(struct devinfo));
- bzero(last.dinfo, sizeof(struct devinfo));
- bzero(run.dinfo, sizeof(struct devinfo));
+ cur.dinfo = calloc(1, sizeof(struct devinfo));
+ last.dinfo = calloc(1, sizeof(struct devinfo));
+ run.dinfo = calloc(1, sizeof(struct devinfo));
if (dsinit(MAXDRIVES, &cur, &last, &run) != 1)
return(0);
diff --git a/usr.bin/vmstat/vmstat.8 b/usr.bin/vmstat/vmstat.8
index f04cc42..93e75b9 100644
--- a/usr.bin/vmstat/vmstat.8
+++ b/usr.bin/vmstat/vmstat.8
@@ -28,7 +28,7 @@
.\" @(#)vmstat.8 8.1 (Berkeley) 6/6/93
.\" $FreeBSD$
.\"
-.Dd October 21, 2006
+.Dd August 8, 2014
.Dt VMSTAT 8
.Os
.Sh NAME
@@ -37,10 +37,9 @@
.Sh SYNOPSIS
.Nm
.\" .Op Fl fimst
-.Op Fl afHhimPsz
-.Op Fl c Ar count
+.Op Fl afHhimoPsz
.Op Fl M Ar core Op Fl N Ar system
-.Op Fl w Ar wait
+.Op Fl c Ar count
.Op Fl n Ar devs
.Oo
.Fl p
@@ -48,7 +47,9 @@
.Ar type , if , pass
.Sm on
.Oc
-.Op Ar disks
+.Op Fl w Ar wait
+.Op Ar disks ...
+.Op wait Op count
.Sh DESCRIPTION
The
.Nm
@@ -91,10 +92,12 @@ and
system calls since system startup, and the number of pages of virtual memory
involved in each.
.It Fl h
-Changes memory columns into more easily human readable form. Default if
+Changes memory columns into more easily human readable form.
+The default if
standard output is a terminal device.
.It Fl H
-Changes memory columns into straight numbers. Default if standard output
+Changes memory columns into straight numbers.
+The default if standard output
is not a terminal device (such as a script).
.It Fl i
Report on the number of interrupts taken by each device since system
@@ -116,6 +119,9 @@ Report on the usage of kernel dynamic memory allocated using
by type.
.It Fl n
Change the maximum number of disks to display from the default of 2.
+.It Fl o
+Display a list of virtual memory objects in the system and the resident
+memory used by each object.
.It Fl P
Report per-cpu system/user/idle cpu statistics.
.It Fl p
@@ -214,6 +220,21 @@ Report on memory used by the kernel zone allocator,
by zone.
.El
.Pp
+The
+.Ar wait
+and
+.Ar count
+arguments may be given after their respective flags at any point
+on the command line before the
+.Ar disks
+argument(s), or without their flags, as the final argument(s).
+The latter form is accepted for backwards compatibility, but it is
+preferred to use the forms with
+.Fl w
+and
+.Fl c
+to avoid ambiguity.
+.Pp
By default,
.Nm
displays the following information:
diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c
index 24a3f2f..9da2973 100644
--- a/usr.bin/vmstat/vmstat.c
+++ b/usr.bin/vmstat/vmstat.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resource.h>
#include <sys/sysctl.h>
#include <sys/time.h>
+#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/pcpu.h>
@@ -143,12 +144,14 @@ static kvm_t *kd;
#define TIMESTAT 0x10
#define VMSTAT 0x20
#define ZMEMSTAT 0x40
+#define OBJSTAT 0x80
static void cpustats(void);
static void pcpustats(int, u_long, int);
static void devstats(void);
static void doforkst(void);
static void dointr(void);
+static void doobjstat(void);
static void dosum(void);
static void dovmstat(unsigned int, int);
static void domemstat_malloc(void);
@@ -181,7 +184,7 @@ main(int argc, char *argv[])
interval = reps = todo = 0;
maxshowdevs = 2;
hflag = isatty(1);
- while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:Pp:stw:z")) != -1) {
+ while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:oPp:stw:z")) != -1) {
switch (c) {
case 'a':
aflag++;
@@ -220,6 +223,9 @@ main(int argc, char *argv[])
errx(1, "number of devices %d is < 0",
maxshowdevs);
break;
+ case 'o':
+ todo |= OBJSTAT;
+ break;
case 'p':
if (devstat_buildmatch(optarg, &matches, &num_matches) != 0)
errx(1, "%s", devstat_errbuf);
@@ -289,15 +295,12 @@ main(int argc, char *argv[])
argv = getdrivedata(argv);
}
-#define BACKWARD_COMPATIBILITY
-#ifdef BACKWARD_COMPATIBILITY
if (*argv) {
f = atof(*argv);
interval = f * 1000;
if (*++argv)
reps = atoi(*argv);
}
-#endif
if (interval) {
if (!reps)
@@ -313,6 +316,8 @@ main(int argc, char *argv[])
domemstat_zone();
if (todo & SUMSTAT)
dosum();
+ if (todo & OBJSTAT)
+ doobjstat();
#ifdef notyet
if (todo & TIMESTAT)
dotimes();
@@ -1302,6 +1307,129 @@ domemstat_zone(void)
printf("\n");
}
+static void
+display_object(struct kinfo_vmobject *kvo)
+{
+ const char *str;
+
+ printf("%5jd ", (uintmax_t)kvo->kvo_resident);
+ printf("%5jd ", (uintmax_t)kvo->kvo_active);
+ printf("%5jd ", (uintmax_t)kvo->kvo_inactive);
+ printf("%3d ", kvo->kvo_ref_count);
+ printf("%3d ", kvo->kvo_shadow_count);
+ switch (kvo->kvo_memattr) {
+#ifdef VM_MEMATTR_UNCACHEABLE
+ case VM_MEMATTR_UNCACHEABLE:
+ str = "UC";
+ break;
+#endif
+#ifdef VM_MEMATTR_WRITE_COMBINING
+ case VM_MEMATTR_WRITE_COMBINING:
+ str = "WC";
+ break;
+#endif
+#ifdef VM_MEMATTR_WRITE_THROUGH
+ case VM_MEMATTR_WRITE_THROUGH:
+ str = "WT";
+ break;
+#endif
+#ifdef VM_MEMATTR_WRITE_PROTECTED
+ case VM_MEMATTR_WRITE_PROTECTED:
+ str = "WP";
+ break;
+#endif
+#ifdef VM_MEMATTR_WRITE_BACK
+ case VM_MEMATTR_WRITE_BACK:
+ str = "WB";
+ break;
+#endif
+#ifdef VM_MEMATTR_WEAK_UNCACHEABLE
+ case VM_MEMATTR_WEAK_UNCACHEABLE:
+ str = "UC-";
+ break;
+#endif
+#ifdef VM_MEMATTR_WB_WA
+ case VM_MEMATTR_WB_WA:
+ str = "WB";
+ break;
+#endif
+#ifdef VM_MEMATTR_NOCACHE
+ case VM_MEMATTR_NOCACHE:
+ str = "NC";
+ break;
+#endif
+#ifdef VM_MEMATTR_DEVICE
+ case VM_MEMATTR_DEVICE:
+ str = "DEV";
+ break;
+#endif
+#ifdef VM_MEMATTR_CACHEABLE
+ case VM_MEMATTR_CACHEABLE:
+ str = "C";
+ break;
+#endif
+#ifdef VM_MEMATTR_PREFETCHABLE
+ case VM_MEMATTR_PREFETCHABLE:
+ str = "PRE";
+ break;
+#endif
+ default:
+ str = "??";
+ break;
+ }
+ printf("%-3s ", str);
+ switch (kvo->kvo_type) {
+ case KVME_TYPE_NONE:
+ str = "--";
+ break;
+ case KVME_TYPE_DEFAULT:
+ str = "df";
+ break;
+ case KVME_TYPE_VNODE:
+ str = "vn";
+ break;
+ case KVME_TYPE_SWAP:
+ str = "sw";
+ break;
+ case KVME_TYPE_DEVICE:
+ str = "dv";
+ break;
+ case KVME_TYPE_PHYS:
+ str = "ph";
+ break;
+ case KVME_TYPE_DEAD:
+ str = "dd";
+ break;
+ case KVME_TYPE_SG:
+ str = "sg";
+ break;
+ case KVME_TYPE_UNKNOWN:
+ default:
+ str = "??";
+ break;
+ }
+ printf("%-2s ", str);
+ printf("%-s\n", kvo->kvo_path);
+}
+
+static void
+doobjstat(void)
+{
+ struct kinfo_vmobject *kvo;
+ int cnt, i;
+
+ kvo = kinfo_getvmobject(&cnt);
+ if (kvo == NULL) {
+ warn("Failed to fetch VM object list");
+ return;
+ }
+ printf("%5s %5s %5s %3s %3s %3s %2s %s\n", "RES", "ACT", "INACT",
+ "REF", "SHD", "CM", "TP", "PATH");
+ for (i = 0; i < cnt; i++)
+ display_object(&kvo[i]);
+ free(kvo);
+}
+
/*
* kread reads something from the kernel, given its nlist index.
*/
@@ -1354,7 +1482,7 @@ static void
usage(void)
{
(void)fprintf(stderr, "%s%s",
- "usage: vmstat [-afHhimPsz] [-c count] [-M core [-N system]] [-w wait]\n",
- " [-n devs] [-p type,if,pass] [disks]\n");
+ "usage: vmstat [-afHhimoPsz] [-M core [-N system]] [-c count] [-n devs]\n",
+ " [-p type,if,pass] [-w wait] [disks] [wait [count]]\n");
exit(1);
}
diff --git a/usr.bin/vtfontcvt/vtfontcvt.c b/usr.bin/vtfontcvt/vtfontcvt.c
index 21c519b..a0895aa 100644
--- a/usr.bin/vtfontcvt/vtfontcvt.c
+++ b/usr.bin/vtfontcvt/vtfontcvt.c
@@ -96,6 +96,16 @@ usage(void)
exit(1);
}
+static void *
+xmalloc(size_t size)
+{
+ void *m;
+
+ if ((m = malloc(size)) == NULL)
+ errx(1, "memory allocation failure");
+ return (m);
+}
+
static int
add_mapping(struct glyph *gl, unsigned int c, unsigned int map_idx)
{
@@ -104,7 +114,7 @@ add_mapping(struct glyph *gl, unsigned int c, unsigned int map_idx)
mapping_total++;
- mp = malloc(sizeof *mp);
+ mp = xmalloc(sizeof *mp);
mp->m_char = c;
mp->m_glyph = gl;
mp->m_length = 0;
@@ -163,8 +173,8 @@ add_glyph(const uint8_t *bytes, unsigned int map_idx, int fallback)
}
}
- gl = malloc(sizeof *gl);
- gl->g_data = malloc(wbytes * height);
+ gl = xmalloc(sizeof *gl);
+ gl->g_data = xmalloc(wbytes * height);
memcpy(gl->g_data, bytes, wbytes * height);
if (fallback)
TAILQ_INSERT_HEAD(&glyphs[map_idx], gl, g_list);
@@ -290,17 +300,26 @@ parse_hex(FILE *fp, unsigned int map_idx)
char *ln, *p;
char fmt_str[8];
size_t length;
- uint8_t bytes[wbytes * height], bytes_r[wbytes * height];
+ uint8_t *bytes = NULL, *bytes_r = NULL;
unsigned curchar = 0, i, line, chars_per_row, dwidth;
+ int rv = 0;
while ((ln = fgetln(fp, &length)) != NULL) {
ln[length - 1] = '\0';
if (strncmp(ln, "# Height: ", 10) == 0) {
+ if (bytes != NULL)
+ errx(1, "malformed input: Height tag after font data");
height = atoi(ln + 10);
} else if (strncmp(ln, "# Width: ", 9) == 0) {
+ if (bytes != NULL)
+ errx(1, "malformed input: Width tag after font data");
set_width(atoi(ln + 9));
} else if (sscanf(ln, "%4x:", &curchar)) {
+ if (bytes == NULL) {
+ bytes = xmalloc(wbytes * height);
+ bytes_r = xmalloc(wbytes * height);
+ }
p = ln + 5;
chars_per_row = strlen(p) / height;
dwidth = width;
@@ -313,16 +332,23 @@ parse_hex(FILE *fp, unsigned int map_idx)
sscanf(p, fmt_str, &line);
p += chars_per_row;
if (parse_bitmap_line(bytes + i * wbytes,
- bytes_r + i * wbytes, line, dwidth) != 0)
- return (1);
+ bytes_r + i * wbytes, line, dwidth) != 0) {
+ rv = 1;
+ goto out;
+ }
}
if (add_char(curchar, map_idx, bytes,
- dwidth == width * 2 ? bytes_r : NULL) != 0)
- return (1);
+ dwidth == width * 2 ? bytes_r : NULL) != 0) {
+ rv = 1;
+ goto out;
+ }
}
}
- return (0);
+out:
+ free(bytes);
+ free(bytes_r);
+ return (rv);
}
static int
diff --git a/usr.bin/w/Makefile b/usr.bin/w/Makefile
index 1515a87..09ab7dc 100644
--- a/usr.bin/w/Makefile
+++ b/usr.bin/w/Makefile
@@ -6,8 +6,6 @@ SRCS= fmt.c pr_time.c proc_compare.c w.c
MAN= w.1 uptime.1
DPADD= ${LIBKVM} ${LIBUTIL}
LDADD= -lkvm -lutil
-#BINGRP= kmem
-#BINMODE=2555
LINKS= ${BINDIR}/w ${BINDIR}/uptime
.PATH: ${.CURDIR}/../../bin/ps
diff --git a/usr.bin/w/w.c b/usr.bin/w/w.c
index 1b9af5f..19fd190 100644
--- a/usr.bin/w/w.c
+++ b/usr.bin/w/w.c
@@ -132,7 +132,7 @@ main(int argc, char *argv[])
struct kinfo_proc *dkp;
struct stat *stp;
time_t touched;
- int ch, i, nentries, nusers, wcmd, longidle, longattime, dropgid;
+ int ch, i, nentries, nusers, wcmd, longidle, longattime;
const char *memf, *nlistf, *p;
char *x_suffix;
char buf[MAXHOSTNAMELEN], errbuf[_POSIX2_LINE_MAX];
@@ -152,7 +152,6 @@ main(int argc, char *argv[])
p = "dhiflM:N:nsuw";
}
- dropgid = 0;
memf = _PATH_DEVNULL;
nlistf = NULL;
while ((ch = getopt(argc, argv, p)) != -1)
@@ -169,11 +168,9 @@ main(int argc, char *argv[])
case 'M':
header = 0;
memf = optarg;
- dropgid = 1;
break;
case 'N':
nlistf = optarg;
- dropgid = 1;
break;
case 'n':
nflag = 1;
@@ -193,13 +190,6 @@ main(int argc, char *argv[])
_res.retrans = 2; /* resolver timeout to 2 seconds per try */
_res.retry = 1; /* only try once.. */
- /*
- * Discard setgid privileges if not the running kernel so that bad
- * guys can't print interesting stuff from kernel memory.
- */
- if (dropgid)
- setgid(getgid());
-
if ((kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf)) == NULL)
errx(1, "%s", errbuf);
diff --git a/usr.bin/yacc/tests/Makefile b/usr.bin/yacc/tests/Makefile
index c8125a6..2e157ec 100644
--- a/usr.bin/yacc/tests/Makefile
+++ b/usr.bin/yacc/tests/Makefile
@@ -17,6 +17,8 @@ TEST_METADATA.yacc_tests+= required_user="unprivileged"
SCRIPTS= run_test
SCRIPTSDIR= ${TESTSDIR}
+CLEANFILES= run_test
+
FILESGROUPS= FILES FILEStest FILEStest_yacc
FILEStestDIR= ${TESTSDIR}
diff --git a/usr.sbin/acpi/acpiconf/acpiconf.8 b/usr.sbin/acpi/acpiconf/acpiconf.8
index 8a949d7..3830827 100644
--- a/usr.sbin/acpi/acpiconf/acpiconf.8
+++ b/usr.sbin/acpi/acpiconf/acpiconf.8
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 16, 2004
+.Dd September 22, 2015
.Dt ACPICONF 8
.Os
.Sh NAME
@@ -37,6 +37,7 @@
.Nm
.Op Fl h
.Op Fl i Ar batt
+.Op Fl k Ar ack
.Op Fl s Ar type
.Sh DESCRIPTION
The
@@ -49,6 +50,9 @@ The following command-line options are recognized:
Displays a summary of available options.
.It Fl i Ar batt
Get design information about the specified battery.
+.It Fl k Ar ack
+Ack or abort a pending suspend request using the argument provided.
+.Sy Most users should not use this option directly.
.It Fl s Ar type
Enters the specified sleep mode.
Recognized types are
@@ -58,11 +62,9 @@ Recognized types are
(not implemented on most systems but similar to S1),
.Cm 3
(the CPU context is lost and memory context is preserved),
-.Cm 4
-(the CPU context is lost and memory context is stored to disk)
and
-.Cm 5
-(soft off).
+.Cm 4
+(the CPU context is lost and memory context is stored to disk).
Sleep states may also be given as S1, S2, etc.
The supported states depend on BIOS implementation, including ACPI
byte code (AML).
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index a9dd1cc..57fe783 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -790,10 +790,10 @@ basl_open(struct basl_fio *bf, int suffix)
err = 0;
if (suffix) {
- strncpy(bf->f_name, basl_stemplate, MAXPATHLEN);
+ strlcpy(bf->f_name, basl_stemplate, MAXPATHLEN);
bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX));
} else {
- strncpy(bf->f_name, basl_template, MAXPATHLEN);
+ strlcpy(bf->f_name, basl_template, MAXPATHLEN);
bf->fd = mkstemp(bf->f_name);
}
diff --git a/usr.sbin/ctladm/ctladm.8 b/usr.sbin/ctladm/ctladm.8
index 1d16539..7ff0c8d 100644
--- a/usr.sbin/ctladm/ctladm.8
+++ b/usr.sbin/ctladm/ctladm.8
@@ -1,5 +1,6 @@
.\"
.\" Copyright (c) 2003 Silicon Graphics International Corp.
+.\" Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
@@ -34,7 +35,7 @@
.\" $Id: //depot/users/kenm/FreeBSD-test2/usr.sbin/ctladm/ctladm.8#3 $
.\" $FreeBSD$
.\"
-.Dd May 22, 2015
+.Dd September 26, 2015
.Dt CTLADM 8
.Os
.Sh NAME
@@ -43,28 +44,28 @@
.Sh SYNOPSIS
.Nm
.Aq Ar command
-.Op target:lun
+.Op lun
.Op generic args
.Op command args
.Nm
.Ic tur
-.Aq target:lun
+.Aq lun
.Op general options
.Nm
.Ic inquiry
-.Aq target:lun
+.Aq lun
.Op general options
.Nm
.Ic reqsense
-.Aq target:lun
+.Aq lun
.Op general options
.Nm
.Ic reportluns
-.Aq target:lun
+.Aq lun
.Op general options
.Nm
.Ic read
-.Aq target:lun
+.Aq lun
.Op general options
.Aq Fl l Ar lba
.Aq Fl d Ar datalen
@@ -74,7 +75,7 @@
.Op Fl N
.Nm
.Ic write
-.Aq target:lun
+.Aq lun
.Op general options
.Aq Fl l Ar lba
.Aq Fl d Ar datalen
@@ -83,19 +84,13 @@
.Op Fl c Ar cdbsize
.Op Fl N
.Nm
-.Ic bbrread
-.Aq target:lun
-.Op general options
-.Aq Fl -l Ar lba
-.Aq Fl -d Ar datalen
-.Nm
.Ic readcap
-.Aq target:lun
+.Aq lun
.Op general options
.Op Fl c Ar cdbsize
.Nm
.Ic modesense
-.Aq target:lun
+.Aq lun
.Aq Fl m Ar page | Fl l
.Op Fl P Ar pc
.Op Fl d
@@ -103,19 +98,19 @@
.Op Fl c Ar size
.Nm
.Ic start
-.Aq target:lun
+.Aq lun
.Op general options
.Op Fl i
.Op Fl o
.Nm
.Ic stop
-.Aq target:lun
+.Aq lun
.Op general options
.Op Fl i
.Op Fl o
.Nm
.Ic synccache
-.Aq target:lun
+.Aq lun
.Op general options
.Op Fl l Ar lba
.Op Fl b Ar blockcount
@@ -123,33 +118,14 @@
.Op Fl i
.Op Fl c Ar cdbsize
.Nm
-.Ic shutdown
-.Op general options
-.Nm
-.Ic startup
-.Op general options
-.Nm
-.Ic hardstop
-.Nm
-.Ic hardstart
-.Nm
.Ic lunlist
.Nm
.Ic delay
-.Aq target:lun
+.Aq lun
.Aq Fl l Ar datamove|done
.Aq Fl t Ar secs
.Op Fl T Ar oneshot|cont
.Nm
-.Ic realsync Aq on|off|query
-.Nm
-.Ic setsync interval
-.Aq target:lun
-.Aq Fl i Ar interval
-.Nm
-.Ic getsync
-.Aq target:lun
-.Nm
.Ic inject
.Aq Fl i Ar action
.Aq Fl p Ar pattern
@@ -176,6 +152,7 @@
.Ic modify
.Aq Fl b Ar backend
.Aq Fl l Ar lun_id
+.Op Fl o Ar name=value
.Aq Fl s Ar size_bytes
.Nm
.Ic devlist
@@ -184,14 +161,11 @@
.Op Fl x
.Nm
.Ic port
-.Op Fl l
.Op Fl o Ar on|off
.Op Fl w Ar wwpn
.Op Fl W Ar wwnn
.Op Fl p Ar targ_port
.Op Fl t Ar fe_type
-.Op Fl q
-.Op Fl x
.Nm
.Ic portlist
.Op Fl f Ar frontend
@@ -245,8 +219,8 @@ utility has a number of primary functions, many of which require a device
identifier.
The device identifier takes the following form:
.Bl -tag -width 14n
-.It target:lun
-Specify the target (almost always 0) and LUN number to operate on.
+.It lun
+Specify the LUN number to operate on.
.El
Many of the primary functions of the
.Nm
@@ -364,34 +338,6 @@ to the kernel when doing a write, just execute the command without copying
data.
This is to be used for performance testing.
.El
-.It Ic bbrread
-Issue a SCSI READ command to the logical device to potentially force a bad
-block on a disk in the RAID set to be reconstructed from the other disks in
-the array. This command should only be used on an array that is in the
-normal state. If used on a critical array, it could cause the array to go
-offline if the bad block to be remapped is on one of the disks that is
-still active in the array.
-.Pp
-The data for this particular command will be discarded, and not returned to
-the user.
-.Pp
-In order to determine which LUN to read from, the user should first
-determine which LUN the disk with a bad block belongs to. Then he should
-map the bad disk block back to the logical block address for the array in
-order to determine which LBA to pass in to the
-.Ic bbrread
-command.
-.Pp
-This command is primarily intended for testing. In practice, bad block
-remapping will generally be triggered by the in-kernel Disk Aerobics and
-Disk Scrubbing code.
-.Bl -tag -width 10n
-.It Fl l Ar lba
-Specify the starting Logical Block Address.
-.It Fl d Ar datalen
-Specify the amount of data in bytes to read from the LUN. This must be a
-multiple of the LUN blocksize.
-.El
.It Ic readcap
Send the
.Tn SCSI
@@ -454,17 +400,6 @@ bit set.
Set the immediate bit in the CDB. Note that CTL does not support the
immediate bit, so this is primarily useful for making sure that CTL returns
the proper error.
-.It Fl o
-Set the Copan proprietary on/offline bit in the CDB. When this flag is
-used, the LUN will be marked online again (see the description of the
-.Ic shutdown
-and
-.Ic startup
-commands). When this flag is used with a
-start command, the LUN will NOT be spun up. You need to use a start
-command without the
-.Fl o
-flag to spin up the disks in the LUN.
.El
.It Ic stop
Send the
@@ -480,14 +415,6 @@ sends an ordered tag for completeness.)
Set the immediate bit in the CDB. Note that CTL does not support the
immediate bit, so this is primarily useful for making sure that CTL returns
the proper error.
-.It Fl o
-Set the Copan proprietary on/offline bit in the CDB. When this flag is
-used, the LUN will be spun down and taken offline ("Logical unit not ready,
-manual intervention required"). See the description of the
-.Ic shutdown
-and
-.Ic startup
-options.
.El
.It Ic synccache
Send the
@@ -519,48 +446,6 @@ support this bit.
.It Fl c Ar cdbsize
Specify the minimum CDB size. Valid values are 10 and 16 bytes.
.El
-.It Ic shutdown
-Issue a
-.Tn SCSI
-START STOP UNIT command with the start bit cleared and the on/offline bit
-set to all direct access LUNs. This will spin down all direct access LUNs,
-and mark them offline ("Logical unit not ready, manual intervention
-required"). Once marked offline, the state can only be cleared by sending
-a START STOP UNIT command with the start bit set and the on/offline bit
-set. The
-.Nm
-commands
-.Ic startup
-and
-.Ic start
-will accomplish this. Note that the
-on/offline bit is a non-standard Copan extension to the
-.Tn SCSI
-START STOP UNIT command, so merely sending a normal start command from an
-initiator will not clear the condition. (This is by design.)
-.It Ic startup
-Issue a
-.Tn SCSI
-START STOP UNIT command with the start bit set and the on/offline bit set
-to all direct access LUNs. This will mark all direct access LUNs "online"
-again. It will not cause any LUNs to start up. A separate start command
-without the on/offline bit set is necessary for that.
-.It Ic hardstop
-Use the kernel facility for stopping all direct access LUNs and setting the
-offline bit. Unlike the
-.Ic shutdown
-command above, this command allows shutting down LUNs with I/O active. It
-will also issue a LUN reset to any reserved LUNs to break the reservation
-so that the LUN can be stopped.
-.Ic shutdown
-command instead.
-.It Ic hardstart
-This command is functionally identical to the
-.Ic startup
-command described above. The primary difference is that the LUNs are
-enumerated and commands sent by the in-kernel Front End Target Driver
-instead of by
-.Nm .
.It Ic lunlist
List all LUNs registered with CTL.
Because this command uses the ioctl port, it will only work when the FETDs
@@ -601,39 +486,6 @@ the next command sent to the given LUN will be delayed and all subsequent
commands will be completed normally.
This is the default.
.El
-.It Ic realsync
-Query and control CTL's SYNCHRONIZE CACHE behavior. The
-.Sq query
-argument
-will show whether SYNCHRONIZE CACHE commands are being sent to the backend
-or not.
-The default is to send SYNCHRONIZE CACHE commands to the backend.
-The
-.Sq on
-argument will cause all SYNCHRONIZE CACHE commands sent to all LUNs to be
-sent to the backend.
-The
-.Sq off
-argument will cause all SYNCHRONIZE CACHE commands sent to all LUNs to be
-immediately returned to the initiator with successful status.
-.It Ic setsync
-For a given lun, only actually service every Nth SYNCHRONIZE CACHE command
-that is sent. This can be used for debugging the optimal time period for
-sending SYNCHRONIZE cache commands. An interval of 0 means that the cache
-will be flushed for this LUN every time a SYNCHRONIZE CACHE command is
-received.
-.Pp
-You must specify the target and LUN you want to modify.
-.It Ic getsync
-Get the interval at which we actually service the SYNCHRONIZE CACHE
-command, as set by the
-.Ic setsync
-command above.
-The reported number means that we will actually flush the cache on every
-Nth SYNCHRONIZE CACHE command. A value of 0 means that we will flush the
-cache every time.
-.Pp
-You must specify the target and LUN you want to query.
.It Ic inject
Inject the specified type of error for the LUN specified, when a command
that matches the given pattern is seen.
@@ -741,8 +593,6 @@ must be specified.
The WWNN and WWPN may both be specified at the same time, but cannot be
combined with enabling/disabling or listing ports.
.Bl -tag -width 12n
-.It Fl l
-List all CTL frontend ports or a specific port type or number.
.It Fl o Ar on|off
Turn the specified CTL frontend ports off or on.
If no port number or port type is specified, all ports are turned on or
@@ -750,8 +600,6 @@ off.
.It Fl p Ar targ_port
Specify the frontend port number.
The port numbers can be found in the frontend port list.
-.It Fl q
-Omit the header in the port list output.
.It Fl t Ar fe_type
Specify the frontend type.
Currently defined port types are
@@ -779,8 +627,6 @@ The
argument must be specified, since this is only possible to implement on a
single port.
As a general rule, the WWPN must be different for every port in the system.
-.It Fl x
-Output the port list in XML format.
.El
.It Ic portlist
List CTL frontend ports.
@@ -871,11 +717,10 @@ Specify the serial number to be used in the
INQUIRY VPD page 0x80 data.
.It Fl t Ar device_type
Specify the numeric SCSI device type to use when creating the LUN.
-For example, the Direct Access type is 0.
If this flag is not used, the type of LUN created is backend-specific.
Not all LUN types are supported.
-Currently CTL only supports Direct Access (type 0) and Processor (type 3)
-LUNs.
+Currently CTL supports Direct Access (type 0), Processor (type 3)
+and CD/DVD (type 5) LUNs.
The backend requested may or may not support all of the LUN types that CTL
supports.
.El
@@ -913,6 +758,12 @@ and
.Dq block .
.It Fl l Ar lun_id
Specify the LUN number to remove.
+.It Fl o Ar name=value
+Specify a backend-specific name/value pair.
+Multiple
+.Fl o
+arguments may be specified.
+Refer to the backend documentation for arguments that may be used.
.It Fl s Ar size_bytes
Specify the size of the LUN in bytes.
For the
@@ -1011,6 +862,9 @@ Specifies LUN NAA identifier.
Either EUI or NAA identifier should be set to UNIQUE value to allow
EXTENDED COPY command access the LUN.
Non-unique LUN identifiers may lead to data corruption.
+.It Va ha_role
+Setting to "primary" or "secondary" overrides default role of the node
+in HA cluster, set by kern.cam.ctl.ha_role sysctl.
.It Va insecure_tpc
Setting to "on" allows EXTENDED COPY command sent to this LUN access
other LUNs on this host, not accessible otherwise.
@@ -1021,6 +875,8 @@ Set to "off", disables read caching for the LUN, if supported by the backend.
.It Va readonly
Set to "on", blocks all media write operations to the LUN, reporting it
as write protected.
+.It Va removable
+Set to "on", makes LUN removable.
.It Va reordering
Set to "unrestricted", allows target to process commands with SIMPLE task
attribute in arbitrary order. Any data integrity exposures related to
@@ -1042,7 +898,6 @@ Specify physical block size and offset of the device.
.It Va ublockoffset
Specify UNMAP block size and offset of the device.
.It Va rpm
-.It Va rpm
Specifies medium rotation rate of the device: 0 -- not reported,
1 -- non-rotating (SSD), >1024 -- value in revolutions per minute.
.It Va formfactor
@@ -1071,34 +926,34 @@ Specifies file or device name to use for backing store.
Specifies number of backend threads to use for this LUN.
.El
.Sh EXAMPLES
-.Dl ctladm tur 0:1
+.Dl ctladm tur 1
.Pp
Send a
.Tn SCSI
TEST UNIT READY command to LUN 1.
.Pp
-.Dl ctladm modesense 0:1 -l
+.Dl ctladm modesense 1 -l
.Pp
Display the list of mode pages supported by LUN 1.
.Pp
-.Dl ctladm modesense 0:0 -m 10 -P 3 -d -c 10
+.Dl ctladm modesense 0 -m 10 -P 3 -d -c 10
.Pp
Display the saved version of the Control mode page (page 10) on LUN 0.
Disable fetching block descriptors, and use a 10 byte MODE SENSE command
instead of the default 6 byte command.
.Bd -literal
-ctladm read 0:2 -l 0 -d 1 -b 512 -f - > foo
+ctladm read 2 -l 0 -d 1 -b 512 -f - > foo
.Ed
.Pp
Read the first 512 byte block from LUN 2 and dump it to the file
.Pa foo .
.Bd -literal
-ctladm write 0:3 -l 0xff432140 -d 20 -b 512 -f /tmp/bar
+ctladm write 3 -l 0xff432140 -d 20 -b 512 -f /tmp/bar
.Ed
.Pp
Read 10240 bytes from the file
.Pa /tmp/bar
-and write it to target 0, LUN 3.
+and write it to LUN 3.
starting at LBA 0xff432140.
.Pp
.Dl ctladm create -b ramdisk -s 10485760000000000
@@ -1142,12 +997,12 @@ List all LUNs in the system, along with their inquiry data and device type.
This only works when the FETDs are enabled, since the commands go through the
ioctl port.
.Pp
-.Dl ctladm inject 0:6 -i mediumerr -p read -r 0,512 -c
+.Dl ctladm inject 6 -i mediumerr -p read -r 0,512 -c
.Pp
Inject a medium error on LUN 6 for every read that covers the first 512
blocks of the LUN.
.Bd -literal -offset indent
-ctladm inject 0:6 -i custom -p tur -s 18 "f0 0 02 s12 04 02"
+ctladm inject 6 -i custom -p tur -s 18 "f0 0 02 s12 04 02"
.Ed
.Pp
Inject a custom error on LUN 6 for the next TEST UNIT READY command only.
diff --git a/usr.sbin/ctladm/ctladm.c b/usr.sbin/ctladm/ctladm.c
index 886a26e..ec0a322 100644
--- a/usr.sbin/ctladm/ctladm.c
+++ b/usr.sbin/ctladm/ctladm.c
@@ -66,10 +66,8 @@ __FBSDID("$FreeBSD$");
#include <cam/scsi/scsi_message.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_io.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
-#include <cam/ctl/ctl_backend_block.h>
#include <cam/ctl/ctl_util.h>
#include <cam/ctl/ctl_scsi_all.h>
#include <camlib.h>
@@ -103,17 +101,9 @@ typedef enum {
CTLADM_CMD_START,
CTLADM_CMD_STOP,
CTLADM_CMD_SYNC_CACHE,
- CTLADM_CMD_SHUTDOWN,
- CTLADM_CMD_STARTUP,
CTLADM_CMD_LUNLIST,
- CTLADM_CMD_HARDSTOP,
- CTLADM_CMD_HARDSTART,
CTLADM_CMD_DELAY,
- CTLADM_CMD_REALSYNC,
- CTLADM_CMD_SETSYNC,
- CTLADM_CMD_GETSYNC,
CTLADM_CMD_ERR_INJECT,
- CTLADM_CMD_BBRREAD,
CTLADM_CMD_PRES_IN,
CTLADM_CMD_PRES_OUT,
CTLADM_CMD_INQ_VPD_DEVID,
@@ -168,20 +158,16 @@ typedef enum {
} ctladm_optret;
static const char rw_opts[] = "Nb:c:d:f:l:";
-static const char startstop_opts[] = "io";
+static const char startstop_opts[] = "i";
static struct ctladm_opts option_table[] = {
{"adddev", CTLADM_CMD_ADDDEV, CTLADM_ARG_NONE, NULL},
- {"bbrread", CTLADM_CMD_BBRREAD, CTLADM_ARG_NEED_TL, "d:l:"},
{"create", CTLADM_CMD_CREATE, CTLADM_ARG_NONE, "b:B:d:l:o:s:S:t:"},
{"delay", CTLADM_CMD_DELAY, CTLADM_ARG_NEED_TL, "T:l:t:"},
{"devid", CTLADM_CMD_INQ_VPD_DEVID, CTLADM_ARG_NEED_TL, NULL},
{"devlist", CTLADM_CMD_DEVLIST, CTLADM_ARG_NONE, "b:vx"},
{"dumpooa", CTLADM_CMD_DUMPOOA, CTLADM_ARG_NONE, NULL},
{"dumpstructs", CTLADM_CMD_DUMPSTRUCTS, CTLADM_ARG_NONE, NULL},
- {"getsync", CTLADM_CMD_GETSYNC, CTLADM_ARG_NEED_TL, NULL},
- {"hardstart", CTLADM_CMD_HARDSTART, CTLADM_ARG_NONE, NULL},
- {"hardstop", CTLADM_CMD_HARDSTOP, CTLADM_ARG_NONE, NULL},
{"help", CTLADM_CMD_HELP, CTLADM_ARG_NONE, NULL},
{"inject", CTLADM_CMD_ERR_INJECT, CTLADM_ARG_NEED_TL, "cd:i:p:r:s:"},
{"inquiry", CTLADM_CMD_INQUIRY, CTLADM_ARG_NEED_TL, NULL},
@@ -191,22 +177,18 @@ static struct ctladm_opts option_table[] = {
{"lunlist", CTLADM_CMD_LUNLIST, CTLADM_ARG_NONE, NULL},
{"lunmap", CTLADM_CMD_LUNMAP, CTLADM_ARG_NONE, "p:l:L:"},
{"modesense", CTLADM_CMD_MODESENSE, CTLADM_ARG_NEED_TL, "P:S:dlm:c:"},
- {"modify", CTLADM_CMD_MODIFY, CTLADM_ARG_NONE, "b:l:s:"},
+ {"modify", CTLADM_CMD_MODIFY, CTLADM_ARG_NONE, "b:l:o:s:"},
{"port", CTLADM_CMD_PORT, CTLADM_ARG_NONE, "lo:p:qt:w:W:x"},
{"portlist", CTLADM_CMD_PORTLIST, CTLADM_ARG_NONE, "f:ilp:qvx"},
{"prin", CTLADM_CMD_PRES_IN, CTLADM_ARG_NEED_TL, "a:"},
{"prout", CTLADM_CMD_PRES_OUT, CTLADM_ARG_NEED_TL, "a:k:r:s:"},
{"read", CTLADM_CMD_READ, CTLADM_ARG_NEED_TL, rw_opts},
{"readcapacity", CTLADM_CMD_READCAPACITY, CTLADM_ARG_NEED_TL, "c:"},
- {"realsync", CTLADM_CMD_REALSYNC, CTLADM_ARG_NONE, NULL},
{"remove", CTLADM_CMD_RM, CTLADM_ARG_NONE, "b:l:o:"},
{"reportluns", CTLADM_CMD_REPORT_LUNS, CTLADM_ARG_NEED_TL, NULL},
{"reqsense", CTLADM_CMD_REQ_SENSE, CTLADM_ARG_NEED_TL, NULL},
{"rtpg", CTLADM_CMD_RTPG, CTLADM_ARG_NEED_TL, NULL},
- {"setsync", CTLADM_CMD_SETSYNC, CTLADM_ARG_NEED_TL, "i:"},
- {"shutdown", CTLADM_CMD_SHUTDOWN, CTLADM_ARG_NONE, NULL},
{"start", CTLADM_CMD_START, CTLADM_ARG_NEED_TL, startstop_opts},
- {"startup", CTLADM_CMD_STARTUP, CTLADM_ARG_NONE, NULL},
{"stop", CTLADM_CMD_STOP, CTLADM_ARG_NEED_TL, startstop_opts},
{"synccache", CTLADM_CMD_SYNC_CACHE, CTLADM_ARG_NEED_TL, "b:c:il:r"},
{"tur", CTLADM_CMD_TUR, CTLADM_ARG_NEED_TL, NULL},
@@ -219,55 +201,45 @@ static struct ctladm_opts option_table[] = {
ctladm_optret getoption(struct ctladm_opts *table, char *arg, uint32_t *cmdnum,
ctladm_cmdargs *argnum, const char **subopt);
-static int cctl_parse_tl(char *str, int *target, int *lun);
static int cctl_dump_ooa(int fd, int argc, char **argv);
-static int cctl_port_dump(int fd, int quiet, int xml, int32_t fe_num,
- ctl_port_type port_type);
static int cctl_port(int fd, int argc, char **argv, char *combinedopt);
static int cctl_do_io(int fd, int retries, union ctl_io *io, const char *func);
-static int cctl_delay(int fd, int target, int lun, int argc, char **argv,
+static int cctl_delay(int fd, int lun, int argc, char **argv,
char *combinedopt);
static int cctl_lunlist(int fd);
-static void cctl_cfi_mt_statusstr(cfi_mt_status status, char *str, int str_len);
-static void cctl_cfi_bbr_statusstr(cfi_bbrread_status, char *str, int str_len);
-static int cctl_hardstopstart(int fd, ctladm_cmdfunction command);
-static int cctl_bbrread(int fd, int target, int lun, int iid, int argc,
- char **argv, char *combinedopt);
-static int cctl_startup_shutdown(int fd, int target, int lun, int iid,
- ctladm_cmdfunction command);
-static int cctl_sync_cache(int fd, int target, int lun, int iid, int retries,
+static int cctl_sync_cache(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt);
-static int cctl_start_stop(int fd, int target, int lun, int iid, int retries,
+static int cctl_start_stop(int fd, int lun, int iid, int retries,
int start, int argc, char **argv, char *combinedopt);
-static int cctl_mode_sense(int fd, int target, int lun, int iid, int retries,
+static int cctl_mode_sense(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt);
-static int cctl_read_capacity(int fd, int target, int lun, int iid,
+static int cctl_read_capacity(int fd, int lun, int iid,
int retries, int argc, char **argv,
char *combinedopt);
-static int cctl_read_write(int fd, int target, int lun, int iid, int retries,
+static int cctl_read_write(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt,
ctladm_cmdfunction command);
-static int cctl_get_luns(int fd, int target, int lun, int iid, int retries,
+static int cctl_get_luns(int fd, int lun, int iid, int retries,
struct scsi_report_luns_data **lun_data,
uint32_t *num_luns);
-static int cctl_report_luns(int fd, int target, int lun, int iid, int retries);
-static int cctl_tur(int fd, int target, int lun, int iid, int retries);
-static int cctl_get_inquiry(int fd, int target, int lun, int iid, int retries,
+static int cctl_report_luns(int fd, int lun, int iid, int retries);
+static int cctl_tur(int fd, int lun, int iid, int retries);
+static int cctl_get_inquiry(int fd, int lun, int iid, int retries,
char *path_str, int path_len,
struct scsi_inquiry_data *inq_data);
-static int cctl_inquiry(int fd, int target, int lun, int iid, int retries);
-static int cctl_req_sense(int fd, int target, int lun, int iid, int retries);
-static int cctl_persistent_reserve_in(int fd, int target, int lun,
+static int cctl_inquiry(int fd, int lun, int iid, int retries);
+static int cctl_req_sense(int fd, int lun, int iid, int retries);
+static int cctl_persistent_reserve_in(int fd, int lun,
int initiator, int argc, char **argv,
char *combinedopt, int retry_count);
-static int cctl_persistent_reserve_out(int fd, int target, int lun,
+static int cctl_persistent_reserve_out(int fd, int lun,
int initiator, int argc, char **argv,
char *combinedopt, int retry_count);
static int cctl_create_lun(int fd, int argc, char **argv, char *combinedopt);
-static int cctl_inquiry_vpd_devid(int fd, int target, int lun, int initiator);
-static int cctl_report_target_port_group(int fd, int target, int lun,
- int initiator);
+static int cctl_inquiry_vpd_devid(int fd, int lun, int initiator);
+static int cctl_report_target_port_group(int fd, int lun, int initiator);
static int cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt);
+static int cctl_portlist(int fd, int argc, char **argv, char *combinedopt);
ctladm_optret
getoption(struct ctladm_opts *table, char *arg, uint32_t *cmdnum,
@@ -297,68 +269,31 @@ getoption(struct ctladm_opts *table, char *arg, uint32_t *cmdnum,
return(CC_OR_NOT_FOUND);
}
-
-static int
-cctl_parse_tl(char *str, int *target, int *lun)
-{
- char *tmpstr;
- int retval;
-
- retval = 0;
-
- while (isspace(*str) && (*str != '\0'))
- str++;
-
- tmpstr = (char *)strtok(str, ":");
- if ((tmpstr != NULL) && (*tmpstr != '\0')) {
- *target = strtol(tmpstr, NULL, 0);
- tmpstr = (char *)strtok(NULL, ":");
- if ((tmpstr != NULL) && (*tmpstr != '\0')) {
- *lun = strtol(tmpstr, NULL, 0);
- } else
- retval = -1;
- } else
- retval = -1;
-
- return (retval);
-}
-
static int
cctl_dump_ooa(int fd, int argc, char **argv)
{
struct ctl_ooa ooa;
long double cmd_latency;
- int num_entries, len;
- int target = -1, lun = -1;
- int retval;
+ int num_entries, len, lun = -1, retval = 0;
unsigned int i;
num_entries = 104;
- if ((argc > 2)
- && (isdigit(argv[2][0]))) {
- retval = cctl_parse_tl(argv[2], &target, &lun);
- if (retval != 0)
- warnx("invalid target:lun argument %s", argv[2]);
- }
+ if ((argc > 2) && (isdigit(argv[2][0])))
+ lun = strtol(argv[2], NULL, 0);
retry:
len = num_entries * sizeof(struct ctl_ooa_entry);
-
bzero(&ooa, sizeof(ooa));
-
ooa.entries = malloc(len);
-
if (ooa.entries == NULL) {
warn("%s: error mallocing %d bytes", __func__, len);
return (1);
}
-
- if (argc > 2) {
+ if (lun >= 0) {
ooa.lun_num = lun;
} else
ooa.flags |= CTL_OOA_FLAG_ALL_LUNS;
-
ooa.alloc_len = len;
ooa.alloc_num = num_entries;
if (ioctl(fd, CTL_GET_OOA, &ooa) == -1) {
@@ -413,17 +348,10 @@ retry:
cmd_latency);
}
fprintf(stdout, "OOA queues dump done\n");
-#if 0
- if (ioctl(fd, CTL_DUMP_OOA) == -1) {
- warn("%s: CTL_DUMP_OOA ioctl failed", __func__);
- return (1);
- }
-#endif
bailout:
free(ooa.entries);
-
- return (0);
+ return (retval);
}
static int
@@ -436,152 +364,6 @@ cctl_dump_structs(int fd, ctladm_cmdargs cmdargs __unused)
return (0);
}
-static int
-cctl_port_dump(int fd, int quiet, int xml, int32_t targ_port,
- ctl_port_type port_type)
-{
- struct ctl_port_list port_list;
- struct ctl_port_entry *entries;
- struct sbuf *sb = NULL;
- int num_entries;
- int did_print = 0;
- unsigned int i;
-
- num_entries = 16;
-
-retry:
-
- entries = malloc(sizeof(*entries) * num_entries);
- bzero(&port_list, sizeof(port_list));
- port_list.entries = entries;
- port_list.alloc_num = num_entries;
- port_list.alloc_len = num_entries * sizeof(*entries);
- if (ioctl(fd, CTL_GET_PORT_LIST, &port_list) != 0) {
- warn("%s: CTL_GET_PORT_LIST ioctl failed", __func__);
- return (1);
- }
- if (port_list.status == CTL_PORT_LIST_NEED_MORE_SPACE) {
- printf("%s: allocated %d, need %d, retrying\n", __func__,
- num_entries, port_list.fill_num + port_list.dropped_num);
- free(entries);
- num_entries = port_list.fill_num + port_list.dropped_num;
- goto retry;
- }
-
- if ((quiet == 0)
- && (xml == 0))
- printf("Port Online Type Name pp vp %-18s %-18s\n",
- "WWNN", "WWPN");
-
- if (xml != 0) {
- sb = sbuf_new_auto();
- sbuf_printf(sb, "<ctlfelist>\n");
- }
- for (i = 0; i < port_list.fill_num; i++) {
- struct ctl_port_entry *entry;
- const char *type;
-
- entry = &entries[i];
-
- switch (entry->port_type) {
- case CTL_PORT_FC:
- type = "FC";
- break;
- case CTL_PORT_SCSI:
- type = "SCSI";
- break;
- case CTL_PORT_IOCTL:
- type = "IOCTL";
- break;
- case CTL_PORT_INTERNAL:
- type = "INTERNAL";
- break;
- case CTL_PORT_ISC:
- type = "ISC";
- break;
- case CTL_PORT_ISCSI:
- type = "ISCSI";
- break;
- case CTL_PORT_SAS:
- type = "SAS";
- break;
- default:
- type = "UNKNOWN";
- break;
- }
-
- /*
- * If the user specified a frontend number or a particular
- * frontend type, only print out that particular frontend
- * or frontend type.
- */
- if ((targ_port != -1)
- && (targ_port != entry->targ_port))
- continue;
- else if ((port_type != CTL_PORT_NONE)
- && ((port_type & entry->port_type) == 0))
- continue;
-
- did_print = 1;
-
-#if 0
- printf("Num: %ju Type: %s (%#x) Name: %s Physical Port: %d "
- "Virtual Port: %d\n", (uintmax_t)entry->fe_num, type,
- entry->port_type, entry->fe_name, entry->physical_port,
- entry->virtual_port);
- printf("WWNN %#jx WWPN %#jx Online: %s\n",
- (uintmax_t)entry->wwnn, (uintmax_t)entry->wwpn,
- (entry->online) ? "YES" : "NO" );
-#endif
- if (xml == 0) {
- printf("%-4d %-6s %-8s %-12s %-2d %-2d %#-18jx "
- "%#-18jx\n",
- entry->targ_port, (entry->online) ? "YES" : "NO",
- type, entry->port_name, entry->physical_port,
- entry->virtual_port, (uintmax_t)entry->wwnn,
- (uintmax_t)entry->wwpn);
- } else {
- sbuf_printf(sb, "<targ_port id=\"%d\">\n",
- entry->targ_port);
- sbuf_printf(sb, "<online>%s</online>\n",
- (entry->online) ? "YES" : "NO");
- sbuf_printf(sb, "<port_type>%s</port_type>\n", type);
- sbuf_printf(sb, "<port_name>%s</port_name>\n",
- entry->port_name);
- sbuf_printf(sb, "<physical_port>%d</physical_port>\n",
- entry->physical_port);
- sbuf_printf(sb, "<virtual_port>%d</virtual_port>\n",
- entry->virtual_port);
- sbuf_printf(sb, "<wwnn>%#jx</wwnn>\n",
- (uintmax_t)entry->wwnn);
- sbuf_printf(sb, "<wwpn>%#jx</wwpn>\n",
- (uintmax_t)entry->wwpn);
- sbuf_printf(sb, "</targ_port>\n");
- }
-
- }
- if (xml != 0) {
- sbuf_printf(sb, "</ctlfelist>\n");
- if (sbuf_finish(sb) != 0)
- err(1, "%s: sbuf_finish", __func__);
- printf("%s", sbuf_data(sb));
- sbuf_delete(sb);
- }
-
- /*
- * Give some indication that we didn't find the frontend or
- * frontend type requested by the user. We could print something
- * out, but it would probably be better to hide that behind a
- * verbose flag.
- */
- if ((did_print == 0)
- && ((targ_port != -1)
- || (port_type != CTL_PORT_NONE)))
- return (1);
- else
- return (0);
-}
-
typedef enum {
CCTL_PORT_MODE_NONE,
CCTL_PORT_MODE_LIST,
@@ -717,9 +499,22 @@ cctl_port(int fd, int argc, char **argv, char *combinedopt)
entry.targ_port = targ_port;
switch (port_mode) {
- case CCTL_PORT_MODE_LIST:
- cctl_port_dump(fd, quiet, xml, targ_port, port_type);
+ case CCTL_PORT_MODE_LIST: {
+ char opts[] = "xq";
+ char argx[] = "-x";
+ char argq[] = "-q";
+ char *argvx[2];
+ int argcx = 0;
+
+ optind = 0;
+ optreset = 1;
+ if (xml)
+ argvx[argcx++] = argx;
+ if (quiet)
+ argvx[argcx++] = argq;
+ cctl_portlist(fd, argcx, argvx, opts);
break;
+ }
case CCTL_PORT_MODE_SET:
if (targ_port == -1) {
warnx("%s: -w and -W require -n", __func__);
@@ -789,7 +584,7 @@ cctl_do_io(int fd, int retries, union ctl_io *io, const char *func)
}
static int
-cctl_delay(int fd, int target, int lun, int argc, char **argv,
+cctl_delay(int fd, int lun, int argc, char **argv,
char *combinedopt)
{
struct ctl_io_delay_info delay_info;
@@ -844,7 +639,6 @@ cctl_delay(int fd, int target, int lun, int argc, char **argv,
goto bailout;
}
- delay_info.target_id = target;
delay_info.lun_id = lun;
delay_info.delay_secs = delaytime;
@@ -885,137 +679,10 @@ cctl_delay(int fd, int target, int lun, int argc, char **argv,
retval = 1;
break;
}
-bailout:
-
- /* delayloc should never be NULL, but just in case...*/
- if (delayloc != NULL)
- free(delayloc);
-
- return (retval);
-}
-
-static int
-cctl_realsync(int fd, int argc, char **argv)
-{
- int syncstate;
- int retval;
- char *syncarg;
-
- retval = 0;
-
- if (argc != 3) {
- warnx("%s %s takes exactly one argument", argv[0], argv[1]);
- retval = 1;
- goto bailout;
- }
-
- syncarg = argv[2];
-
- if (strncasecmp(syncarg, "query", min(strlen(syncarg),
- strlen("query"))) == 0) {
- if (ioctl(fd, CTL_REALSYNC_GET, &syncstate) == -1) {
- warn("%s: CTL_REALSYNC_GET ioctl failed", __func__);
- retval = 1;
- goto bailout;
- }
- fprintf(stdout, "SYNCHRONIZE CACHE support is: ");
- switch (syncstate) {
- case 0:
- fprintf(stdout, "OFF\n");
- break;
- case 1:
- fprintf(stdout, "ON\n");
- break;
- default:
- fprintf(stdout, "unknown (%d)\n", syncstate);
- break;
- }
- goto bailout;
- } else if (strcasecmp(syncarg, "on") == 0) {
- syncstate = 1;
- } else if (strcasecmp(syncarg, "off") == 0) {
- syncstate = 0;
- } else {
- warnx("%s: invalid realsync argument %s", __func__, syncarg);
- retval = 1;
- goto bailout;
- }
-
- if (ioctl(fd, CTL_REALSYNC_SET, &syncstate) == -1) {
- warn("%s: CTL_REALSYNC_SET ioctl failed", __func__);
- retval = 1;
- goto bailout;
- }
-bailout:
- return (retval);
-}
-
-static int
-cctl_getsetsync(int fd, int target, int lun, ctladm_cmdfunction command,
- int argc, char **argv, char *combinedopt)
-{
- struct ctl_sync_info sync_info;
- uint32_t ioctl_cmd;
- int sync_interval = -1;
- int retval;
- int c;
-
- retval = 0;
-
- memset(&sync_info, 0, sizeof(sync_info));
- sync_info.target_id = target;
- sync_info.lun_id = lun;
-
- while ((c = getopt(argc, argv, combinedopt)) != -1) {
- switch (c) {
- case 'i':
- sync_interval = strtoul(optarg, NULL, 0);
- break;
- default:
- break;
- }
- }
- if (command == CTLADM_CMD_SETSYNC) {
- if (sync_interval == -1) {
- warnx("%s: you must specify the sync interval with -i",
- __func__);
- retval = 1;
- goto bailout;
- }
- sync_info.sync_interval = sync_interval;
- ioctl_cmd = CTL_SETSYNC;
- } else {
- ioctl_cmd = CTL_GETSYNC;
- }
-
- if (ioctl(fd, ioctl_cmd, &sync_info) == -1) {
- warn("%s: CTL_%sSYNC ioctl failed", __func__,
- (command == CTLADM_CMD_SETSYNC) ? "SET" : "GET");
- retval = 1;
- goto bailout;
- }
-
- switch (sync_info.status) {
- case CTL_GS_SYNC_OK:
- if (command == CTLADM_CMD_GETSYNC) {
- fprintf(stdout, "%d:%d: sync interval: %d\n",
- target, lun, sync_info.sync_interval);
- }
- break;
- case CTL_GS_SYNC_NO_LUN:
- warnx("%s: unknown target:LUN %d:%d", __func__, target, lun);
- retval = 1;
- break;
- case CTL_GS_SYNC_NONE:
- default:
- warnx("%s: unknown CTL_%sSYNC status %d", __func__,
- (command == CTLADM_CMD_SETSYNC) ? "SET" : "GET",
- sync_info.status);
- retval = 1;
- break;
- }
bailout:
+ free(delayloc);
+ free(delaytype);
return (retval);
}
@@ -1043,7 +710,7 @@ static struct ctladm_opts cctl_err_patterns[] = {
};
static int
-cctl_error_inject(int fd, uint32_t target, uint32_t lun, int argc, char **argv,
+cctl_error_inject(int fd, uint32_t lun, int argc, char **argv,
char *combinedopt)
{
int retval = 0;
@@ -1058,7 +725,6 @@ cctl_error_inject(int fd, uint32_t target, uint32_t lun, int argc, char **argv,
int c;
bzero(&err_desc, sizeof(err_desc));
- err_desc.target_id = target;
err_desc.lun_id = lun;
while ((c = getopt(argc, argv, combinedopt)) != -1) {
@@ -1269,22 +935,18 @@ cctl_lunlist(int fd)
struct scsi_report_luns_data *lun_data;
struct scsi_inquiry_data *inq_data;
uint32_t num_luns;
- int target;
int initid;
unsigned int i;
int retval;
- retval = 0;
inq_data = NULL;
-
- target = 6;
initid = 7;
/*
* XXX KDM assuming LUN 0 is fine, but we may need to change this
* if we ever acquire the ability to have multiple targets.
*/
- if ((retval = cctl_get_luns(fd, target, /*lun*/ 0, initid,
+ if ((retval = cctl_get_luns(fd, /*lun*/ 0, initid,
/*retries*/ 2, &lun_data, &num_luns)) != 0)
goto bailout;
@@ -1321,7 +983,7 @@ cctl_lunlist(int fd)
if (lun_val == -1)
continue;
- if ((retval = cctl_get_inquiry(fd, target, lun_val, initid,
+ if ((retval = cctl_get_inquiry(fd, lun_val, initid,
/*retries*/ 2, scsi_path,
sizeof(scsi_path),
inq_data)) != 0) {
@@ -1341,345 +1003,11 @@ bailout:
return (retval);
}
-static void
-cctl_cfi_mt_statusstr(cfi_mt_status status, char *str, int str_len)
-{
- switch (status) {
- case CFI_MT_PORT_OFFLINE:
- snprintf(str, str_len, "Port Offline");
- break;
- case CFI_MT_ERROR:
- snprintf(str, str_len, "Error");
- break;
- case CFI_MT_SUCCESS:
- snprintf(str, str_len, "Success");
- break;
- case CFI_MT_NONE:
- snprintf(str, str_len, "None??");
- break;
- default:
- snprintf(str, str_len, "Unknown status: %d", status);
- break;
- }
-}
-
-static void
-cctl_cfi_bbr_statusstr(cfi_bbrread_status status, char *str, int str_len)
-{
- switch (status) {
- case CFI_BBR_SUCCESS:
- snprintf(str, str_len, "Success");
- break;
- case CFI_BBR_LUN_UNCONFIG:
- snprintf(str, str_len, "LUN not configured");
- break;
- case CFI_BBR_NO_LUN:
- snprintf(str, str_len, "LUN does not exist");
- break;
- case CFI_BBR_NO_MEM:
- snprintf(str, str_len, "Memory allocation error");
- break;
- case CFI_BBR_BAD_LEN:
- snprintf(str, str_len, "Length is not a multiple of blocksize");
- break;
- case CFI_BBR_RESERV_CONFLICT:
- snprintf(str, str_len, "Reservation conflict");
- break;
- case CFI_BBR_LUN_STOPPED:
- snprintf(str, str_len, "LUN is powered off");
- break;
- case CFI_BBR_LUN_OFFLINE_CTL:
- snprintf(str, str_len, "LUN is offline");
- break;
- case CFI_BBR_LUN_OFFLINE_RC:
- snprintf(str, str_len, "RAIDCore array is offline (double "
- "failure?)");
- break;
- case CFI_BBR_SCSI_ERROR:
- snprintf(str, str_len, "SCSI Error");
- break;
- case CFI_BBR_ERROR:
- snprintf(str, str_len, "Error");
- break;
- default:
- snprintf(str, str_len, "Unknown status: %d", status);
- break;
- }
-}
-
-static int
-cctl_hardstopstart(int fd, ctladm_cmdfunction command)
-{
- struct ctl_hard_startstop_info hs_info;
- char error_str[256];
- int do_start;
- int retval;
-
- retval = 0;
-
- if (command == CTLADM_CMD_HARDSTART)
- do_start = 1;
- else
- do_start = 0;
-
- if (ioctl(fd, (do_start == 1) ? CTL_HARD_START : CTL_HARD_STOP,
- &hs_info) == -1) {
- warn("%s: CTL_HARD_%s ioctl failed", __func__,
- (do_start == 1) ? "START" : "STOP");
- retval = 1;
- goto bailout;
- }
-
- fprintf(stdout, "Hard %s Status: ", (command == CTLADM_CMD_HARDSTOP) ?
- "Stop" : "Start");
- cctl_cfi_mt_statusstr(hs_info.status, error_str, sizeof(error_str));
- fprintf(stdout, "%s\n", error_str);
- fprintf(stdout, "Total LUNs: %d\n", hs_info.total_luns);
- fprintf(stdout, "LUNs complete: %d\n", hs_info.luns_complete);
- fprintf(stdout, "LUNs failed: %d\n", hs_info.luns_failed);
-
-bailout:
- return (retval);
-}
-
-static int
-cctl_bbrread(int fd, int target __unused, int lun, int iid __unused,
- int argc, char **argv, char *combinedopt)
-{
- struct ctl_bbrread_info bbr_info;
- char error_str[256];
- int datalen = -1;
- uint64_t lba = 0;
- int lba_set = 0;
- int retval;
- int c;
-
- retval = 0;
-
- while ((c = getopt(argc, argv, combinedopt)) != -1) {
- switch (c) {
- case 'd':
- datalen = strtoul(optarg, NULL, 0);
- break;
- case 'l':
- lba = strtoull(optarg, NULL, 0);
- lba_set = 1;
- break;
- default:
- break;
- }
- }
-
- if (lba_set == 0) {
- warnx("%s: you must specify an LBA with -l", __func__);
- retval = 1;
- goto bailout;
- }
-
- if (datalen == -1) {
- warnx("%s: you must specify a length with -d", __func__);
- retval = 1;
- goto bailout;
- }
-
- bbr_info.lun_num = lun;
- bbr_info.lba = lba;
- /*
- * XXX KDM get the blocksize first??
- */
- if ((datalen % 512) != 0) {
- warnx("%s: data length %d is not a multiple of 512 bytes",
- __func__, datalen);
- retval = 1;
- goto bailout;
- }
- bbr_info.len = datalen;
-
- if (ioctl(fd, CTL_BBRREAD, &bbr_info) == -1) {
- warn("%s: CTL_BBRREAD ioctl failed", __func__);
- retval = 1;
- goto bailout;
- }
- cctl_cfi_mt_statusstr(bbr_info.status, error_str, sizeof(error_str));
- fprintf(stdout, "BBR Read Overall Status: %s\n", error_str);
- cctl_cfi_bbr_statusstr(bbr_info.bbr_status, error_str,
- sizeof(error_str));
- fprintf(stdout, "BBR Read Status: %s\n", error_str);
- /*
- * XXX KDM should we bother printing out SCSI status if we get
- * CFI_BBR_SCSI_ERROR back?
- *
- * Return non-zero if this fails?
- */
-bailout:
- return (retval);
-}
-
-static int
-cctl_startup_shutdown(int fd, int target, int lun, int iid,
- ctladm_cmdfunction command)
-{
- union ctl_io *io;
- struct ctl_id id;
- struct scsi_report_luns_data *lun_data;
- struct scsi_inquiry_data *inq_data;
- uint32_t num_luns;
- unsigned int i;
- int retval;
-
- retval = 0;
- inq_data = NULL;
-
- /*
- * - report luns
- * - step through each lun, do an inquiry
- * - check OOA queue on direct access luns
- * - send stop with offline bit to each direct access device with a
- * clear OOA queue
- * - if we get a reservation conflict, reset the LUN to clear it
- * and reissue the stop with the offline bit set
- */
-
- id.id = iid;
-
- io = ctl_scsi_alloc_io(id);
- if (io == NULL) {
- warnx("%s: can't allocate memory", __func__);
- return (1);
- }
-
- if ((retval = cctl_get_luns(fd, target, lun, iid, /*retries*/ 2,
- &lun_data, &num_luns)) != 0)
- goto bailout;
-
- inq_data = malloc(sizeof(*inq_data));
- if (inq_data == NULL) {
- warn("%s: couldn't allocate memory for inquiry data\n",
- __func__);
- retval = 1;
- goto bailout;
- }
- for (i = 0; i < num_luns; i++) {
- char scsi_path[40];
- int lun_val;
-
- /*
- * XXX KDM figure out a way to share this code with
- * cctl_lunlist()?
- */
- switch (lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK) {
- case RPL_LUNDATA_ATYP_PERIPH:
- lun_val = lun_data->luns[i].lundata[1];
- break;
- case RPL_LUNDATA_ATYP_FLAT:
- lun_val = (lun_data->luns[i].lundata[0] &
- RPL_LUNDATA_FLAT_LUN_MASK) |
- (lun_data->luns[i].lundata[1] <<
- RPL_LUNDATA_FLAT_LUN_BITS);
- break;
- case RPL_LUNDATA_ATYP_LUN:
- case RPL_LUNDATA_ATYP_EXTLUN:
- default:
- fprintf(stdout, "Unsupported LUN format %d\n",
- lun_data->luns[i].lundata[0] &
- RPL_LUNDATA_ATYP_MASK);
- lun_val = -1;
- break;
- }
- if (lun_val == -1)
- continue;
-
- if ((retval = cctl_get_inquiry(fd, target, lun_val, iid,
- /*retries*/ 2, scsi_path,
- sizeof(scsi_path),
- inq_data)) != 0) {
- goto bailout;
- }
- printf("%s", scsi_path);
- scsi_print_inquiry(inq_data);
- /*
- * We only want to shutdown direct access devices.
- */
- if (SID_TYPE(inq_data) != T_DIRECT) {
- printf("%s LUN is not direct access, skipped\n",
- scsi_path);
- continue;
- }
-
- if (command == CTLADM_CMD_SHUTDOWN) {
- struct ctl_ooa_info ooa_info;
-
- ooa_info.target_id = target;
- ooa_info.lun_id = lun_val;
-
- if (ioctl(fd, CTL_CHECK_OOA, &ooa_info) == -1) {
- printf("%s CTL_CHECK_OOA ioctl failed\n",
- scsi_path);
- continue;
- }
-
- if (ooa_info.status != CTL_OOA_SUCCESS) {
- printf("%s CTL_CHECK_OOA returned status %d\n",
- scsi_path, ooa_info.status);
- continue;
- }
- if (ooa_info.num_entries != 0) {
- printf("%s %d entr%s in the OOA queue, "
- "skipping shutdown\n", scsi_path,
- ooa_info.num_entries,
- (ooa_info.num_entries > 1)?"ies" : "y" );
- continue;
- }
- }
-
- ctl_scsi_start_stop(/*io*/ io,
- /*start*/(command == CTLADM_CMD_STARTUP) ?
- 1 : 0,
- /*load_eject*/ 0,
- /*immediate*/ 0,
- /*power_conditions*/ SSS_PC_START_VALID,
- /*onoffline*/ 1,
- /*ctl_tag_type*/
- (command == CTLADM_CMD_STARTUP) ?
- CTL_TAG_SIMPLE :CTL_TAG_ORDERED,
- /*control*/ 0);
-
- io->io_hdr.nexus.targ_target.id = target;
- io->io_hdr.nexus.targ_lun = lun_val;
- io->io_hdr.nexus.initid = id;
-
- if (cctl_do_io(fd, /*retries*/ 3, io, __func__) != 0) {
- retval = 1;
- goto bailout;
- }
-
- if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)
- ctl_io_error_print(io, inq_data, stderr);
- else {
- printf("%s LUN is now %s\n", scsi_path,
- (command == CTLADM_CMD_STARTUP) ? "online" :
- "offline");
- }
- }
-bailout:
- if (lun_data != NULL)
- free(lun_data);
-
- if (inq_data != NULL)
- free(inq_data);
-
- if (io != NULL)
- ctl_scsi_free_io(io);
-
- return (retval);
-}
-
static int
-cctl_sync_cache(int fd, int target, int lun, int iid, int retries,
+cctl_sync_cache(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt)
{
union ctl_io *io;
- struct ctl_id id;
int cdb_size = -1;
int retval;
uint64_t our_lba = 0;
@@ -1687,10 +1015,9 @@ cctl_sync_cache(int fd, int target, int lun, int iid, int retries,
int reladr = 0, immed = 0;
int c;
- id.id = iid;
retval = 0;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warnx("%s: can't allocate memory", __func__);
return (1);
@@ -1742,9 +1069,8 @@ cctl_sync_cache(int fd, int target, int lun, int iid, int retries,
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
retval = 1;
@@ -1762,19 +1088,17 @@ bailout:
}
static int
-cctl_start_stop(int fd, int target, int lun, int iid, int retries, int start,
+cctl_start_stop(int fd, int lun, int iid, int retries, int start,
int argc, char **argv, char *combinedopt)
{
union ctl_io *io;
- struct ctl_id id;
char scsi_path[40];
- int immed = 0, onoffline = 0;
+ int immed = 0;
int retval, c;
- id.id = iid;
retval = 0;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warnx("%s: can't allocate memory", __func__);
return (1);
@@ -1785,9 +1109,6 @@ cctl_start_stop(int fd, int target, int lun, int iid, int retries, int start,
case 'i':
immed = 1;
break;
- case 'o':
- onoffline = 1;
- break;
default:
break;
}
@@ -1804,14 +1125,12 @@ cctl_start_stop(int fd, int target, int lun, int iid, int retries, int start,
/*load_eject*/ 0,
/*immediate*/ immed,
/*power_conditions*/ SSS_PC_START_VALID,
- /*onoffline*/ onoffline,
/*ctl_tag_type*/ start ? CTL_TAG_SIMPLE :
CTL_TAG_ORDERED,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
retval = 1;
@@ -1832,11 +1151,10 @@ bailout:
}
static int
-cctl_mode_sense(int fd, int target, int lun, int iid, int retries,
+cctl_mode_sense(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt)
{
union ctl_io *io;
- struct ctl_id id;
uint32_t datalen;
uint8_t *dataptr;
int pc = -1, cdbsize, retval, dbd = 0, subpage = -1;
@@ -1844,12 +1162,11 @@ cctl_mode_sense(int fd, int target, int lun, int iid, int retries,
int page_code = -1;
int c;
- id.id = iid;
cdbsize = 0;
retval = 0;
dataptr = NULL;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory", __func__);
return (1);
@@ -1977,9 +1294,8 @@ cctl_mode_sense(int fd, int target, int lun, int iid, int retries,
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
retval = 1;
@@ -2050,11 +1366,10 @@ bailout:
}
static int
-cctl_read_capacity(int fd, int target, int lun, int iid, int retries,
+cctl_read_capacity(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt)
{
union ctl_io *io;
- struct ctl_id id;
struct scsi_read_capacity_data *data;
struct scsi_read_capacity_data_long *longdata;
int cdbsize = -1, retval;
@@ -2064,9 +1379,8 @@ cctl_read_capacity(int fd, int target, int lun, int iid, int retries,
cdbsize = 10;
dataptr = NULL;
retval = 0;
- id.id = iid;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory\n", __func__);
return (1);
@@ -2130,8 +1444,7 @@ retry:
break;
}
- io->io_hdr.nexus.initid = id;
- io->io_hdr.nexus.targ_target.id = target;
+ io->io_hdr.nexus.initid = iid;
io->io_hdr.nexus.targ_lun = lun;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
@@ -2176,12 +1489,11 @@ bailout:
}
static int
-cctl_read_write(int fd, int target, int lun, int iid, int retries,
+cctl_read_write(int fd, int lun, int iid, int retries,
int argc, char **argv, char *combinedopt,
ctladm_cmdfunction command)
{
union ctl_io *io;
- struct ctl_id id;
int file_fd, do_stdio;
int cdbsize = -1, databytes;
uint8_t *dataptr;
@@ -2196,9 +1508,8 @@ cctl_read_write(int fd, int target, int lun, int iid, int retries,
do_stdio = 0;
dataptr = NULL;
file_fd = -1;
- id.id = iid;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory\n", __func__);
return (1);
@@ -2322,9 +1633,8 @@ cctl_read_write(int fd, int target, int lun, int iid, int retries,
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
retval = 1;
@@ -2359,19 +1669,17 @@ bailout:
}
static int
-cctl_get_luns(int fd, int target, int lun, int iid, int retries, struct
+cctl_get_luns(int fd, int lun, int iid, int retries, struct
scsi_report_luns_data **lun_data, uint32_t *num_luns)
{
union ctl_io *io;
- struct ctl_id id;
uint32_t nluns;
int lun_datalen;
int retval;
retval = 0;
- id.id = iid;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warnx("%s: can't allocate memory", __func__);
return (1);
@@ -2400,8 +1708,7 @@ retry:
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.initid = id;
- io->io_hdr.nexus.targ_target.id = target;
+ io->io_hdr.nexus.initid = iid;
io->io_hdr.nexus.targ_lun = lun;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
@@ -2432,7 +1739,7 @@ bailout:
}
static int
-cctl_report_luns(int fd, int target, int lun, int iid, int retries)
+cctl_report_luns(int fd, int lun, int iid, int retries)
{
struct scsi_report_luns_data *lun_data;
uint32_t num_luns, i;
@@ -2440,7 +1747,7 @@ cctl_report_luns(int fd, int target, int lun, int iid, int retries)
lun_data = NULL;
- if ((retval = cctl_get_luns(fd, target, lun, iid, retries, &lun_data,
+ if ((retval = cctl_get_luns(fd, lun, iid, retries, &lun_data,
&num_luns)) != 0)
goto bailout;
@@ -2485,14 +1792,11 @@ bailout:
}
static int
-cctl_tur(int fd, int target, int lun, int iid, int retries)
+cctl_tur(int fd, int lun, int iid, int retries)
{
union ctl_io *io;
- struct ctl_id id;
-
- id.id = iid;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
fprintf(stderr, "can't allocate memory\n");
return (1);
@@ -2502,9 +1806,8 @@ cctl_tur(int fd, int target, int lun, int iid, int retries)
/* tag_type */ CTL_TAG_SIMPLE,
/* control */ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
ctl_scsi_free_io(io);
@@ -2520,19 +1823,16 @@ cctl_tur(int fd, int target, int lun, int iid, int retries)
}
static int
-cctl_get_inquiry(int fd, int target, int lun, int iid, int retries,
+cctl_get_inquiry(int fd, int lun, int iid, int retries,
char *path_str, int path_len,
struct scsi_inquiry_data *inq_data)
{
union ctl_io *io;
- struct ctl_id id;
int retval;
retval = 0;
- id.id = iid;
-
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warnx("cctl_inquiry: can't allocate memory\n");
return (1);
@@ -2546,9 +1846,8 @@ cctl_get_inquiry(int fd, int target, int lun, int iid, int retries,
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
retval = 1;
@@ -2568,14 +1867,12 @@ bailout:
}
static int
-cctl_inquiry(int fd, int target, int lun, int iid, int retries)
+cctl_inquiry(int fd, int lun, int iid, int retries)
{
struct scsi_inquiry_data *inq_data;
char scsi_path[40];
int retval;
- retval = 0;
-
inq_data = malloc(sizeof(*inq_data));
if (inq_data == NULL) {
warnx("%s: can't allocate inquiry data", __func__);
@@ -2583,7 +1880,7 @@ cctl_inquiry(int fd, int target, int lun, int iid, int retries)
goto bailout;
}
- if ((retval = cctl_get_inquiry(fd, target, lun, iid, retries, scsi_path,
+ if ((retval = cctl_get_inquiry(fd, lun, iid, retries, scsi_path,
sizeof(scsi_path), inq_data)) != 0)
goto bailout;
@@ -2598,18 +1895,15 @@ bailout:
}
static int
-cctl_req_sense(int fd, int target, int lun, int iid, int retries)
+cctl_req_sense(int fd, int lun, int iid, int retries)
{
union ctl_io *io;
struct scsi_sense_data *sense_data;
- struct ctl_id id;
int retval;
retval = 0;
- id.id = iid;
-
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warnx("cctl_req_sense: can't allocate memory\n");
return (1);
@@ -2624,9 +1918,8 @@ cctl_req_sense(int fd, int target, int lun, int iid, int retries)
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retries, io, __func__) != 0) {
retval = 1;
@@ -2649,19 +1942,17 @@ bailout:
}
static int
-cctl_report_target_port_group(int fd, int target, int lun, int initiator)
+cctl_report_target_port_group(int fd, int lun, int iid)
{
union ctl_io *io;
- struct ctl_id id;
uint32_t datalen;
uint8_t *dataptr;
int retval;
- id.id = initiator;
dataptr = NULL;
retval = 0;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory", __func__);
return (1);
@@ -2684,9 +1975,8 @@ cctl_report_target_port_group(int fd, int target, int lun, int initiator)
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, 0, io, __func__) != 0) {
retval = 1;
@@ -2717,19 +2007,17 @@ bailout:
}
static int
-cctl_inquiry_vpd_devid(int fd, int target, int lun, int initiator)
+cctl_inquiry_vpd_devid(int fd, int lun, int iid)
{
union ctl_io *io;
- struct ctl_id id;
uint32_t datalen;
uint8_t *dataptr;
int retval;
- id.id = initiator;
retval = 0;
dataptr = NULL;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory", __func__);
return (1);
@@ -2753,9 +2041,8 @@ cctl_inquiry_vpd_devid(int fd, int target, int lun, int initiator)
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, 0, io, __func__) != 0) {
retval = 1;
@@ -2786,23 +2073,21 @@ bailout:
}
static int
-cctl_persistent_reserve_in(int fd, int target, int lun, int initiator,
+cctl_persistent_reserve_in(int fd, int lun, int iid,
int argc, char **argv, char *combinedopt,
int retry_count)
{
union ctl_io *io;
- struct ctl_id id;
uint32_t datalen;
uint8_t *dataptr;
int action = -1;
int retval;
int c;
- id.id = initiator;
retval = 0;
dataptr = NULL;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory", __func__);
return (1);
@@ -2842,9 +2127,8 @@ cctl_persistent_reserve_in(int fd, int target, int lun, int initiator,
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retry_count, io, __func__) != 0) {
retval = 1;
@@ -2854,8 +2138,6 @@ cctl_persistent_reserve_in(int fd, int target, int lun, int initiator,
if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) {
int returned_len, used_len;
- returned_len = 0;
-
switch (action) {
case 0:
returned_len = scsi_4btoul(&dataptr[4]) + 8;
@@ -2892,12 +2174,11 @@ bailout:
}
static int
-cctl_persistent_reserve_out(int fd, int target, int lun, int initiator,
+cctl_persistent_reserve_out(int fd, int lun, int iid,
int argc, char **argv, char *combinedopt,
int retry_count)
{
union ctl_io *io;
- struct ctl_id id;
uint32_t datalen;
uint64_t key = 0, sa_key = 0;
int action = -1, restype = -1;
@@ -2905,11 +2186,10 @@ cctl_persistent_reserve_out(int fd, int target, int lun, int initiator,
int retval;
int c;
- id.id = initiator;
retval = 0;
dataptr = NULL;
- io = ctl_scsi_alloc_io(id);
+ io = ctl_scsi_alloc_io(iid);
if (io == NULL) {
warn("%s: can't allocate memory", __func__);
return (1);
@@ -2967,9 +2247,8 @@ cctl_persistent_reserve_out(int fd, int target, int lun, int initiator,
/*tag_type*/ CTL_TAG_SIMPLE,
/*control*/ 0);
- io->io_hdr.nexus.targ_target.id = target;
io->io_hdr.nexus.targ_lun = lun;
- io->io_hdr.nexus.initid = id;
+ io->io_hdr.nexus.initid = iid;
if (cctl_do_io(fd, retry_count, io, __func__) != 0) {
retval = 1;
@@ -3356,8 +2635,11 @@ cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt)
uint32_t lun_id = 0;
int lun_id_set = 0, lun_size_set = 0;
char *backend_name = NULL;
+ STAILQ_HEAD(, cctl_req_option) option_list;
+ int num_options = 0;
int retval = 0, c;
+ STAILQ_INIT(&option_list);
while ((c = getopt(argc, argv, combinedopt)) != -1) {
switch (c) {
case 'b':
@@ -3367,6 +2649,43 @@ cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt)
lun_id = strtoul(optarg, NULL, 0);
lun_id_set = 1;
break;
+ case 'o': {
+ struct cctl_req_option *option;
+ char *tmpstr;
+ char *name, *value;
+
+ tmpstr = strdup(optarg);
+ name = strsep(&tmpstr, "=");
+ if (name == NULL) {
+ warnx("%s: option -o takes \"name=value\""
+ "argument", __func__);
+ retval = 1;
+ goto bailout;
+ }
+ value = strsep(&tmpstr, "=");
+ if (value == NULL) {
+ warnx("%s: option -o takes \"name=value\""
+ "argument", __func__);
+ retval = 1;
+ goto bailout;
+ }
+ option = malloc(sizeof(*option));
+ if (option == NULL) {
+ warn("%s: error allocating %zd bytes",
+ __func__, sizeof(*option));
+ retval = 1;
+ goto bailout;
+ }
+ option->name = strdup(name);
+ option->namelen = strlen(name) + 1;
+ option->value = strdup(value);
+ option->vallen = strlen(value) + 1;
+ free(tmpstr);
+
+ STAILQ_INSERT_TAIL(&option_list, option, links);
+ num_options++;
+ break;
+ }
case 's':
if (strcasecmp(optarg, "auto") != 0) {
retval = expand_number(optarg, &lun_size);
@@ -3390,8 +2709,9 @@ cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt)
if (lun_id_set == 0)
errx(1, "%s: LUN id (-l) must be specified", __func__);
- if (lun_size_set == 0)
- errx(1, "%s: size (-s) must be specified", __func__);
+ if (lun_size_set == 0 && num_options == 0)
+ errx(1, "%s: size (-s) or options (-o) must be specified",
+ __func__);
bzero(&req, sizeof(req));
@@ -3401,6 +2721,42 @@ cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt)
req.reqdata.modify.lun_id = lun_id;
req.reqdata.modify.lun_size_bytes = lun_size;
+ req.num_be_args = num_options;
+ if (num_options > 0) {
+ struct cctl_req_option *option, *next_option;
+ int i;
+
+ req.be_args = malloc(num_options * sizeof(*req.be_args));
+ if (req.be_args == NULL) {
+ warn("%s: error allocating %zd bytes", __func__,
+ num_options * sizeof(*req.be_args));
+ retval = 1;
+ goto bailout;
+ }
+
+ for (i = 0, option = STAILQ_FIRST(&option_list);
+ i < num_options; i++, option = next_option) {
+ next_option = STAILQ_NEXT(option, links);
+
+ req.be_args[i].namelen = option->namelen;
+ req.be_args[i].name = strdup(option->name);
+ req.be_args[i].vallen = option->vallen;
+ req.be_args[i].value = strdup(option->value);
+ /*
+ * XXX KDM do we want a way to specify a writeable
+ * flag of some sort? Do we want a way to specify
+ * binary data?
+ */
+ req.be_args[i].flags = CTL_BEARG_ASCII | CTL_BEARG_RD;
+
+ STAILQ_REMOVE(&option_list, option, cctl_req_option,
+ links);
+ free(option->name);
+ free(option->value);
+ free(option);
+ }
+ }
+
if (ioctl(fd, CTL_LUN_REQ, &req) == -1) {
warn("%s: error issuing CTL_LUN_REQ ioctl", __func__);
retval = 1;
@@ -4492,22 +3848,14 @@ usage(int error)
" ctladm remove <-b backend> <-l lun_id> [-o name=value]\n"
" ctladm modify <-b backend> <-l lun_id> <-s size_bytes>\n"
" ctladm devlist [-b backend] [-v] [-x]\n"
-" ctladm shutdown\n"
-" ctladm startup\n"
-" ctladm hardstop\n"
-" ctladm hardstart\n"
" ctladm lunlist\n"
" ctladm lunmap -p targ_port [-l pLUN] [-L cLUN]\n"
-" ctladm bbrread [dev_id] <-l lba> <-d datalen>\n"
" ctladm delay [dev_id] <-l datamove|done> [-T oneshot|cont]\n"
" [-t secs]\n"
-" ctladm realsync <on|off|query>\n"
-" ctladm setsync [dev_id] <-i interval>\n"
-" ctladm getsync [dev_id]\n"
" ctladm inject [dev_id] <-i action> <-p pattern> [-r lba,len]\n"
" [-s len fmt [args]] [-c] [-d delete_id]\n"
-" ctladm port <-l | -o <on|off> | [-w wwnn][-W wwpn]>\n"
-" [-p targ_port] [-t port_type] [-q] [-x]\n"
+" ctladm port <-o <on|off> | [-w wwnn][-W wwpn]>\n"
+" [-p targ_port] [-t port_type]\n"
" ctladm portlist [-f frontend] [-i] [-p targ_port] [-q] [-v] [-x]\n"
" ctladm islist [-v | -x]\n"
" ctladm islogout <-a | -c connection-id | -i name | -p portal>\n"
@@ -4605,10 +3953,7 @@ usage(int error)
"lunmap options:\n"
"-p targ_port : specify target port number\n"
"-L pLUN : specify port-visible LUN\n"
-"-L cLUN : specify CTL LUN\n"
-"bbrread options:\n"
-"-l lba : starting LBA\n"
-"-d datalen : length, in bytes, to read\n",
+"-L cLUN : specify CTL LUN\n",
CTL_DEFAULT_DEV);
}
@@ -4623,7 +3968,7 @@ main(int argc, char **argv)
const char *mainopt = "C:D:I:";
const char *subopt = NULL;
char combinedopt[256];
- int target, lun;
+ int lun;
int optstart = 2;
int retval, fd;
int retries;
@@ -4636,7 +3981,6 @@ main(int argc, char **argv)
device = NULL;
fd = -1;
retries = 0;
- target = 0;
lun = 0;
initid = 7;
@@ -4662,16 +4006,13 @@ main(int argc, char **argv)
}
if (cmdargs & CTLADM_ARG_NEED_TL) {
- if ((argc < 3)
- || (!isdigit(argv[2][0]))) {
- warnx("option %s requires a target:lun argument",
+ if ((argc < 3) || (!isdigit(argv[2][0]))) {
+ warnx("option %s requires a lun argument",
argv[1]);
usage(0);
exit(1);
}
- retval = cctl_parse_tl(argv[2], &target, &lun);
- if (retval != 0)
- errx(1, "invalid target:lun argument %s", argv[2]);
+ lun = strtol(argv[2], NULL, 0);
cmdargs |= CTLADM_ARG_TARG_LUN;
optstart++;
@@ -4802,16 +4143,16 @@ main(int argc, char **argv)
switch (command) {
case CTLADM_CMD_TUR:
- retval = cctl_tur(fd, target, lun, initid, retries);
+ retval = cctl_tur(fd, lun, initid, retries);
break;
case CTLADM_CMD_INQUIRY:
- retval = cctl_inquiry(fd, target, lun, initid, retries);
+ retval = cctl_inquiry(fd, lun, initid, retries);
break;
case CTLADM_CMD_REQ_SENSE:
- retval = cctl_req_sense(fd, target, lun, initid, retries);
+ retval = cctl_req_sense(fd, lun, initid, retries);
break;
case CTLADM_CMD_REPORT_LUNS:
- retval = cctl_report_luns(fd, target, lun, initid, retries);
+ retval = cctl_report_luns(fd, lun, initid, retries);
break;
case CTLADM_CMD_CREATE:
retval = cctl_create_lun(fd, argc, argv, combinedopt);
@@ -4824,7 +4165,7 @@ main(int argc, char **argv)
break;
case CTLADM_CMD_READ:
case CTLADM_CMD_WRITE:
- retval = cctl_read_write(fd, target, lun, initid, retries,
+ retval = cctl_read_write(fd, lun, initid, retries,
argc, argv, combinedopt, command);
break;
case CTLADM_CMD_PORT:
@@ -4837,52 +4178,31 @@ main(int argc, char **argv)
retval = cctl_lunmap(fd, argc, argv, combinedopt);
break;
case CTLADM_CMD_READCAPACITY:
- retval = cctl_read_capacity(fd, target, lun, initid, retries,
+ retval = cctl_read_capacity(fd, lun, initid, retries,
argc, argv, combinedopt);
break;
case CTLADM_CMD_MODESENSE:
- retval = cctl_mode_sense(fd, target, lun, initid, retries,
+ retval = cctl_mode_sense(fd, lun, initid, retries,
argc, argv, combinedopt);
break;
case CTLADM_CMD_START:
case CTLADM_CMD_STOP:
- retval = cctl_start_stop(fd, target, lun, initid, retries,
+ retval = cctl_start_stop(fd, lun, initid, retries,
(command == CTLADM_CMD_START) ? 1 : 0,
argc, argv, combinedopt);
break;
case CTLADM_CMD_SYNC_CACHE:
- retval = cctl_sync_cache(fd, target, lun, initid, retries,
+ retval = cctl_sync_cache(fd, lun, initid, retries,
argc, argv, combinedopt);
break;
- case CTLADM_CMD_SHUTDOWN:
- case CTLADM_CMD_STARTUP:
- retval = cctl_startup_shutdown(fd, target, lun, initid,
- command);
- break;
- case CTLADM_CMD_HARDSTOP:
- case CTLADM_CMD_HARDSTART:
- retval = cctl_hardstopstart(fd, command);
- break;
- case CTLADM_CMD_BBRREAD:
- retval = cctl_bbrread(fd, target, lun, initid, argc, argv,
- combinedopt);
- break;
case CTLADM_CMD_LUNLIST:
retval = cctl_lunlist(fd);
break;
case CTLADM_CMD_DELAY:
- retval = cctl_delay(fd, target, lun, argc, argv, combinedopt);
- break;
- case CTLADM_CMD_REALSYNC:
- retval = cctl_realsync(fd, argc, argv);
- break;
- case CTLADM_CMD_SETSYNC:
- case CTLADM_CMD_GETSYNC:
- retval = cctl_getsetsync(fd, target, lun, command,
- argc, argv, combinedopt);
+ retval = cctl_delay(fd, lun, argc, argv, combinedopt);
break;
case CTLADM_CMD_ERR_INJECT:
- retval = cctl_error_inject(fd, target, lun, argc, argv,
+ retval = cctl_error_inject(fd, lun, argc, argv,
combinedopt);
break;
case CTLADM_CMD_DUMPOOA:
@@ -4892,20 +4212,20 @@ main(int argc, char **argv)
retval = cctl_dump_structs(fd, cmdargs);
break;
case CTLADM_CMD_PRES_IN:
- retval = cctl_persistent_reserve_in(fd, target, lun, initid,
+ retval = cctl_persistent_reserve_in(fd, lun, initid,
argc, argv, combinedopt,
retries);
break;
case CTLADM_CMD_PRES_OUT:
- retval = cctl_persistent_reserve_out(fd, target, lun, initid,
+ retval = cctl_persistent_reserve_out(fd, lun, initid,
argc, argv, combinedopt,
retries);
break;
case CTLADM_CMD_INQ_VPD_DEVID:
- retval = cctl_inquiry_vpd_devid(fd, target, lun, initid);
+ retval = cctl_inquiry_vpd_devid(fd, lun, initid);
break;
case CTLADM_CMD_RTPG:
- retval = cctl_report_target_port_group(fd, target, lun, initid);
+ retval = cctl_report_target_port_group(fd, lun, initid);
break;
case CTLADM_CMD_MODIFY:
retval = cctl_modify_lun(fd, argc, argv, combinedopt);
diff --git a/usr.sbin/ctld/ctl.conf.5 b/usr.sbin/ctld/ctl.conf.5
index af50c0a..2fcc908 100644
--- a/usr.sbin/ctld/ctl.conf.5
+++ b/usr.sbin/ctld/ctl.conf.5
@@ -1,4 +1,5 @@
.\" Copyright (c) 2012 The FreeBSD Foundation
+.\" Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
.\" All rights reserved.
.\"
.\" This software was developed by Edward Tomasz Napierala under sponsorship
@@ -27,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd June 24, 2015
+.Dd September 27, 2015
.Dt CTL.CONF 5
.Os
.Sh NAME
@@ -239,6 +240,15 @@ Redirection happens before authentication and any
or
.Sy initiator-portal
checks are skipped.
+.It Ic tag Ar value
+Unique 16-bit tag value of this
+.Sy portal-group .
+If not specified, the value is generated automatically.
+.It Ic foreign
+Specifies that this
+.Sy portal-group
+is listened by some other host.
+This host will announce it on discovery stage, but won't listen.
.El
.Ss target Context
.Bl -tag -width indent
@@ -354,9 +364,17 @@ testing.
The default backend is block.
.It Ic blocksize Ar size
The blocksize visible to the initiator.
-The default blocksize is 512.
+The default blocksize is 512 for disks, and 2048 for CD/DVDs.
+.It Ic ctl-lun Ar lun_id
+Global numeric identifier to use for a given LUN inside CTL.
+By default CTL allocates those IDs dynamically, but explicit specification
+may be needed for consistency in HA configurations.
.It Ic device-id Ar string
The SCSI Device Identification string presented to the initiator.
+.It Ic device-type Ar type
+Specify the SCSI device type to use when creating the LUN.
+Currently CTL supports Direct Access (type 0), Processor (type 3)
+and CD/DVD (type 5) LUNs.
.It Ic option Ar name Ar value
The CTL-specific options passed to the kernel.
All CTL-specific options are documented in the
diff --git a/usr.sbin/ctld/ctld.c b/usr.sbin/ctld/ctld.c
index 24a1d41..90c7db8 100644
--- a/usr.sbin/ctld/ctld.c
+++ b/usr.sbin/ctld/ctld.c
@@ -59,7 +59,7 @@ static volatile bool sigterm_received = false;
static volatile bool sigalrm_received = false;
static int nchildren = 0;
-static uint16_t last_portal_group_tag = 0;
+static uint16_t last_portal_group_tag = 0xff;
static void
usage(void)
@@ -1132,7 +1132,7 @@ valid_iscsi_name(const char *name)
}
} else {
log_warnx("invalid target name \"%s\"; should start with "
- "either \".iqn\", \"eui.\", or \"naa.\"",
+ "either \"iqn.\", \"eui.\", or \"naa.\"",
name);
}
return (true);
@@ -1212,6 +1212,7 @@ port_new(struct conf *conf, struct target *target, struct portal_group *pg)
port->p_target = target;
TAILQ_INSERT_TAIL(&pg->pg_ports, port, p_pgs);
port->p_portal_group = pg;
+ port->p_foreign = pg->pg_foreign;
return (port);
}
@@ -1379,6 +1380,7 @@ lun_new(struct conf *conf, const char *name)
lun->l_name = checked_strdup(name);
TAILQ_INIT(&lun->l_options);
TAILQ_INSERT_TAIL(&conf->conf_luns, lun, l_next);
+ lun->l_ctl_lun = -1;
return (lun);
}
@@ -1437,6 +1439,13 @@ lun_set_blocksize(struct lun *lun, size_t value)
}
void
+lun_set_device_type(struct lun *lun, uint8_t value)
+{
+
+ lun->l_device_type = value;
+}
+
+void
lun_set_device_id(struct lun *lun, const char *value)
{
free(lun->l_device_id);
@@ -1635,7 +1644,10 @@ conf_verify_lun(struct lun *lun)
}
}
if (lun->l_blocksize == 0) {
- lun_set_blocksize(lun, DEFAULT_BLOCKSIZE);
+ if (lun->l_device_type == 5)
+ lun_set_blocksize(lun, DEFAULT_CD_BLOCKSIZE);
+ else
+ lun_set_blocksize(lun, DEFAULT_BLOCKSIZE);
} else if (lun->l_blocksize < 0) {
log_warnx("invalid blocksize for lun \"%s\"; "
"must be larger than 0", lun->l_name);
@@ -1717,14 +1729,16 @@ conf_verify(struct conf *conf)
if (pg->pg_discovery_filter == PG_FILTER_UNKNOWN)
pg->pg_discovery_filter = PG_FILTER_NONE;
- if (!TAILQ_EMPTY(&pg->pg_ports)) {
- if (pg->pg_redirection != NULL) {
+ if (pg->pg_redirection != NULL) {
+ if (!TAILQ_EMPTY(&pg->pg_ports)) {
log_debugx("portal-group \"%s\" assigned "
"to target, but configured "
"for redirection",
pg->pg_name);
}
pg->pg_unassigned = false;
+ } else if (!TAILQ_EMPTY(&pg->pg_ports)) {
+ pg->pg_unassigned = false;
} else {
if (strcmp(pg->pg_name, "default") != 0)
log_warnx("portal-group \"%s\" not assigned "
@@ -1818,6 +1832,8 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
* Go through the new portal groups, assigning tags or preserving old.
*/
TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) {
+ if (newpg->pg_tag != 0)
+ continue;
oldpg = portal_group_find(oldconf, newpg->pg_name);
if (oldpg != NULL)
newpg->pg_tag = oldpg->pg_tag;
@@ -1847,8 +1863,10 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
* and missing in the new one.
*/
TAILQ_FOREACH_SAFE(oldport, &oldconf->conf_ports, p_next, tmpport) {
+ if (oldport->p_foreign)
+ continue;
newport = port_find(newconf, oldport->p_name);
- if (newport != NULL)
+ if (newport != NULL && !newport->p_foreign)
continue;
log_debugx("removing port \"%s\"", oldport->p_name);
error = kernel_port_remove(oldport);
@@ -1944,18 +1962,14 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
TAILQ_FOREACH_SAFE(newlun, &newconf->conf_luns, l_next, tmplun) {
oldlun = lun_find(oldconf, newlun->l_name);
if (oldlun != NULL) {
- if (newlun->l_size != oldlun->l_size ||
- newlun->l_size == 0) {
- log_debugx("resizing lun \"%s\", CTL lun %d",
+ log_debugx("modifying lun \"%s\", CTL lun %d",
+ newlun->l_name, newlun->l_ctl_lun);
+ error = kernel_lun_modify(newlun);
+ if (error != 0) {
+ log_warnx("failed to "
+ "modify lun \"%s\", CTL lun %d",
newlun->l_name, newlun->l_ctl_lun);
- error = kernel_lun_resize(newlun);
- if (error != 0) {
- log_warnx("failed to "
- "resize lun \"%s\", CTL lun %d",
- newlun->l_name,
- newlun->l_ctl_lun);
- cumulated_error++;
- }
+ cumulated_error++;
}
continue;
}
@@ -1972,15 +1986,17 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
* Now add new ports or modify existing ones.
*/
TAILQ_FOREACH(newport, &newconf->conf_ports, p_next) {
+ if (newport->p_foreign)
+ continue;
oldport = port_find(oldconf, newport->p_name);
- if (oldport == NULL) {
+ if (oldport == NULL || oldport->p_foreign) {
log_debugx("adding port \"%s\"", newport->p_name);
error = kernel_port_add(newport);
} else {
log_debugx("updating port \"%s\"", newport->p_name);
newport->p_ctl_port = oldport->p_ctl_port;
- error = kernel_port_update(newport);
+ error = kernel_port_update(newport, oldport);
}
if (error != 0) {
log_warnx("failed to %s port %s",
@@ -1998,6 +2014,8 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
* Go through the new portals, opening the sockets as neccessary.
*/
TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) {
+ if (newpg->pg_foreign)
+ continue;
if (newpg->pg_unassigned) {
log_debugx("not listening on portal-group \"%s\", "
"not assigned to any target",
diff --git a/usr.sbin/ctld/ctld.h b/usr.sbin/ctld/ctld.h
index f6db0cc..ef85e43 100644
--- a/usr.sbin/ctld/ctld.h
+++ b/usr.sbin/ctld/ctld.h
@@ -43,6 +43,7 @@
#define DEFAULT_CONFIG_PATH "/etc/ctl.conf"
#define DEFAULT_PIDFILE "/var/run/ctld.pid"
#define DEFAULT_BLOCKSIZE 512
+#define DEFAULT_CD_BLOCKSIZE 2048
#define MAX_LUNS 1024
#define MAX_NAME_LEN 223
@@ -116,6 +117,7 @@ struct portal_group {
char *pg_name;
struct auth_group *pg_discovery_auth_group;
int pg_discovery_filter;
+ int pg_foreign;
bool pg_unassigned;
TAILQ_HEAD(, portal) pg_portals;
TAILQ_HEAD(, port) pg_ports;
@@ -144,6 +146,7 @@ struct port {
struct portal_group *p_portal_group;
struct pport *p_pport;
struct target *p_target;
+ int p_foreign;
uint32_t p_ctl_port;
};
@@ -161,6 +164,7 @@ struct lun {
TAILQ_HEAD(, lun_option) l_options;
char *l_name;
char *l_backend;
+ uint8_t l_device_type;
int l_blocksize;
char *l_device_id;
char *l_path;
@@ -369,6 +373,7 @@ struct lun *lun_new(struct conf *conf, const char *name);
void lun_delete(struct lun *lun);
struct lun *lun_find(const struct conf *conf, const char *name);
void lun_set_backend(struct lun *lun, const char *value);
+void lun_set_device_type(struct lun *lun, uint8_t value);
void lun_set_blocksize(struct lun *lun, size_t value);
void lun_set_device_id(struct lun *lun, const char *value);
void lun_set_path(struct lun *lun, const char *value);
@@ -387,11 +392,11 @@ void lun_option_set(struct lun_option *clo,
void kernel_init(void);
int kernel_lun_add(struct lun *lun);
-int kernel_lun_resize(struct lun *lun);
+int kernel_lun_modify(struct lun *lun);
int kernel_lun_remove(struct lun *lun);
void kernel_handoff(struct connection *conn);
int kernel_port_add(struct port *port);
-int kernel_port_update(struct port *port);
+int kernel_port_update(struct port *port, struct port *old);
int kernel_port_remove(struct port *port);
void kernel_capsicate(void);
@@ -410,7 +415,6 @@ void keys_delete(struct keys *keys);
void keys_load(struct keys *keys, const struct pdu *pdu);
void keys_save(struct keys *keys, struct pdu *pdu);
const char *keys_find(struct keys *keys, const char *name);
-int keys_find_int(struct keys *keys, const char *name);
void keys_add(struct keys *keys,
const char *name, const char *value);
void keys_add_int(struct keys *keys,
diff --git a/usr.sbin/ctld/discovery.c b/usr.sbin/ctld/discovery.c
index 15eaa76..d7d843e 100644
--- a/usr.sbin/ctld/discovery.c
+++ b/usr.sbin/ctld/discovery.c
@@ -32,7 +32,6 @@
__FBSDID("$FreeBSD$");
#include <assert.h>
-#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/usr.sbin/ctld/isns.c b/usr.sbin/ctld/isns.c
index 11eef3e0..f7381a1 100644
--- a/usr.sbin/ctld/isns.c
+++ b/usr.sbin/ctld/isns.c
@@ -35,14 +35,8 @@ __FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <netinet/in.h>
#include <arpa/inet.h>
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
#include <netdb.h>
-#include <signal.h>
#include <stdbool.h>
-#include <stdio.h>
-#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
diff --git a/usr.sbin/ctld/kernel.c b/usr.sbin/ctld/kernel.c
index cde4486..dea1917 100644
--- a/usr.sbin/ctld/kernel.c
+++ b/usr.sbin/ctld/kernel.c
@@ -60,10 +60,8 @@ __FBSDID("$FreeBSD$");
#include <cam/scsi/scsi_message.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_io.h>
-#include <cam/ctl/ctl_frontend_internal.h>
#include <cam/ctl/ctl_backend.h>
#include <cam/ctl/ctl_ioctl.h>
-#include <cam/ctl/ctl_backend_block.h>
#include <cam/ctl/ctl_util.h>
#include <cam/ctl/ctl_scsi_all.h>
@@ -110,6 +108,7 @@ struct cctl_lun_nv {
struct cctl_lun {
uint64_t lun_id;
char *backend_type;
+ uint8_t device_type;
uint64_t size_blocks;
uint32_t blocksize;
char *serial_number;
@@ -121,6 +120,7 @@ struct cctl_lun {
struct cctl_port {
uint32_t port_id;
+ char *port_frontend;
char *port_name;
int pp;
int vp;
@@ -222,6 +222,8 @@ cctl_end_element(void *user_data, const char *name)
if (strcmp(name, "backend_type") == 0) {
cur_lun->backend_type = str;
str = NULL;
+ } else if (strcmp(name, "lun_type") == 0) {
+ cur_lun->device_type = strtoull(str, NULL, 0);
} else if (strcmp(name, "size") == 0) {
cur_lun->size_blocks = strtoull(str, NULL, 0);
} else if (strcmp(name, "blocksize") == 0) {
@@ -333,7 +335,10 @@ cctl_end_pelement(void *user_data, const char *name)
devlist->cur_sb[devlist->level] = NULL;
devlist->level--;
- if (strcmp(name, "port_name") == 0) {
+ if (strcmp(name, "frontend_type") == 0) {
+ cur_port->port_frontend = str;
+ str = NULL;
+ } else if (strcmp(name, "port_name") == 0) {
cur_port->port_name = str;
str = NULL;
} else if (strcmp(name, "physical_port") == 0) {
@@ -473,7 +478,7 @@ retry_port:
return (NULL);
}
- if (list.status == CTL_PORT_LIST_ERROR) {
+ if (list.status == CTL_LUN_LIST_ERROR) {
log_warnx("error returned from CTL_PORT_LIST ioctl: %s",
list.error_str);
free(str);
@@ -508,6 +513,8 @@ retry_port:
name = NULL;
STAILQ_FOREACH(port, &devlist.port_list, links) {
+ if (strcmp(port->port_frontend, "ha") == 0)
+ continue;
if (name)
free(name);
if (port->pp == 0 && port->vp == 0)
@@ -606,6 +613,7 @@ retry_port:
continue;
}
lun_set_backend(cl, lun->backend_type);
+ lun_set_device_type(cl, lun->device_type);
lun_set_blocksize(cl, lun->blocksize);
lun_set_device_id(cl, lun->device_id);
lun_set_serial(cl, lun->serial_number);
@@ -658,8 +666,13 @@ kernel_lun_add(struct lun *lun)
if (lun->l_size != 0)
req.reqdata.create.lun_size_bytes = lun->l_size;
+ if (lun->l_ctl_lun >= 0) {
+ req.reqdata.create.req_lun_id = lun->l_ctl_lun;
+ req.reqdata.create.flags |= CTL_LUN_FLAG_ID_REQ;
+ }
+
req.reqdata.create.flags |= CTL_LUN_FLAG_DEV_TYPE;
- req.reqdata.create.device_type = T_DIRECT;
+ req.reqdata.create.device_type = lun->l_device_type;
if (lun->l_serial != NULL) {
strncpy(req.reqdata.create.serial_num, lun->l_serial,
@@ -745,9 +758,11 @@ kernel_lun_add(struct lun *lun)
}
int
-kernel_lun_resize(struct lun *lun)
+kernel_lun_modify(struct lun *lun)
{
+ struct lun_option *lo;
struct ctl_lun_req req;
+ int error, i, num_options;
bzero(&req, sizeof(req));
@@ -757,7 +772,30 @@ kernel_lun_resize(struct lun *lun)
req.reqdata.modify.lun_id = lun->l_ctl_lun;
req.reqdata.modify.lun_size_bytes = lun->l_size;
- if (ioctl(ctl_fd, CTL_LUN_REQ, &req) == -1) {
+ num_options = 0;
+ TAILQ_FOREACH(lo, &lun->l_options, lo_next)
+ num_options++;
+
+ req.num_be_args = num_options;
+ if (num_options > 0) {
+ req.be_args = malloc(num_options * sizeof(*req.be_args));
+ if (req.be_args == NULL) {
+ log_warn("error allocating %zd bytes",
+ num_options * sizeof(*req.be_args));
+ return (1);
+ }
+
+ i = 0;
+ TAILQ_FOREACH(lo, &lun->l_options, lo_next) {
+ str_arg(&req.be_args[i], lo->lo_name, lo->lo_value);
+ i++;
+ }
+ assert(i == num_options);
+ }
+
+ error = ioctl(ctl_fd, CTL_LUN_REQ, &req);
+ free(req.be_args);
+ if (error != 0) {
log_warn("error issuing CTL_LUN_REQ ioctl");
return (1);
}
@@ -963,11 +1001,13 @@ kernel_port_add(struct port *port)
}
int
-kernel_port_update(struct port *port)
+kernel_port_update(struct port *port, struct port *oport)
{
struct ctl_lun_map lm;
struct target *targ = port->p_target;
+ struct target *otarg = oport->p_target;
int error, i;
+ uint32_t olun;
/* Map configured LUNs and unmap others */
for (i = 0; i < MAX_LUNS; i++) {
@@ -977,6 +1017,12 @@ kernel_port_update(struct port *port)
lm.lun = UINT32_MAX;
else
lm.lun = targ->t_luns[i]->l_ctl_lun;
+ if (otarg->t_luns[i] == NULL)
+ olun = UINT32_MAX;
+ else
+ olun = otarg->t_luns[i]->l_ctl_lun;
+ if (lm.lun == olun)
+ continue;
error = ioctl(ctl_fd, CTL_LUN_MAP, &lm);
if (error != 0)
log_warn("CTL_LUN_MAP ioctl failed");
diff --git a/usr.sbin/ctld/keys.c b/usr.sbin/ctld/keys.c
index 6a9ad02..f339a10 100644
--- a/usr.sbin/ctld/keys.c
+++ b/usr.sbin/ctld/keys.c
@@ -32,7 +32,6 @@
__FBSDID("$FreeBSD$");
#include <assert.h>
-#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -161,26 +160,6 @@ keys_find(struct keys *keys, const char *name)
return (NULL);
}
-int
-keys_find_int(struct keys *keys, const char *name)
-{
- const char *str;
- char *endptr;
- int num;
-
- str = keys_find(keys, name);
- if (str == NULL)
- return (-1);
-
- num = strtoul(str, &endptr, 10);
- if (*endptr != '\0') {
- log_debugx("invalid numeric value \"%s\"", str);
- return (-1);
- }
-
- return (num);
-}
-
void
keys_add(struct keys *keys, const char *name, const char *value)
{
diff --git a/usr.sbin/ctld/login.c b/usr.sbin/ctld/login.c
index 50be604..6ef0e61 100644
--- a/usr.sbin/ctld/login.c
+++ b/usr.sbin/ctld/login.c
@@ -33,8 +33,6 @@ __FBSDID("$FreeBSD$");
#include <assert.h>
#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -603,6 +601,11 @@ login_negotiate_key(struct pdu *request, const char *name,
keys_add(response_keys, name, "No");
} else if (strcmp(name, "IFMarker") == 0) {
keys_add(response_keys, name, "No");
+ } else if (strcmp(name, "iSCSIProtocolLevel") == 0) {
+ tmp = strtoul(value, NULL, 10);
+ if (tmp > 2)
+ tmp = 2;
+ keys_add_int(response_keys, name, tmp);
} else {
log_debugx("unknown key \"%s\"; responding "
"with NotUnderstood", name);
diff --git a/usr.sbin/ctld/parse.y b/usr.sbin/ctld/parse.y
index af481fe..1e40dd8 100644
--- a/usr.sbin/ctld/parse.y
+++ b/usr.sbin/ctld/parse.y
@@ -35,7 +35,6 @@
#include <sys/stat.h>
#include <assert.h>
#include <stdio.h>
-#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@@ -58,11 +57,12 @@ extern void yyrestart(FILE *);
%}
%token ALIAS AUTH_GROUP AUTH_TYPE BACKEND BLOCKSIZE CHAP CHAP_MUTUAL
-%token CLOSING_BRACKET DEBUG DEVICE_ID DISCOVERY_AUTH_GROUP DISCOVERY_FILTER
+%token CLOSING_BRACKET CTL_LUN DEBUG DEVICE_ID DEVICE_TYPE
+%token DISCOVERY_AUTH_GROUP DISCOVERY_FILTER FOREIGN
%token INITIATOR_NAME INITIATOR_PORTAL ISNS_SERVER ISNS_PERIOD ISNS_TIMEOUT
%token LISTEN LISTEN_ISER LUN MAXPROC OPENING_BRACKET OPTION
%token PATH PIDFILE PORT PORTAL_GROUP REDIRECT SEMICOLON SERIAL SIZE STR
-%token TARGET TIMEOUT
+%token TAG TARGET TIMEOUT
%union
{
@@ -338,11 +338,15 @@ portal_group_entry:
|
portal_group_discovery_filter
|
+ portal_group_foreign
+ |
portal_group_listen
|
portal_group_listen_iser
|
portal_group_redirect
+ |
+ portal_group_tag
;
portal_group_discovery_auth_group: DISCOVERY_AUTH_GROUP STR
@@ -376,6 +380,13 @@ portal_group_discovery_filter: DISCOVERY_FILTER STR
}
;
+portal_group_foreign: FOREIGN
+ {
+
+ portal_group->pg_foreign = 1;
+ }
+ ;
+
portal_group_listen: LISTEN STR
{
int error;
@@ -409,6 +420,20 @@ portal_group_redirect: REDIRECT STR
}
;
+portal_group_tag: TAG STR
+ {
+ uint64_t tmp;
+
+ if (expand_number($2, &tmp) != 0) {
+ yyerror("invalid numeric value");
+ free($2);
+ return (1);
+ }
+
+ portal_group->pg_tag = tmp;
+ }
+ ;
+
lun: LUN lun_name
OPENING_BRACKET lun_entries CLOSING_BRACKET
{
@@ -761,6 +786,7 @@ target_lun: LUN lun_number
lun_number: STR
{
uint64_t tmp;
+ int ret;
char *name;
if (expand_number($1, &tmp) != 0) {
@@ -769,7 +795,9 @@ lun_number: STR
return (1);
}
- asprintf(&name, "%s,lun,%ju", target->t_name, tmp);
+ ret = asprintf(&name, "%s,lun,%ju", target->t_name, tmp);
+ if (ret <= 0)
+ log_err(1, "asprintf");
lun = lun_new(conf, name);
if (lun == NULL)
return (1);
@@ -814,6 +842,10 @@ lun_entry:
|
lun_device_id
|
+ lun_device_type
+ |
+ lun_ctl_lun
+ |
lun_option
|
lun_path
@@ -871,6 +903,51 @@ lun_device_id: DEVICE_ID STR
}
;
+lun_device_type: DEVICE_TYPE STR
+ {
+ uint64_t tmp;
+
+ if (strcasecmp($2, "disk") == 0 ||
+ strcasecmp($2, "direct") == 0)
+ tmp = 0;
+ else if (strcasecmp($2, "processor") == 0)
+ tmp = 3;
+ else if (strcasecmp($2, "cd") == 0 ||
+ strcasecmp($2, "cdrom") == 0 ||
+ strcasecmp($2, "dvd") == 0 ||
+ strcasecmp($2, "dvdrom") == 0)
+ tmp = 5;
+ else if (expand_number($2, &tmp) != 0 ||
+ tmp > 15) {
+ yyerror("invalid numeric value");
+ free($2);
+ return (1);
+ }
+
+ lun_set_device_type(lun, tmp);
+ }
+ ;
+
+lun_ctl_lun: CTL_LUN STR
+ {
+ uint64_t tmp;
+
+ if (expand_number($2, &tmp) != 0) {
+ yyerror("invalid numeric value");
+ free($2);
+ return (1);
+ }
+
+ if (lun->l_ctl_lun >= 0) {
+ log_warnx("ctl_lun for lun \"%s\" "
+ "specified more than once",
+ lun->l_name);
+ return (1);
+ }
+ lun_set_ctl_lun(lun, tmp);
+ }
+ ;
+
lun_option: OPTION STR STR
{
struct lun_option *clo;
diff --git a/usr.sbin/ctld/pdu.c b/usr.sbin/ctld/pdu.c
index c3181ac..be3598e 100644
--- a/usr.sbin/ctld/pdu.c
+++ b/usr.sbin/ctld/pdu.c
@@ -34,8 +34,6 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/uio.h>
#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
diff --git a/usr.sbin/ctld/token.l b/usr.sbin/ctld/token.l
index 6437d7b..e23385f 100644
--- a/usr.sbin/ctld/token.l
+++ b/usr.sbin/ctld/token.l
@@ -34,7 +34,6 @@
#include <stdint.h>
#include <string.h>
-#include "ctld.h"
#include "y.tab.h"
int lineno;
@@ -55,10 +54,13 @@ backend { return BACKEND; }
blocksize { return BLOCKSIZE; }
chap { return CHAP; }
chap-mutual { return CHAP_MUTUAL; }
+ctl-lun { return CTL_LUN; }
debug { return DEBUG; }
device-id { return DEVICE_ID; }
+device-type { return DEVICE_TYPE; }
discovery-auth-group { return DISCOVERY_AUTH_GROUP; }
discovery-filter { return DISCOVERY_FILTER; }
+foreign { return FOREIGN; }
initiator-name { return INITIATOR_NAME; }
initiator-portal { return INITIATOR_PORTAL; }
listen { return LISTEN; }
@@ -76,6 +78,7 @@ portal-group { return PORTAL_GROUP; }
redirect { return REDIRECT; }
serial { return SERIAL; }
size { return SIZE; }
+tag { return TAG; }
target { return TARGET; }
timeout { return TIMEOUT; }
\"[^"]+\" { yylval.str = strndup(yytext + 1,
diff --git a/usr.sbin/etcupdate/etcupdate.8 b/usr.sbin/etcupdate/etcupdate.8
index 104f379..0aff331 100644
--- a/usr.sbin/etcupdate/etcupdate.8
+++ b/usr.sbin/etcupdate/etcupdate.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 29, 2014
+.Dd September 29, 2015
.Dt ETCUPDATE 8
.Os
.Sh NAME
@@ -233,6 +233,16 @@ is changed,
is invoked if
.Pa /etc/mail/aliases
is changed,
+.Xr services_mkdb 8
+is invoked if
+.Pa /etc/services
+is changed,
+.Xr tzsetup 8
+is invoked if
+.Pa /etc/localtime
+is changed and if
+.Fa /var/db/zoneinfo
+exists,
and
.Pa /etc/rc.d/motd
is invoked if
@@ -843,7 +853,9 @@ but it has been removed in the destination directory.
.Xr make 1 ,
.Xr newaliases 1 ,
.Xr sh 1 ,
-.Xr pwd_mkdb 8
+.Xr pwd_mkdb 8 ,
+.Xr services_mkdb 8 ,
+.Xr tzsetup 8
.Sh HISTORY
The
.Nm
diff --git a/usr.sbin/gssd/gssd.c b/usr.sbin/gssd/gssd.c
index 9548b8c..11a633b 100644
--- a/usr.sbin/gssd/gssd.c
+++ b/usr.sbin/gssd/gssd.c
@@ -750,8 +750,8 @@ gssd_pname_to_uid_1_svc(pname_to_uid_args *argp, pname_to_uid_res *result, struc
buflen_hint = buflen;
}
if (pw) {
- int len = NGRPS;
- int groups[NGRPS];
+ int len = NGROUPS;
+ int groups[NGROUPS];
result->gid = pw->pw_gid;
getgrouplist(pw->pw_name, pw->pw_gid,
groups, &len);
diff --git a/usr.sbin/gstat/gstat.c b/usr.sbin/gstat/gstat.c
index d83ef79..8be3775 100644
--- a/usr.sbin/gstat/gstat.c
+++ b/usr.sbin/gstat/gstat.c
@@ -124,7 +124,7 @@ main(int argc, char **argv)
if (regcomp(&f_re, optarg, REG_EXTENDED) != 0)
errx(EX_USAGE,
"Invalid filter - see re_format(7)");
- strncpy(f_s, optarg, sizeof(f_s));
+ strlcpy(f_s, optarg, sizeof(f_s));
break;
case 'o':
flag_o = 1;
@@ -216,7 +216,7 @@ main(int argc, char **argv)
getyx(stdscr, cury, curx);
getmaxyx(stdscr, maxy, maxx);
}
- strncpy(pf_s, f_s, sizeof(pf_s));
+ strlcpy(pf_s, f_s, sizeof(pf_s));
max_flen = maxx - curx - 1;
if ((int)strlen(f_s) > max_flen && max_flen >= 0) {
if (max_flen > 3)
@@ -406,7 +406,7 @@ main(int argc, char **argv)
err(1, "el_gets");
if (line_len > 1)
history(hist, &hist_ev, H_ENTER, line);
- strncpy(tmp_f_s, line, sizeof(f_s));
+ strlcpy(tmp_f_s, line, sizeof(f_s));
if ((p = strchr(tmp_f_s, '\n')) != NULL)
*p = '\0';
/*
@@ -423,7 +423,7 @@ main(int argc, char **argv)
refresh();
sleep(1);
} else {
- strncpy(f_s, tmp_f_s, sizeof(f_s));
+ strlcpy(f_s, tmp_f_s, sizeof(f_s));
f_re = tmp_f_re;
}
break;
diff --git a/usr.sbin/iscsid/iscsid.h b/usr.sbin/iscsid/iscsid.h
index 0ce1075..64a0340 100644
--- a/usr.sbin/iscsid/iscsid.h
+++ b/usr.sbin/iscsid/iscsid.h
@@ -115,7 +115,6 @@ void keys_delete(struct keys *key);
void keys_load(struct keys *keys, const struct pdu *pdu);
void keys_save(struct keys *keys, struct pdu *pdu);
const char *keys_find(struct keys *keys, const char *name);
-int keys_find_int(struct keys *keys, const char *name);
void keys_add(struct keys *keys,
const char *name, const char *value);
void keys_add_int(struct keys *keys,
diff --git a/usr.sbin/iscsid/keys.c b/usr.sbin/iscsid/keys.c
index bab1ac9..c4b478b 100644
--- a/usr.sbin/iscsid/keys.c
+++ b/usr.sbin/iscsid/keys.c
@@ -162,26 +162,6 @@ keys_find(struct keys *keys, const char *name)
return (NULL);
}
-int
-keys_find_int(struct keys *keys, const char *name)
-{
- const char *str;
- char *endptr;
- int num;
-
- str = keys_find(keys, name);
- if (str == NULL)
- return (-1);
-
- num = strtoul(str, &endptr, 10);
- if (*endptr != '\0') {
- log_debugx("invalid numeric value \"%s\"", str);
- return (-1);
- }
-
- return (num);
-}
-
void
keys_add(struct keys *keys, const char *name, const char *value)
{
diff --git a/usr.sbin/mergemaster/mergemaster.8 b/usr.sbin/mergemaster/mergemaster.8
index 1cff984..d939c41 100644
--- a/usr.sbin/mergemaster/mergemaster.8
+++ b/usr.sbin/mergemaster/mergemaster.8
@@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 1, 2011
+.Dd September 29, 2015
.Dt MERGEMASTER 8
.Os
.Sh NAME
@@ -257,14 +257,13 @@ Specify the path to the directory where you want to do the
.Xr make 1 .
(In other words, where your sources are, but -s was already
taken.)
-In previous versions of
+In older versions of
.Nm
-you needed to specify the path all the way to
-.Pa src/etc .
-Starting with r186678 you only need to specify the path to
-.Pa src .
+the path to
+.Pa src/etc
+was required.
.Nm
-will convert the path for you if you use the old method.
+will convert the path if this older method is used.
.It Fl t Ar /path/to/temp/root
Create the temporary root environment in
.Pa /path/to/temp/root
diff --git a/usr.sbin/ndiscvt/inf.c b/usr.sbin/ndiscvt/inf.c
index fe4db6a..4b30da0 100644
--- a/usr.sbin/ndiscvt/inf.c
+++ b/usr.sbin/ndiscvt/inf.c
@@ -887,6 +887,12 @@ regkey_add (const char *r)
void
push_word (const char *w)
{
+
+ if (idx == W_MAX) {
+ fprintf(stderr, "too many words; try bumping W_MAX in inf.h\n");
+ exit(1);
+ }
+
if (w && strlen(w))
words[idx++] = w;
else
diff --git a/usr.sbin/ndiscvt/inf.h b/usr.sbin/ndiscvt/inf.h
index 8d0b0c1..ba08d67 100644
--- a/usr.sbin/ndiscvt/inf.h
+++ b/usr.sbin/ndiscvt/inf.h
@@ -4,7 +4,7 @@
* $FreeBSD$
*/
-#define W_MAX 16
+#define W_MAX 32
struct section {
const char * name;
diff --git a/usr.sbin/ntp/ntpdc/Makefile b/usr.sbin/ntp/ntpdc/Makefile
index 88c03d6..051d697 100644
--- a/usr.sbin/ntp/ntpdc/Makefile
+++ b/usr.sbin/ntp/ntpdc/Makefile
@@ -37,7 +37,4 @@ CLEANFILES+= .version version.c
version.c:
sh -e ${.CURDIR}/../scripts/mkver ntpdc
-afterinstall:
- rm -f ${DESTDIR}/usr/sbin/xntpdc
-
.include <bsd.prog.mk>
diff --git a/usr.sbin/pmcstat/pmcstat.8 b/usr.sbin/pmcstat/pmcstat.8
index af54cea..7de335d 100644
--- a/usr.sbin/pmcstat/pmcstat.8
+++ b/usr.sbin/pmcstat/pmcstat.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd May 8, 2015
+.Dd May 27, 2015
.Dt PMCSTAT 8
.Os
.Sh NAME
@@ -236,7 +236,7 @@ This option requires the
.Fl R
option to read in samples that were previously collected and
saved with the
-.Fl o
+.Fl O
option.
.It Fl c Ar cpu-spec
Set the cpus for subsequent system mode PMCs specified on the
@@ -246,8 +246,8 @@ Argument
.Ar cpu-spec
is a comma separated list of CPU numbers, or the literal
.Sq *
-denoting all unhalted CPUs.
-The default is to allocate system mode PMCs on all unhalted
+denoting all available CPUs.
+The default is to allocate system mode PMCs on all available
CPUs.
.It Fl d
Toggle between process mode PMCs measuring events for the target
@@ -302,6 +302,12 @@ is a
this information is sent to the output file specified by the
.Fl o
option.
+This option requires the
+.Fl R
+option to read in samples that were previously collected and
+saved with the
+.Fl O
+option.
.It Fl n Ar rate
Set the default sampling rate for subsequent sampling mode
PMCs specified on the command line.
diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c
index 05dffaa..5ae53aa 100644
--- a/usr.sbin/pmcstat/pmcstat.c
+++ b/usr.sbin/pmcstat/pmcstat.c
@@ -116,11 +116,10 @@ struct pmcstat_args args;
static void
pmcstat_clone_event_descriptor(struct pmcstat_ev *ev, const cpuset_t *cpumask)
{
- int cpu, mcpu;
+ int cpu;
struct pmcstat_ev *ev_clone;
- mcpu = sizeof(*cpumask) * NBBY;
- for (cpu = 0; cpu < mcpu; cpu++) {
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
if (!CPU_ISSET(cpu, cpumask))
continue;
@@ -161,6 +160,7 @@ pmcstat_get_cpumask(const char *cpuspec, cpuset_t *cpumask)
CPU_SET(cpu, cpumask);
s = end + strspn(end, ", \t");
} while (*s);
+ assert(!CPU_EMPTY(cpumask));
}
void
@@ -550,10 +550,10 @@ pmcstat_topexit(void)
int
main(int argc, char **argv)
{
- cpuset_t cpumask;
+ cpuset_t cpumask, rootmask;
double interval;
double duration;
- int hcpu, option, npmc, ncpu;
+ int option, npmc;
int c, check_driver_stats, current_sampling_count;
int do_callchain, do_descendants, do_logproccsw, do_logprocexit;
int do_print, do_read;
@@ -618,14 +618,13 @@ main(int argc, char **argv)
err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
/*
- * The initial CPU mask specifies all non-halted CPUS in the
- * system.
+ * The initial CPU mask specifies the root mask of this process
+ * which is usually all CPUs in the system.
*/
- len = sizeof(int);
- if (sysctlbyname("hw.ncpu", &ncpu, &len, NULL, 0) < 0)
- err(EX_OSERR, "ERROR: Cannot determine the number of CPUs");
- for (hcpu = 0; hcpu < ncpu; hcpu++)
- CPU_SET(hcpu, &cpumask);
+ if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
+ sizeof(rootmask), &rootmask) == -1)
+ err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs");
+ CPU_COPY(&rootmask, &cpumask);
while ((option = getopt(argc, argv,
"CD:EF:G:M:NO:P:R:S:TWa:c:df:gk:l:m:n:o:p:qr:s:t:vw:z:")) != -1)
@@ -642,11 +641,9 @@ main(int argc, char **argv)
break;
case 'c': /* CPU */
-
- if (optarg[0] == '*' && optarg[1] == '\0') {
- for (hcpu = 0; hcpu < ncpu; hcpu++)
- CPU_SET(hcpu, &cpumask);
- } else
+ if (optarg[0] == '*' && optarg[1] == '\0')
+ CPU_COPY(&rootmask, &cpumask);
+ else
pmcstat_get_cpumask(optarg, &cpumask);
args.pa_flags |= FLAGS_HAS_CPUMASK;
@@ -771,13 +768,9 @@ main(int argc, char **argv)
else
ev->ev_count = -1;
- if (option == 'S' || option == 's') {
- hcpu = sizeof(cpumask) * NBBY;
- for (hcpu--; hcpu >= 0; hcpu--)
- if (CPU_ISSET(hcpu, &cpumask))
- break;
- ev->ev_cpu = hcpu;
- } else
+ if (option == 'S' || option == 's')
+ ev->ev_cpu = CPU_FFS(&cpumask) - 1;
+ else
ev->ev_cpu = PMC_CPU_ANY;
ev->ev_flags = 0;
@@ -804,11 +797,9 @@ main(int argc, char **argv)
STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next);
if (option == 's' || option == 'S') {
- hcpu = CPU_ISSET(ev->ev_cpu, &cpumask);
CPU_CLR(ev->ev_cpu, &cpumask);
pmcstat_clone_event_descriptor(ev, &cpumask);
- if (hcpu != 0)
- CPU_SET(ev->ev_cpu, &cpumask);
+ CPU_SET(ev->ev_cpu, &cpumask);
}
break;
@@ -947,7 +938,7 @@ main(int argc, char **argv)
errx(EX_USAGE, "ERROR: options -T and -l are mutually "
"exclusive.");
- /* -m option is allowed with -R only. */
+ /* -a and -m require -R */
if (args.pa_flags & FLAG_DO_ANNOTATE && args.pa_inputpath == NULL)
errx(EX_USAGE, "ERROR: option %s requires an input file",
args.pa_plugin == PMCSTAT_PL_ANNOTATE ? "-m" : "-a");
diff --git a/usr.sbin/rpcbind/rpcb_svc_com.c b/usr.sbin/rpcbind/rpcb_svc_com.c
index f90dc59..72fea28 100644
--- a/usr.sbin/rpcbind/rpcb_svc_com.c
+++ b/usr.sbin/rpcbind/rpcb_svc_com.c
@@ -48,6 +48,7 @@
#include <rpc/rpc.h>
#include <rpc/rpcb_prot.h>
#include <rpc/svc_dg.h>
+#include <assert.h>
#include <netconfig.h>
#include <errno.h>
#include <syslog.h>
@@ -1048,19 +1049,34 @@ netbufcmp(struct netbuf *n1, struct netbuf *n2)
return ((n1->len != n2->len) || memcmp(n1->buf, n2->buf, n1->len));
}
+static bool_t
+netbuf_copybuf(struct netbuf *dst, const struct netbuf *src)
+{
+
+ if (dst->len != src->len || dst->buf == NULL) {
+ if (dst->buf != NULL)
+ free(dst->buf);
+ if ((dst->buf = malloc(src->len)) == NULL)
+ return (FALSE);
+
+ dst->maxlen = dst->len = src->len;
+ }
+
+ memcpy(dst->buf, src->buf, src->len);
+ return (TRUE);
+}
+
static struct netbuf *
netbufdup(struct netbuf *ap)
{
struct netbuf *np;
- if ((np = malloc(sizeof(struct netbuf))) == NULL)
+ if ((np = calloc(1, sizeof(struct netbuf))) == NULL)
return (NULL);
- if ((np->buf = malloc(ap->len)) == NULL) {
+ if (netbuf_copybuf(np, ap) == FALSE) {
free(np);
return (NULL);
}
- np->maxlen = np->len = ap->len;
- memcpy(np->buf, ap->buf, ap->len);
return (np);
}
@@ -1068,6 +1084,7 @@ static void
netbuffree(struct netbuf *ap)
{
free(ap->buf);
+ ap->buf = NULL;
free(ap);
}
@@ -1185,7 +1202,7 @@ xprt_set_caller(SVCXPRT *xprt, struct finfo *fi)
{
u_int32_t *xidp;
- *(svc_getrpccaller(xprt)) = *(fi->caller_addr);
+ netbuf_copybuf(svc_getrpccaller(xprt), fi->caller_addr);
xidp = __rpcb_get_dg_xidp(xprt);
*xidp = fi->caller_xid;
}
diff --git a/usr.sbin/sesutil/Makefile b/usr.sbin/sesutil/Makefile
new file mode 100644
index 0000000..39ca86b
--- /dev/null
+++ b/usr.sbin/sesutil/Makefile
@@ -0,0 +1,6 @@
+# $FreeBSD$
+
+PROG= sesutil
+MAN= sesutil.8
+
+.include <bsd.prog.mk>
diff --git a/usr.sbin/sesutil/sesutil.8 b/usr.sbin/sesutil/sesutil.8
new file mode 100644
index 0000000..8c64922
--- /dev/null
+++ b/usr.sbin/sesutil/sesutil.8
@@ -0,0 +1,73 @@
+.\" Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 1, 2015
+.Dt SESUTIL 8
+.Os
+.Sh NAME
+.Nm sesutil
+.Nd Utility for managing SCSI Enclosure Services (SES) device
+.Sh SYNOPSIS
+.Nm
+.Cm locate Ar disk Bq on|off
+.Sh DESCRIPTION
+The
+.Nm
+utility can be used to modify various parameter on SCSI Enclosure Services
+(SES) device.
+.Pp
+List of supported commands:
+.Bl -tag -width indent
+.It Cm locate Ar disk Bq on|off
+Change the state of the external LED associated with
+.Ar disk .
+.Ar disk
+can be the device name of the disk, like
+.Cm da12 ,
+or
+.Cm all .
+to indicate all disks attached to SES controllers.
+.El
+.Sh EXAMPLES
+Turn off all external LEDs:
+.Pp
+.Dl Nm Cm locate all off
+.Pp
+Turn on the external LED of drive
+.Pa da15 :
+.Pp
+.Dl Nm Cm locate da15 on
+.Sh SEE ALSO
+.Xr ses 4
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Fx 11.0 .
+.Sh AUTHORS
+The
+.Nm utility was written by
+.An Baptiste Daroussin Aq Mt bapt@FreeBSD.org .
diff --git a/usr.sbin/sesutil/sesutil.c b/usr.sbin/sesutil/sesutil.c
new file mode 100644
index 0000000..1b38143
--- /dev/null
+++ b/usr.sbin/sesutil/sesutil.c
@@ -0,0 +1,224 @@
+/*-
+ * Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_enc.h>
+
+static int locate(int argc, char **argv);
+
+static struct command {
+ const char *name;
+ const char *desc;
+ int (*exec)(int argc, char **argv);
+} cmds[] = {
+ { "locate", "Change the state of the external LED associated with a"
+ " disk", locate} ,
+};
+
+static const int nbcmds = nitems(cmds);
+
+static void
+do_locate(int fd, unsigned int idx, bool onoff)
+{
+ encioc_elm_status_t o;
+
+ o.elm_idx = idx;
+ if (ioctl(fd, ENCIOC_GETELMSTAT, (caddr_t) &o) < 0) {
+ close(fd);
+ err(EXIT_FAILURE, "ENCIOC_GETELMSTAT");
+ }
+ o.cstat[0] |= 0x80;
+ if (onoff)
+ o.cstat[2] |= 0x02;
+ else
+ o.cstat[2] &= 0xfd;
+
+ if (ioctl(fd, ENCIOC_SETELMSTAT, (caddr_t) &o) < 0) {
+ close(fd);
+ err(EXIT_FAILURE, "ENCIOC_SETELMSTAT");
+ }
+}
+
+static bool
+disk_match(const char *devnames, const char *disk, size_t len)
+{
+ const char *dname;
+
+ dname = devnames;
+ while ((dname = strstr(dname, disk)) != NULL) {
+ if (dname[len] == '\0' || dname[len] == ',')
+ return (true);
+ dname++;
+ }
+ return (false);
+}
+
+static int
+locate(int argc, char **argv)
+{
+ encioc_elm_devnames_t objdn;
+ encioc_element_t *objp;
+ glob_t g;
+ char *disk;
+ size_t len, i;
+ int fd, nobj, j;
+ bool all = false;
+ bool onoff;
+
+ if (argc != 2) {
+ errx(EXIT_FAILURE, "usage: %s locate [disk] [on|off]",
+ getprogname());
+ }
+
+ disk = argv[0];
+
+ if (strcmp(argv[1], "on") == 0) {
+ onoff = true;
+ } else if (strcmp(argv[1], "off") == 0) {
+ onoff = false;
+ } else {
+ errx(EXIT_FAILURE, "usage: %s locate [disk] [on|off]",
+ getprogname());
+ }
+
+ if (strcmp(disk, "all") == 0) {
+ all = true;
+ }
+ len = strlen(disk);
+
+ /* Get the list of ses devices */
+ if (glob("/dev/ses[0-9]*", 0, NULL, &g) == GLOB_NOMATCH) {
+ globfree(&g);
+ errx(EXIT_FAILURE, "No SES devices found");
+ }
+ for (i = 0; i < g.gl_pathc; i++) {
+ /* ensure we only got numbers after ses */
+ if (strspn(g.gl_pathv[i] + 8, "0123456789") !=
+ strlen(g.gl_pathv[i] + 8))
+ continue;
+ if ((fd = open(g.gl_pathv[i], O_RDWR)) < 0) {
+ if (errno == EACCES)
+ err(EXIT_FAILURE, "enable to access SES device");
+ break;
+ }
+
+ if (ioctl(fd, ENCIOC_GETNELM, (caddr_t) &nobj) < 0)
+ err(EXIT_FAILURE, "ENCIOC_GETNELM");
+
+ objp = calloc(nobj, sizeof(encioc_element_t));
+ if (objp == NULL)
+ err(EXIT_FAILURE, "calloc()");
+
+ if (ioctl(fd, ENCIOC_GETELMMAP, (caddr_t) objp) < 0)
+ err(EXIT_FAILURE, "ENCIOC_GETELMMAP");
+
+ for (j = 0; j < nobj; j++) {
+ memset(&objdn, 0, sizeof(objdn));
+ objdn.elm_idx = objp[j].elm_idx;
+ objdn.elm_names_size = 128;
+ objdn.elm_devnames = calloc(128, sizeof(char));
+ if (objdn.elm_devnames == NULL)
+ err(EXIT_FAILURE, "calloc()");
+ if (ioctl(fd, ENCIOC_GETELMDEVNAMES,
+ (caddr_t) &objdn) <0)
+ continue;
+ if (objdn.elm_names_len > 0) {
+ if (all) {
+ do_locate(fd, objdn.elm_idx, onoff);
+ continue;
+ }
+ if (disk_match(objdn.elm_devnames, disk, len)) {
+ do_locate(fd, objdn.elm_idx, onoff);
+ break;
+ }
+ }
+ }
+ close(fd);
+ }
+ globfree(&g);
+
+ return (EXIT_SUCCESS);
+}
+
+static void
+usage(FILE *out)
+{
+ int i;
+
+ fprintf(out, "Usage: %s [command] [options]\n", getprogname());
+ fprintf(out, "Commands supported:\n");
+ for (i = 0; i < nbcmds; i++)
+ fprintf(out, "\t%-15s%s\n", cmds[i].name, cmds[i].desc);
+}
+
+int
+main(int argc, char **argv)
+{
+ int i;
+ struct command *cmd = NULL;
+
+ if (argc < 2) {
+ warnx("Missing command");
+ usage(stderr);
+ return (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < nbcmds; i++) {
+ if (strcmp(argv[1], cmds[i].name) == 0) {
+ cmd = &cmds[i];
+ break;
+ }
+ }
+
+ if (cmd == NULL) {
+ warnx("unknown command %s", argv[1]);
+ usage(stderr);
+ return (EXIT_FAILURE);
+ }
+
+ argc-=2;
+ argv+=2;
+
+ return (cmd->exec(argc, argv));
+}
diff --git a/usr.sbin/sysrc/Makefile b/usr.sbin/sysrc/Makefile
index ca3dc56..1ace38a 100644
--- a/usr.sbin/sysrc/Makefile
+++ b/usr.sbin/sysrc/Makefile
@@ -4,8 +4,4 @@ SCRIPTS= sysrc
MAN= sysrc.8
-beforeinstall:
- mkdir -p ${DESTDIR}${SCRIPTSDIR}
- mkdir -p ${DESTDIR}${MANDIR}8
-
.include <bsd.prog.mk>
OpenPOWER on IntegriCloud