summaryrefslogtreecommitdiffstats
path: root/sbin/raidctl
diff options
context:
space:
mode:
authorscottl <scottl@FreeBSD.org>2002-10-20 08:17:39 +0000
committerscottl <scottl@FreeBSD.org>2002-10-20 08:17:39 +0000
commit710948de69ddeae56bda663219319f6d859aea1f (patch)
tree71c65823ba2e8591de708d5cb2e990a75135ee11 /sbin/raidctl
parent63bd46464d6d4587c20c1ca62fb6a6e3be132db9 (diff)
downloadFreeBSD-src-710948de69ddeae56bda663219319f6d859aea1f.zip
FreeBSD-src-710948de69ddeae56bda663219319f6d859aea1f.tar.gz
After much delay and anticipation, welcome RAIDFrame into the FreeBSD
world. This should be considered highly experimental. Approved-by: re
Diffstat (limited to 'sbin/raidctl')
-rw-r--r--sbin/raidctl/Makefile14
-rw-r--r--sbin/raidctl/raidctl.81325
-rw-r--r--sbin/raidctl/raidctl.c1110
-rw-r--r--sbin/raidctl/rf_configure.c583
4 files changed, 3032 insertions, 0 deletions
diff --git a/sbin/raidctl/Makefile b/sbin/raidctl/Makefile
new file mode 100644
index 0000000..0705eab
--- /dev/null
+++ b/sbin/raidctl/Makefile
@@ -0,0 +1,14 @@
+# $FreeBSD$
+# $NetBSD: Makefile,v 1.7 2000/05/23 00:46:53 thorpej Exp $
+PROG= raidctl
+SRCS= rf_configure.c raidctl.c
+MAN8= raidctl.8
+
+LOOKHERE = ${.CURDIR}/../../sys
+
+CFLAGS+= -DRF_UTILITY=1 -I${LOOKHERE}
+
+DPADD= ${LIBUTIL}
+LDADD= -lutil
+
+.include <bsd.prog.mk>
diff --git a/sbin/raidctl/raidctl.8 b/sbin/raidctl/raidctl.8
new file mode 100644
index 0000000..9aef14f
--- /dev/null
+++ b/sbin/raidctl/raidctl.8
@@ -0,0 +1,1325 @@
+.\" $FreeBSD$
+.\" $NetBSD: raidctl.8,v 1.21 2000/08/10 15:14:14 oster Exp $
+.\"
+.\" Copyright (c) 1998 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Greg Oster
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the NetBSD
+.\" Foundation, Inc. and its contributors.
+.\" 4. Neither the name of The NetBSD Foundation nor the names of its
+.\" contributors may be used to endorse or promote products derived
+.\" from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\"
+.\" Copyright (c) 1995 Carnegie-Mellon University.
+.\" All rights reserved.
+.\"
+.\" Author: Mark Holland
+.\"
+.\" Permission to use, copy, modify and distribute this software and
+.\" its documentation is hereby granted, provided that both the copyright
+.\" notice and this permission notice appear in all copies of the
+.\" software, derivative works or modified versions, and any portions
+.\" thereof, and that both notices appear in supporting documentation.
+.\"
+.\" CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+.\" CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+.\" FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+.\"
+.\" Carnegie Mellon requests users of this software to return to
+.\"
+.\" Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+.\" School of Computer Science
+.\" Carnegie Mellon University
+.\" Pittsburgh PA 15213-3890
+.\"
+.\" any improvements or extensions that they make and grant Carnegie the
+.\" rights to redistribute these changes.
+.\"
+.Dd November 6, 1998
+.Dt RAIDCTL 8
+.Os FreeBSD
+.Sh NAME
+.Nm raidctl
+.Nd configuration utility for the RAIDframe disk driver
+.Sh SYNOPSIS
+.Nm
+.Op Fl v
+.Fl a Ar component Ar dev
+.Nm
+.Op Fl v
+.Fl A Op yes | no | root
+.Ar dev
+.Nm
+.Op Fl v
+.Fl B Ar dev
+.Nm
+.Op Fl v
+.Fl c Ar config_file
+.Nm
+.Op Fl v
+.Fl C Ar config_file
+.Nm
+.Op Fl v
+.Fl f Ar component Ar dev
+.Nm
+.Op Fl v
+.Fl F Ar component Ar dev
+.Nm
+.Op Fl v
+.Fl g Ar component Ar dev
+.Nm
+.Op Fl v
+.Fl i Ar dev
+.Nm
+.Op Fl v
+.Fl I Ar serial_number Ar dev
+.Nm
+.Op Fl v
+.Fl p Ar dev
+.Nm
+.Op Fl v
+.Fl P Ar dev
+.Nm
+.Op Fl v
+.Fl r Ar component Ar dev
+.Nm
+.Op Fl v
+.Fl R Ar component Ar dev
+.Nm
+.Op Fl v
+.Fl s Ar dev
+.Nm
+.Op Fl v
+.Fl S Ar dev
+.Nm
+.Op Fl v
+.Fl u Ar dev
+.Sh DESCRIPTION
+.Nm
+is the user-land control program for
+.Xr raid 4 ,
+the RAIDframe disk device.
+.Nm
+is primarily used to dynamically configure and unconfigure RAIDframe disk
+devices. For more information about the RAIDframe disk device, see
+.Xr raid 4 .
+.Pp
+This document assumes the reader has at least rudimentary knowledge of
+RAID and RAID concepts.
+.Pp
+The command-line options for
+.Nm
+are as follows:
+.Bl -tag -width indent
+.It Fl a Ar component Ar dev
+Add
+.Ar component
+as a hot spare for the device
+.Ar dev .
+.It Fl A Ic yes Ar dev
+Make the RAID set auto-configurable. The RAID set will be
+automatically configured at boot
+.Ar before
+the root filesystem is
+mounted. Note that all components of the set must be of type RAID in the
+disklabel.
+.It Fl A Ic no Ar dev
+Turn off auto-configuration for the RAID set.
+.It Fl A Ic root Ar dev
+Make the RAID set auto-configurable, and also mark the set as being
+eligible to be the root partition. A RAID set configured this way
+will
+.Ar override
+the use of the boot disk as the root device. All components of the
+set must be of type RAID in the disklabel. Note that the kernel being
+booted must currently reside on a non-RAID set.
+.It Fl B Ar dev
+Initiate a copyback of reconstructed data from a spare disk to
+its original disk. This is performed after a component has failed,
+and the failed drive has been reconstructed onto a spare drive.
+.It Fl c Ar config_file
+Configure a RAIDframe device
+according to the configuration given in
+.Ar config_file .
+A description of the contents of
+.Ar config_file
+is given later.
+.It Fl C Ar config_file
+As for
+.Ar -c ,
+but forces the configuration to take place. This is required the
+first time a RAID set is configured.
+.It Fl f Ar component Ar dev
+This marks the specified
+.Ar component
+as having failed, but does not initiate a reconstruction of that
+component.
+.It Fl F Ar component Ar dev
+Fails the specified
+.Ar component
+of the device, and immediately begin a reconstruction of the failed
+disk onto an available hot spare. This is one of the mechanisms used to start
+the reconstruction process if a component does have a hardware failure.
+.It Fl g Ar component Ar dev
+Get the component label for the specified component.
+.It Fl i Ar dev
+Initialize the RAID device. In particular, (re-write) the parity on
+the selected device. This
+.Ar MUST
+be done for
+.Ar all
+RAID sets before the RAID device is labeled and before
+filesystems are created on the RAID device.
+.It Fl I Ar serial_number Ar dev
+Initialize the component labels on each component of the device.
+.Ar serial_number
+is used as one of the keys in determining whether a
+particular set of components belong to the same RAID set. While not
+strictly enforced, different serial numbers should be used for
+different RAID sets. This step
+.Ar MUST
+be performed when a new RAID set is created.
+.It Fl p Ar dev
+Check the status of the parity on the RAID set. Displays a status
+message, and returns successfully if the parity is up-to-date.
+.It Fl P Ar dev
+Check the status of the parity on the RAID set, and initialize
+(re-write) the parity if the parity is not known to be up-to-date.
+This is normally used after a system crash (and before a
+.Xr fsck 8 )
+to ensure the integrity of the parity.
+.It Fl r Ar component Ar dev
+Remove the spare disk specified by
+.Ar component
+from the set of available spare components.
+.It Fl R Ar component Ar dev
+Fails the specified
+.Ar component ,
+if necessary, and immediately begins a reconstruction back to
+.Ar component .
+This is useful for reconstructing back onto a component after
+it has been replaced following a failure.
+.It Fl s Ar dev
+Display the status of the RAIDframe device for each of the components
+and spares.
+.It Fl S Ar dev
+Check the status of parity re-writing, component reconstruction, and
+component copyback. The output indicates the amount of progress
+achieved in each of these areas.
+.It Fl u Ar dev
+Unconfigure the RAIDframe device.
+.It Fl v
+Be more verbose. For operations such as reconstructions, parity
+re-writing, and copybacks, provide a progress indicator.
+.El
+.Pp
+The device used by
+.Nm
+is specified by
+.Ar dev .
+.Ar dev
+may be either the full name of the device, e.g. /dev/rraid0d,
+for the i386 architecture, and /dev/rraid0c
+for all others, or just simply raid0 (for /dev/rraid0d).
+.Pp
+The format of the configuration file is complex, and
+only an abbreviated treatment is given here. In the configuration
+files, a
+.Sq #
+indicates the beginning of a comment.
+.Pp
+There are 4 required sections of a configuration file, and 2
+optional sections. Each section begins with a
+.Sq START ,
+followed by
+the section name, and the configuration parameters associated with that
+section. The first section is the
+.Sq array
+section, and it specifies
+the number of rows, columns, and spare disks in the RAID set. For
+example:
+.Bd -unfilled -offset indent
+START array
+1 3 0
+.Ed
+.Pp
+indicates an array with 1 row, 3 columns, and 0 spare disks. Note
+that although multi-dimensional arrays may be specified, they are
+.Ar NOT
+supported in the driver.
+.Pp
+The second section, the
+.Sq disks
+section, specifies the actual
+components of the device. For example:
+.Bd -unfilled -offset indent
+START disks
+/dev/da0s1e
+/dev/da1s1e
+/dev/da2s1e
+.Ed
+.Pp
+specifies the three component disks to be used in the RAID device. If
+any of the specified drives cannot be found when the RAID device is
+configured, then they will be marked as
+.Sq failed ,
+and the system will
+operate in degraded mode. Note that it is
+.Ar imperative
+that the order of the components in the configuration file does not
+change between configurations of a RAID device. Changing the order
+of the components will result in data loss if the set is configured
+with the
+.Fl C
+option. In normal circumstances, the RAID set will not configure if
+only
+.Fl c
+is specified, and the components are out-of-order.
+.Pp
+The next section, which is the
+.Sq spare
+section, is optional, and, if
+present, specifies the devices to be used as
+.Sq hot spares
+-- devices
+which are on-line, but are not actively used by the RAID driver unless
+one of the main components fail. A simple
+.Sq spare
+section might be:
+.Bd -unfilled -offset indent
+START spare
+/dev/da3s1e
+.Ed
+.Pp
+for a configuration with a single spare component. If no spare drives
+are to be used in the configuration, then the
+.Sq spare
+section may be omitted.
+.Pp
+The next section is the
+.Sq layout
+section. This section describes the
+general layout parameters for the RAID device, and provides such
+information as sectors per stripe unit, stripe units per parity unit,
+stripe units per reconstruction unit, and the parity configuration to
+use. This section might look like:
+.Bd -unfilled -offset indent
+START layout
+# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level
+32 1 1 5
+.Ed
+.Pp
+The sectors per stripe unit specifies, in blocks, the interleave
+factor; i.e. the number of contiguous sectors to be written to each
+component for a single stripe. Appropriate selection of this value
+(32 in this example) is the subject of much research in RAID
+architectures. The stripe units per parity unit and
+stripe units per reconstruction unit are normally each set to 1.
+While certain values above 1 are permitted, a discussion of valid
+values and the consequences of using anything other than 1 are outside
+the scope of this document. The last value in this section (5 in this
+example) indicates the parity configuration desired. Valid entries
+include:
+.Bl -tag -width inde
+.It 0
+RAID level 0. No parity, only simple striping.
+.It 1
+RAID level 1. Mirroring. The parity is the mirror.
+.It 4
+RAID level 4. Striping across components, with parity stored on the
+last component.
+.It 5
+RAID level 5. Striping across components, parity distributed across
+all components.
+.El
+.Pp
+There are other valid entries here, including those for Even-Odd
+parity, RAID level 5 with rotated sparing, Chained declustering,
+and Interleaved declustering, but as of this writing the code for
+those parity operations has not been tested with
+.Fx .
+.Pp
+The next required section is the
+.Sq queue
+section. This is most often
+specified as:
+.Bd -unfilled -offset indent
+START queue
+fifo 100
+.Ed
+.Pp
+where the queuing method is specified as fifo (first-in, first-out),
+and the size of the per-component queue is limited to 100 requests.
+Other queuing methods may also be specified, but a discussion of them
+is beyond the scope of this document.
+.Pp
+The final section, the
+.Sq debug
+section, is optional. For more details
+on this the reader is referred to the RAIDframe documentation
+discussed in the
+.Sx HISTORY
+section.
+
+See
+.Sx EXAMPLES
+for a more complete configuration file example.
+
+.Sh EXAMPLES
+
+It is highly recommended that before using the RAID driver for real
+filesystems that the system administrator(s) become quite familiar
+with the use of
+.Nm ,
+and that they understand how the component reconstruction process
+works. The examples in this section will focus on configuring a
+number of different RAID sets of varying degrees of redundancy.
+By working through these examples, administrators should be able to
+develop a good feel for how to configure a RAID set, and how to
+initiate reconstruction of failed components.
+.Pp
+In the following examples
+.Sq raid0
+will be used to denote the RAID device. Depending on the
+architecture,
+.Sq /dev/rraid0c
+or
+.Sq /dev/rraid0d
+may be used in place of
+.Sq raid0 .
+.Pp
+.Ss Initialization and Configuration
+The initial step in configuring a RAID set is to identify the components
+that will be used in the RAID set. All components should be the same
+size. Each component should have a disklabel type of
+.Dv FS_RAID ,
+and a typical disklabel entry for a RAID component
+might look like:
+.Bd -unfilled -offset indent
+f: 1800000 200495 RAID # (Cyl. 405*- 4041*)
+.Ed
+.Pp
+While
+.Dv FS_BSDFFS
+will also work as the component type, the type
+.Dv FS_RAID
+is preferred for RAIDframe use, as it is required for features such as
+auto-configuration. As part of the initial configuration of each RAID
+set, each component will be given a
+.Sq component label .
+A
+.Sq component label
+contains important information about the component, including a
+user-specified serial number, the row and column of that component in
+the RAID set, the redundancy level of the RAID set, a 'modification
+counter', and whether the parity information (if any) on that
+component is known to be correct. Component labels are an integral
+part of the RAID set, since they are used to ensure that components
+are configured in the correct order, and used to keep track of other
+vital information about the RAID set. Component labels are also
+required for the auto-detection and auto-configuration of RAID sets at
+boot time. For a component label to be considered valid, that
+particular component label must be in agreement with the other
+component labels in the set. For example, the serial number,
+.Sq modification counter ,
+number of rows and number of columns must all
+be in agreement. If any of these are different, then the component is
+not considered to be part of the set. See
+.Xr raid 4
+for more information about component labels.
+.Pp
+Once the components have been identified, and the disks have
+appropriate labels,
+.Nm
+is then used to configure the
+.Xr raid 4
+device. To configure the device, a configuration
+file which looks something like:
+.Bd -unfilled -offset indent
+START array
+# numRow numCol numSpare
+1 3 1
+
+START disks
+/dev/da1s1e
+/dev/da2s1e
+/dev/da3s1e
+
+START spare
+/dev/da4s1e
+
+START layout
+# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_5
+32 1 1 5
+
+START queue
+fifo 100
+.Ed
+.Pp
+is created in a file. The above configuration file specifies a RAID 5
+set consisting of the components /dev/da1s1e, /dev/da2s1e, and /dev/da3s1e,
+with /dev/da4s1e available as a
+.Sq hot spare
+in case one of
+the three main drives should fail. A RAID 0 set would be specified in
+a similar way:
+.Bd -unfilled -offset indent
+START array
+# numRow numCol numSpare
+1 4 0
+
+START disks
+/dev/da1s10e
+/dev/da1s11e
+/dev/da1s12e
+/dev/da1s13e
+
+START layout
+# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_0
+64 1 1 0
+
+START queue
+fifo 100
+.Ed
+.Pp
+In this case, devices /dev/da1s10e, /dev/da1s11e, /dev/da1s12e, and /dev/da1s13e
+are the components that make up this RAID set. Note that there are no
+hot spares for a RAID 0 set, since there is no way to recover data if
+any of the components fail.
+.Pp
+For a RAID 1 (mirror) set, the following configuration might be used:
+.Bd -unfilled -offset indent
+START array
+# numRow numCol numSpare
+1 2 0
+
+START disks
+/dev/da2s10e
+/dev/da2s11e
+
+START layout
+# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_1
+128 1 1 1
+
+START queue
+fifo 100
+.Ed
+.Pp
+In this case, /dev/da2s10e and /dev/da2s11e are the two components of the
+mirror set. While no hot spares have been specified in this
+configuration, they easily could be, just as they were specified in
+the RAID 5 case above. Note as well that RAID 1 sets are currently
+limited to only 2 components. At present, n-way mirroring is not
+possible.
+.Pp
+The first time a RAID set is configured, the
+.Fl C
+option must be used:
+.Bd -unfilled -offset indent
+raidctl -C raid0.conf
+.Ed
+.Pp
+where
+.Sq raid0.conf
+is the name of the RAID configuration file. The
+.Fl C
+forces the configuration to succeed, even if any of the component
+labels are incorrect. The
+.Fl C
+option should not be used lightly in
+situations other than initial configurations, as if
+the system is refusing to configure a RAID set, there is probably a
+very good reason for it. After the initial configuration is done (and
+appropriate component labels are added with the
+.Fl I
+option) then raid0 can be configured normally with:
+.Bd -unfilled -offset indent
+raidctl -c raid0.conf
+.Ed
+.Pp
+When the RAID set is configured for the first time, it is
+necessary to initialize the component labels, and to initialize the
+parity on the RAID set. Initializing the component labels is done with:
+.Bd -unfilled -offset indent
+raidctl -I 112341 raid0
+.Ed
+.Pp
+where
+.Sq 112341
+is a user-specified serial number for the RAID set. This
+initialization step is
+.Ar required
+for all RAID sets. As well, using different
+serial numbers between RAID sets is
+.Ar strongly encouraged ,
+as using the same serial number for all RAID sets will only serve to
+decrease the usefulness of the component label checking.
+.Pp
+Initializing the RAID set is done via the
+.Fl i
+option. This initialization
+.Ar MUST
+be done for
+.Ar all
+RAID sets, since among other things it verifies that the parity (if
+any) on the RAID set is correct. Since this initialization may be
+quite time-consuming, the
+.Fl v
+option may be also used in conjunction with
+.Fl i :
+.Bd -unfilled -offset indent
+raidctl -iv raid0
+.Ed
+.Pp
+This will give more verbose output on the
+status of the initialization:
+.Bd -unfilled -offset indent
+Initiating re-write of parity
+Parity Re-write status:
+ 10% |**** | ETA: 06:03 /
+.Ed
+.Pp
+The output provides a
+.Sq Percent Complete
+in both a numeric and graphical format, as well as an estimated time
+to completion of the operation.
+.Pp
+Since it is the parity that provides the
+.Sq redundancy
+part of RAID, it is critical that the parity is correct
+as much as possible. If the parity is not correct, then there is no
+guarantee that data will not be lost if a component fails.
+.Pp
+Once the parity is known to be correct,
+it is then safe to perform
+.Xr disklabel 8 ,
+.Xr newfs 8 ,
+or
+.Xr fsck 8
+on the device or its filesystems, and then to mount the filesystems
+for use.
+.Pp
+Under certain circumstances (e.g. the additional component has not
+arrived, or data is being migrated off of a disk destined to become a
+component) it may be desirable to to configure a RAID 1 set with only
+a single component. This can be achieved by configuring the set with
+a physically existing component (as either the first or second
+component) and with a
+.Sq fake
+component. In the following:
+.Bd -unfilled -offset indent
+START array
+# numRow numCol numSpare
+1 2 0
+
+START disks
+/dev/da6s1e
+/dev/da0s1e
+
+START layout
+# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_1
+128 1 1 1
+
+START queue
+fifo 100
+.Ed
+.Pp
+/dev/da0s1e is the real component, and will be the second disk of a RAID 1
+set. The component /dev/da6s1e, which must exist, but have no physical
+device associated with it, is simply used as a placeholder.
+Configuration (using
+.Fl C
+and
+.Fl I Ar 12345
+as above) proceeds normally, but initialization of the RAID set will
+have to wait until all physical components are present. After
+configuration, this set can be used normally, but will be operating
+in degraded mode. Once a second physical component is obtained, it
+can be hot-added, the existing data mirrored, and normal operation
+resumed.
+.Pp
+.Ss Maintenance of the RAID set
+After the parity has been initialized for the first time, the command:
+.Bd -unfilled -offset indent
+raidctl -p raid0
+.Ed
+.Pp
+can be used to check the current status of the parity. To check the
+parity and rebuild it necessary (for example, after an unclean
+shutdown) the command:
+.Bd -unfilled -offset indent
+raidctl -P raid0
+.Ed
+.Pp
+is used. Note that re-writing the parity can be done while
+other operations on the RAID set are taking place (e.g. while doing a
+.Xr fsck 8
+on a filesystem on the RAID set). However: for maximum effectiveness
+of the RAID set, the parity should be known to be correct before any
+data on the set is modified.
+.Pp
+To see how the RAID set is doing, the following command can be used to
+show the RAID set's status:
+.Bd -unfilled -offset indent
+raidctl -s raid0
+.Ed
+.Pp
+The output will look something like:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: optimal
+ /dev/da3s1e: optimal
+Spares:
+ /dev/da4s1e: spare
+Component label for /dev/da1s1e:
+ Row: 0 Column: 0 Num Rows: 1 Num Columns: 3
+ Version: 2 Serial Number: 13432 Mod Counter: 65
+ Clean: No Status: 0
+ sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1
+ RAID Level: 5 blocksize: 512 numBlocks: 1799936
+ Autoconfig: No
+ Last configured as: raid0
+Component label for /dev/da2s1e:
+ Row: 0 Column: 1 Num Rows: 1 Num Columns: 3
+ Version: 2 Serial Number: 13432 Mod Counter: 65
+ Clean: No Status: 0
+ sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1
+ RAID Level: 5 blocksize: 512 numBlocks: 1799936
+ Autoconfig: No
+ Last configured as: raid0
+Component label for /dev/da3s1e:
+ Row: 0 Column: 2 Num Rows: 1 Num Columns: 3
+ Version: 2 Serial Number: 13432 Mod Counter: 65
+ Clean: No Status: 0
+ sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1
+ RAID Level: 5 blocksize: 512 numBlocks: 1799936
+ Autoconfig: No
+ Last configured as: raid0
+Parity status: clean
+Reconstruction is 100% complete.
+Parity Re-write is 100% complete.
+Copyback is 100% complete.
+.Ed
+.Pp
+This indicates that all is well with the RAID set. Of importance here
+are the component lines which read
+.Sq optimal ,
+and the
+.Sq Parity status
+line which indicates that the parity is up-to-date. Note that if
+there are filesystems open on the RAID set, the individual components
+will not be
+.Sq clean
+but the set as a whole can still be clean.
+.Pp
+To check the component label of /dev/da1s1e, the following is used:
+.Bd -unfilled -offset indent
+raidctl -g /dev/da1s1e raid0
+.Ed
+.Pp
+The output of this command will look something like:
+.Bd -unfilled -offset indent
+Component label for /dev/da1s1e:
+ Row: 0 Column: 0 Num Rows: 1 Num Columns: 3
+ Version: 2 Serial Number: 13432 Mod Counter: 65
+ Clean: No Status: 0
+ sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1
+ RAID Level: 5 blocksize: 512 numBlocks: 1799936
+ Autoconfig: No
+ Last configured as: raid0
+.Ed
+.Pp
+.Ss Dealing with Component Failures
+If for some reason
+(perhaps to test reconstruction) it is necessary to pretend a drive
+has failed, the following will perform that function:
+.Bd -unfilled -offset indent
+raidctl -f /dev/da2s1e raid0
+.Ed
+.Pp
+The system will then be performing all operations in degraded mode,
+where missing data is re-computed from existing data and the parity.
+In this case, obtaining the status of raid0 will return (in part):
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: failed
+ /dev/da3s1e: optimal
+Spares:
+ /dev/da4s1e: spare
+.Ed
+.Pp
+Note that with the use of
+.Fl f
+a reconstruction has not been started. To both fail the disk and
+start a reconstruction, the
+.Fl F
+option must be used:
+.Bd -unfilled -offset indent
+raidctl -F /dev/da2s1e raid0
+.Ed
+.Pp
+The
+.Fl f
+option may be used first, and then the
+.Fl F
+option used later, on the same disk, if desired.
+Immediately after the reconstruction is started, the status will report:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: reconstructing
+ /dev/da3s1e: optimal
+Spares:
+ /dev/da4s1e: used_spare
+[...]
+Parity status: clean
+Reconstruction is 10% complete.
+Parity Re-write is 100% complete.
+Copyback is 100% complete.
+.Ed
+.Pp
+This indicates that a reconstruction is in progress. To find out how
+the reconstruction is progressing the
+.Fl S
+option may be used. This will indicate the progress in terms of the
+percentage of the reconstruction that is completed. When the
+reconstruction is finished the
+.Fl s
+option will show:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: spared
+ /dev/da3s1e: optimal
+Spares:
+ /dev/da4s1e: used_spare
+[...]
+Parity status: clean
+Reconstruction is 100% complete.
+Parity Re-write is 100% complete.
+Copyback is 100% complete.
+.Ed
+.Pp
+At this point there are at least two options. First, if /dev/da2s1e is
+known to be good (i.e. the failure was either caused by
+.Fl f
+or
+.Fl F ,
+or the failed disk was replaced), then a copyback of the data can
+be initiated with the
+.Fl B
+option. In this example, this would copy the entire contents of
+/dev/da4s1e to /dev/da2s1e. Once the copyback procedure is complete, the
+status of the device would be (in part):
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: optimal
+ /dev/da3s1e: optimal
+Spares:
+ /dev/da4s1e: spare
+.Ed
+.Pp
+and the system is back to normal operation.
+.Pp
+The second option after the reconstruction is to simply use /dev/da4s1e
+in place of /dev/da2s1e in the configuration file. For example, the
+configuration file (in part) might now look like:
+.Bd -unfilled -offset indent
+START array
+1 3 0
+
+START drives
+/dev/da1s1e
+/dev/da4s1e
+/dev/da3s1e
+.Ed
+.Pp
+This can be done as /dev/da4s1e is completely interchangeable with
+/dev/da2s1e at this point. Note that extreme care must be taken when
+changing the order of the drives in a configuration. This is one of
+the few instances where the devices and/or their orderings can be
+changed without loss of data! In general, the ordering of components
+in a configuration file should
+.Ar never
+be changed.
+.Pp
+If a component fails and there are no hot spares
+available on-line, the status of the RAID set might (in part) look like:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: failed
+ /dev/da3s1e: optimal
+No spares.
+.Ed
+.Pp
+In this case there are a number of options. The first option is to add a hot
+spare using:
+.Bd -unfilled -offset indent
+raidctl -a /dev/da4s1e raid0
+.Ed
+.Pp
+After the hot add, the status would then be:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: failed
+ /dev/da3s1e: optimal
+Spares:
+ /dev/da4s1e: spare
+.Ed
+.Pp
+Reconstruction could then take place using
+.Fl F
+as describe above.
+.Pp
+A second option is to rebuild directly onto /dev/da2s1e. Once the disk
+containing /dev/da2s1e has been replaced, one can simply use:
+.Bd -unfilled -offset indent
+raidctl -R /dev/da2s1e raid0
+.Ed
+.Pp
+to rebuild the /dev/da2s1e component. As the rebuilding is in progress,
+the status will be:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: reconstructing
+ /dev/da3s1e: optimal
+No spares.
+.Ed
+.Pp
+and when completed, will be:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da1s1e: optimal
+ /dev/da2s1e: optimal
+ /dev/da3s1e: optimal
+No spares.
+.Ed
+.Pp
+In circumstances where a particular component is completely
+unavailable after a reboot, a special component name will be used to
+indicate the missing component. For example:
+.Bd -unfilled -offset indent
+Components:
+ /dev/da2s1e: optimal
+ component1: failed
+No spares.
+.Ed
+.Pp
+indicates that the second component of this RAID set was not detected
+at all by the auto-configuration code. The name
+.Sq component1
+can be used anywhere a normal component name would be used. For
+example, to add a hot spare to the above set, and rebuild to that hot
+spare, the following could be done:
+.Bd -unfilled -offset indent
+raidctl -a /dev/da3s1e raid0
+raidctl -F component1 raid0
+.Ed
+.Pp
+at which point the data missing from
+.Sq component1
+would be reconstructed onto /dev/da3s1e.
+.Pp
+.Ss RAID on RAID
+RAID sets can be layered to create more complex and much larger RAID
+sets. A RAID 0 set, for example, could be constructed from four RAID
+5 sets. The following configuration file shows such a setup:
+.Bd -unfilled -offset indent
+START array
+# numRow numCol numSpare
+1 4 0
+
+START disks
+/dev/raid1e
+/dev/raid2e
+/dev/raid3e
+/dev/raid4e
+
+START layout
+# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_0
+128 1 1 0
+
+START queue
+fifo 100
+.Ed
+.Pp
+A similar configuration file might be used for a RAID 0 set
+constructed from components on RAID 1 sets. In such a configuration,
+the mirroring provides a high degree of redundancy, while the striping
+provides additional speed benefits.
+.Pp
+.Ss Auto-configuration and Root on RAID
+RAID sets can also be auto-configured at boot. To make a set
+auto-configurable, simply prepare the RAID set as above, and then do
+a:
+.Bd -unfilled -offset indent
+raidctl -A yes raid0
+.Ed
+.Pp
+to turn on auto-configuration for that set. To turn off
+auto-configuration, use:
+.Bd -unfilled -offset indent
+raidctl -A no raid0
+.Ed
+.Pp
+RAID sets which are auto-configurable will be configured before the
+root filesystem is mounted. These RAID sets are thus available for
+use as a root filesystem, or for any other filesystem. A primary
+advantage of using the auto-configuration is that RAID components
+become more independent of the disks they reside on. For example,
+SCSI ID's can change, but auto-configured sets will always be
+configured correctly, even if the SCSI ID's of the component disks
+have become scrambled.
+.Pp
+Having a system's root filesystem (/) on a RAID set is also allowed,
+with the
+.Sq a
+partition of such a RAID set being used for /.
+To use raid0a as the root filesystem, simply use:
+.Bd -unfilled -offset indent
+raidctl -A root raid0
+.Ed
+.Pp
+To return raid0a to be just an auto-configuring set simply use the
+.Fl A Ar yes
+arguments.
+.Pp
+Note that kernels can only be directly read from RAID 1 components on
+alpha and pmax architectures. On those architectures, the
+.Dv FS_RAID
+filesystem is recognized by the bootblocks, and will properly load the
+kernel directly from a RAID 1 component. For other architectures, or
+to support the root filesystem on other RAID sets, some other
+mechanism must be used to get a kernel booting. For example, a small
+partition containing only the secondary boot-blocks and an alternate
+kernel (or two) could be used. Once a kernel is booting however, and
+an auto-configuring RAID set is found that is eligible to be root,
+then that RAID set will be auto-configured and used as the root
+device. If two or more RAID sets claim to be root devices, then the
+user will be prompted to select the root device. At this time, RAID
+0, 1, 4, and 5 sets are all supported as root devices.
+.Pp
+A typical RAID 1 setup with root on RAID might be as follows:
+.Bl -enum
+.It
+wd0a - a small partition, which contains a complete, bootable, basic
+NetBSD installation.
+.It
+wd1a - also contains a complete, bootable, basic NetBSD installation.
+.It
+wd0e and wd1e - a RAID 1 set, raid0, used for the root filesystem.
+.It
+wd0f and wd1f - a RAID 1 set, raid1, which will be used only for
+swap space.
+.It
+wd0g and wd1g - a RAID 1 set, raid2, used for /usr, /home, or other
+data, if desired.
+.It
+wd0h and wd0h - a RAID 1 set, raid3, if desired.
+.El
+.Pp
+RAID sets raid0, raid1, and raid2 are all marked as
+auto-configurable. raid0 is marked as being a root filesystem.
+When new kernels are installed, the kernel is not only copied to /,
+but also to wd0a and wd1a. The kernel on wd0a is required, since that
+is the kernel the system boots from. The kernel on wd1a is also
+required, since that will be the kernel used should wd0 fail. The
+important point here is to have redundant copies of the kernel
+available, in the event that one of the drives fail.
+.Pp
+There is no requirement that the root filesystem be on the same disk
+as the kernel. For example, obtaining the kernel from wd0a, and using
+da0s1e and da1s1e for raid0, and the root filesystem, is fine. It
+.Ar is
+critical, however, that there be multiple kernels available, in the
+event of media failure.
+.Pp
+Multi-layered RAID devices (such as a RAID 0 set made
+up of RAID 1 sets) are
+.Ar not
+supported as root devices or auto-configurable devices at this point.
+(Multi-layered RAID devices
+.Ar are
+supported in general, however, as mentioned earlier.) Note that in
+order to enable component auto-detection and auto-configuration of
+RAID devices, the line:
+.Bd -unfilled -offset indent
+options RAID_AUTOCONFIG
+.Ed
+.Pp
+must be in the kernel configuration file. See
+.Xr raid 4
+for more details.
+.Pp
+.Ss Unconfiguration
+The final operation performed by
+.Nm
+is to unconfigure a
+.Xr raid 4
+device. This is accomplished via a simple:
+.Bd -unfilled -offset indent
+raidctl -u raid0
+.Ed
+.Pp
+at which point the device is ready to be reconfigured.
+.Pp
+.Ss Performance Tuning
+Selection of the various parameter values which result in the best
+performance can be quite tricky, and often requires a bit of
+trial-and-error to get those values most appropriate for a given system.
+A whole range of factors come into play, including:
+.Bl -enum
+.It
+Types of components (e.g. SCSI vs. IDE) and their bandwidth
+.It
+Types of controller cards and their bandwidth
+.It
+Distribution of components among controllers
+.It
+IO bandwidth
+.It
+Filesystem access patterns
+.It
+CPU speed
+.El
+.Pp
+As with most performance tuning, benchmarking under real-life loads
+may be the only way to measure expected performance. Understanding
+some of the underlying technology is also useful in tuning. The goal
+of this section is to provide pointers to those parameters which may
+make significant differences in performance.
+.Pp
+For a RAID 1 set, a SectPerSU value of 64 or 128 is typically
+sufficient. Since data in a RAID 1 set is arranged in a linear
+fashion on each component, selecting an appropriate stripe size is
+somewhat less critical than it is for a RAID 5 set. However: a stripe
+size that is too small will cause large IO's to be broken up into a
+number of smaller ones, hurting performance. At the same time, a
+large stripe size may cause problems with concurrent accesses to
+stripes, which may also affect performance. Thus values in the range
+of 32 to 128 are often the most effective.
+.Pp
+Tuning RAID 5 sets is trickier. In the best case, IO is presented to
+the RAID set one stripe at a time. Since the entire stripe is
+available at the beginning of the IO, the parity of that stripe can
+be calculated before the stripe is written, and then the stripe data
+and parity can be written in parallel. When the amount of data being
+written is less than a full stripe worth, the
+.Sq small write
+problem occurs. Since a
+.Sq small write
+means only a portion of the stripe on the components is going to
+change, the data (and parity) on the components must be updated
+slightly differently. First, the
+.Sq old parity
+and
+.Sq old data
+must be read from the components. Then the new parity is constructed,
+using the new data to be written, and the old data and old parity.
+Finally, the new data and new parity are written. All this extra data
+shuffling results in a serious loss of performance, and is typically 2
+to 4 times slower than a full stripe write (or read). To combat this
+problem in the real world, it may be useful to ensure that stripe
+sizes are small enough that a
+.Sq large IO
+from the system will use exactly one large stripe write. As is seen
+later, there are some filesystem dependencies which may come into play
+here as well.
+.Pp
+Since the size of a
+.Sq large IO
+is often (currently) only 32K or 64K, on a 5-drive RAID 5 set it may
+be desirable to select a SectPerSU value of 16 blocks (8K) or 32
+blocks (16K). Since there are 4 data sectors per stripe, the maximum
+data per stripe is 64 blocks (32K) or 128 blocks (64K). Again,
+empirical measurement will provide the best indicators of which
+values will yeild better performance.
+.Pp
+The parameters used for the filesystem are also critical to good
+performance. For
+.Xr newfs 8 ,
+for example, increasing the block size to 32K or 64K may improve
+performance dramatically. As well, changing the cylinders-per-group
+parameter from 16 to 32 or higher is often not only necessary for
+larger filesystems, but may also have positive performance
+implications.
+.Pp
+.Ss Summary
+Despite the length of this man-page, configuring a RAID set is a
+relatively straight-forward process. All that needs to be done is the
+following steps:
+.Bl -enum
+.It
+Use
+.Xr disklabel 8
+to create the components (of type RAID).
+.It
+Construct a RAID configuration file: e.g.
+.Sq raid0.conf
+.It
+Configure the RAID set with:
+.Bd -unfilled -offset indent
+raidctl -C raid0.conf
+.Ed
+.Pp
+.It
+Initialize the component labels with:
+.Bd -unfilled -offset indent
+raidctl -I 123456 raid0
+.Ed
+.Pp
+.It
+Initialize other important parts of the set with:
+.Bd -unfilled -offset indent
+raidctl -i raid0
+.Ed
+.Pp
+.It
+Get the default label for the RAID set:
+.Bd -unfilled -offset indent
+disklabel raid0 > /tmp/label
+.Ed
+.Pp
+.It
+Edit the label:
+.Bd -unfilled -offset indent
+vi /tmp/label
+.Ed
+.Pp
+.It
+Put the new label on the RAID set:
+.Bd -unfilled -offset indent
+disklabel -R -r raid0 /tmp/label
+.Ed
+.Pp
+.It
+Create the filesystem:
+.Bd -unfilled -offset indent
+newfs /dev/rraid0e
+.Ed
+.Pp
+.It
+Mount the filesystem:
+.Bd -unfilled -offset indent
+mount /dev/raid0e /mnt
+.Ed
+.Pp
+.It
+Use:
+.Bd -unfilled -offset indent
+raidctl -c raid0.conf
+.Ed
+.Pp
+To re-configure the RAID set the next time it is needed, or put
+raid0.conf into /etc where it will automatically be started by
+the /etc/rc scripts.
+.El
+.Pp
+.Sh WARNINGS
+Certain RAID levels (1, 4, 5, 6, and others) can protect against some
+data loss due to component failure. However the loss of two
+components of a RAID 4 or 5 system, or the loss of a single component
+of a RAID 0 system will result in the entire filesystem being lost.
+RAID is
+.Ar NOT
+a substitute for good backup practices.
+.Pp
+Recomputation of parity
+.Ar MUST
+be performed whenever there is a chance that it may have been
+compromised. This includes after system crashes, or before a RAID
+device has been used for the first time. Failure to keep parity
+correct will be catastrophic should a component ever fail -- it is
+better to use RAID 0 and get the additional space and speed, than it
+is to use parity, but not keep the parity correct. At least with RAID
+0 there is no perception of increased data security.
+.Pp
+.Sh FILES
+.Bl -tag -width /dev/XXrXraidX -compact
+.It Pa /dev/{,r}raid*
+.Cm raid
+device special files.
+.El
+.Pp
+.Sh SEE ALSO
+.Xr raid 4 ,
+.Xr ccd 4 ,
+.Xr rc 8
+.Sh BUGS
+Hot-spare removal is currently not available.
+.Sh HISTORY
+RAIDframe is a framework for rapid prototyping of RAID structures
+developed by the folks at the Parallel Data Laboratory at Carnegie
+Mellon University (CMU).
+A more complete description of the internals and functionality of
+RAIDframe is found in the paper "RAIDframe: A Rapid Prototyping Tool
+for RAID Systems", by William V. Courtright II, Garth Gibson, Mark
+Holland, LeAnn Neal Reilly, and Jim Zelenka, and published by the
+Parallel Data Laboratory of Carnegie Mellon University.
+.Pp
+The
+.Nm
+command first appeared as a program in CMU's RAIDframe v1.1 distribution. This
+version of
+.Nm
+is a complete re-write, and first appeared in
+.Fx 4.4 .
+.Sh COPYRIGHT
+.Bd -unfilled
+The RAIDframe Copyright is as follows:
+
+Copyright (c) 1994-1996 Carnegie-Mellon University.
+All rights reserved.
+
+Permission to use, copy, modify and distribute this software and
+its documentation is hereby granted, provided that both the copyright
+notice and this permission notice appear in all copies of the
+software, derivative works or modified versions, and any portions
+thereof, and that both notices appear in supporting documentation.
+
+CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+
+Carnegie Mellon requests users of this software to return to
+
+ Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ School of Computer Science
+ Carnegie Mellon University
+ Pittsburgh PA 15213-3890
+
+any improvements or extensions that they make and grant Carnegie the
+rights to redistribute these changes.
+.Ed
diff --git a/sbin/raidctl/raidctl.c b/sbin/raidctl/raidctl.c
new file mode 100644
index 0000000..4b7d27d
--- /dev/null
+++ b/sbin/raidctl/raidctl.c
@@ -0,0 +1,1110 @@
+/*-
+ * Copyright (c) 2002 Scott Long <scottl@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* $NetBSD: raidctl.c,v 1.25 2000/10/31 14:18:39 lukem Exp $ */
+/*-
+ * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Greg Oster
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This program is a re-write of the original rf_ctrl program
+ * distributed by CMU with RAIDframe 1.1.
+ *
+ * This program is the user-land interface to the RAIDframe kernel
+ * driver in NetBSD.
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/disklabel.h>
+#if defined(__FreeBSD__)
+#include <sys/linker.h>
+#include <sys/module.h>
+#endif
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#ifdef __FreeBSD__
+#include <paths.h>
+#endif
+#if defined(__NetBSD__)
+#include <util.h>
+#endif
+
+#include <dev/raidframe/rf_raidframe.h>
+
+int main(int, char *[]);
+void do_ioctl(int, u_long, void *, const char *);
+static void rf_configure(int, char*, int);
+static const char *device_status(RF_DiskStatus_t);
+static void rf_get_device_status(int);
+static void get_component_number(int, char *, int *, int *);
+static void rf_fail_disk(int, char *, int);
+static void usage(void);
+static void get_component_label(int, char *);
+static void set_component_label(int, char *);
+static void init_component_labels(int, int);
+static void set_autoconfig(int, char *, char *);
+static void add_hot_spare(int, char *);
+static void remove_hot_spare(int, char *);
+static void rebuild_in_place(int, char *);
+static void check_status(int,int);
+static void check_parity(int,int, char *);
+static void do_meter(int, u_long);
+static void get_bar(char *, double, int);
+static void get_time_string(char *, int);
+#if defined(__FreeBSD__)
+static void check_driver(void);
+
+extern char *__progname;
+#define PROGNAME __progname
+
+#define RAIDCTLDEV "/dev/raidctl"
+#elif defined(__NetBSD__)
+#define PROGNAME getprogname()
+#endif
+
+int verbose;
+
+int
+main(argc,argv)
+ int argc;
+ char *argv[];
+{
+ int ch;
+ int num_options;
+ unsigned long action;
+ char config_filename[PATH_MAX];
+ char dev_name[PATH_MAX];
+ char name[PATH_MAX];
+ char component[PATH_MAX];
+ char autoconf[10];
+ int do_recon;
+ int do_rewrite;
+ int is_clean;
+ int serial_number;
+ struct stat st;
+ int fd;
+ int force;
+ int raidID;
+
+ num_options = 0;
+ action = 0;
+ do_recon = 0;
+ do_rewrite = 0;
+ is_clean = 0;
+ force = 0;
+
+ while ((ch = getopt(argc, argv, "a:A:Bc:C:f:F:g:iI:l:r:R:sSpPuv"))
+ != -1)
+ switch(ch) {
+ case 'a':
+ action = RAIDFRAME_ADD_HOT_SPARE;
+ strncpy(component, optarg, PATH_MAX);
+ num_options++;
+ break;
+ case 'A':
+ action = RAIDFRAME_SET_AUTOCONFIG;
+ strncpy(autoconf, optarg, 10);
+ num_options++;
+ break;
+ case 'B':
+ action = RAIDFRAME_COPYBACK;
+ num_options++;
+ break;
+ case 'c':
+ case 'C':
+ strncpy(config_filename,optarg,PATH_MAX);
+ action = RAIDFRAME_CONFIGURE;
+ force = (ch == 'c') ? 0 : 1;
+#if defined(__FreeBSD__)
+ check_driver();
+ fd = open(RAIDCTLDEV, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "%s: unable to open raid "
+ "control device %s\n", PROGNAME,
+ RAIDCTLDEV);
+ fprintf(stderr, "Error: %s\n", strerror(errno));
+ exit(1);
+ }
+ rf_configure(fd, config_filename, force);
+ close(fd);
+ exit(0);
+#elif defined(__NetBSD__)
+ num_options++;
+ break;
+#endif
+ case 'f':
+ action = RAIDFRAME_FAIL_DISK;
+ strncpy(component, optarg, PATH_MAX);
+ do_recon = 0;
+ num_options++;
+ break;
+ case 'F':
+ action = RAIDFRAME_FAIL_DISK;
+ strncpy(component, optarg, PATH_MAX);
+ do_recon = 1;
+ num_options++;
+ break;
+ case 'g':
+ action = RAIDFRAME_GET_COMPONENT_LABEL;
+ strncpy(component, optarg, PATH_MAX);
+ num_options++;
+ break;
+ case 'i':
+ action = RAIDFRAME_REWRITEPARITY;
+ num_options++;
+ break;
+ case 'I':
+ action = RAIDFRAME_INIT_LABELS;
+ serial_number = atoi(optarg);
+ num_options++;
+ break;
+ case 'l':
+ action = RAIDFRAME_SET_COMPONENT_LABEL;
+ strncpy(component, optarg, PATH_MAX);
+ num_options++;
+ break;
+ case 'r':
+ action = RAIDFRAME_REMOVE_HOT_SPARE;
+ strncpy(component, optarg, PATH_MAX);
+ num_options++;
+ break;
+ case 'R':
+ strncpy(component,optarg,PATH_MAX);
+ action = RAIDFRAME_REBUILD_IN_PLACE;
+ num_options++;
+ break;
+ case 's':
+ action = RAIDFRAME_GET_INFO;
+ num_options++;
+ break;
+ case 'S':
+ action = RAIDFRAME_CHECK_RECON_STATUS_EXT;
+ num_options++;
+ break;
+ case 'p':
+ action = RAIDFRAME_CHECK_PARITY;
+ num_options++;
+ break;
+ case 'P':
+ action = RAIDFRAME_CHECK_PARITY;
+ do_rewrite = 1;
+ num_options++;
+ break;
+ case 'u':
+ action = RAIDFRAME_SHUTDOWN;
+ num_options++;
+ break;
+ case 'v':
+ verbose = 1;
+ /* Don't bump num_options, as '-v' is not
+ an option like the others */
+ /* num_options++; */
+ break;
+ default:
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ if ((num_options > 1) || (argc == NULL))
+ usage();
+
+ strncpy(name,argv[0],PATH_MAX);
+#if defined(__NetBSD__)
+ fd = opendisk(name, O_RDWR, dev_name, sizeof(dev_name), 1);
+#elif defined(__FreeBSD__)
+ check_driver();
+
+ if (name[0] != '/') {
+ char name1[PATH_MAX];
+ snprintf(name1, PATH_MAX, "%s%s", _PATH_DEV, name);
+ strncpy(name, name1, PATH_MAX);
+ }
+ fd = open(name, O_RDWR);
+#endif
+ if (fd == -1) {
+ fprintf(stderr, "%s: unable to open device file: %s\n",
+ PROGNAME, name);
+ exit(1);
+ }
+ if (fstat(fd, &st) != 0) {
+ fprintf(stderr,"%s: stat failure on: %s\n",
+ PROGNAME, dev_name);
+ exit(1);
+ }
+ if (!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) {
+ fprintf(stderr,"%s: invalid device: %s\n",
+ PROGNAME, dev_name);
+ exit(1);
+ }
+
+ switch(action) {
+ case RAIDFRAME_ADD_HOT_SPARE:
+ add_hot_spare(fd, component);
+ break;
+ case RAIDFRAME_REMOVE_HOT_SPARE:
+ remove_hot_spare(fd, component);
+ break;
+#if defined(__NetBSD__)
+ case RAIDFRAME_CONFIGURE:
+ rf_configure(fd, config_filename, force);
+ break;
+#endif
+ case RAIDFRAME_SET_AUTOCONFIG:
+ set_autoconfig(fd, name, autoconf);
+ break;
+ case RAIDFRAME_COPYBACK:
+ printf("Copyback.\n");
+ do_ioctl(fd, RAIDFRAME_COPYBACK, NULL, "RAIDFRAME_COPYBACK");
+ if (verbose) {
+ sleep(3); /* XXX give the copyback a chance to start */
+ printf("Copyback status:\n");
+ do_meter(fd,RAIDFRAME_CHECK_COPYBACK_STATUS_EXT);
+ }
+ break;
+ case RAIDFRAME_FAIL_DISK:
+ rf_fail_disk(fd, component, do_recon);
+ break;
+ case RAIDFRAME_SET_COMPONENT_LABEL:
+ set_component_label(fd, component);
+ break;
+ case RAIDFRAME_GET_COMPONENT_LABEL:
+ get_component_label(fd, component);
+ break;
+ case RAIDFRAME_INIT_LABELS:
+ init_component_labels(fd, serial_number);
+ break;
+ case RAIDFRAME_REWRITEPARITY:
+ printf("Initiating re-write of parity\n");
+ do_ioctl(fd, RAIDFRAME_REWRITEPARITY, NULL,
+ "RAIDFRAME_REWRITEPARITY");
+ if (verbose) {
+ sleep(3); /* XXX give it time to get started */
+ printf("Parity Re-write status:\n");
+ do_meter(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT);
+ }
+ break;
+ case RAIDFRAME_CHECK_RECON_STATUS_EXT:
+ check_status(fd,1);
+ break;
+ case RAIDFRAME_GET_INFO:
+ rf_get_device_status(fd);
+ break;
+ case RAIDFRAME_REBUILD_IN_PLACE:
+ rebuild_in_place(fd, component);
+ break;
+ case RAIDFRAME_CHECK_PARITY:
+ check_parity(fd, do_rewrite, dev_name);
+ break;
+ case RAIDFRAME_SHUTDOWN:
+#if defined(__NetBSD__)
+ do_ioctl(fd, RAIDFRAME_SHUTDOWN, NULL, "RAIDFRAME_SHUTDOWN");
+#elif defined(__FreeBSD__)
+ /* Find out the unit number of the raid device */
+ do_ioctl(fd, RAIDFRAME_GET_UNIT, &raidID, "RAIDFRAME_GET_UNIT");
+ close (fd);
+
+ fd = open(RAIDCTLDEV, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "%s: unable to open raid control "
+ "device %s\n", PROGNAME, RAIDCTLDEV);
+ fprintf(stderr, "Error: %s\n", strerror(errno));
+ exit(1);
+ }
+ do_ioctl(fd, RAIDFRAME_SHUTDOWN, &raidID, "RAIDFRAME_SHUTDOWN");
+ close(fd);
+#endif
+ break;
+ default:
+ break;
+ }
+
+ close(fd);
+ exit(0);
+}
+
+void
+do_ioctl(fd, command, arg, ioctl_name)
+ int fd;
+ unsigned long command;
+ void *arg;
+ const char *ioctl_name;
+{
+ if (ioctl(fd, command, arg) < 0) {
+ warn("ioctl (%s) failed", ioctl_name);
+ exit(1);
+ }
+}
+
+
+static void
+rf_configure(fd,config_file,force)
+ int fd;
+ char *config_file;
+ int force;
+{
+ void *generic;
+ RF_Config_t cfg;
+
+ if (rf_MakeConfig( config_file, &cfg ) != 0) {
+ fprintf(stderr,"%s: unable to create RAIDframe %s\n",
+ PROGNAME, "configuration structure\n");
+ exit(1);
+ }
+
+ cfg.force = force;
+
+ /*
+ * Note the extra level of redirection needed here, since
+ * what we really want to pass in is a pointer to the pointer to
+ * the configuration structure.
+ */
+
+ generic = (void *) &cfg;
+ do_ioctl(fd, RAIDFRAME_CONFIGURE, &generic, "RAIDFRAME_CONFIGURE");
+}
+
+static const char *
+device_status(status)
+ RF_DiskStatus_t status;
+{
+
+ switch (status) {
+ case rf_ds_optimal:
+ return ("optimal");
+ break;
+ case rf_ds_failed:
+ return ("failed");
+ break;
+ case rf_ds_reconstructing:
+ return ("reconstructing");
+ break;
+ case rf_ds_dist_spared:
+ return ("dist_spared");
+ break;
+ case rf_ds_spared:
+ return ("spared");
+ break;
+ case rf_ds_spare:
+ return ("spare");
+ break;
+ case rf_ds_used_spare:
+ return ("used_spare");
+ break;
+ default:
+ return ("UNKNOWN");
+ }
+ /* NOTREACHED */
+}
+
+static void
+rf_get_device_status(fd)
+ int fd;
+{
+ RF_DeviceConfig_t device_config;
+ void *cfg_ptr;
+ int is_clean;
+ int i;
+
+ cfg_ptr = &device_config;
+ printf("Address= %p\n", &cfg_ptr);
+ do_ioctl(fd, RAIDFRAME_GET_INFO, &cfg_ptr, "RAIDFRAME_GET_INFO");
+
+ printf("Components:\n");
+ for(i=0; i < device_config.ndevs; i++) {
+ printf("%20s: %s\n", device_config.devs[i].devname,
+ device_status(device_config.devs[i].status));
+ }
+ if (device_config.nspares > 0) {
+ printf("Spares:\n");
+ for(i=0; i < device_config.nspares; i++) {
+ printf("%20s: %s\n",
+ device_config.spares[i].devname,
+ device_status(device_config.spares[i].status));
+ }
+ } else {
+ printf("No spares.\n");
+ }
+ for(i=0; i < device_config.ndevs; i++) {
+ if (device_config.devs[i].status == rf_ds_optimal) {
+ get_component_label(fd, device_config.devs[i].devname);
+ } else {
+ printf("%s status is: %s. Skipping label.\n",
+ device_config.devs[i].devname,
+ device_status(device_config.devs[i].status));
+ }
+ }
+
+ if (device_config.nspares > 0) {
+ for(i=0; i < device_config.nspares; i++) {
+ if ((device_config.spares[i].status ==
+ rf_ds_optimal) ||
+ (device_config.spares[i].status ==
+ rf_ds_used_spare)) {
+ get_component_label(fd,
+ device_config.spares[i].devname);
+ } else {
+ printf("%s status is: %s. Skipping label.\n",
+ device_config.spares[i].devname,
+ device_status(device_config.spares[i].status));
+ }
+ }
+ }
+
+ do_ioctl(fd, RAIDFRAME_CHECK_PARITY, &is_clean,
+ "RAIDFRAME_CHECK_PARITY");
+ if (is_clean) {
+ printf("Parity status: clean\n");
+ } else {
+ printf("Parity status: DIRTY\n");
+ }
+ check_status(fd,0);
+}
+
+static void
+get_component_number(fd, component_name, component_number, num_columns)
+ int fd;
+ char *component_name;
+ int *component_number;
+ int *num_columns;
+{
+ RF_DeviceConfig_t device_config;
+ void *cfg_ptr;
+ int i;
+ int found;
+
+ *component_number = -1;
+
+ /* Assuming a full path spec... */
+ cfg_ptr = &device_config;
+ do_ioctl(fd, RAIDFRAME_GET_INFO, &cfg_ptr, "RAIDFRAME_GET_INFO");
+
+ *num_columns = device_config.cols;
+
+ found = 0;
+ for(i=0; i < device_config.ndevs; i++) {
+ if (strncmp(component_name, device_config.devs[i].devname,
+ PATH_MAX)==0) {
+ found = 1;
+ *component_number = i;
+ }
+ }
+ if (!found) { /* maybe it's a spare? */
+ for(i=0; i < device_config.nspares; i++) {
+ if (strncmp(component_name,
+ device_config.spares[i].devname,
+ PATH_MAX)==0) {
+ found = 1;
+ *component_number = i + device_config.ndevs;
+ /* the way spares are done should
+ really change... */
+ *num_columns = device_config.cols +
+ device_config.nspares;
+ }
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr,"%s: %s is not a component %s", PROGNAME,
+ component_name, "of this device\n");
+ exit(1);
+ }
+}
+
+static void
+rf_fail_disk(fd, component_to_fail, do_recon)
+ int fd;
+ char *component_to_fail;
+ int do_recon;
+{
+ struct rf_recon_req recon_request;
+ int component_num;
+ int num_cols;
+
+ get_component_number(fd, component_to_fail, &component_num, &num_cols);
+
+ recon_request.row = component_num / num_cols;
+ recon_request.col = component_num % num_cols;
+ if (do_recon) {
+ recon_request.flags = RF_FDFLAGS_RECON;
+ } else {
+ recon_request.flags = RF_FDFLAGS_NONE;
+ }
+ do_ioctl(fd, RAIDFRAME_FAIL_DISK, &recon_request,
+ "RAIDFRAME_FAIL_DISK");
+ if (do_recon && verbose) {
+ printf("Reconstruction status:\n");
+ sleep(3); /* XXX give reconstruction a chance to start */
+ do_meter(fd,RAIDFRAME_CHECK_RECON_STATUS_EXT);
+ }
+}
+
+static void
+get_component_label(fd, component)
+ int fd;
+ char *component;
+{
+ RF_ComponentLabel_t component_label;
+ int component_num;
+ int num_cols;
+
+ get_component_number(fd, component, &component_num, &num_cols);
+
+ memset( &component_label, 0, sizeof(RF_ComponentLabel_t));
+ component_label.row = component_num / num_cols;
+ component_label.column = component_num % num_cols;
+
+ do_ioctl( fd, RAIDFRAME_GET_COMPONENT_LABEL, &component_label,
+ "RAIDFRAME_GET_COMPONENT_LABEL");
+
+ printf("Component label for %s:\n",component);
+
+ printf(" Row: %d, Column: %d, Num Rows: %d, Num Columns: %d\n",
+ component_label.row, component_label.column,
+ component_label.num_rows, component_label.num_columns);
+ printf(" Version: %d, Serial Number: %d, Mod Counter: %d\n",
+ component_label.version, component_label.serial_number,
+ component_label.mod_counter);
+ printf(" Clean: %s, Status: %d\n",
+ component_label.clean ? "Yes" : "No",
+ component_label.status );
+ printf(" sectPerSU: %d, SUsPerPU: %d, SUsPerRU: %d\n",
+ component_label.sectPerSU, component_label.SUsPerPU,
+ component_label.SUsPerRU);
+ printf(" Queue size: %d, blocksize: %d, numBlocks: %d\n",
+ component_label.maxOutstanding, component_label.blockSize,
+ component_label.numBlocks);
+ printf(" RAID Level: %c\n", (char) component_label.parityConfig);
+ printf(" Autoconfig: %s\n",
+ component_label.autoconfigure ? "Yes" : "No" );
+ printf(" Root partition: %s\n",
+ component_label.root_partition ? "Yes" : "No" );
+ printf(" Last configured as: raid%d\n", component_label.last_unit );
+}
+
+static void
+set_component_label(fd, component)
+ int fd;
+ char *component;
+{
+ RF_ComponentLabel_t component_label;
+ int component_num;
+ int num_cols;
+
+ get_component_number(fd, component, &component_num, &num_cols);
+
+ /* XXX This is currently here for testing, and future expandability */
+
+ component_label.version = 1;
+ component_label.serial_number = 123456;
+ component_label.mod_counter = 0;
+ component_label.row = component_num / num_cols;
+ component_label.column = component_num % num_cols;
+ component_label.num_rows = 0;
+ component_label.num_columns = 5;
+ component_label.clean = 0;
+ component_label.status = 1;
+
+ do_ioctl( fd, RAIDFRAME_SET_COMPONENT_LABEL, &component_label,
+ "RAIDFRAME_SET_COMPONENT_LABEL");
+}
+
+
+static void
+init_component_labels(fd, serial_number)
+ int fd;
+ int serial_number;
+{
+ RF_ComponentLabel_t component_label;
+
+ component_label.version = 0;
+ component_label.serial_number = serial_number;
+ component_label.mod_counter = 0;
+ component_label.row = 0;
+ component_label.column = 0;
+ component_label.num_rows = 0;
+ component_label.num_columns = 0;
+ component_label.clean = 0;
+ component_label.status = 0;
+
+ do_ioctl( fd, RAIDFRAME_INIT_LABELS, &component_label,
+ "RAIDFRAME_SET_COMPONENT_LABEL");
+}
+
+static void
+set_autoconfig(fd, name, autoconf)
+ int fd;
+ char *name;
+ char *autoconf;
+{
+ int auto_config;
+ int root_config;
+
+ auto_config = 0;
+ root_config = 0;
+
+ if (strncasecmp(autoconf,"root", 4) == 0) {
+ root_config = 1;
+ }
+
+ if ((strncasecmp(autoconf,"yes", 3) == 0) ||
+ root_config == 1) {
+ auto_config = 1;
+ }
+
+ do_ioctl(fd, RAIDFRAME_SET_AUTOCONFIG, &auto_config,
+ "RAIDFRAME_SET_AUTOCONFIG");
+
+ do_ioctl(fd, RAIDFRAME_SET_ROOT, &root_config,
+ "RAIDFRAME_SET_ROOT");
+
+ printf("%s: Autoconfigure: %s\n", name,
+ auto_config ? "Yes" : "No");
+
+ if (root_config == 1) {
+ printf("%s: Root: %s\n", name,
+ auto_config ? "Yes" : "No");
+ }
+}
+
+static void
+add_hot_spare(fd, component)
+ int fd;
+ char *component;
+{
+ RF_SingleComponent_t hot_spare;
+
+ hot_spare.row = 0;
+ hot_spare.column = 0;
+ strncpy(hot_spare.component_name, component,
+ sizeof(hot_spare.component_name));
+
+ do_ioctl( fd, RAIDFRAME_ADD_HOT_SPARE, &hot_spare,
+ "RAIDFRAME_ADD_HOT_SPARE");
+}
+
+static void
+remove_hot_spare(fd, component)
+ int fd;
+ char *component;
+{
+ RF_SingleComponent_t hot_spare;
+ int component_num;
+ int num_cols;
+
+ get_component_number(fd, component, &component_num, &num_cols);
+
+ hot_spare.row = component_num / num_cols;
+ hot_spare.column = component_num % num_cols;
+
+ strncpy(hot_spare.component_name, component,
+ sizeof(hot_spare.component_name));
+
+ do_ioctl( fd, RAIDFRAME_REMOVE_HOT_SPARE, &hot_spare,
+ "RAIDFRAME_REMOVE_HOT_SPARE");
+}
+
+static void
+rebuild_in_place( fd, component )
+ int fd;
+ char *component;
+{
+ RF_SingleComponent_t comp;
+ int component_num;
+ int num_cols;
+
+ get_component_number(fd, component, &component_num, &num_cols);
+
+ comp.row = 0;
+ comp.column = component_num;
+ strncpy(comp.component_name, component, sizeof(comp.component_name));
+
+ do_ioctl( fd, RAIDFRAME_REBUILD_IN_PLACE, &comp,
+ "RAIDFRAME_REBUILD_IN_PLACE");
+
+ if (verbose) {
+ printf("Reconstruction status:\n");
+ sleep(3); /* XXX give reconstruction a chance to start */
+ do_meter(fd,RAIDFRAME_CHECK_RECON_STATUS_EXT);
+ }
+
+}
+
+static void
+check_parity( fd, do_rewrite, dev_name )
+ int fd;
+ int do_rewrite;
+ char *dev_name;
+{
+ int is_clean;
+ int percent_done;
+
+ is_clean = 0;
+ percent_done = 0;
+ do_ioctl(fd, RAIDFRAME_CHECK_PARITY, &is_clean,
+ "RAIDFRAME_CHECK_PARITY");
+ if (is_clean) {
+ printf("%s: Parity status: clean\n",dev_name);
+ } else {
+ printf("%s: Parity status: DIRTY\n",dev_name);
+ if (do_rewrite) {
+ printf("%s: Initiating re-write of parity\n",
+ dev_name);
+ do_ioctl(fd, RAIDFRAME_REWRITEPARITY, NULL,
+ "RAIDFRAME_REWRITEPARITY");
+ sleep(3); /* XXX give it time to
+ get started. */
+ if (verbose) {
+ printf("Parity Re-write status:\n");
+ do_meter(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT);
+ } else {
+ do_ioctl(fd,
+ RAIDFRAME_CHECK_PARITYREWRITE_STATUS,
+ &percent_done,
+ "RAIDFRAME_CHECK_PARITYREWRITE_STATUS"
+ );
+ while( percent_done < 100 ) {
+ sleep(3); /* wait a bit... */
+ do_ioctl(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS,
+ &percent_done, "RAIDFRAME_CHECK_PARITYREWRITE_STATUS");
+ }
+
+ }
+ printf("%s: Parity Re-write complete\n",
+ dev_name);
+ } else {
+ /* parity is wrong, and is not being fixed.
+ Exit w/ an error. */
+ exit(1);
+ }
+ }
+}
+
+
+static void
+check_status( fd, meter )
+ int fd;
+ int meter;
+{
+ int recon_percent_done = 0;
+ int parity_percent_done = 0;
+ int copyback_percent_done = 0;
+
+ do_ioctl(fd, RAIDFRAME_CHECK_RECON_STATUS, &recon_percent_done,
+ "RAIDFRAME_CHECK_RECON_STATUS");
+ printf("Reconstruction is %d%% complete.\n", recon_percent_done);
+ do_ioctl(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS,
+ &parity_percent_done,
+ "RAIDFRAME_CHECK_PARITYREWRITE_STATUS");
+ printf("Parity Re-write is %d%% complete.\n", parity_percent_done);
+ do_ioctl(fd, RAIDFRAME_CHECK_COPYBACK_STATUS, &copyback_percent_done,
+ "RAIDFRAME_CHECK_COPYBACK_STATUS");
+ printf("Copyback is %d%% complete.\n", copyback_percent_done);
+
+ if (meter) {
+ /* These 3 should be mutually exclusive at this point */
+ if (recon_percent_done < 100) {
+ printf("Reconstruction status:\n");
+ do_meter(fd,RAIDFRAME_CHECK_RECON_STATUS_EXT);
+ } else if (parity_percent_done < 100) {
+ printf("Parity Re-write status:\n");
+ do_meter(fd,RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT);
+ } else if (copyback_percent_done < 100) {
+ printf("Copyback status:\n");
+ do_meter(fd,RAIDFRAME_CHECK_COPYBACK_STATUS_EXT);
+ }
+ }
+}
+
+const char *tbits = "|/-\\";
+
+static void
+do_meter(fd, option)
+ int fd;
+ u_long option;
+{
+ int percent_done;
+ int last_value;
+ int start_value;
+ RF_ProgressInfo_t progressInfo;
+ struct timeval start_time;
+ struct timeval last_time;
+ struct timeval current_time;
+ double elapsed;
+ int elapsed_sec;
+ int elapsed_usec;
+ int simple_eta,last_eta;
+ double rate;
+ int amount;
+ int tbit_value;
+ int wait_for_more_data;
+ char buffer[1024];
+ char bar_buffer[1024];
+ char eta_buffer[1024];
+
+ if (gettimeofday(&start_time,NULL)) {
+ fprintf(stderr,"%s: gettimeofday failed!?!?\n", PROGNAME);
+ exit(errno);
+ }
+ memset(&progressInfo, 0, sizeof(RF_ProgressInfo_t));
+
+ percent_done = 0;
+ do_ioctl(fd, option, &progressInfo, "");
+ last_value = progressInfo.completed;
+ start_value = last_value;
+ last_time = start_time;
+ current_time = start_time;
+
+ wait_for_more_data = 0;
+ tbit_value = 0;
+ while(progressInfo.completed < progressInfo.total) {
+
+ percent_done = (progressInfo.completed * 100) /
+ progressInfo.total;
+
+ get_bar(bar_buffer, percent_done, 40);
+
+ elapsed_sec = current_time.tv_sec - start_time.tv_sec;
+ elapsed_usec = current_time.tv_usec - start_time.tv_usec;
+ if (elapsed_usec < 0) {
+ elapsed_usec-=1000000;
+ elapsed_sec++;
+ }
+
+ elapsed = (double) elapsed_sec +
+ (double) elapsed_usec / 1000000.0;
+
+ amount = progressInfo.completed - start_value;
+
+ if (amount <= 0) { /* we don't do negatives (yet?) */
+ amount = 0;
+ wait_for_more_data = 1;
+ } else {
+ wait_for_more_data = 0;
+ }
+
+ if (elapsed == 0)
+ rate = 0.0;
+ else
+ rate = amount / elapsed;
+
+ if (rate > 0.0) {
+ simple_eta = (int) (((double)progressInfo.total -
+ (double) progressInfo.completed)
+ / rate);
+ } else {
+ simple_eta = -1;
+ }
+
+ if (simple_eta <=0) {
+ simple_eta = last_eta;
+ } else {
+ last_eta = simple_eta;
+ }
+
+ get_time_string(eta_buffer, simple_eta);
+
+ snprintf(buffer,1024,"\r%3d%% |%s| ETA: %s %c",
+ percent_done,bar_buffer,eta_buffer,tbits[tbit_value]);
+
+ write(fileno(stdout),buffer,strlen(buffer));
+ fflush(stdout);
+
+ /* resolution wasn't high enough... wait until we get another
+ timestamp and perhaps more "work" done. */
+
+ if (!wait_for_more_data) {
+ last_time = current_time;
+ last_value = progressInfo.completed;
+ }
+
+ if (++tbit_value>3)
+ tbit_value = 0;
+
+ sleep(2);
+
+ if (gettimeofday(&current_time,NULL)) {
+ fprintf(stderr,"%s: gettimeofday failed!?!?\n",
+ PROGNAME);
+ exit(errno);
+ }
+
+ do_ioctl( fd, option, &progressInfo, "");
+
+
+ }
+ printf("\n");
+}
+/* 40 '*''s per line, then 40 ' ''s line. */
+/* If you've got a screen wider than 160 characters, "tough" */
+
+#define STAR_MIDPOINT 4*40
+const char stars[] = "****************************************"
+ "****************************************"
+ "****************************************"
+ "****************************************"
+ " "
+ " "
+ " "
+ " "
+ " ";
+
+static void
+get_bar(string,percent,max_strlen)
+ char *string;
+ double percent;
+ int max_strlen;
+{
+ int offset;
+
+ if (max_strlen > STAR_MIDPOINT) {
+ max_strlen = STAR_MIDPOINT;
+ }
+ offset = STAR_MIDPOINT -
+ (int)((percent * max_strlen)/ 100);
+ if (offset < 0)
+ offset = 0;
+ snprintf(string,max_strlen,"%s",&stars[offset]);
+}
+
+static void
+get_time_string(string,simple_time)
+ char *string;
+ int simple_time;
+{
+ int minutes, seconds, hours;
+ char hours_buffer[5];
+ char minutes_buffer[5];
+ char seconds_buffer[5];
+
+ if (simple_time >= 0) {
+
+ minutes = (int) simple_time / 60;
+ seconds = ((int)simple_time - 60*minutes);
+ hours = minutes / 60;
+ minutes = minutes - 60*hours;
+
+ if (hours > 0) {
+ snprintf(hours_buffer,5,"%02d:",hours);
+ } else {
+ snprintf(hours_buffer,5," ");
+ }
+
+ snprintf(minutes_buffer,5,"%02d:",minutes);
+ snprintf(seconds_buffer,5,"%02d",seconds);
+ snprintf(string,1024,"%s%s%s",
+ hours_buffer, minutes_buffer, seconds_buffer);
+ } else {
+ snprintf(string,1024," --:--");
+ }
+
+}
+
+static void
+usage()
+{
+ const char *progname = PROGNAME;
+
+ fprintf(stderr, "usage: %s [-v] -a component dev\n", progname);
+ fprintf(stderr, " %s [-v] -A yes | no | root dev\n", progname);
+ fprintf(stderr, " %s [-v] -B dev\n", progname);
+ fprintf(stderr, " %s [-v] -c config_file dev\n", progname);
+ fprintf(stderr, " %s [-v] -C config_file dev\n", progname);
+ fprintf(stderr, " %s [-v] -f component dev\n", progname);
+ fprintf(stderr, " %s [-v] -F component dev\n", progname);
+ fprintf(stderr, " %s [-v] -g component dev\n", progname);
+ fprintf(stderr, " %s [-v] -i dev\n", progname);
+ fprintf(stderr, " %s [-v] -I serial_number dev\n", progname);
+ fprintf(stderr, " %s [-v] -r component dev\n", progname);
+ fprintf(stderr, " %s [-v] -R component dev\n", progname);
+ fprintf(stderr, " %s [-v] -s dev\n", progname);
+ fprintf(stderr, " %s [-v] -S dev\n", progname);
+ fprintf(stderr, " %s [-v] -u dev\n", progname);
+#if 0
+ fprintf(stderr, "usage: %s %s\n", progname,
+ "-a | -f | -F | -g | -r | -R component dev");
+ fprintf(stderr, " %s -B | -i | -s | -S -u dev\n", progname);
+ fprintf(stderr, " %s -c | -C config_file dev\n", progname);
+ fprintf(stderr, " %s -I serial_number dev\n", progname);
+#endif
+ exit(1);
+ /* NOTREACHED */
+}
+
+#if defined(__FreeBSD__)
+static void
+check_driver(void)
+{
+ if (modfind("raidframe") == -1 && kldload("raidframe") == -1) {
+ printf("Error: Cannot load RAIDframe driver.\n");
+ exit(1);
+ }
+}
+#endif
+
diff --git a/sbin/raidctl/rf_configure.c b/sbin/raidctl/rf_configure.c
new file mode 100644
index 0000000..8df7889
--- /dev/null
+++ b/sbin/raidctl/rf_configure.c
@@ -0,0 +1,583 @@
+/* $FreeBSD$ */
+/* $NetBSD: rf_configure.c,v 1.13 2001/01/27 19:32:47 oster Exp $ */
+
+/*
+ * Copyright (c) 1995 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Mark Holland
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/***************************************************************
+ *
+ * rf_configure.c -- code related to configuring the raidframe system
+ *
+ * configuration is complicated by the fact that we want the same
+ * driver to work both in the kernel and at user level. In the
+ * kernel, we can't read the configuration file, so we configure
+ * by running a user-level program that reads the config file,
+ * creates a data structure describing the configuration and
+ * passes it into the kernel via an ioctl. Since we want the config
+ * code to be common between the two versions of the driver, we
+ * configure using the same two-step process when running at
+ * user level. Of course, at user level, the config structure is
+ * passed directly to the config routine, rather than via ioctl.
+ *
+ * This file is not compiled into the kernel, so we have no
+ * need for KERNEL ifdefs.
+ *
+ **************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dev/raidframe/rf_raid.h>
+#include <dev/raidframe/rf_raidframe.h>
+#include <dev/raidframe/rf_general.h>
+#include <dev/raidframe/rf_decluster.h>
+#include <dev/raidframe/rf_configure.h>
+
+/*
+ * XXX we include this here so we don't need to drag rf_debugMem.c into
+ * the picture... This is userland, afterall...
+ */
+
+/*
+ * XXX sucky hack to override the defn. of RF_Malloc as given in
+ * rf_debugMem.c... but I *really* don't want (nor need) to link with
+ * that file here in userland.. GO
+ */
+
+#undef RF_Malloc
+#define RF_Malloc(_p_, _size_, _cast_) \
+ { \
+ _p_ = _cast_ malloc((u_long)_size_); \
+ bzero((char *)_p_, _size_); \
+ }
+
+int distSpareYes = 1;
+int distSpareNo = 0;
+
+/* The mapsw[] table below contains all the various RAID types that might
+be supported by the kernel. The actual supported types are found
+in sys/dev/raidframe/rf_layout.c. */
+
+static RF_LayoutSW_t mapsw[] = {
+ /* parity declustering */
+ {'T', "Parity declustering",
+ rf_MakeLayoutSpecificDeclustered, &distSpareNo},
+ /* parity declustering with distributed sparing */
+ {'D', "Distributed sparing parity declustering",
+ rf_MakeLayoutSpecificDeclustered, &distSpareYes},
+ /* declustered P+Q */
+ {'Q', "Declustered P+Q",
+ rf_MakeLayoutSpecificDeclustered, &distSpareNo},
+ /* RAID 5 with rotated sparing */
+ {'R', "RAID Level 5 rotated sparing", rf_MakeLayoutSpecificNULL, NULL},
+ /* Chained Declustering */
+ {'C', "Chained Declustering", rf_MakeLayoutSpecificNULL, NULL},
+ /* Interleaved Declustering */
+ {'I', "Interleaved Declustering", rf_MakeLayoutSpecificNULL, NULL},
+ /* RAID level 0 */
+ {'0', "RAID Level 0", rf_MakeLayoutSpecificNULL, NULL},
+ /* RAID level 1 */
+ {'1', "RAID Level 1", rf_MakeLayoutSpecificNULL, NULL},
+ /* RAID level 4 */
+ {'4', "RAID Level 4", rf_MakeLayoutSpecificNULL, NULL},
+ /* RAID level 5 */
+ {'5', "RAID Level 5", rf_MakeLayoutSpecificNULL, NULL},
+ /* Evenodd */
+ {'E', "EvenOdd", rf_MakeLayoutSpecificNULL, NULL},
+ /* Declustered Evenodd */
+ {'e', "Declustered EvenOdd",
+ rf_MakeLayoutSpecificDeclustered, &distSpareNo},
+ /* parity logging */
+ {'L', "Parity logging", rf_MakeLayoutSpecificNULL, NULL},
+ /* end-of-list marker */
+ {'\0', NULL, NULL, NULL}
+};
+RF_LayoutSW_t *
+rf_GetLayout(RF_ParityConfig_t parityConfig)
+{
+ RF_LayoutSW_t *p;
+
+ /* look up the specific layout */
+ for (p = &mapsw[0]; p->parityConfig; p++)
+ if (p->parityConfig == parityConfig)
+ break;
+ if (!p->parityConfig)
+ return (NULL);
+ RF_ASSERT(p->parityConfig == parityConfig);
+ return (p);
+}
+
+static int rf_search_file_for_start_of(const char *string, char *buf,
+ int len, FILE * fp);
+static int rf_get_next_nonblank_line(char *buf, int len, FILE * fp,
+ const char *errmsg);
+
+/*
+ * called from user level to read the configuration file and create
+ * a configuration control structure. This is used in the user-level
+ * version of the driver, and in the user-level program that configures
+ * the system via ioctl.
+ */
+int
+rf_MakeConfig(configname, cfgPtr)
+ char *configname;
+ RF_Config_t *cfgPtr;
+{
+ int numscanned, val, r, c, retcode, aa, bb, cc;
+ char buf[256], buf1[256], *cp;
+ RF_LayoutSW_t *lp;
+ FILE *fp;
+
+ bzero((char *) cfgPtr, sizeof(RF_Config_t));
+
+ fp = fopen(configname, "r");
+ if (!fp) {
+ RF_ERRORMSG1("Can't open config file %s\n", configname);
+ return (-1);
+ }
+ rewind(fp);
+ if (rf_search_file_for_start_of("array", buf, 256, fp)) {
+ RF_ERRORMSG1("Unable to find start of \"array\" params in config file %s\n", configname);
+ retcode = -1;
+ goto out;
+ }
+ rf_get_next_nonblank_line(buf, 256, fp, "Config file error (\"array\" section): unable to get numRow and numCol\n");
+
+ /*
+ * wackiness with aa, bb, cc to get around size problems on
+ * different platforms
+ */
+ numscanned = sscanf(buf, "%d %d %d", &aa, &bb, &cc);
+ if (numscanned != 3) {
+ RF_ERRORMSG("Config file error (\"array\" section): unable to get numRow, numCol, numSpare\n");
+ retcode = -1;
+ goto out;
+ }
+ cfgPtr->numRow = (RF_RowCol_t) aa;
+ cfgPtr->numCol = (RF_RowCol_t) bb;
+ cfgPtr->numSpare = (RF_RowCol_t) cc;
+
+ /* debug section is optional */
+ for (c = 0; c < RF_MAXDBGV; c++)
+ cfgPtr->debugVars[c][0] = '\0';
+ rewind(fp);
+ if (!rf_search_file_for_start_of("debug", buf, 256, fp)) {
+ for (c = 0; c < RF_MAXDBGV; c++) {
+ if (rf_get_next_nonblank_line(buf, 256, fp, NULL))
+ break;
+ cp = rf_find_non_white(buf);
+ if (!strncmp(cp, "START", strlen("START")))
+ break;
+ (void) strcpy(&cfgPtr->debugVars[c][0], cp);
+ }
+ }
+ rewind(fp);
+ strcpy(cfgPtr->diskQueueType, "fifo");
+ cfgPtr->maxOutstandingDiskReqs = 1;
+ /* scan the file for the block related to disk queues */
+ if (rf_search_file_for_start_of("queue", buf, 256, fp)) {
+ RF_ERRORMSG2("[No disk queue discipline specified in config file %s. Using %s.]\n", configname, cfgPtr->diskQueueType);
+ } else {
+ if (rf_get_next_nonblank_line(buf, 256, fp, NULL)) {
+ RF_ERRORMSG2("[No disk queue discipline specified in config file %s. Using %s.]\n", configname, cfgPtr->diskQueueType);
+ }
+ }
+
+ /* the queue specifier line contains two entries: 1st char of first
+ * word specifies queue to be used 2nd word specifies max num reqs
+ * that can be outstanding on the disk itself (typically 1) */
+ if (sscanf(buf, "%s %d", buf1, &val) != 2) {
+ RF_ERRORMSG1("Can't determine queue type and/or max outstanding reqs from line: %s", buf);
+ RF_ERRORMSG2("Using %s-%d\n", cfgPtr->diskQueueType, cfgPtr->maxOutstandingDiskReqs);
+ } else {
+ char *ch;
+ bcopy(buf1, cfgPtr->diskQueueType,
+ RF_MIN(sizeof(cfgPtr->diskQueueType), strlen(buf1) + 1));
+ for (ch = buf1; *ch; ch++) {
+ if (*ch == ' ') {
+ *ch = '\0';
+ break;
+ }
+ }
+ cfgPtr->maxOutstandingDiskReqs = val;
+ }
+
+ rewind(fp);
+
+ if (rf_search_file_for_start_of("disks", buf, 256, fp)) {
+ RF_ERRORMSG1("Can't find \"disks\" section in config file %s\n", configname);
+ retcode = -1;
+ goto out;
+ }
+ for (r = 0; r < cfgPtr->numRow; r++) {
+ for (c = 0; c < cfgPtr->numCol; c++) {
+ int devfd;
+ char bfr[256], *bfr1;
+ if (rf_get_next_nonblank_line(&bfr[0], 256, fp, NULL)) {
+ RF_ERRORMSG2("Config file error: unable to get device file for disk at row %d col %d\n", r, c);
+ retcode = -1;
+ goto out;
+ }
+ /* Get rid of the newline at the end of the string */
+ if ((bfr1 = strchr(&bfr[0], '\n')) != NULL)
+ *bfr1 = NULL;
+ /* Make sure the device exists */
+ if ((devfd = open(&bfr[0], O_RDWR)) < 0) {
+ RF_ERRORMSG2(
+ "Config file error: device %s, %s\n",
+ &bfr[0], strerror(errno));
+ retcode = -1;
+ goto out;
+ }
+ close(devfd);
+ strncpy(&cfgPtr->devnames[r][c][0], &bfr[0], 50);
+ }
+ }
+
+ /* "spare" section is optional */
+ rewind(fp);
+ if (rf_search_file_for_start_of("spare", buf, 256, fp))
+ cfgPtr->numSpare = 0;
+ for (c = 0; c < cfgPtr->numSpare; c++) {
+ if (rf_get_next_nonblank_line(&cfgPtr->spare_names[c][0],
+ 256, fp, NULL)) {
+ RF_ERRORMSG1("Config file error: unable to get device file for spare disk %d\n", c);
+ retcode = -1;
+ goto out;
+ }
+ }
+
+ /* scan the file for the block related to layout */
+ rewind(fp);
+ if (rf_search_file_for_start_of("layout", buf, 256, fp)) {
+ RF_ERRORMSG1("Can't find \"layout\" section in configuration file %s\n", configname);
+ retcode = -1;
+ goto out;
+ }
+ if (rf_get_next_nonblank_line(buf, 256, fp, NULL)) {
+ RF_ERRORMSG("Config file error (\"layout\" section): unable to find common layout param line\n");
+ retcode = -1;
+ goto out;
+ }
+ c = sscanf(buf, "%d %d %d %c", &aa, &bb, &cc, &cfgPtr->parityConfig);
+ cfgPtr->sectPerSU = (RF_SectorNum_t) aa;
+ cfgPtr->SUsPerPU = (RF_StripeNum_t) bb;
+ cfgPtr->SUsPerRU = (RF_StripeNum_t) cc;
+ if (c != 4) {
+ RF_ERRORMSG("Unable to scan common layout line\n");
+ retcode = -1;
+ goto out;
+ }
+ lp = rf_GetLayout(cfgPtr->parityConfig);
+ if (lp == NULL) {
+ RF_ERRORMSG1("Unknown parity config '%c'\n",
+ cfgPtr->parityConfig);
+ retcode = -1;
+ goto out;
+ }
+
+ retcode = lp->MakeLayoutSpecific(fp, cfgPtr, lp->makeLayoutSpecificArg);
+out:
+ fclose(fp);
+ if (retcode < 0)
+ retcode = errno = EINVAL;
+ else
+ errno = retcode;
+ return (retcode);
+}
+
+
+/* used in architectures such as RAID0 where there is no layout-specific
+ * information to be passed into the configuration code.
+ */
+int
+rf_MakeLayoutSpecificNULL(fp, cfgPtr, ignored)
+ FILE *fp;
+ RF_Config_t *cfgPtr;
+ void *ignored;
+{
+ cfgPtr->layoutSpecificSize = 0;
+ cfgPtr->layoutSpecific = NULL;
+ return (0);
+}
+
+int
+rf_MakeLayoutSpecificDeclustered(configfp, cfgPtr, arg)
+ FILE *configfp;
+ RF_Config_t *cfgPtr;
+ void *arg;
+{
+ int b, v, k, r, lambda, norotate, i, val, distSpare;
+ char *cfgBuf, *bdfile, *p, *smname;
+ char buf[256], smbuf[256];
+ FILE *fp;
+
+ distSpare = *((int *) arg);
+
+ /* get the block design file name */
+ if (rf_get_next_nonblank_line(buf, 256, configfp,
+ "Can't find block design file name in config file\n"))
+ return (EINVAL);
+ bdfile = rf_find_non_white(buf);
+ if (bdfile[strlen(bdfile) - 1] == '\n') {
+ /* strip newline char */
+ bdfile[strlen(bdfile) - 1] = '\0';
+ }
+ /* open bd file, check validity of configuration */
+ if ((fp = fopen(bdfile, "r")) == NULL) {
+ RF_ERRORMSG1("RAID: config error: Can't open layout table file %s\n", bdfile);
+ return (EINVAL);
+ }
+ if (fgets(buf, 256, fp) == NULL) {
+ RF_ERRORMSG1("RAID: config error: Can't read layout from layout table file %s\n", bdfile);
+ return (EINVAL);
+ }
+ i = sscanf(buf, "%u %u %u %u %u %u", &b, &v, &k, &r, &lambda, &norotate);
+ if (i == 5)
+ norotate = 0; /* no-rotate flag is optional */
+ else if (i != 6) {
+ RF_ERRORMSG("Unable to parse header line in block design file\n");
+ return (EINVAL);
+ }
+ /* set the sparemap directory. In the in-kernel version, there's a
+ * daemon that's responsible for finding the sparemaps */
+ if (distSpare) {
+ if (rf_get_next_nonblank_line(smbuf, 256, configfp,
+ "Can't find sparemap file name in config file\n"))
+ return (EINVAL);
+ smname = rf_find_non_white(smbuf);
+ if (smname[strlen(smname) - 1] == '\n') {
+ /* strip newline char */
+ smname[strlen(smname) - 1] = '\0';
+ }
+ } else {
+ smbuf[0] = '\0';
+ smname = smbuf;
+ }
+
+ /* allocate a buffer to hold the configuration info */
+ cfgPtr->layoutSpecificSize = RF_SPAREMAP_NAME_LEN +
+ 6 * sizeof(int) + b * k;
+ /* can't use RF_Malloc here b/c debugMem module not yet init'd */
+ cfgBuf = (char *) malloc(cfgPtr->layoutSpecificSize);
+ cfgPtr->layoutSpecific = (void *) cfgBuf;
+ p = cfgBuf;
+
+ /* install name of sparemap file */
+ for (i = 0; smname[i]; i++)
+ *p++ = smname[i];
+ /* pad with zeros */
+ while (i < RF_SPAREMAP_NAME_LEN) {
+ *p++ = '\0';
+ i++;
+ }
+
+ /*
+ * fill in the buffer with the block design parameters
+ * and then the block design itself
+ */
+ *((int *) p) = b;
+ p += sizeof(int);
+ *((int *) p) = v;
+ p += sizeof(int);
+ *((int *) p) = k;
+ p += sizeof(int);
+ *((int *) p) = r;
+ p += sizeof(int);
+ *((int *) p) = lambda;
+ p += sizeof(int);
+ *((int *) p) = norotate;
+ p += sizeof(int);
+
+ while (fscanf(fp, "%d", &val) == 1)
+ *p++ = (char) val;
+ fclose(fp);
+ if (p - cfgBuf != cfgPtr->layoutSpecificSize) {
+ RF_ERRORMSG2("Size mismatch creating layout specific data: is %d sb %d bytes\n", (int) (p - cfgBuf), (int) (6 * sizeof(int) + b * k));
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/****************************************************************************
+ *
+ * utilities
+ *
+ ***************************************************************************/
+
+/* finds a non-white character in the line */
+char *
+rf_find_non_white(char *p)
+{
+ for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++);
+ return (p);
+}
+
+/* finds a white character in the line */
+char *
+rf_find_white(char *p)
+{
+ for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++);
+ return (p);
+}
+
+/*
+ * searches a file for a line that says "START string", where string is
+ * specified as a parameter
+ */
+static int
+rf_search_file_for_start_of(string, buf, len, fp)
+ const char *string;
+ char *buf;
+ int len;
+ FILE *fp;
+{
+ char *p;
+
+ while (1) {
+ if (fgets(buf, len, fp) == NULL)
+ return (-1);
+ p = rf_find_non_white(buf);
+ if (!strncmp(p, "START", strlen("START"))) {
+ p = rf_find_white(p);
+ p = rf_find_non_white(p);
+ if (!strncmp(p, string, strlen(string)))
+ return (0);
+ }
+ }
+}
+
+/* reads from file fp into buf until it finds an interesting line */
+int
+rf_get_next_nonblank_line(buf, len, fp, errmsg)
+ char *buf;
+ int len;
+ FILE *fp;
+ const char *errmsg;
+{
+ char *p;
+
+ while (fgets(buf, 256, fp) != NULL) {
+ p = rf_find_non_white(buf);
+ if (*p == '\n' || *p == '\0' || *p == '#')
+ continue;
+ return (0);
+ }
+ if (errmsg)
+ RF_ERRORMSG1("%s", errmsg);
+ return (1);
+}
+
+/*
+ * Allocates an array for the spare table, and initializes it from a file.
+ * In the user-level version, this is called when recon is initiated.
+ * When/if I move recon into the kernel, there'll be a daemon that does
+ * an ioctl into raidframe which will block until a spare table is needed.
+ * When it returns, it will read a spare table from the file system,
+ * pass it into the kernel via a different ioctl, and then block again
+ * on the original ioctl.
+ *
+ * This is specific to the declustered layout, but doesn't belong in
+ * rf_decluster.c because it uses stuff that can't be compiled into
+ * the kernel, and it needs to be compiled into the user-level sparemap daemon.
+ *
+ */
+void *
+rf_ReadSpareTable(req, fname)
+ RF_SparetWait_t *req;
+ char *fname;
+{
+ int i, j, numFound, linecount, tableNum, tupleNum,
+ spareDisk, spareBlkOffset;
+ char buf[1024], targString[100], errString[100];
+ RF_SpareTableEntry_t **table;
+ FILE *fp;
+
+ /* allocate and initialize the table */
+ RF_Malloc(table,
+ req->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *),
+ (RF_SpareTableEntry_t **));
+ for (i = 0; i < req->TablesPerSpareRegion; i++) {
+ RF_Malloc(table[i],
+ req->BlocksPerTable * sizeof(RF_SpareTableEntry_t),
+ (RF_SpareTableEntry_t *));
+ for (j = 0; j < req->BlocksPerTable; j++)
+ table[i][j].spareDisk =
+ table[i][j].spareBlockOffsetInSUs = -1;
+ }
+
+ /* 2. open sparemap file, sanity check */
+ if ((fp = fopen(fname, "r")) == NULL) {
+ fprintf(stderr,
+ "rf_ReadSpareTable: Can't open sparemap file %s\n", fname);
+ return (NULL);
+ }
+ if (rf_get_next_nonblank_line(buf, 1024, fp,
+ "Invalid sparemap file: can't find header line\n"))
+ return (NULL);
+ if (buf[strlen(buf) - 1] == '\n')
+ buf[strlen(buf) - 1] = '\0';
+
+ sprintf(targString, "fdisk %d\n", req->fcol);
+ sprintf(errString,
+ "Invalid sparemap file: can't find \"fdisk %d\" line\n",
+ req->fcol);
+ while (1) {
+ rf_get_next_nonblank_line(buf, 1024, fp, errString);
+ if (!strncmp(buf, targString, strlen(targString)))
+ break;
+ }
+
+ /* no more blank lines or comments allowed now */
+ linecount = req->TablesPerSpareRegion * req->TableDepthInPUs;
+ for (i = 0; i < linecount; i++) {
+ numFound = fscanf(fp, " %d %d %d %d", &tableNum, &tupleNum,
+ &spareDisk, &spareBlkOffset);
+ if (numFound != 4) {
+ fprintf(stderr, "Sparemap file prematurely exhausted after %d of %d lines\n", i, linecount);
+ return (NULL);
+ }
+ RF_ASSERT(tableNum >= 0 &&
+ tableNum < req->TablesPerSpareRegion);
+ RF_ASSERT(tupleNum >= 0 && tupleNum < req->BlocksPerTable);
+ RF_ASSERT(spareDisk >= 0 && spareDisk < req->C);
+ RF_ASSERT(spareBlkOffset >= 0 && spareBlkOffset <
+ req->SpareSpaceDepthPerRegionInSUs / req->SUsPerPU);
+
+ table[tableNum][tupleNum].spareDisk = spareDisk;
+ table[tableNum][tupleNum].spareBlockOffsetInSUs =
+ spareBlkOffset * req->SUsPerPU;
+ }
+
+ fclose(fp);
+ return ((void *) table);
+}
OpenPOWER on IntegriCloud