diff options
author | scottl <scottl@FreeBSD.org> | 2002-10-20 08:17:39 +0000 |
---|---|---|
committer | scottl <scottl@FreeBSD.org> | 2002-10-20 08:17:39 +0000 |
commit | 710948de69ddeae56bda663219319f6d859aea1f (patch) | |
tree | 71c65823ba2e8591de708d5cb2e990a75135ee11 /sbin/raidctl | |
parent | 63bd46464d6d4587c20c1ca62fb6a6e3be132db9 (diff) | |
download | FreeBSD-src-710948de69ddeae56bda663219319f6d859aea1f.zip FreeBSD-src-710948de69ddeae56bda663219319f6d859aea1f.tar.gz |
After much delay and anticipation, welcome RAIDFrame into the FreeBSD
world. This should be considered highly experimental.
Approved-by: re
Diffstat (limited to 'sbin/raidctl')
-rw-r--r-- | sbin/raidctl/Makefile | 14 | ||||
-rw-r--r-- | sbin/raidctl/raidctl.8 | 1325 | ||||
-rw-r--r-- | sbin/raidctl/raidctl.c | 1110 | ||||
-rw-r--r-- | sbin/raidctl/rf_configure.c | 583 |
4 files changed, 3032 insertions, 0 deletions
diff --git a/sbin/raidctl/Makefile b/sbin/raidctl/Makefile new file mode 100644 index 0000000..0705eab --- /dev/null +++ b/sbin/raidctl/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ +# $NetBSD: Makefile,v 1.7 2000/05/23 00:46:53 thorpej Exp $ +PROG= raidctl +SRCS= rf_configure.c raidctl.c +MAN8= raidctl.8 + +LOOKHERE = ${.CURDIR}/../../sys + +CFLAGS+= -DRF_UTILITY=1 -I${LOOKHERE} + +DPADD= ${LIBUTIL} +LDADD= -lutil + +.include <bsd.prog.mk> diff --git a/sbin/raidctl/raidctl.8 b/sbin/raidctl/raidctl.8 new file mode 100644 index 0000000..9aef14f --- /dev/null +++ b/sbin/raidctl/raidctl.8 @@ -0,0 +1,1325 @@ +.\" $FreeBSD$ +.\" $NetBSD: raidctl.8,v 1.21 2000/08/10 15:14:14 oster Exp $ +.\" +.\" Copyright (c) 1998 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Greg Oster +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the NetBSD +.\" Foundation, Inc. and its contributors. +.\" 4. Neither the name of The NetBSD Foundation nor the names of its +.\" contributors may be used to endorse or promote products derived +.\" from this software without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" +.\" Copyright (c) 1995 Carnegie-Mellon University. +.\" All rights reserved. +.\" +.\" Author: Mark Holland +.\" +.\" Permission to use, copy, modify and distribute this software and +.\" its documentation is hereby granted, provided that both the copyright +.\" notice and this permission notice appear in all copies of the +.\" software, derivative works or modified versions, and any portions +.\" thereof, and that both notices appear in supporting documentation. +.\" +.\" CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" +.\" CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND +.\" FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. +.\" +.\" Carnegie Mellon requests users of this software to return to +.\" +.\" Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU +.\" School of Computer Science +.\" Carnegie Mellon University +.\" Pittsburgh PA 15213-3890 +.\" +.\" any improvements or extensions that they make and grant Carnegie the +.\" rights to redistribute these changes. +.\" +.Dd November 6, 1998 +.Dt RAIDCTL 8 +.Os FreeBSD +.Sh NAME +.Nm raidctl +.Nd configuration utility for the RAIDframe disk driver +.Sh SYNOPSIS +.Nm +.Op Fl v +.Fl a Ar component Ar dev +.Nm +.Op Fl v +.Fl A Op yes | no | root +.Ar dev +.Nm +.Op Fl v +.Fl B Ar dev +.Nm +.Op Fl v +.Fl c Ar config_file +.Nm +.Op Fl v +.Fl C Ar config_file +.Nm +.Op Fl v +.Fl f Ar component Ar dev +.Nm +.Op Fl v +.Fl F Ar component Ar dev +.Nm +.Op Fl v +.Fl g Ar component Ar dev +.Nm +.Op Fl v +.Fl i Ar dev +.Nm +.Op Fl v +.Fl I Ar serial_number Ar dev +.Nm +.Op Fl v +.Fl p Ar dev +.Nm +.Op Fl v +.Fl P Ar dev +.Nm +.Op Fl v +.Fl r Ar component Ar dev +.Nm +.Op Fl v +.Fl R Ar component Ar dev +.Nm +.Op Fl v +.Fl s Ar dev +.Nm +.Op Fl v +.Fl S Ar dev +.Nm +.Op Fl v +.Fl u Ar dev +.Sh DESCRIPTION +.Nm +is the user-land control program for +.Xr raid 4 , +the RAIDframe disk device. +.Nm +is primarily used to dynamically configure and unconfigure RAIDframe disk +devices. For more information about the RAIDframe disk device, see +.Xr raid 4 . +.Pp +This document assumes the reader has at least rudimentary knowledge of +RAID and RAID concepts. +.Pp +The command-line options for +.Nm +are as follows: +.Bl -tag -width indent +.It Fl a Ar component Ar dev +Add +.Ar component +as a hot spare for the device +.Ar dev . +.It Fl A Ic yes Ar dev +Make the RAID set auto-configurable. The RAID set will be +automatically configured at boot +.Ar before +the root filesystem is +mounted. Note that all components of the set must be of type RAID in the +disklabel. +.It Fl A Ic no Ar dev +Turn off auto-configuration for the RAID set. +.It Fl A Ic root Ar dev +Make the RAID set auto-configurable, and also mark the set as being +eligible to be the root partition. A RAID set configured this way +will +.Ar override +the use of the boot disk as the root device. All components of the +set must be of type RAID in the disklabel. Note that the kernel being +booted must currently reside on a non-RAID set. +.It Fl B Ar dev +Initiate a copyback of reconstructed data from a spare disk to +its original disk. This is performed after a component has failed, +and the failed drive has been reconstructed onto a spare drive. +.It Fl c Ar config_file +Configure a RAIDframe device +according to the configuration given in +.Ar config_file . +A description of the contents of +.Ar config_file +is given later. +.It Fl C Ar config_file +As for +.Ar -c , +but forces the configuration to take place. This is required the +first time a RAID set is configured. +.It Fl f Ar component Ar dev +This marks the specified +.Ar component +as having failed, but does not initiate a reconstruction of that +component. +.It Fl F Ar component Ar dev +Fails the specified +.Ar component +of the device, and immediately begin a reconstruction of the failed +disk onto an available hot spare. This is one of the mechanisms used to start +the reconstruction process if a component does have a hardware failure. +.It Fl g Ar component Ar dev +Get the component label for the specified component. +.It Fl i Ar dev +Initialize the RAID device. In particular, (re-write) the parity on +the selected device. This +.Ar MUST +be done for +.Ar all +RAID sets before the RAID device is labeled and before +filesystems are created on the RAID device. +.It Fl I Ar serial_number Ar dev +Initialize the component labels on each component of the device. +.Ar serial_number +is used as one of the keys in determining whether a +particular set of components belong to the same RAID set. While not +strictly enforced, different serial numbers should be used for +different RAID sets. This step +.Ar MUST +be performed when a new RAID set is created. +.It Fl p Ar dev +Check the status of the parity on the RAID set. Displays a status +message, and returns successfully if the parity is up-to-date. +.It Fl P Ar dev +Check the status of the parity on the RAID set, and initialize +(re-write) the parity if the parity is not known to be up-to-date. +This is normally used after a system crash (and before a +.Xr fsck 8 ) +to ensure the integrity of the parity. +.It Fl r Ar component Ar dev +Remove the spare disk specified by +.Ar component +from the set of available spare components. +.It Fl R Ar component Ar dev +Fails the specified +.Ar component , +if necessary, and immediately begins a reconstruction back to +.Ar component . +This is useful for reconstructing back onto a component after +it has been replaced following a failure. +.It Fl s Ar dev +Display the status of the RAIDframe device for each of the components +and spares. +.It Fl S Ar dev +Check the status of parity re-writing, component reconstruction, and +component copyback. The output indicates the amount of progress +achieved in each of these areas. +.It Fl u Ar dev +Unconfigure the RAIDframe device. +.It Fl v +Be more verbose. For operations such as reconstructions, parity +re-writing, and copybacks, provide a progress indicator. +.El +.Pp +The device used by +.Nm +is specified by +.Ar dev . +.Ar dev +may be either the full name of the device, e.g. /dev/rraid0d, +for the i386 architecture, and /dev/rraid0c +for all others, or just simply raid0 (for /dev/rraid0d). +.Pp +The format of the configuration file is complex, and +only an abbreviated treatment is given here. In the configuration +files, a +.Sq # +indicates the beginning of a comment. +.Pp +There are 4 required sections of a configuration file, and 2 +optional sections. Each section begins with a +.Sq START , +followed by +the section name, and the configuration parameters associated with that +section. The first section is the +.Sq array +section, and it specifies +the number of rows, columns, and spare disks in the RAID set. For +example: +.Bd -unfilled -offset indent +START array +1 3 0 +.Ed +.Pp +indicates an array with 1 row, 3 columns, and 0 spare disks. Note +that although multi-dimensional arrays may be specified, they are +.Ar NOT +supported in the driver. +.Pp +The second section, the +.Sq disks +section, specifies the actual +components of the device. For example: +.Bd -unfilled -offset indent +START disks +/dev/da0s1e +/dev/da1s1e +/dev/da2s1e +.Ed +.Pp +specifies the three component disks to be used in the RAID device. If +any of the specified drives cannot be found when the RAID device is +configured, then they will be marked as +.Sq failed , +and the system will +operate in degraded mode. Note that it is +.Ar imperative +that the order of the components in the configuration file does not +change between configurations of a RAID device. Changing the order +of the components will result in data loss if the set is configured +with the +.Fl C +option. In normal circumstances, the RAID set will not configure if +only +.Fl c +is specified, and the components are out-of-order. +.Pp +The next section, which is the +.Sq spare +section, is optional, and, if +present, specifies the devices to be used as +.Sq hot spares +-- devices +which are on-line, but are not actively used by the RAID driver unless +one of the main components fail. A simple +.Sq spare +section might be: +.Bd -unfilled -offset indent +START spare +/dev/da3s1e +.Ed +.Pp +for a configuration with a single spare component. If no spare drives +are to be used in the configuration, then the +.Sq spare +section may be omitted. +.Pp +The next section is the +.Sq layout +section. This section describes the +general layout parameters for the RAID device, and provides such +information as sectors per stripe unit, stripe units per parity unit, +stripe units per reconstruction unit, and the parity configuration to +use. This section might look like: +.Bd -unfilled -offset indent +START layout +# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level +32 1 1 5 +.Ed +.Pp +The sectors per stripe unit specifies, in blocks, the interleave +factor; i.e. the number of contiguous sectors to be written to each +component for a single stripe. Appropriate selection of this value +(32 in this example) is the subject of much research in RAID +architectures. The stripe units per parity unit and +stripe units per reconstruction unit are normally each set to 1. +While certain values above 1 are permitted, a discussion of valid +values and the consequences of using anything other than 1 are outside +the scope of this document. The last value in this section (5 in this +example) indicates the parity configuration desired. Valid entries +include: +.Bl -tag -width inde +.It 0 +RAID level 0. No parity, only simple striping. +.It 1 +RAID level 1. Mirroring. The parity is the mirror. +.It 4 +RAID level 4. Striping across components, with parity stored on the +last component. +.It 5 +RAID level 5. Striping across components, parity distributed across +all components. +.El +.Pp +There are other valid entries here, including those for Even-Odd +parity, RAID level 5 with rotated sparing, Chained declustering, +and Interleaved declustering, but as of this writing the code for +those parity operations has not been tested with +.Fx . +.Pp +The next required section is the +.Sq queue +section. This is most often +specified as: +.Bd -unfilled -offset indent +START queue +fifo 100 +.Ed +.Pp +where the queuing method is specified as fifo (first-in, first-out), +and the size of the per-component queue is limited to 100 requests. +Other queuing methods may also be specified, but a discussion of them +is beyond the scope of this document. +.Pp +The final section, the +.Sq debug +section, is optional. For more details +on this the reader is referred to the RAIDframe documentation +discussed in the +.Sx HISTORY +section. + +See +.Sx EXAMPLES +for a more complete configuration file example. + +.Sh EXAMPLES + +It is highly recommended that before using the RAID driver for real +filesystems that the system administrator(s) become quite familiar +with the use of +.Nm , +and that they understand how the component reconstruction process +works. The examples in this section will focus on configuring a +number of different RAID sets of varying degrees of redundancy. +By working through these examples, administrators should be able to +develop a good feel for how to configure a RAID set, and how to +initiate reconstruction of failed components. +.Pp +In the following examples +.Sq raid0 +will be used to denote the RAID device. Depending on the +architecture, +.Sq /dev/rraid0c +or +.Sq /dev/rraid0d +may be used in place of +.Sq raid0 . +.Pp +.Ss Initialization and Configuration +The initial step in configuring a RAID set is to identify the components +that will be used in the RAID set. All components should be the same +size. Each component should have a disklabel type of +.Dv FS_RAID , +and a typical disklabel entry for a RAID component +might look like: +.Bd -unfilled -offset indent +f: 1800000 200495 RAID # (Cyl. 405*- 4041*) +.Ed +.Pp +While +.Dv FS_BSDFFS +will also work as the component type, the type +.Dv FS_RAID +is preferred for RAIDframe use, as it is required for features such as +auto-configuration. As part of the initial configuration of each RAID +set, each component will be given a +.Sq component label . +A +.Sq component label +contains important information about the component, including a +user-specified serial number, the row and column of that component in +the RAID set, the redundancy level of the RAID set, a 'modification +counter', and whether the parity information (if any) on that +component is known to be correct. Component labels are an integral +part of the RAID set, since they are used to ensure that components +are configured in the correct order, and used to keep track of other +vital information about the RAID set. Component labels are also +required for the auto-detection and auto-configuration of RAID sets at +boot time. For a component label to be considered valid, that +particular component label must be in agreement with the other +component labels in the set. For example, the serial number, +.Sq modification counter , +number of rows and number of columns must all +be in agreement. If any of these are different, then the component is +not considered to be part of the set. See +.Xr raid 4 +for more information about component labels. +.Pp +Once the components have been identified, and the disks have +appropriate labels, +.Nm +is then used to configure the +.Xr raid 4 +device. To configure the device, a configuration +file which looks something like: +.Bd -unfilled -offset indent +START array +# numRow numCol numSpare +1 3 1 + +START disks +/dev/da1s1e +/dev/da2s1e +/dev/da3s1e + +START spare +/dev/da4s1e + +START layout +# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_5 +32 1 1 5 + +START queue +fifo 100 +.Ed +.Pp +is created in a file. The above configuration file specifies a RAID 5 +set consisting of the components /dev/da1s1e, /dev/da2s1e, and /dev/da3s1e, +with /dev/da4s1e available as a +.Sq hot spare +in case one of +the three main drives should fail. A RAID 0 set would be specified in +a similar way: +.Bd -unfilled -offset indent +START array +# numRow numCol numSpare +1 4 0 + +START disks +/dev/da1s10e +/dev/da1s11e +/dev/da1s12e +/dev/da1s13e + +START layout +# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_0 +64 1 1 0 + +START queue +fifo 100 +.Ed +.Pp +In this case, devices /dev/da1s10e, /dev/da1s11e, /dev/da1s12e, and /dev/da1s13e +are the components that make up this RAID set. Note that there are no +hot spares for a RAID 0 set, since there is no way to recover data if +any of the components fail. +.Pp +For a RAID 1 (mirror) set, the following configuration might be used: +.Bd -unfilled -offset indent +START array +# numRow numCol numSpare +1 2 0 + +START disks +/dev/da2s10e +/dev/da2s11e + +START layout +# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_1 +128 1 1 1 + +START queue +fifo 100 +.Ed +.Pp +In this case, /dev/da2s10e and /dev/da2s11e are the two components of the +mirror set. While no hot spares have been specified in this +configuration, they easily could be, just as they were specified in +the RAID 5 case above. Note as well that RAID 1 sets are currently +limited to only 2 components. At present, n-way mirroring is not +possible. +.Pp +The first time a RAID set is configured, the +.Fl C +option must be used: +.Bd -unfilled -offset indent +raidctl -C raid0.conf +.Ed +.Pp +where +.Sq raid0.conf +is the name of the RAID configuration file. The +.Fl C +forces the configuration to succeed, even if any of the component +labels are incorrect. The +.Fl C +option should not be used lightly in +situations other than initial configurations, as if +the system is refusing to configure a RAID set, there is probably a +very good reason for it. After the initial configuration is done (and +appropriate component labels are added with the +.Fl I +option) then raid0 can be configured normally with: +.Bd -unfilled -offset indent +raidctl -c raid0.conf +.Ed +.Pp +When the RAID set is configured for the first time, it is +necessary to initialize the component labels, and to initialize the +parity on the RAID set. Initializing the component labels is done with: +.Bd -unfilled -offset indent +raidctl -I 112341 raid0 +.Ed +.Pp +where +.Sq 112341 +is a user-specified serial number for the RAID set. This +initialization step is +.Ar required +for all RAID sets. As well, using different +serial numbers between RAID sets is +.Ar strongly encouraged , +as using the same serial number for all RAID sets will only serve to +decrease the usefulness of the component label checking. +.Pp +Initializing the RAID set is done via the +.Fl i +option. This initialization +.Ar MUST +be done for +.Ar all +RAID sets, since among other things it verifies that the parity (if +any) on the RAID set is correct. Since this initialization may be +quite time-consuming, the +.Fl v +option may be also used in conjunction with +.Fl i : +.Bd -unfilled -offset indent +raidctl -iv raid0 +.Ed +.Pp +This will give more verbose output on the +status of the initialization: +.Bd -unfilled -offset indent +Initiating re-write of parity +Parity Re-write status: + 10% |**** | ETA: 06:03 / +.Ed +.Pp +The output provides a +.Sq Percent Complete +in both a numeric and graphical format, as well as an estimated time +to completion of the operation. +.Pp +Since it is the parity that provides the +.Sq redundancy +part of RAID, it is critical that the parity is correct +as much as possible. If the parity is not correct, then there is no +guarantee that data will not be lost if a component fails. +.Pp +Once the parity is known to be correct, +it is then safe to perform +.Xr disklabel 8 , +.Xr newfs 8 , +or +.Xr fsck 8 +on the device or its filesystems, and then to mount the filesystems +for use. +.Pp +Under certain circumstances (e.g. the additional component has not +arrived, or data is being migrated off of a disk destined to become a +component) it may be desirable to to configure a RAID 1 set with only +a single component. This can be achieved by configuring the set with +a physically existing component (as either the first or second +component) and with a +.Sq fake +component. In the following: +.Bd -unfilled -offset indent +START array +# numRow numCol numSpare +1 2 0 + +START disks +/dev/da6s1e +/dev/da0s1e + +START layout +# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_1 +128 1 1 1 + +START queue +fifo 100 +.Ed +.Pp +/dev/da0s1e is the real component, and will be the second disk of a RAID 1 +set. The component /dev/da6s1e, which must exist, but have no physical +device associated with it, is simply used as a placeholder. +Configuration (using +.Fl C +and +.Fl I Ar 12345 +as above) proceeds normally, but initialization of the RAID set will +have to wait until all physical components are present. After +configuration, this set can be used normally, but will be operating +in degraded mode. Once a second physical component is obtained, it +can be hot-added, the existing data mirrored, and normal operation +resumed. +.Pp +.Ss Maintenance of the RAID set +After the parity has been initialized for the first time, the command: +.Bd -unfilled -offset indent +raidctl -p raid0 +.Ed +.Pp +can be used to check the current status of the parity. To check the +parity and rebuild it necessary (for example, after an unclean +shutdown) the command: +.Bd -unfilled -offset indent +raidctl -P raid0 +.Ed +.Pp +is used. Note that re-writing the parity can be done while +other operations on the RAID set are taking place (e.g. while doing a +.Xr fsck 8 +on a filesystem on the RAID set). However: for maximum effectiveness +of the RAID set, the parity should be known to be correct before any +data on the set is modified. +.Pp +To see how the RAID set is doing, the following command can be used to +show the RAID set's status: +.Bd -unfilled -offset indent +raidctl -s raid0 +.Ed +.Pp +The output will look something like: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: optimal + /dev/da3s1e: optimal +Spares: + /dev/da4s1e: spare +Component label for /dev/da1s1e: + Row: 0 Column: 0 Num Rows: 1 Num Columns: 3 + Version: 2 Serial Number: 13432 Mod Counter: 65 + Clean: No Status: 0 + sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1 + RAID Level: 5 blocksize: 512 numBlocks: 1799936 + Autoconfig: No + Last configured as: raid0 +Component label for /dev/da2s1e: + Row: 0 Column: 1 Num Rows: 1 Num Columns: 3 + Version: 2 Serial Number: 13432 Mod Counter: 65 + Clean: No Status: 0 + sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1 + RAID Level: 5 blocksize: 512 numBlocks: 1799936 + Autoconfig: No + Last configured as: raid0 +Component label for /dev/da3s1e: + Row: 0 Column: 2 Num Rows: 1 Num Columns: 3 + Version: 2 Serial Number: 13432 Mod Counter: 65 + Clean: No Status: 0 + sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1 + RAID Level: 5 blocksize: 512 numBlocks: 1799936 + Autoconfig: No + Last configured as: raid0 +Parity status: clean +Reconstruction is 100% complete. +Parity Re-write is 100% complete. +Copyback is 100% complete. +.Ed +.Pp +This indicates that all is well with the RAID set. Of importance here +are the component lines which read +.Sq optimal , +and the +.Sq Parity status +line which indicates that the parity is up-to-date. Note that if +there are filesystems open on the RAID set, the individual components +will not be +.Sq clean +but the set as a whole can still be clean. +.Pp +To check the component label of /dev/da1s1e, the following is used: +.Bd -unfilled -offset indent +raidctl -g /dev/da1s1e raid0 +.Ed +.Pp +The output of this command will look something like: +.Bd -unfilled -offset indent +Component label for /dev/da1s1e: + Row: 0 Column: 0 Num Rows: 1 Num Columns: 3 + Version: 2 Serial Number: 13432 Mod Counter: 65 + Clean: No Status: 0 + sectPerSU: 32 SUsPerPU: 1 SUsPerRU: 1 + RAID Level: 5 blocksize: 512 numBlocks: 1799936 + Autoconfig: No + Last configured as: raid0 +.Ed +.Pp +.Ss Dealing with Component Failures +If for some reason +(perhaps to test reconstruction) it is necessary to pretend a drive +has failed, the following will perform that function: +.Bd -unfilled -offset indent +raidctl -f /dev/da2s1e raid0 +.Ed +.Pp +The system will then be performing all operations in degraded mode, +where missing data is re-computed from existing data and the parity. +In this case, obtaining the status of raid0 will return (in part): +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: failed + /dev/da3s1e: optimal +Spares: + /dev/da4s1e: spare +.Ed +.Pp +Note that with the use of +.Fl f +a reconstruction has not been started. To both fail the disk and +start a reconstruction, the +.Fl F +option must be used: +.Bd -unfilled -offset indent +raidctl -F /dev/da2s1e raid0 +.Ed +.Pp +The +.Fl f +option may be used first, and then the +.Fl F +option used later, on the same disk, if desired. +Immediately after the reconstruction is started, the status will report: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: reconstructing + /dev/da3s1e: optimal +Spares: + /dev/da4s1e: used_spare +[...] +Parity status: clean +Reconstruction is 10% complete. +Parity Re-write is 100% complete. +Copyback is 100% complete. +.Ed +.Pp +This indicates that a reconstruction is in progress. To find out how +the reconstruction is progressing the +.Fl S +option may be used. This will indicate the progress in terms of the +percentage of the reconstruction that is completed. When the +reconstruction is finished the +.Fl s +option will show: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: spared + /dev/da3s1e: optimal +Spares: + /dev/da4s1e: used_spare +[...] +Parity status: clean +Reconstruction is 100% complete. +Parity Re-write is 100% complete. +Copyback is 100% complete. +.Ed +.Pp +At this point there are at least two options. First, if /dev/da2s1e is +known to be good (i.e. the failure was either caused by +.Fl f +or +.Fl F , +or the failed disk was replaced), then a copyback of the data can +be initiated with the +.Fl B +option. In this example, this would copy the entire contents of +/dev/da4s1e to /dev/da2s1e. Once the copyback procedure is complete, the +status of the device would be (in part): +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: optimal + /dev/da3s1e: optimal +Spares: + /dev/da4s1e: spare +.Ed +.Pp +and the system is back to normal operation. +.Pp +The second option after the reconstruction is to simply use /dev/da4s1e +in place of /dev/da2s1e in the configuration file. For example, the +configuration file (in part) might now look like: +.Bd -unfilled -offset indent +START array +1 3 0 + +START drives +/dev/da1s1e +/dev/da4s1e +/dev/da3s1e +.Ed +.Pp +This can be done as /dev/da4s1e is completely interchangeable with +/dev/da2s1e at this point. Note that extreme care must be taken when +changing the order of the drives in a configuration. This is one of +the few instances where the devices and/or their orderings can be +changed without loss of data! In general, the ordering of components +in a configuration file should +.Ar never +be changed. +.Pp +If a component fails and there are no hot spares +available on-line, the status of the RAID set might (in part) look like: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: failed + /dev/da3s1e: optimal +No spares. +.Ed +.Pp +In this case there are a number of options. The first option is to add a hot +spare using: +.Bd -unfilled -offset indent +raidctl -a /dev/da4s1e raid0 +.Ed +.Pp +After the hot add, the status would then be: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: failed + /dev/da3s1e: optimal +Spares: + /dev/da4s1e: spare +.Ed +.Pp +Reconstruction could then take place using +.Fl F +as describe above. +.Pp +A second option is to rebuild directly onto /dev/da2s1e. Once the disk +containing /dev/da2s1e has been replaced, one can simply use: +.Bd -unfilled -offset indent +raidctl -R /dev/da2s1e raid0 +.Ed +.Pp +to rebuild the /dev/da2s1e component. As the rebuilding is in progress, +the status will be: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: reconstructing + /dev/da3s1e: optimal +No spares. +.Ed +.Pp +and when completed, will be: +.Bd -unfilled -offset indent +Components: + /dev/da1s1e: optimal + /dev/da2s1e: optimal + /dev/da3s1e: optimal +No spares. +.Ed +.Pp +In circumstances where a particular component is completely +unavailable after a reboot, a special component name will be used to +indicate the missing component. For example: +.Bd -unfilled -offset indent +Components: + /dev/da2s1e: optimal + component1: failed +No spares. +.Ed +.Pp +indicates that the second component of this RAID set was not detected +at all by the auto-configuration code. The name +.Sq component1 +can be used anywhere a normal component name would be used. For +example, to add a hot spare to the above set, and rebuild to that hot +spare, the following could be done: +.Bd -unfilled -offset indent +raidctl -a /dev/da3s1e raid0 +raidctl -F component1 raid0 +.Ed +.Pp +at which point the data missing from +.Sq component1 +would be reconstructed onto /dev/da3s1e. +.Pp +.Ss RAID on RAID +RAID sets can be layered to create more complex and much larger RAID +sets. A RAID 0 set, for example, could be constructed from four RAID +5 sets. The following configuration file shows such a setup: +.Bd -unfilled -offset indent +START array +# numRow numCol numSpare +1 4 0 + +START disks +/dev/raid1e +/dev/raid2e +/dev/raid3e +/dev/raid4e + +START layout +# sectPerSU SUsPerParityUnit SUsPerReconUnit RAID_level_0 +128 1 1 0 + +START queue +fifo 100 +.Ed +.Pp +A similar configuration file might be used for a RAID 0 set +constructed from components on RAID 1 sets. In such a configuration, +the mirroring provides a high degree of redundancy, while the striping +provides additional speed benefits. +.Pp +.Ss Auto-configuration and Root on RAID +RAID sets can also be auto-configured at boot. To make a set +auto-configurable, simply prepare the RAID set as above, and then do +a: +.Bd -unfilled -offset indent +raidctl -A yes raid0 +.Ed +.Pp +to turn on auto-configuration for that set. To turn off +auto-configuration, use: +.Bd -unfilled -offset indent +raidctl -A no raid0 +.Ed +.Pp +RAID sets which are auto-configurable will be configured before the +root filesystem is mounted. These RAID sets are thus available for +use as a root filesystem, or for any other filesystem. A primary +advantage of using the auto-configuration is that RAID components +become more independent of the disks they reside on. For example, +SCSI ID's can change, but auto-configured sets will always be +configured correctly, even if the SCSI ID's of the component disks +have become scrambled. +.Pp +Having a system's root filesystem (/) on a RAID set is also allowed, +with the +.Sq a +partition of such a RAID set being used for /. +To use raid0a as the root filesystem, simply use: +.Bd -unfilled -offset indent +raidctl -A root raid0 +.Ed +.Pp +To return raid0a to be just an auto-configuring set simply use the +.Fl A Ar yes +arguments. +.Pp +Note that kernels can only be directly read from RAID 1 components on +alpha and pmax architectures. On those architectures, the +.Dv FS_RAID +filesystem is recognized by the bootblocks, and will properly load the +kernel directly from a RAID 1 component. For other architectures, or +to support the root filesystem on other RAID sets, some other +mechanism must be used to get a kernel booting. For example, a small +partition containing only the secondary boot-blocks and an alternate +kernel (or two) could be used. Once a kernel is booting however, and +an auto-configuring RAID set is found that is eligible to be root, +then that RAID set will be auto-configured and used as the root +device. If two or more RAID sets claim to be root devices, then the +user will be prompted to select the root device. At this time, RAID +0, 1, 4, and 5 sets are all supported as root devices. +.Pp +A typical RAID 1 setup with root on RAID might be as follows: +.Bl -enum +.It +wd0a - a small partition, which contains a complete, bootable, basic +NetBSD installation. +.It +wd1a - also contains a complete, bootable, basic NetBSD installation. +.It +wd0e and wd1e - a RAID 1 set, raid0, used for the root filesystem. +.It +wd0f and wd1f - a RAID 1 set, raid1, which will be used only for +swap space. +.It +wd0g and wd1g - a RAID 1 set, raid2, used for /usr, /home, or other +data, if desired. +.It +wd0h and wd0h - a RAID 1 set, raid3, if desired. +.El +.Pp +RAID sets raid0, raid1, and raid2 are all marked as +auto-configurable. raid0 is marked as being a root filesystem. +When new kernels are installed, the kernel is not only copied to /, +but also to wd0a and wd1a. The kernel on wd0a is required, since that +is the kernel the system boots from. The kernel on wd1a is also +required, since that will be the kernel used should wd0 fail. The +important point here is to have redundant copies of the kernel +available, in the event that one of the drives fail. +.Pp +There is no requirement that the root filesystem be on the same disk +as the kernel. For example, obtaining the kernel from wd0a, and using +da0s1e and da1s1e for raid0, and the root filesystem, is fine. It +.Ar is +critical, however, that there be multiple kernels available, in the +event of media failure. +.Pp +Multi-layered RAID devices (such as a RAID 0 set made +up of RAID 1 sets) are +.Ar not +supported as root devices or auto-configurable devices at this point. +(Multi-layered RAID devices +.Ar are +supported in general, however, as mentioned earlier.) Note that in +order to enable component auto-detection and auto-configuration of +RAID devices, the line: +.Bd -unfilled -offset indent +options RAID_AUTOCONFIG +.Ed +.Pp +must be in the kernel configuration file. See +.Xr raid 4 +for more details. +.Pp +.Ss Unconfiguration +The final operation performed by +.Nm +is to unconfigure a +.Xr raid 4 +device. This is accomplished via a simple: +.Bd -unfilled -offset indent +raidctl -u raid0 +.Ed +.Pp +at which point the device is ready to be reconfigured. +.Pp +.Ss Performance Tuning +Selection of the various parameter values which result in the best +performance can be quite tricky, and often requires a bit of +trial-and-error to get those values most appropriate for a given system. +A whole range of factors come into play, including: +.Bl -enum +.It +Types of components (e.g. SCSI vs. IDE) and their bandwidth +.It +Types of controller cards and their bandwidth +.It +Distribution of components among controllers +.It +IO bandwidth +.It +Filesystem access patterns +.It +CPU speed +.El +.Pp +As with most performance tuning, benchmarking under real-life loads +may be the only way to measure expected performance. Understanding +some of the underlying technology is also useful in tuning. The goal +of this section is to provide pointers to those parameters which may +make significant differences in performance. +.Pp +For a RAID 1 set, a SectPerSU value of 64 or 128 is typically +sufficient. Since data in a RAID 1 set is arranged in a linear +fashion on each component, selecting an appropriate stripe size is +somewhat less critical than it is for a RAID 5 set. However: a stripe +size that is too small will cause large IO's to be broken up into a +number of smaller ones, hurting performance. At the same time, a +large stripe size may cause problems with concurrent accesses to +stripes, which may also affect performance. Thus values in the range +of 32 to 128 are often the most effective. +.Pp +Tuning RAID 5 sets is trickier. In the best case, IO is presented to +the RAID set one stripe at a time. Since the entire stripe is +available at the beginning of the IO, the parity of that stripe can +be calculated before the stripe is written, and then the stripe data +and parity can be written in parallel. When the amount of data being +written is less than a full stripe worth, the +.Sq small write +problem occurs. Since a +.Sq small write +means only a portion of the stripe on the components is going to +change, the data (and parity) on the components must be updated +slightly differently. First, the +.Sq old parity +and +.Sq old data +must be read from the components. Then the new parity is constructed, +using the new data to be written, and the old data and old parity. +Finally, the new data and new parity are written. All this extra data +shuffling results in a serious loss of performance, and is typically 2 +to 4 times slower than a full stripe write (or read). To combat this +problem in the real world, it may be useful to ensure that stripe +sizes are small enough that a +.Sq large IO +from the system will use exactly one large stripe write. As is seen +later, there are some filesystem dependencies which may come into play +here as well. +.Pp +Since the size of a +.Sq large IO +is often (currently) only 32K or 64K, on a 5-drive RAID 5 set it may +be desirable to select a SectPerSU value of 16 blocks (8K) or 32 +blocks (16K). Since there are 4 data sectors per stripe, the maximum +data per stripe is 64 blocks (32K) or 128 blocks (64K). Again, +empirical measurement will provide the best indicators of which +values will yeild better performance. +.Pp +The parameters used for the filesystem are also critical to good +performance. For +.Xr newfs 8 , +for example, increasing the block size to 32K or 64K may improve +performance dramatically. As well, changing the cylinders-per-group +parameter from 16 to 32 or higher is often not only necessary for +larger filesystems, but may also have positive performance +implications. +.Pp +.Ss Summary +Despite the length of this man-page, configuring a RAID set is a +relatively straight-forward process. All that needs to be done is the +following steps: +.Bl -enum +.It +Use +.Xr disklabel 8 +to create the components (of type RAID). +.It +Construct a RAID configuration file: e.g. +.Sq raid0.conf +.It +Configure the RAID set with: +.Bd -unfilled -offset indent +raidctl -C raid0.conf +.Ed +.Pp +.It +Initialize the component labels with: +.Bd -unfilled -offset indent +raidctl -I 123456 raid0 +.Ed +.Pp +.It +Initialize other important parts of the set with: +.Bd -unfilled -offset indent +raidctl -i raid0 +.Ed +.Pp +.It +Get the default label for the RAID set: +.Bd -unfilled -offset indent +disklabel raid0 > /tmp/label +.Ed +.Pp +.It +Edit the label: +.Bd -unfilled -offset indent +vi /tmp/label +.Ed +.Pp +.It +Put the new label on the RAID set: +.Bd -unfilled -offset indent +disklabel -R -r raid0 /tmp/label +.Ed +.Pp +.It +Create the filesystem: +.Bd -unfilled -offset indent +newfs /dev/rraid0e +.Ed +.Pp +.It +Mount the filesystem: +.Bd -unfilled -offset indent +mount /dev/raid0e /mnt +.Ed +.Pp +.It +Use: +.Bd -unfilled -offset indent +raidctl -c raid0.conf +.Ed +.Pp +To re-configure the RAID set the next time it is needed, or put +raid0.conf into /etc where it will automatically be started by +the /etc/rc scripts. +.El +.Pp +.Sh WARNINGS +Certain RAID levels (1, 4, 5, 6, and others) can protect against some +data loss due to component failure. However the loss of two +components of a RAID 4 or 5 system, or the loss of a single component +of a RAID 0 system will result in the entire filesystem being lost. +RAID is +.Ar NOT +a substitute for good backup practices. +.Pp +Recomputation of parity +.Ar MUST +be performed whenever there is a chance that it may have been +compromised. This includes after system crashes, or before a RAID +device has been used for the first time. Failure to keep parity +correct will be catastrophic should a component ever fail -- it is +better to use RAID 0 and get the additional space and speed, than it +is to use parity, but not keep the parity correct. At least with RAID +0 there is no perception of increased data security. +.Pp +.Sh FILES +.Bl -tag -width /dev/XXrXraidX -compact +.It Pa /dev/{,r}raid* +.Cm raid +device special files. +.El +.Pp +.Sh SEE ALSO +.Xr raid 4 , +.Xr ccd 4 , +.Xr rc 8 +.Sh BUGS +Hot-spare removal is currently not available. +.Sh HISTORY +RAIDframe is a framework for rapid prototyping of RAID structures +developed by the folks at the Parallel Data Laboratory at Carnegie +Mellon University (CMU). +A more complete description of the internals and functionality of +RAIDframe is found in the paper "RAIDframe: A Rapid Prototyping Tool +for RAID Systems", by William V. Courtright II, Garth Gibson, Mark +Holland, LeAnn Neal Reilly, and Jim Zelenka, and published by the +Parallel Data Laboratory of Carnegie Mellon University. +.Pp +The +.Nm +command first appeared as a program in CMU's RAIDframe v1.1 distribution. This +version of +.Nm +is a complete re-write, and first appeared in +.Fx 4.4 . +.Sh COPYRIGHT +.Bd -unfilled +The RAIDframe Copyright is as follows: + +Copyright (c) 1994-1996 Carnegie-Mellon University. +All rights reserved. + +Permission to use, copy, modify and distribute this software and +its documentation is hereby granted, provided that both the copyright +notice and this permission notice appear in all copies of the +software, derivative works or modified versions, and any portions +thereof, and that both notices appear in supporting documentation. + +CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" +CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND +FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + +Carnegie Mellon requests users of this software to return to + + Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + School of Computer Science + Carnegie Mellon University + Pittsburgh PA 15213-3890 + +any improvements or extensions that they make and grant Carnegie the +rights to redistribute these changes. +.Ed diff --git a/sbin/raidctl/raidctl.c b/sbin/raidctl/raidctl.c new file mode 100644 index 0000000..4b7d27d --- /dev/null +++ b/sbin/raidctl/raidctl.c @@ -0,0 +1,1110 @@ +/*- + * Copyright (c) 2002 Scott Long <scottl@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* $NetBSD: raidctl.c,v 1.25 2000/10/31 14:18:39 lukem Exp $ */ +/*- + * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Greg Oster + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This program is a re-write of the original rf_ctrl program + * distributed by CMU with RAIDframe 1.1. + * + * This program is the user-land interface to the RAIDframe kernel + * driver in NetBSD. + */ + +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/disklabel.h> +#if defined(__FreeBSD__) +#include <sys/linker.h> +#include <sys/module.h> +#endif + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef __FreeBSD__ +#include <paths.h> +#endif +#if defined(__NetBSD__) +#include <util.h> +#endif + +#include <dev/raidframe/rf_raidframe.h> + +int main(int, char *[]); +void do_ioctl(int, u_long, void *, const char *); +static void rf_configure(int, char*, int); +static const char *device_status(RF_DiskStatus_t); +static void rf_get_device_status(int); +static void get_component_number(int, char *, int *, int *); +static void rf_fail_disk(int, char *, int); +static void usage(void); +static void get_component_label(int, char *); +static void set_component_label(int, char *); +static void init_component_labels(int, int); +static void set_autoconfig(int, char *, char *); +static void add_hot_spare(int, char *); +static void remove_hot_spare(int, char *); +static void rebuild_in_place(int, char *); +static void check_status(int,int); +static void check_parity(int,int, char *); +static void do_meter(int, u_long); +static void get_bar(char *, double, int); +static void get_time_string(char *, int); +#if defined(__FreeBSD__) +static void check_driver(void); + +extern char *__progname; +#define PROGNAME __progname + +#define RAIDCTLDEV "/dev/raidctl" +#elif defined(__NetBSD__) +#define PROGNAME getprogname() +#endif + +int verbose; + +int +main(argc,argv) + int argc; + char *argv[]; +{ + int ch; + int num_options; + unsigned long action; + char config_filename[PATH_MAX]; + char dev_name[PATH_MAX]; + char name[PATH_MAX]; + char component[PATH_MAX]; + char autoconf[10]; + int do_recon; + int do_rewrite; + int is_clean; + int serial_number; + struct stat st; + int fd; + int force; + int raidID; + + num_options = 0; + action = 0; + do_recon = 0; + do_rewrite = 0; + is_clean = 0; + force = 0; + + while ((ch = getopt(argc, argv, "a:A:Bc:C:f:F:g:iI:l:r:R:sSpPuv")) + != -1) + switch(ch) { + case 'a': + action = RAIDFRAME_ADD_HOT_SPARE; + strncpy(component, optarg, PATH_MAX); + num_options++; + break; + case 'A': + action = RAIDFRAME_SET_AUTOCONFIG; + strncpy(autoconf, optarg, 10); + num_options++; + break; + case 'B': + action = RAIDFRAME_COPYBACK; + num_options++; + break; + case 'c': + case 'C': + strncpy(config_filename,optarg,PATH_MAX); + action = RAIDFRAME_CONFIGURE; + force = (ch == 'c') ? 0 : 1; +#if defined(__FreeBSD__) + check_driver(); + fd = open(RAIDCTLDEV, O_RDWR); + if (fd < 0) { + fprintf(stderr, "%s: unable to open raid " + "control device %s\n", PROGNAME, + RAIDCTLDEV); + fprintf(stderr, "Error: %s\n", strerror(errno)); + exit(1); + } + rf_configure(fd, config_filename, force); + close(fd); + exit(0); +#elif defined(__NetBSD__) + num_options++; + break; +#endif + case 'f': + action = RAIDFRAME_FAIL_DISK; + strncpy(component, optarg, PATH_MAX); + do_recon = 0; + num_options++; + break; + case 'F': + action = RAIDFRAME_FAIL_DISK; + strncpy(component, optarg, PATH_MAX); + do_recon = 1; + num_options++; + break; + case 'g': + action = RAIDFRAME_GET_COMPONENT_LABEL; + strncpy(component, optarg, PATH_MAX); + num_options++; + break; + case 'i': + action = RAIDFRAME_REWRITEPARITY; + num_options++; + break; + case 'I': + action = RAIDFRAME_INIT_LABELS; + serial_number = atoi(optarg); + num_options++; + break; + case 'l': + action = RAIDFRAME_SET_COMPONENT_LABEL; + strncpy(component, optarg, PATH_MAX); + num_options++; + break; + case 'r': + action = RAIDFRAME_REMOVE_HOT_SPARE; + strncpy(component, optarg, PATH_MAX); + num_options++; + break; + case 'R': + strncpy(component,optarg,PATH_MAX); + action = RAIDFRAME_REBUILD_IN_PLACE; + num_options++; + break; + case 's': + action = RAIDFRAME_GET_INFO; + num_options++; + break; + case 'S': + action = RAIDFRAME_CHECK_RECON_STATUS_EXT; + num_options++; + break; + case 'p': + action = RAIDFRAME_CHECK_PARITY; + num_options++; + break; + case 'P': + action = RAIDFRAME_CHECK_PARITY; + do_rewrite = 1; + num_options++; + break; + case 'u': + action = RAIDFRAME_SHUTDOWN; + num_options++; + break; + case 'v': + verbose = 1; + /* Don't bump num_options, as '-v' is not + an option like the others */ + /* num_options++; */ + break; + default: + usage(); + } + argc -= optind; + argv += optind; + + if ((num_options > 1) || (argc == NULL)) + usage(); + + strncpy(name,argv[0],PATH_MAX); +#if defined(__NetBSD__) + fd = opendisk(name, O_RDWR, dev_name, sizeof(dev_name), 1); +#elif defined(__FreeBSD__) + check_driver(); + + if (name[0] != '/') { + char name1[PATH_MAX]; + snprintf(name1, PATH_MAX, "%s%s", _PATH_DEV, name); + strncpy(name, name1, PATH_MAX); + } + fd = open(name, O_RDWR); +#endif + if (fd == -1) { + fprintf(stderr, "%s: unable to open device file: %s\n", + PROGNAME, name); + exit(1); + } + if (fstat(fd, &st) != 0) { + fprintf(stderr,"%s: stat failure on: %s\n", + PROGNAME, dev_name); + exit(1); + } + if (!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) { + fprintf(stderr,"%s: invalid device: %s\n", + PROGNAME, dev_name); + exit(1); + } + + switch(action) { + case RAIDFRAME_ADD_HOT_SPARE: + add_hot_spare(fd, component); + break; + case RAIDFRAME_REMOVE_HOT_SPARE: + remove_hot_spare(fd, component); + break; +#if defined(__NetBSD__) + case RAIDFRAME_CONFIGURE: + rf_configure(fd, config_filename, force); + break; +#endif + case RAIDFRAME_SET_AUTOCONFIG: + set_autoconfig(fd, name, autoconf); + break; + case RAIDFRAME_COPYBACK: + printf("Copyback.\n"); + do_ioctl(fd, RAIDFRAME_COPYBACK, NULL, "RAIDFRAME_COPYBACK"); + if (verbose) { + sleep(3); /* XXX give the copyback a chance to start */ + printf("Copyback status:\n"); + do_meter(fd,RAIDFRAME_CHECK_COPYBACK_STATUS_EXT); + } + break; + case RAIDFRAME_FAIL_DISK: + rf_fail_disk(fd, component, do_recon); + break; + case RAIDFRAME_SET_COMPONENT_LABEL: + set_component_label(fd, component); + break; + case RAIDFRAME_GET_COMPONENT_LABEL: + get_component_label(fd, component); + break; + case RAIDFRAME_INIT_LABELS: + init_component_labels(fd, serial_number); + break; + case RAIDFRAME_REWRITEPARITY: + printf("Initiating re-write of parity\n"); + do_ioctl(fd, RAIDFRAME_REWRITEPARITY, NULL, + "RAIDFRAME_REWRITEPARITY"); + if (verbose) { + sleep(3); /* XXX give it time to get started */ + printf("Parity Re-write status:\n"); + do_meter(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT); + } + break; + case RAIDFRAME_CHECK_RECON_STATUS_EXT: + check_status(fd,1); + break; + case RAIDFRAME_GET_INFO: + rf_get_device_status(fd); + break; + case RAIDFRAME_REBUILD_IN_PLACE: + rebuild_in_place(fd, component); + break; + case RAIDFRAME_CHECK_PARITY: + check_parity(fd, do_rewrite, dev_name); + break; + case RAIDFRAME_SHUTDOWN: +#if defined(__NetBSD__) + do_ioctl(fd, RAIDFRAME_SHUTDOWN, NULL, "RAIDFRAME_SHUTDOWN"); +#elif defined(__FreeBSD__) + /* Find out the unit number of the raid device */ + do_ioctl(fd, RAIDFRAME_GET_UNIT, &raidID, "RAIDFRAME_GET_UNIT"); + close (fd); + + fd = open(RAIDCTLDEV, O_RDWR); + if (fd < 0) { + fprintf(stderr, "%s: unable to open raid control " + "device %s\n", PROGNAME, RAIDCTLDEV); + fprintf(stderr, "Error: %s\n", strerror(errno)); + exit(1); + } + do_ioctl(fd, RAIDFRAME_SHUTDOWN, &raidID, "RAIDFRAME_SHUTDOWN"); + close(fd); +#endif + break; + default: + break; + } + + close(fd); + exit(0); +} + +void +do_ioctl(fd, command, arg, ioctl_name) + int fd; + unsigned long command; + void *arg; + const char *ioctl_name; +{ + if (ioctl(fd, command, arg) < 0) { + warn("ioctl (%s) failed", ioctl_name); + exit(1); + } +} + + +static void +rf_configure(fd,config_file,force) + int fd; + char *config_file; + int force; +{ + void *generic; + RF_Config_t cfg; + + if (rf_MakeConfig( config_file, &cfg ) != 0) { + fprintf(stderr,"%s: unable to create RAIDframe %s\n", + PROGNAME, "configuration structure\n"); + exit(1); + } + + cfg.force = force; + + /* + * Note the extra level of redirection needed here, since + * what we really want to pass in is a pointer to the pointer to + * the configuration structure. + */ + + generic = (void *) &cfg; + do_ioctl(fd, RAIDFRAME_CONFIGURE, &generic, "RAIDFRAME_CONFIGURE"); +} + +static const char * +device_status(status) + RF_DiskStatus_t status; +{ + + switch (status) { + case rf_ds_optimal: + return ("optimal"); + break; + case rf_ds_failed: + return ("failed"); + break; + case rf_ds_reconstructing: + return ("reconstructing"); + break; + case rf_ds_dist_spared: + return ("dist_spared"); + break; + case rf_ds_spared: + return ("spared"); + break; + case rf_ds_spare: + return ("spare"); + break; + case rf_ds_used_spare: + return ("used_spare"); + break; + default: + return ("UNKNOWN"); + } + /* NOTREACHED */ +} + +static void +rf_get_device_status(fd) + int fd; +{ + RF_DeviceConfig_t device_config; + void *cfg_ptr; + int is_clean; + int i; + + cfg_ptr = &device_config; + printf("Address= %p\n", &cfg_ptr); + do_ioctl(fd, RAIDFRAME_GET_INFO, &cfg_ptr, "RAIDFRAME_GET_INFO"); + + printf("Components:\n"); + for(i=0; i < device_config.ndevs; i++) { + printf("%20s: %s\n", device_config.devs[i].devname, + device_status(device_config.devs[i].status)); + } + if (device_config.nspares > 0) { + printf("Spares:\n"); + for(i=0; i < device_config.nspares; i++) { + printf("%20s: %s\n", + device_config.spares[i].devname, + device_status(device_config.spares[i].status)); + } + } else { + printf("No spares.\n"); + } + for(i=0; i < device_config.ndevs; i++) { + if (device_config.devs[i].status == rf_ds_optimal) { + get_component_label(fd, device_config.devs[i].devname); + } else { + printf("%s status is: %s. Skipping label.\n", + device_config.devs[i].devname, + device_status(device_config.devs[i].status)); + } + } + + if (device_config.nspares > 0) { + for(i=0; i < device_config.nspares; i++) { + if ((device_config.spares[i].status == + rf_ds_optimal) || + (device_config.spares[i].status == + rf_ds_used_spare)) { + get_component_label(fd, + device_config.spares[i].devname); + } else { + printf("%s status is: %s. Skipping label.\n", + device_config.spares[i].devname, + device_status(device_config.spares[i].status)); + } + } + } + + do_ioctl(fd, RAIDFRAME_CHECK_PARITY, &is_clean, + "RAIDFRAME_CHECK_PARITY"); + if (is_clean) { + printf("Parity status: clean\n"); + } else { + printf("Parity status: DIRTY\n"); + } + check_status(fd,0); +} + +static void +get_component_number(fd, component_name, component_number, num_columns) + int fd; + char *component_name; + int *component_number; + int *num_columns; +{ + RF_DeviceConfig_t device_config; + void *cfg_ptr; + int i; + int found; + + *component_number = -1; + + /* Assuming a full path spec... */ + cfg_ptr = &device_config; + do_ioctl(fd, RAIDFRAME_GET_INFO, &cfg_ptr, "RAIDFRAME_GET_INFO"); + + *num_columns = device_config.cols; + + found = 0; + for(i=0; i < device_config.ndevs; i++) { + if (strncmp(component_name, device_config.devs[i].devname, + PATH_MAX)==0) { + found = 1; + *component_number = i; + } + } + if (!found) { /* maybe it's a spare? */ + for(i=0; i < device_config.nspares; i++) { + if (strncmp(component_name, + device_config.spares[i].devname, + PATH_MAX)==0) { + found = 1; + *component_number = i + device_config.ndevs; + /* the way spares are done should + really change... */ + *num_columns = device_config.cols + + device_config.nspares; + } + } + } + + if (!found) { + fprintf(stderr,"%s: %s is not a component %s", PROGNAME, + component_name, "of this device\n"); + exit(1); + } +} + +static void +rf_fail_disk(fd, component_to_fail, do_recon) + int fd; + char *component_to_fail; + int do_recon; +{ + struct rf_recon_req recon_request; + int component_num; + int num_cols; + + get_component_number(fd, component_to_fail, &component_num, &num_cols); + + recon_request.row = component_num / num_cols; + recon_request.col = component_num % num_cols; + if (do_recon) { + recon_request.flags = RF_FDFLAGS_RECON; + } else { + recon_request.flags = RF_FDFLAGS_NONE; + } + do_ioctl(fd, RAIDFRAME_FAIL_DISK, &recon_request, + "RAIDFRAME_FAIL_DISK"); + if (do_recon && verbose) { + printf("Reconstruction status:\n"); + sleep(3); /* XXX give reconstruction a chance to start */ + do_meter(fd,RAIDFRAME_CHECK_RECON_STATUS_EXT); + } +} + +static void +get_component_label(fd, component) + int fd; + char *component; +{ + RF_ComponentLabel_t component_label; + int component_num; + int num_cols; + + get_component_number(fd, component, &component_num, &num_cols); + + memset( &component_label, 0, sizeof(RF_ComponentLabel_t)); + component_label.row = component_num / num_cols; + component_label.column = component_num % num_cols; + + do_ioctl( fd, RAIDFRAME_GET_COMPONENT_LABEL, &component_label, + "RAIDFRAME_GET_COMPONENT_LABEL"); + + printf("Component label for %s:\n",component); + + printf(" Row: %d, Column: %d, Num Rows: %d, Num Columns: %d\n", + component_label.row, component_label.column, + component_label.num_rows, component_label.num_columns); + printf(" Version: %d, Serial Number: %d, Mod Counter: %d\n", + component_label.version, component_label.serial_number, + component_label.mod_counter); + printf(" Clean: %s, Status: %d\n", + component_label.clean ? "Yes" : "No", + component_label.status ); + printf(" sectPerSU: %d, SUsPerPU: %d, SUsPerRU: %d\n", + component_label.sectPerSU, component_label.SUsPerPU, + component_label.SUsPerRU); + printf(" Queue size: %d, blocksize: %d, numBlocks: %d\n", + component_label.maxOutstanding, component_label.blockSize, + component_label.numBlocks); + printf(" RAID Level: %c\n", (char) component_label.parityConfig); + printf(" Autoconfig: %s\n", + component_label.autoconfigure ? "Yes" : "No" ); + printf(" Root partition: %s\n", + component_label.root_partition ? "Yes" : "No" ); + printf(" Last configured as: raid%d\n", component_label.last_unit ); +} + +static void +set_component_label(fd, component) + int fd; + char *component; +{ + RF_ComponentLabel_t component_label; + int component_num; + int num_cols; + + get_component_number(fd, component, &component_num, &num_cols); + + /* XXX This is currently here for testing, and future expandability */ + + component_label.version = 1; + component_label.serial_number = 123456; + component_label.mod_counter = 0; + component_label.row = component_num / num_cols; + component_label.column = component_num % num_cols; + component_label.num_rows = 0; + component_label.num_columns = 5; + component_label.clean = 0; + component_label.status = 1; + + do_ioctl( fd, RAIDFRAME_SET_COMPONENT_LABEL, &component_label, + "RAIDFRAME_SET_COMPONENT_LABEL"); +} + + +static void +init_component_labels(fd, serial_number) + int fd; + int serial_number; +{ + RF_ComponentLabel_t component_label; + + component_label.version = 0; + component_label.serial_number = serial_number; + component_label.mod_counter = 0; + component_label.row = 0; + component_label.column = 0; + component_label.num_rows = 0; + component_label.num_columns = 0; + component_label.clean = 0; + component_label.status = 0; + + do_ioctl( fd, RAIDFRAME_INIT_LABELS, &component_label, + "RAIDFRAME_SET_COMPONENT_LABEL"); +} + +static void +set_autoconfig(fd, name, autoconf) + int fd; + char *name; + char *autoconf; +{ + int auto_config; + int root_config; + + auto_config = 0; + root_config = 0; + + if (strncasecmp(autoconf,"root", 4) == 0) { + root_config = 1; + } + + if ((strncasecmp(autoconf,"yes", 3) == 0) || + root_config == 1) { + auto_config = 1; + } + + do_ioctl(fd, RAIDFRAME_SET_AUTOCONFIG, &auto_config, + "RAIDFRAME_SET_AUTOCONFIG"); + + do_ioctl(fd, RAIDFRAME_SET_ROOT, &root_config, + "RAIDFRAME_SET_ROOT"); + + printf("%s: Autoconfigure: %s\n", name, + auto_config ? "Yes" : "No"); + + if (root_config == 1) { + printf("%s: Root: %s\n", name, + auto_config ? "Yes" : "No"); + } +} + +static void +add_hot_spare(fd, component) + int fd; + char *component; +{ + RF_SingleComponent_t hot_spare; + + hot_spare.row = 0; + hot_spare.column = 0; + strncpy(hot_spare.component_name, component, + sizeof(hot_spare.component_name)); + + do_ioctl( fd, RAIDFRAME_ADD_HOT_SPARE, &hot_spare, + "RAIDFRAME_ADD_HOT_SPARE"); +} + +static void +remove_hot_spare(fd, component) + int fd; + char *component; +{ + RF_SingleComponent_t hot_spare; + int component_num; + int num_cols; + + get_component_number(fd, component, &component_num, &num_cols); + + hot_spare.row = component_num / num_cols; + hot_spare.column = component_num % num_cols; + + strncpy(hot_spare.component_name, component, + sizeof(hot_spare.component_name)); + + do_ioctl( fd, RAIDFRAME_REMOVE_HOT_SPARE, &hot_spare, + "RAIDFRAME_REMOVE_HOT_SPARE"); +} + +static void +rebuild_in_place( fd, component ) + int fd; + char *component; +{ + RF_SingleComponent_t comp; + int component_num; + int num_cols; + + get_component_number(fd, component, &component_num, &num_cols); + + comp.row = 0; + comp.column = component_num; + strncpy(comp.component_name, component, sizeof(comp.component_name)); + + do_ioctl( fd, RAIDFRAME_REBUILD_IN_PLACE, &comp, + "RAIDFRAME_REBUILD_IN_PLACE"); + + if (verbose) { + printf("Reconstruction status:\n"); + sleep(3); /* XXX give reconstruction a chance to start */ + do_meter(fd,RAIDFRAME_CHECK_RECON_STATUS_EXT); + } + +} + +static void +check_parity( fd, do_rewrite, dev_name ) + int fd; + int do_rewrite; + char *dev_name; +{ + int is_clean; + int percent_done; + + is_clean = 0; + percent_done = 0; + do_ioctl(fd, RAIDFRAME_CHECK_PARITY, &is_clean, + "RAIDFRAME_CHECK_PARITY"); + if (is_clean) { + printf("%s: Parity status: clean\n",dev_name); + } else { + printf("%s: Parity status: DIRTY\n",dev_name); + if (do_rewrite) { + printf("%s: Initiating re-write of parity\n", + dev_name); + do_ioctl(fd, RAIDFRAME_REWRITEPARITY, NULL, + "RAIDFRAME_REWRITEPARITY"); + sleep(3); /* XXX give it time to + get started. */ + if (verbose) { + printf("Parity Re-write status:\n"); + do_meter(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT); + } else { + do_ioctl(fd, + RAIDFRAME_CHECK_PARITYREWRITE_STATUS, + &percent_done, + "RAIDFRAME_CHECK_PARITYREWRITE_STATUS" + ); + while( percent_done < 100 ) { + sleep(3); /* wait a bit... */ + do_ioctl(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS, + &percent_done, "RAIDFRAME_CHECK_PARITYREWRITE_STATUS"); + } + + } + printf("%s: Parity Re-write complete\n", + dev_name); + } else { + /* parity is wrong, and is not being fixed. + Exit w/ an error. */ + exit(1); + } + } +} + + +static void +check_status( fd, meter ) + int fd; + int meter; +{ + int recon_percent_done = 0; + int parity_percent_done = 0; + int copyback_percent_done = 0; + + do_ioctl(fd, RAIDFRAME_CHECK_RECON_STATUS, &recon_percent_done, + "RAIDFRAME_CHECK_RECON_STATUS"); + printf("Reconstruction is %d%% complete.\n", recon_percent_done); + do_ioctl(fd, RAIDFRAME_CHECK_PARITYREWRITE_STATUS, + &parity_percent_done, + "RAIDFRAME_CHECK_PARITYREWRITE_STATUS"); + printf("Parity Re-write is %d%% complete.\n", parity_percent_done); + do_ioctl(fd, RAIDFRAME_CHECK_COPYBACK_STATUS, ©back_percent_done, + "RAIDFRAME_CHECK_COPYBACK_STATUS"); + printf("Copyback is %d%% complete.\n", copyback_percent_done); + + if (meter) { + /* These 3 should be mutually exclusive at this point */ + if (recon_percent_done < 100) { + printf("Reconstruction status:\n"); + do_meter(fd,RAIDFRAME_CHECK_RECON_STATUS_EXT); + } else if (parity_percent_done < 100) { + printf("Parity Re-write status:\n"); + do_meter(fd,RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT); + } else if (copyback_percent_done < 100) { + printf("Copyback status:\n"); + do_meter(fd,RAIDFRAME_CHECK_COPYBACK_STATUS_EXT); + } + } +} + +const char *tbits = "|/-\\"; + +static void +do_meter(fd, option) + int fd; + u_long option; +{ + int percent_done; + int last_value; + int start_value; + RF_ProgressInfo_t progressInfo; + struct timeval start_time; + struct timeval last_time; + struct timeval current_time; + double elapsed; + int elapsed_sec; + int elapsed_usec; + int simple_eta,last_eta; + double rate; + int amount; + int tbit_value; + int wait_for_more_data; + char buffer[1024]; + char bar_buffer[1024]; + char eta_buffer[1024]; + + if (gettimeofday(&start_time,NULL)) { + fprintf(stderr,"%s: gettimeofday failed!?!?\n", PROGNAME); + exit(errno); + } + memset(&progressInfo, 0, sizeof(RF_ProgressInfo_t)); + + percent_done = 0; + do_ioctl(fd, option, &progressInfo, ""); + last_value = progressInfo.completed; + start_value = last_value; + last_time = start_time; + current_time = start_time; + + wait_for_more_data = 0; + tbit_value = 0; + while(progressInfo.completed < progressInfo.total) { + + percent_done = (progressInfo.completed * 100) / + progressInfo.total; + + get_bar(bar_buffer, percent_done, 40); + + elapsed_sec = current_time.tv_sec - start_time.tv_sec; + elapsed_usec = current_time.tv_usec - start_time.tv_usec; + if (elapsed_usec < 0) { + elapsed_usec-=1000000; + elapsed_sec++; + } + + elapsed = (double) elapsed_sec + + (double) elapsed_usec / 1000000.0; + + amount = progressInfo.completed - start_value; + + if (amount <= 0) { /* we don't do negatives (yet?) */ + amount = 0; + wait_for_more_data = 1; + } else { + wait_for_more_data = 0; + } + + if (elapsed == 0) + rate = 0.0; + else + rate = amount / elapsed; + + if (rate > 0.0) { + simple_eta = (int) (((double)progressInfo.total - + (double) progressInfo.completed) + / rate); + } else { + simple_eta = -1; + } + + if (simple_eta <=0) { + simple_eta = last_eta; + } else { + last_eta = simple_eta; + } + + get_time_string(eta_buffer, simple_eta); + + snprintf(buffer,1024,"\r%3d%% |%s| ETA: %s %c", + percent_done,bar_buffer,eta_buffer,tbits[tbit_value]); + + write(fileno(stdout),buffer,strlen(buffer)); + fflush(stdout); + + /* resolution wasn't high enough... wait until we get another + timestamp and perhaps more "work" done. */ + + if (!wait_for_more_data) { + last_time = current_time; + last_value = progressInfo.completed; + } + + if (++tbit_value>3) + tbit_value = 0; + + sleep(2); + + if (gettimeofday(¤t_time,NULL)) { + fprintf(stderr,"%s: gettimeofday failed!?!?\n", + PROGNAME); + exit(errno); + } + + do_ioctl( fd, option, &progressInfo, ""); + + + } + printf("\n"); +} +/* 40 '*''s per line, then 40 ' ''s line. */ +/* If you've got a screen wider than 160 characters, "tough" */ + +#define STAR_MIDPOINT 4*40 +const char stars[] = "****************************************" + "****************************************" + "****************************************" + "****************************************" + " " + " " + " " + " " + " "; + +static void +get_bar(string,percent,max_strlen) + char *string; + double percent; + int max_strlen; +{ + int offset; + + if (max_strlen > STAR_MIDPOINT) { + max_strlen = STAR_MIDPOINT; + } + offset = STAR_MIDPOINT - + (int)((percent * max_strlen)/ 100); + if (offset < 0) + offset = 0; + snprintf(string,max_strlen,"%s",&stars[offset]); +} + +static void +get_time_string(string,simple_time) + char *string; + int simple_time; +{ + int minutes, seconds, hours; + char hours_buffer[5]; + char minutes_buffer[5]; + char seconds_buffer[5]; + + if (simple_time >= 0) { + + minutes = (int) simple_time / 60; + seconds = ((int)simple_time - 60*minutes); + hours = minutes / 60; + minutes = minutes - 60*hours; + + if (hours > 0) { + snprintf(hours_buffer,5,"%02d:",hours); + } else { + snprintf(hours_buffer,5," "); + } + + snprintf(minutes_buffer,5,"%02d:",minutes); + snprintf(seconds_buffer,5,"%02d",seconds); + snprintf(string,1024,"%s%s%s", + hours_buffer, minutes_buffer, seconds_buffer); + } else { + snprintf(string,1024," --:--"); + } + +} + +static void +usage() +{ + const char *progname = PROGNAME; + + fprintf(stderr, "usage: %s [-v] -a component dev\n", progname); + fprintf(stderr, " %s [-v] -A yes | no | root dev\n", progname); + fprintf(stderr, " %s [-v] -B dev\n", progname); + fprintf(stderr, " %s [-v] -c config_file dev\n", progname); + fprintf(stderr, " %s [-v] -C config_file dev\n", progname); + fprintf(stderr, " %s [-v] -f component dev\n", progname); + fprintf(stderr, " %s [-v] -F component dev\n", progname); + fprintf(stderr, " %s [-v] -g component dev\n", progname); + fprintf(stderr, " %s [-v] -i dev\n", progname); + fprintf(stderr, " %s [-v] -I serial_number dev\n", progname); + fprintf(stderr, " %s [-v] -r component dev\n", progname); + fprintf(stderr, " %s [-v] -R component dev\n", progname); + fprintf(stderr, " %s [-v] -s dev\n", progname); + fprintf(stderr, " %s [-v] -S dev\n", progname); + fprintf(stderr, " %s [-v] -u dev\n", progname); +#if 0 + fprintf(stderr, "usage: %s %s\n", progname, + "-a | -f | -F | -g | -r | -R component dev"); + fprintf(stderr, " %s -B | -i | -s | -S -u dev\n", progname); + fprintf(stderr, " %s -c | -C config_file dev\n", progname); + fprintf(stderr, " %s -I serial_number dev\n", progname); +#endif + exit(1); + /* NOTREACHED */ +} + +#if defined(__FreeBSD__) +static void +check_driver(void) +{ + if (modfind("raidframe") == -1 && kldload("raidframe") == -1) { + printf("Error: Cannot load RAIDframe driver.\n"); + exit(1); + } +} +#endif + diff --git a/sbin/raidctl/rf_configure.c b/sbin/raidctl/rf_configure.c new file mode 100644 index 0000000..8df7889 --- /dev/null +++ b/sbin/raidctl/rf_configure.c @@ -0,0 +1,583 @@ +/* $FreeBSD$ */ +/* $NetBSD: rf_configure.c,v 1.13 2001/01/27 19:32:47 oster Exp $ */ + +/* + * Copyright (c) 1995 Carnegie-Mellon University. + * All rights reserved. + * + * Author: Mark Holland + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +/*************************************************************** + * + * rf_configure.c -- code related to configuring the raidframe system + * + * configuration is complicated by the fact that we want the same + * driver to work both in the kernel and at user level. In the + * kernel, we can't read the configuration file, so we configure + * by running a user-level program that reads the config file, + * creates a data structure describing the configuration and + * passes it into the kernel via an ioctl. Since we want the config + * code to be common between the two versions of the driver, we + * configure using the same two-step process when running at + * user level. Of course, at user level, the config structure is + * passed directly to the config routine, rather than via ioctl. + * + * This file is not compiled into the kernel, so we have no + * need for KERNEL ifdefs. + * + **************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <dev/raidframe/rf_raid.h> +#include <dev/raidframe/rf_raidframe.h> +#include <dev/raidframe/rf_general.h> +#include <dev/raidframe/rf_decluster.h> +#include <dev/raidframe/rf_configure.h> + +/* + * XXX we include this here so we don't need to drag rf_debugMem.c into + * the picture... This is userland, afterall... + */ + +/* + * XXX sucky hack to override the defn. of RF_Malloc as given in + * rf_debugMem.c... but I *really* don't want (nor need) to link with + * that file here in userland.. GO + */ + +#undef RF_Malloc +#define RF_Malloc(_p_, _size_, _cast_) \ + { \ + _p_ = _cast_ malloc((u_long)_size_); \ + bzero((char *)_p_, _size_); \ + } + +int distSpareYes = 1; +int distSpareNo = 0; + +/* The mapsw[] table below contains all the various RAID types that might +be supported by the kernel. The actual supported types are found +in sys/dev/raidframe/rf_layout.c. */ + +static RF_LayoutSW_t mapsw[] = { + /* parity declustering */ + {'T', "Parity declustering", + rf_MakeLayoutSpecificDeclustered, &distSpareNo}, + /* parity declustering with distributed sparing */ + {'D', "Distributed sparing parity declustering", + rf_MakeLayoutSpecificDeclustered, &distSpareYes}, + /* declustered P+Q */ + {'Q', "Declustered P+Q", + rf_MakeLayoutSpecificDeclustered, &distSpareNo}, + /* RAID 5 with rotated sparing */ + {'R', "RAID Level 5 rotated sparing", rf_MakeLayoutSpecificNULL, NULL}, + /* Chained Declustering */ + {'C', "Chained Declustering", rf_MakeLayoutSpecificNULL, NULL}, + /* Interleaved Declustering */ + {'I', "Interleaved Declustering", rf_MakeLayoutSpecificNULL, NULL}, + /* RAID level 0 */ + {'0', "RAID Level 0", rf_MakeLayoutSpecificNULL, NULL}, + /* RAID level 1 */ + {'1', "RAID Level 1", rf_MakeLayoutSpecificNULL, NULL}, + /* RAID level 4 */ + {'4', "RAID Level 4", rf_MakeLayoutSpecificNULL, NULL}, + /* RAID level 5 */ + {'5', "RAID Level 5", rf_MakeLayoutSpecificNULL, NULL}, + /* Evenodd */ + {'E', "EvenOdd", rf_MakeLayoutSpecificNULL, NULL}, + /* Declustered Evenodd */ + {'e', "Declustered EvenOdd", + rf_MakeLayoutSpecificDeclustered, &distSpareNo}, + /* parity logging */ + {'L', "Parity logging", rf_MakeLayoutSpecificNULL, NULL}, + /* end-of-list marker */ + {'\0', NULL, NULL, NULL} +}; +RF_LayoutSW_t * +rf_GetLayout(RF_ParityConfig_t parityConfig) +{ + RF_LayoutSW_t *p; + + /* look up the specific layout */ + for (p = &mapsw[0]; p->parityConfig; p++) + if (p->parityConfig == parityConfig) + break; + if (!p->parityConfig) + return (NULL); + RF_ASSERT(p->parityConfig == parityConfig); + return (p); +} + +static int rf_search_file_for_start_of(const char *string, char *buf, + int len, FILE * fp); +static int rf_get_next_nonblank_line(char *buf, int len, FILE * fp, + const char *errmsg); + +/* + * called from user level to read the configuration file and create + * a configuration control structure. This is used in the user-level + * version of the driver, and in the user-level program that configures + * the system via ioctl. + */ +int +rf_MakeConfig(configname, cfgPtr) + char *configname; + RF_Config_t *cfgPtr; +{ + int numscanned, val, r, c, retcode, aa, bb, cc; + char buf[256], buf1[256], *cp; + RF_LayoutSW_t *lp; + FILE *fp; + + bzero((char *) cfgPtr, sizeof(RF_Config_t)); + + fp = fopen(configname, "r"); + if (!fp) { + RF_ERRORMSG1("Can't open config file %s\n", configname); + return (-1); + } + rewind(fp); + if (rf_search_file_for_start_of("array", buf, 256, fp)) { + RF_ERRORMSG1("Unable to find start of \"array\" params in config file %s\n", configname); + retcode = -1; + goto out; + } + rf_get_next_nonblank_line(buf, 256, fp, "Config file error (\"array\" section): unable to get numRow and numCol\n"); + + /* + * wackiness with aa, bb, cc to get around size problems on + * different platforms + */ + numscanned = sscanf(buf, "%d %d %d", &aa, &bb, &cc); + if (numscanned != 3) { + RF_ERRORMSG("Config file error (\"array\" section): unable to get numRow, numCol, numSpare\n"); + retcode = -1; + goto out; + } + cfgPtr->numRow = (RF_RowCol_t) aa; + cfgPtr->numCol = (RF_RowCol_t) bb; + cfgPtr->numSpare = (RF_RowCol_t) cc; + + /* debug section is optional */ + for (c = 0; c < RF_MAXDBGV; c++) + cfgPtr->debugVars[c][0] = '\0'; + rewind(fp); + if (!rf_search_file_for_start_of("debug", buf, 256, fp)) { + for (c = 0; c < RF_MAXDBGV; c++) { + if (rf_get_next_nonblank_line(buf, 256, fp, NULL)) + break; + cp = rf_find_non_white(buf); + if (!strncmp(cp, "START", strlen("START"))) + break; + (void) strcpy(&cfgPtr->debugVars[c][0], cp); + } + } + rewind(fp); + strcpy(cfgPtr->diskQueueType, "fifo"); + cfgPtr->maxOutstandingDiskReqs = 1; + /* scan the file for the block related to disk queues */ + if (rf_search_file_for_start_of("queue", buf, 256, fp)) { + RF_ERRORMSG2("[No disk queue discipline specified in config file %s. Using %s.]\n", configname, cfgPtr->diskQueueType); + } else { + if (rf_get_next_nonblank_line(buf, 256, fp, NULL)) { + RF_ERRORMSG2("[No disk queue discipline specified in config file %s. Using %s.]\n", configname, cfgPtr->diskQueueType); + } + } + + /* the queue specifier line contains two entries: 1st char of first + * word specifies queue to be used 2nd word specifies max num reqs + * that can be outstanding on the disk itself (typically 1) */ + if (sscanf(buf, "%s %d", buf1, &val) != 2) { + RF_ERRORMSG1("Can't determine queue type and/or max outstanding reqs from line: %s", buf); + RF_ERRORMSG2("Using %s-%d\n", cfgPtr->diskQueueType, cfgPtr->maxOutstandingDiskReqs); + } else { + char *ch; + bcopy(buf1, cfgPtr->diskQueueType, + RF_MIN(sizeof(cfgPtr->diskQueueType), strlen(buf1) + 1)); + for (ch = buf1; *ch; ch++) { + if (*ch == ' ') { + *ch = '\0'; + break; + } + } + cfgPtr->maxOutstandingDiskReqs = val; + } + + rewind(fp); + + if (rf_search_file_for_start_of("disks", buf, 256, fp)) { + RF_ERRORMSG1("Can't find \"disks\" section in config file %s\n", configname); + retcode = -1; + goto out; + } + for (r = 0; r < cfgPtr->numRow; r++) { + for (c = 0; c < cfgPtr->numCol; c++) { + int devfd; + char bfr[256], *bfr1; + if (rf_get_next_nonblank_line(&bfr[0], 256, fp, NULL)) { + RF_ERRORMSG2("Config file error: unable to get device file for disk at row %d col %d\n", r, c); + retcode = -1; + goto out; + } + /* Get rid of the newline at the end of the string */ + if ((bfr1 = strchr(&bfr[0], '\n')) != NULL) + *bfr1 = NULL; + /* Make sure the device exists */ + if ((devfd = open(&bfr[0], O_RDWR)) < 0) { + RF_ERRORMSG2( + "Config file error: device %s, %s\n", + &bfr[0], strerror(errno)); + retcode = -1; + goto out; + } + close(devfd); + strncpy(&cfgPtr->devnames[r][c][0], &bfr[0], 50); + } + } + + /* "spare" section is optional */ + rewind(fp); + if (rf_search_file_for_start_of("spare", buf, 256, fp)) + cfgPtr->numSpare = 0; + for (c = 0; c < cfgPtr->numSpare; c++) { + if (rf_get_next_nonblank_line(&cfgPtr->spare_names[c][0], + 256, fp, NULL)) { + RF_ERRORMSG1("Config file error: unable to get device file for spare disk %d\n", c); + retcode = -1; + goto out; + } + } + + /* scan the file for the block related to layout */ + rewind(fp); + if (rf_search_file_for_start_of("layout", buf, 256, fp)) { + RF_ERRORMSG1("Can't find \"layout\" section in configuration file %s\n", configname); + retcode = -1; + goto out; + } + if (rf_get_next_nonblank_line(buf, 256, fp, NULL)) { + RF_ERRORMSG("Config file error (\"layout\" section): unable to find common layout param line\n"); + retcode = -1; + goto out; + } + c = sscanf(buf, "%d %d %d %c", &aa, &bb, &cc, &cfgPtr->parityConfig); + cfgPtr->sectPerSU = (RF_SectorNum_t) aa; + cfgPtr->SUsPerPU = (RF_StripeNum_t) bb; + cfgPtr->SUsPerRU = (RF_StripeNum_t) cc; + if (c != 4) { + RF_ERRORMSG("Unable to scan common layout line\n"); + retcode = -1; + goto out; + } + lp = rf_GetLayout(cfgPtr->parityConfig); + if (lp == NULL) { + RF_ERRORMSG1("Unknown parity config '%c'\n", + cfgPtr->parityConfig); + retcode = -1; + goto out; + } + + retcode = lp->MakeLayoutSpecific(fp, cfgPtr, lp->makeLayoutSpecificArg); +out: + fclose(fp); + if (retcode < 0) + retcode = errno = EINVAL; + else + errno = retcode; + return (retcode); +} + + +/* used in architectures such as RAID0 where there is no layout-specific + * information to be passed into the configuration code. + */ +int +rf_MakeLayoutSpecificNULL(fp, cfgPtr, ignored) + FILE *fp; + RF_Config_t *cfgPtr; + void *ignored; +{ + cfgPtr->layoutSpecificSize = 0; + cfgPtr->layoutSpecific = NULL; + return (0); +} + +int +rf_MakeLayoutSpecificDeclustered(configfp, cfgPtr, arg) + FILE *configfp; + RF_Config_t *cfgPtr; + void *arg; +{ + int b, v, k, r, lambda, norotate, i, val, distSpare; + char *cfgBuf, *bdfile, *p, *smname; + char buf[256], smbuf[256]; + FILE *fp; + + distSpare = *((int *) arg); + + /* get the block design file name */ + if (rf_get_next_nonblank_line(buf, 256, configfp, + "Can't find block design file name in config file\n")) + return (EINVAL); + bdfile = rf_find_non_white(buf); + if (bdfile[strlen(bdfile) - 1] == '\n') { + /* strip newline char */ + bdfile[strlen(bdfile) - 1] = '\0'; + } + /* open bd file, check validity of configuration */ + if ((fp = fopen(bdfile, "r")) == NULL) { + RF_ERRORMSG1("RAID: config error: Can't open layout table file %s\n", bdfile); + return (EINVAL); + } + if (fgets(buf, 256, fp) == NULL) { + RF_ERRORMSG1("RAID: config error: Can't read layout from layout table file %s\n", bdfile); + return (EINVAL); + } + i = sscanf(buf, "%u %u %u %u %u %u", &b, &v, &k, &r, &lambda, &norotate); + if (i == 5) + norotate = 0; /* no-rotate flag is optional */ + else if (i != 6) { + RF_ERRORMSG("Unable to parse header line in block design file\n"); + return (EINVAL); + } + /* set the sparemap directory. In the in-kernel version, there's a + * daemon that's responsible for finding the sparemaps */ + if (distSpare) { + if (rf_get_next_nonblank_line(smbuf, 256, configfp, + "Can't find sparemap file name in config file\n")) + return (EINVAL); + smname = rf_find_non_white(smbuf); + if (smname[strlen(smname) - 1] == '\n') { + /* strip newline char */ + smname[strlen(smname) - 1] = '\0'; + } + } else { + smbuf[0] = '\0'; + smname = smbuf; + } + + /* allocate a buffer to hold the configuration info */ + cfgPtr->layoutSpecificSize = RF_SPAREMAP_NAME_LEN + + 6 * sizeof(int) + b * k; + /* can't use RF_Malloc here b/c debugMem module not yet init'd */ + cfgBuf = (char *) malloc(cfgPtr->layoutSpecificSize); + cfgPtr->layoutSpecific = (void *) cfgBuf; + p = cfgBuf; + + /* install name of sparemap file */ + for (i = 0; smname[i]; i++) + *p++ = smname[i]; + /* pad with zeros */ + while (i < RF_SPAREMAP_NAME_LEN) { + *p++ = '\0'; + i++; + } + + /* + * fill in the buffer with the block design parameters + * and then the block design itself + */ + *((int *) p) = b; + p += sizeof(int); + *((int *) p) = v; + p += sizeof(int); + *((int *) p) = k; + p += sizeof(int); + *((int *) p) = r; + p += sizeof(int); + *((int *) p) = lambda; + p += sizeof(int); + *((int *) p) = norotate; + p += sizeof(int); + + while (fscanf(fp, "%d", &val) == 1) + *p++ = (char) val; + fclose(fp); + if (p - cfgBuf != cfgPtr->layoutSpecificSize) { + RF_ERRORMSG2("Size mismatch creating layout specific data: is %d sb %d bytes\n", (int) (p - cfgBuf), (int) (6 * sizeof(int) + b * k)); + return (EINVAL); + } + return (0); +} + +/**************************************************************************** + * + * utilities + * + ***************************************************************************/ + +/* finds a non-white character in the line */ +char * +rf_find_non_white(char *p) +{ + for (; *p != '\0' && (*p == ' ' || *p == '\t'); p++); + return (p); +} + +/* finds a white character in the line */ +char * +rf_find_white(char *p) +{ + for (; *p != '\0' && (*p != ' ' && *p != '\t'); p++); + return (p); +} + +/* + * searches a file for a line that says "START string", where string is + * specified as a parameter + */ +static int +rf_search_file_for_start_of(string, buf, len, fp) + const char *string; + char *buf; + int len; + FILE *fp; +{ + char *p; + + while (1) { + if (fgets(buf, len, fp) == NULL) + return (-1); + p = rf_find_non_white(buf); + if (!strncmp(p, "START", strlen("START"))) { + p = rf_find_white(p); + p = rf_find_non_white(p); + if (!strncmp(p, string, strlen(string))) + return (0); + } + } +} + +/* reads from file fp into buf until it finds an interesting line */ +int +rf_get_next_nonblank_line(buf, len, fp, errmsg) + char *buf; + int len; + FILE *fp; + const char *errmsg; +{ + char *p; + + while (fgets(buf, 256, fp) != NULL) { + p = rf_find_non_white(buf); + if (*p == '\n' || *p == '\0' || *p == '#') + continue; + return (0); + } + if (errmsg) + RF_ERRORMSG1("%s", errmsg); + return (1); +} + +/* + * Allocates an array for the spare table, and initializes it from a file. + * In the user-level version, this is called when recon is initiated. + * When/if I move recon into the kernel, there'll be a daemon that does + * an ioctl into raidframe which will block until a spare table is needed. + * When it returns, it will read a spare table from the file system, + * pass it into the kernel via a different ioctl, and then block again + * on the original ioctl. + * + * This is specific to the declustered layout, but doesn't belong in + * rf_decluster.c because it uses stuff that can't be compiled into + * the kernel, and it needs to be compiled into the user-level sparemap daemon. + * + */ +void * +rf_ReadSpareTable(req, fname) + RF_SparetWait_t *req; + char *fname; +{ + int i, j, numFound, linecount, tableNum, tupleNum, + spareDisk, spareBlkOffset; + char buf[1024], targString[100], errString[100]; + RF_SpareTableEntry_t **table; + FILE *fp; + + /* allocate and initialize the table */ + RF_Malloc(table, + req->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), + (RF_SpareTableEntry_t **)); + for (i = 0; i < req->TablesPerSpareRegion; i++) { + RF_Malloc(table[i], + req->BlocksPerTable * sizeof(RF_SpareTableEntry_t), + (RF_SpareTableEntry_t *)); + for (j = 0; j < req->BlocksPerTable; j++) + table[i][j].spareDisk = + table[i][j].spareBlockOffsetInSUs = -1; + } + + /* 2. open sparemap file, sanity check */ + if ((fp = fopen(fname, "r")) == NULL) { + fprintf(stderr, + "rf_ReadSpareTable: Can't open sparemap file %s\n", fname); + return (NULL); + } + if (rf_get_next_nonblank_line(buf, 1024, fp, + "Invalid sparemap file: can't find header line\n")) + return (NULL); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + + sprintf(targString, "fdisk %d\n", req->fcol); + sprintf(errString, + "Invalid sparemap file: can't find \"fdisk %d\" line\n", + req->fcol); + while (1) { + rf_get_next_nonblank_line(buf, 1024, fp, errString); + if (!strncmp(buf, targString, strlen(targString))) + break; + } + + /* no more blank lines or comments allowed now */ + linecount = req->TablesPerSpareRegion * req->TableDepthInPUs; + for (i = 0; i < linecount; i++) { + numFound = fscanf(fp, " %d %d %d %d", &tableNum, &tupleNum, + &spareDisk, &spareBlkOffset); + if (numFound != 4) { + fprintf(stderr, "Sparemap file prematurely exhausted after %d of %d lines\n", i, linecount); + return (NULL); + } + RF_ASSERT(tableNum >= 0 && + tableNum < req->TablesPerSpareRegion); + RF_ASSERT(tupleNum >= 0 && tupleNum < req->BlocksPerTable); + RF_ASSERT(spareDisk >= 0 && spareDisk < req->C); + RF_ASSERT(spareBlkOffset >= 0 && spareBlkOffset < + req->SpareSpaceDepthPerRegionInSUs / req->SUsPerPU); + + table[tableNum][tupleNum].spareDisk = spareDisk; + table[tableNum][tupleNum].spareBlockOffsetInSUs = + spareBlkOffset * req->SUsPerPU; + } + + fclose(fp); + return ((void *) table); +} |