summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2004-11-04 09:57:21 +0000
committerphk <phk@FreeBSD.org>2004-11-04 09:57:21 +0000
commit27e302a86b65c2d82a3601b7d1f56add77a4316b (patch)
tree624c21e78226c47c48f9ebeceb36ef79e0336468
parente5715b2cc150e2463e9765903b2ba151e4540913 (diff)
downloadFreeBSD-src-27e302a86b65c2d82a3601b7d1f56add77a4316b.zip
FreeBSD-src-27e302a86b65c2d82a3601b7d1f56add77a4316b.tar.gz
Remove unused vinum files.
-rw-r--r--sys/dev/vinum/COPYRIGHT37
-rwxr-xr-xsys/dev/vinum/makestatetext78
-rw-r--r--sys/dev/vinum/request.h273
-rw-r--r--sys/dev/vinum/statetexts.h91
-rw-r--r--sys/dev/vinum/vinum.c542
-rw-r--r--sys/dev/vinum/vinumconfig.c2166
-rw-r--r--sys/dev/vinum/vinumdaemon.c283
-rw-r--r--sys/dev/vinum/vinumext.h261
-rw-r--r--sys/dev/vinum/vinumhdr.h81
-rw-r--r--sys/dev/vinum/vinuminterrupt.c473
-rw-r--r--sys/dev/vinum/vinumio.c918
-rw-r--r--sys/dev/vinum/vinumio.h154
-rw-r--r--sys/dev/vinum/vinumioctl.c960
-rw-r--r--sys/dev/vinum/vinumkw.h152
-rw-r--r--sys/dev/vinum/vinumlock.c266
-rw-r--r--sys/dev/vinum/vinummemory.c290
-rw-r--r--sys/dev/vinum/vinumobj.h321
-rw-r--r--sys/dev/vinum/vinumparser.c236
-rw-r--r--sys/dev/vinum/vinumraid5.c700
-rw-r--r--sys/dev/vinum/vinumrequest.c1125
-rw-r--r--sys/dev/vinum/vinumrevive.c620
-rw-r--r--sys/dev/vinum/vinumstate.c1095
-rw-r--r--sys/dev/vinum/vinumstate.h257
-rw-r--r--sys/dev/vinum/vinumutil.c311
-rw-r--r--sys/dev/vinum/vinumutil.h54
-rw-r--r--sys/dev/vinum/vinumvar.h395
26 files changed, 0 insertions, 12139 deletions
diff --git a/sys/dev/vinum/COPYRIGHT b/sys/dev/vinum/COPYRIGHT
deleted file mode 100644
index f0295e6..0000000
--- a/sys/dev/vinum/COPYRIGHT
+++ /dev/null
@@ -1,37 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $FreeBSD$
- */
diff --git a/sys/dev/vinum/makestatetext b/sys/dev/vinum/makestatetext
deleted file mode 100755
index c5a7da2..0000000
--- a/sys/dev/vinum/makestatetext
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/sh
-# Make statetexts.h from vinumstate.h
-# $FreeBSD$
-# $Id: makestatetext,v 1.7 1999/12/29 07:24:54 grog Exp grog $
-infile=vinumstate.h
-ofile=statetexts.h
-echo >$ofile "/* Created by $0 on" `date`. "Do not edit */"
-echo >>$ofile
-cat >> $ofile <<FOO
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called \`\`Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided \`\`as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- */
-
-FOO
-
-echo >>$ofile "/* Drive state texts */"
-echo >>$ofile "char *drivestatetext [] =
- { "
-egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
-cat <<FOO >> $ofile
- };
-
-/* Subdisk state texts */
-char *sdstatetext [] =
- {
-FOO
-egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
-cat <<FOO >> $ofile
- };
-
-/* Plex state texts */
-char *plexstatetext [] =
- {
-FOO
-egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
-cat <<FOO >> $ofile
- };
-
-/* Volume state texts */
-char *volstatetext [] =
- {
-FOO
-egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
-cat <<FOO >> $ofile
- };
-FOO
diff --git a/sys/dev/vinum/request.h b/sys/dev/vinum/request.h
deleted file mode 100644
index 600130f..0000000
--- a/sys/dev/vinum/request.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: request.h,v 1.22 2003/04/24 04:37:08 grog Exp $
- * $FreeBSD$
- */
-
-/* Information needed to set up a transfer */
-
-enum xferinfo {
- XFR_NORMAL_READ = 1,
- XFR_NORMAL_WRITE = 2, /* write request in normal mode */
- XFR_RECOVERY_READ = 4,
- XFR_DEGRADED_WRITE = 8,
- XFR_PARITYLESS_WRITE = 0x10,
- XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
- XFR_DATA_BLOCK = 0x40, /* data block in request */
- XFR_PARITY_BLOCK = 0x80, /* parity block in request */
- XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
- XFR_MALLOCED = 0x200, /* this buffer is malloced */
-#ifdef VINUMDEBUG
- XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
-#endif
- XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
- XFR_BUFLOCKED = 0x2000, /* BUF_LOCK performed on this buffer */
- XFR_COPYBUF = 0x4000, /* data buffer was copied */
- /* operations that need a parity block */
- XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
- /* operations that use the group parameters */
- XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
- /* operations that that use the data parameters */
- XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
- /* operations requiring read before write */
- XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
- /* operations that need a malloced buffer */
- XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
-};
-
-/*
- * Describe one low-level request, part of a
- * high-level request. This is an extended
- * struct buf buffer, and the first element
- * *must* be a struct buf. We pass this
- * structure to the I/O routines instead of a
- * struct buf in order to be able to locate the
- * high-level request when it completes.
- *
- * All offsets and lengths are in sectors.
- */
-
-struct rqelement {
- struct buf b; /* buf structure */
- struct rqgroup *rqg; /* pointer to our group */
- /* Information about the transfer */
- daddr_t sdoffset; /* offset in subdisk */
- int useroffset; /* offset in user buffer of normal data */
- /*
- * dataoffset and datalen refer to "individual" data
- * transfers which involve only this drive (normal read,
- * parityless write) and also degraded write.
- *
- * groupoffset and grouplen refer to the other "group"
- * operations (normal write, recovery read) which involve
- * more than one drive. Both the offsets are relative to
- * the start of the local buffer.
- */
- int dataoffset; /* offset in buffer of the normal data */
- int groupoffset; /* offset in buffer of group data */
- short datalen; /* length of normal data (sectors) */
- short grouplen; /* length of group data (sectors) */
- short buflen; /* total buffer length to allocate */
- short flags; /* really enum xferinfo (see above) */
- /* Ways to find other components */
- short sdno; /* subdisk number */
- short driveno; /* drive number */
- struct timeval launchtime; /* time of launch, for info function */
-};
-
-/*
- * A group of requests built to satisfy an I/O
- * transfer on a single plex.
- */
-struct rqgroup {
- struct rqgroup *next; /* pointer to next group */
- struct request *rq; /* pointer to the request */
- short count; /* number of requests in this group */
- short active; /* and number active */
- short plexno; /* index of plex */
- int badsdno; /* index of bad subdisk or -1 */
- enum xferinfo flags; /* description of transfer */
- struct rangelock *lock; /* lock for this transfer */
- daddr_t lockbase; /* and lock address */
- struct rqelement rqe[0]; /* and the elements of this request */
-};
-
-/*
- * Describe one high-level request and the
- * work we have to do to satisfy it.
- */
-struct request {
- struct buf *bp; /* pointer to the high-level request */
- caddr_t save_data; /* for copied write buffers */
- enum xferinfo flags;
- union {
- int volno; /* volume index */
- int plexno; /* or plex index */
- } volplex;
- int error; /* current error indication */
- int sdno; /* reviving subdisk (XFR_REVIVECONFLICT) */
- short isplex; /* set if this is a plex request */
- short active; /* number of subrequests still active */
- struct rqgroup *rqg; /* pointer to the first group of requests */
- struct rqgroup *lrqg; /* and to the last group of requests */
- struct request *next; /* link of waiting requests */
-};
-
-/*
- * Extended buffer header for subdisk I/O. Includes
- * a pointer to the user I/O request.
- */
-struct sdbuf {
- struct buf b; /* our buffer */
- struct buf *bp; /* and pointer to parent */
- short driveno; /* drive index */
- short sdno; /* and subdisk index */
-};
-
-/*
- * Values returned by rqe and friends. Be careful
- * with these: they are in order of increasing
- * seriousness. Some routines check for
- * > REQUEST_RECOVERED to indicate a failed request. XXX
- */
-enum requeststatus {
- REQUEST_OK, /* request built OK */
- REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
- REQUEST_DEGRADED, /* parts of request failed */
- REQUEST_EOF, /* parts of request failed: outside plex */
- REQUEST_DOWN, /* all of request failed: subdisk(s) down */
- REQUEST_ENOMEM /* all of request failed: ran out of memory */
-};
-
-#ifdef VINUMDEBUG
-/* Trace entry for request info (DEBUG_LASTREQS) */
-enum rqinfo_type {
- loginfo_unused, /* never been used */
- loginfo_user_bp, /* this is the bp when strategy is called */
- loginfo_user_bpl, /* and this is the bp at launch time */
- loginfo_rqe, /* user RQE */
- loginfo_iodone, /* iodone */
- loginfo_raid5_data, /* write RAID-5 data block */
- loginfo_raid5_parity, /* write RAID-5 parity block */
- loginfo_sdio, /* subdisk I/O */
- loginfo_sdiol, /* subdisk I/O launch */
- loginfo_sdiodone, /* subdisk iodone */
- loginfo_lockwait, /* wait for range lock */
- loginfo_lock, /* lock range */
- loginfo_unlock, /* unlock range */
-};
-
-/*
- * This is the rangelock structure with an added
- * buffer pointer and plex number. We don't need
- * the plex number for the locking protocol, but
- * it does help a lot when logging.
- */
-struct rangelockinfo {
- daddr_t stripe; /* address + 1 of the range being locked */
- struct buf *bp; /* user's buffer pointer */
- int plexno;
-};
-
-union rqinfou { /* info to pass to logrq */
- struct buf *bp;
- struct rqelement *rqe; /* address of request, for correlation */
- struct rangelockinfo *lockinfo;
-};
-
-struct rqinfo {
- enum rqinfo_type type; /* kind of event */
- struct timeval timestamp; /* time it happened */
- struct buf *bp; /* point to user buffer */
- int devmajor; /* major and minor device info */
- int devminor;
- union {
- struct buf b; /* yup, the *whole* buffer header */
- struct rqelement rqe; /* and the whole rqe */
- struct rangelock lockinfo;
- } info;
-};
-
-#define RQINFO_SIZE 128 /* number of info slots in buffer */
-
-void logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp);
-#endif
-
-/* Structures for the daemon */
-
-/* types of request to the daemon */
-enum daemonrq {
- daemonrq_none, /* dummy to catch bugs */
- daemonrq_ioerror, /* error occurred on I/O */
- daemonrq_saveconfig, /* save configuration */
- daemonrq_return, /* return to userland */
- daemonrq_ping, /* show sign of life */
- daemonrq_init, /* initialize a plex */
- daemonrq_revive, /* revive a subdisk */
- daemonrq_closedrive, /* close a drive */
-};
-
-/* info field for daemon requests */
-union daemoninfo { /* and the request information */
- struct request *rq; /* for daemonrq_ioerror */
- struct sd *sd; /* for daemonrq_revive */
- struct plex *plex; /* for daemonrq_init */
- struct drive *drive; /* for daemonrq_closedrive */
- int nothing; /* for passing NULL */
-};
-
-struct daemonq {
- struct daemonq *next; /* pointer to next element in queue */
- enum daemonrq type; /* type of request */
- int privateinuse; /* private element, being used */
- union daemoninfo info; /* and the request information */
-};
-
-void queue_daemon_request(enum daemonrq type, union daemoninfo info);
-
-extern int daemon_options;
-
-enum daemon_option {
- daemon_verbose = 1, /* talk about what we're doing */
- daemon_stopped = 2,
- daemon_noupdate = 4, /* don't update the disk config, for recovery */
-};
-
-void freerq(struct request *rq);
-void unlockrange(int plexno, struct rangelock *);
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/statetexts.h b/sys/dev/vinum/statetexts.h
deleted file mode 100644
index 88cfc17..0000000
--- a/sys/dev/vinum/statetexts.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Created by ./makestatetext on Wed Jan 5 10:05:30 CST 2000. Do not edit */
-
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $FreeBSD$
- */
-
-/* Drive state texts */
-char *drivestatetext[] =
-{
- "unallocated",
- "referenced",
- "down",
- "up",
-};
-
-/* Subdisk state texts */
-char *sdstatetext[] =
-{
- "unallocated",
- "uninit",
- "referenced",
- "init",
- "empty",
- "initializing",
- "initialized",
- "obsolete",
- "stale",
- "crashed",
- "down",
- "reviving",
- "reborn",
- "up",
-};
-
-/* Plex state texts */
-char *plexstatetext[] =
-{
- "unallocated",
- "referenced",
- "init",
- "faulty",
- "down",
- "initializing",
- "corrupt",
- "degraded",
- "flaky",
- "up",
-};
-
-/* Volume state texts */
-char *volstatetext[] =
-{
- "unallocated",
- "uninit",
- "down",
- "up",
-};
diff --git a/sys/dev/vinum/vinum.c b/sys/dev/vinum/vinum.c
deleted file mode 100644
index 5fb990d..0000000
--- a/sys/dev/vinum/vinum.c
+++ /dev/null
@@ -1,542 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinum.c,v 1.44 2003/05/23 00:50:55 grog Exp grog $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#define STATIC static /* nothing while we're testing */
-
-#include <dev/vinum/vinumhdr.h>
-#include <sys/sysproto.h> /* for sync(2) */
-#ifdef VINUMDEBUG
-#include <sys/reboot.h>
-int debug = 0; /* debug flags */
-extern int total_malloced;
-extern int malloccount;
-extern struct mc malloced[];
-#endif
-#include <dev/vinum/request.h>
-
-struct cdevsw vinum_cdevsw = {
- .d_version = D_VERSION,
- .d_open = vinumopen,
- .d_close = vinumclose,
- .d_read = physread,
- .d_write = physwrite,
- .d_ioctl = vinumioctl,
- .d_strategy = vinumstrategy,
- .d_name = "vinum",
- .d_flags = D_DISK | D_NEEDGIANT
-};
-
-/* Called by main() during pseudo-device attachment. */
-void vinumattach(void *);
-STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused);
-STATIC void vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev);
-
-struct _vinum_conf vinum_conf; /* configuration information */
-
-struct cdev *vinum_daemon_dev;
-struct cdev *vinum_super_dev;
-
-static eventhandler_tag dev_clone_tag;
-
-/*
- * Mutexes for plex synchronization. Ideally each plex
- * should have its own mutex, but the fact that the plex
- * struct can move makes that very complicated. Instead,
- * have plexes use share these mutexes based on modulo plex
- * number.
- */
-struct mtx plexmutex[PLEXMUTEXES];
-
-/*
- * Called by main() during pseudo-device attachment. All we need
- * to do is allocate enough space for devices to be configured later, and
- * add devsw entries.
- */
-void
-vinumattach(void *dummy)
-{
- char *envp;
- int i;
-#define MUTEXNAMELEN 16
- char mutexname[MUTEXNAMELEN];
-#if PLEXMUTEXES > 10000
-#error Increase size of MUTEXNAMELEN
-#endif
-/* modload should prevent multiple loads, so this is worth a panic */
- if ((vinum_conf.flags & VF_LOADED) != 0)
- panic("vinum: already loaded");
-
- log(LOG_INFO, "vinum: loaded\n");
-#ifdef VINUMDEBUG
- vinum_conf.flags |= VF_LOADED | VF_HASDEBUG; /* we're loaded now, and we support debug */
-#else
- vinum_conf.flags |= VF_LOADED; /* we're loaded now */
-#endif
-
- daemonq = NULL; /* initialize daemon's work queue */
- dqend = NULL;
-
- vinum_daemon_dev = make_dev(&vinum_cdevsw,
- VINUM_DAEMON_MINOR,
- UID_ROOT,
- GID_WHEEL,
- S_IRUSR | S_IWUSR,
- "vinum/controld");
- vinum_super_dev = make_dev(&vinum_cdevsw,
- VINUM_SUPERDEV_MINOR,
- UID_ROOT,
- GID_WHEEL,
- S_IRUSR | S_IWUSR,
- "vinum/control");
-
- vinum_conf.version = VINUMVERSION; /* note what version we are */
-
- /* allocate space: drives... */
- DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
- CHECKALLOC(DRIVE, "vinum: no memory\n");
- bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES);
- vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
- vinum_conf.drives_used = 0; /* and number in use */
-
- /* volumes, ... */
- VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
- CHECKALLOC(VOL, "vinum: no memory\n");
- bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES);
- vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
- vinum_conf.volumes_used = 0; /* and number in use */
-
- /* plexes, ... */
- PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
- CHECKALLOC(PLEX, "vinum: no memory\n");
- bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES);
- vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
- vinum_conf.plexes_used = 0; /* and number in use */
-
- for (i = 0; i < PLEXMUTEXES; i++) {
- snprintf(mutexname, MUTEXNAMELEN, "vinumplex%d", i);
- mtx_init(&plexmutex[i], mutexname, "plex", MTX_DEF);
- }
-
- /* and subdisks */
- SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
- CHECKALLOC(SD, "vinum: no memory\n");
- bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS);
- vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
- vinum_conf.subdisks_used = 0; /* and number in use */
- dev_clone_tag = EVENTHANDLER_REGISTER(dev_clone, vinum_clone, 0, 1000);
-
- /*
- * See if the loader has passed us any of the autostart
- * options.
- */
- envp = NULL;
- if ((envp = getenv("vinum.autostart")) != NULL) { /* start all drives now */
- vinum_scandisk(NULL);
- freeenv(envp);
- } else if ((envp = getenv("vinum.drives")) != NULL) {
- vinum_scandisk(envp);
- freeenv(envp);
- }
-}
-
-/*
- * Check if we have anything open. If confopen is != 0,
- * that goes for the super device as well, otherwise
- * only for volumes.
- *
- * Return 0 if not inactive, 1 if inactive.
- */
-int
-vinum_inactive(int confopen)
-{
- int i;
- int can_do = 1; /* assume we can do it */
-
- if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */
- return 0; /* can't do it while we're open */
- lock_config();
- for (i = 0; i < vinum_conf.volumes_allocated; i++) {
- if ((VOL[i].state > volume_down)
- && (VOL[i].flags & VF_OPEN)) { /* volume is open */
- can_do = 0;
- break;
- }
- }
- unlock_config();
- return can_do;
-}
-
-/*
- * Free all structures.
- * If cleardrive is 0, save the configuration; otherwise
- * remove the configuration from the drive.
- *
- * Before coming here, ensure that no volumes are open.
- */
-void
-free_vinum(int cleardrive)
-{
- int i;
- int drives_allocated = vinum_conf.drives_allocated;
-
- while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN))
- == (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */
- queue_daemon_request(daemonrq_return, (union daemoninfo) 0); /* stop the daemon */
- tsleep(&vinumclose, PUSER, "vstop", 1); /* and wait for it */
- }
- if (DRIVE != NULL) {
- if (cleardrive) { /* remove the vinum config */
- for (i = 0; i < drives_allocated; i++)
- remove_drive(i); /* remove the drive */
- } else { /* keep the config */
- for (i = 0; i < drives_allocated; i++)
- free_drive(&DRIVE[i]); /* close files and things */
- }
- Free(DRIVE);
- }
- if (SD != NULL) {
- for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
- struct sd *sd = &SD[i];
-
- if (sd->state != sd_unallocated)
- free_sd(i);
- }
- Free(SD);
- }
- if (PLEX != NULL) {
- for (i = 0; i < vinum_conf.plexes_allocated; i++) {
- struct plex *plex = &PLEX[i];
-
- if (plex->state != plex_unallocated) /* we have real data there */
- free_plex(i);
- }
- Free(PLEX);
- }
- if (VOL != NULL) {
- for (i = 0; i < vinum_conf.volumes_allocated; i++) {
- struct volume *volume = &VOL[i];
-
- if (volume->state != volume_unallocated)
- free_volume(i);
- }
- Free(VOL);
- }
- bzero(&vinum_conf, sizeof(vinum_conf));
- vinum_conf.version = VINUMVERSION; /* reinstate version number */
-}
-
-STATIC int
-vinum_modevent(module_t mod, modeventtype_t type, void *unused)
-{
- struct sync_args dummyarg =
- {0};
- int i;
-
- switch (type) {
- case MOD_LOAD:
- vinumattach(NULL);
- return 0; /* OK */
- case MOD_UNLOAD:
- if (!vinum_inactive(1)) /* is anything open? */
- return EBUSY; /* yes, we can't do it */
- vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */
- sync(curthread, &dummyarg); /* write out buffers */
- free_vinum(0); /* clean up */
-#ifdef VINUMDEBUG
- if (total_malloced) {
- int i;
-#ifdef INVARIANTS
- int *poke;
-#endif
-
- for (i = 0; i < malloccount; i++) {
- if (debug & DEBUG_WARNINGS) /* want to hear about them */
- log(LOG_WARNING,
- "vinum: exiting with %d bytes malloced from %s:%d\n",
- malloced[i].size,
- malloced[i].file,
- malloced[i].line);
-#ifdef INVARIANTS
- poke = &((int *) malloced[i].address)
- [malloced[i].size / (2 * sizeof(int))]; /* middle of the area */
- if (*poke == 0xdeadc0de) /* already freed */
- log(LOG_ERR,
- "vinum: exiting with malloc table inconsistency at %p from %s:%d\n",
- malloced[i].address,
- malloced[i].file,
- malloced[i].line);
-#endif
- Free(malloced[i].address);
- }
- }
-#endif
- destroy_dev(vinum_daemon_dev); /* daemon device */
- destroy_dev(vinum_super_dev);
- for (i = 0; i < PLEXMUTEXES; i++)
- mtx_destroy(&plexmutex[i]);
- log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */
- EVENTHANDLER_DEREGISTER(dev_clone, dev_clone_tag);
- return 0;
- default:
- return EOPNOTSUPP;
- break;
- }
- return 0;
-}
-
-static moduledata_t vinum_mod =
-{
- "vinum",
- (modeventhand_t) vinum_modevent,
- 0
-};
-DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE);
-
-/* ARGSUSED */
-/* Open a vinum object */
-int
-vinumopen(struct cdev *dev,
- int flags,
- int fmt,
- struct thread *td)
-{
- int error;
- unsigned int index;
- struct volume *vol;
- struct plex *plex;
- struct sd *sd;
- int devminor; /* minor number */
-
- devminor = minor(dev);
- error = 0;
- /* First, decide what we're looking at */
- switch (DEVTYPE(dev)) {
- case VINUM_VOLUME_TYPE:
- /*
- * The super device and daemon device are the last two
- * volume numbers, so check for them first.
- */
- if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */
- ||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */
- error = suser(td); /* are we root? */
-
- if (error == 0) { /* yes, can do */
- if (devminor == VINUM_DAEMON_MINOR) /* daemon device */
- vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */
- else /* superdev */
- vinum_conf.flags |= VF_OPEN; /* we're open */
- }
- return error;
- }
- /* Must be a real volume. Check. */
- index = Volno(dev);
- if (index >= vinum_conf.volumes_allocated)
- return ENXIO; /* no such device */
- vol = &VOL[index];
-
- switch (vol->state) {
- case volume_unallocated:
- case volume_uninit:
- return ENXIO;
-
- case volume_up:
- vol->flags |= VF_OPEN; /* note we're open */
- return 0;
-
- case volume_down:
- return EIO;
-
- default:
- return EINVAL;
- }
-
- case VINUM_PLEX_TYPE:
- index = Plexno(dev); /* get plex index in vinum_conf */
- if (index >= vinum_conf.plexes_allocated)
- return ENXIO; /* no such device */
- plex = &PLEX[index];
-
- switch (plex->state) {
- case plex_unallocated:
- return ENXIO;
-
- case plex_referenced:
- return EINVAL;
-
- default:
- plex->flags |= VF_OPEN; /* note we're open */
- return 0;
- }
-
- case VINUM_SD_TYPE:
- case VINUM_SD2_TYPE:
- index = Sdno(dev); /* get the subdisk number */
- if (index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */
- return ENXIO; /* no such device */
- sd = &SD[index];
-
- /*
- * Opening a subdisk is always a special operation, so
- * we ignore the state as long as it represents a real
- * subdisk.
- */
- switch (sd->state) {
- case sd_unallocated:
- return ENXIO;
-
- case sd_uninit:
- case sd_referenced:
- return EINVAL;
-
- default:
- sd->flags |= VF_OPEN; /* note we're open */
- return 0;
- }
- }
- return 0; /* to keep the compiler happy */
-}
-
-/* ARGSUSED */
-int
-vinumclose(struct cdev *dev,
- int flags,
- int fmt,
- struct thread *td)
-{
- unsigned int index;
- struct volume *vol;
- int devminor;
-
- devminor = minor(dev);
- /* First, decide what we're looking at */
- switch (DEVTYPE(dev)) {
- case VINUM_VOLUME_TYPE:
- /*
- * The super device and daemon device are the last two
- * volume numbers, so check for them first.
- */
- if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */
- ||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */
- /*
- * don't worry about whether we're root:
- * nobody else would get this far.
- */
- if (devminor == VINUM_SUPERDEV_MINOR) /* normal superdev */
- vinum_conf.flags &= ~VF_OPEN; /* no longer open */
- else { /* the daemon device */
- vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */
- if (vinum_conf.flags & VF_STOPPING) /* we're trying to stop, */
- wakeup(&vinumclose); /* we can continue now */
- }
- return 0;
- }
- /* Real volume */
- index = Volno(dev);
- if (index >= vinum_conf.volumes_allocated)
- return ENXIO; /* no such device */
- vol = &VOL[index];
-
- switch (vol->state) {
- case volume_unallocated:
- case volume_uninit:
- return ENXIO;
-
- case volume_up:
- vol->flags &= ~VF_OPEN; /* reset our flags */
- return 0;
-
- case volume_down:
- return EIO;
-
- default:
- return EINVAL;
- }
-
- case VINUM_PLEX_TYPE:
- if (Volno(dev) >= vinum_conf.volumes_allocated)
- return ENXIO;
- index = Plexno (dev);
- if (index >= vinum_conf.plexes_allocated) /* no such plex */
- return ENXIO;
- PLEX [index].flags &= ~VF_OPEN; /* no longer open */
- return 0;
-
- case VINUM_SD_TYPE:
- if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */
- (Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */
- return ENXIO; /* no such device */
- index = Sdno (dev);
- if (index >= vinum_conf.subdisks_allocated) /* no such sd */
- return ENXIO;
- SD [index].flags &= ~VF_OPEN; /* no longer open */
- return 0;
-
-
- default:
- return ENODEV; /* don't know what to do with these */
- }
-}
-
-void
-vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev)
-{
- struct volume *vol;
- int i;
-
- if (*dev != NULL)
- return;
- if (strncmp(name, "vinum/", sizeof("vinum/") - 1) != 0)
- return;
-
- name += sizeof("vinum/") - 1;
- if ((i = find_volume(name, 0)) == -1)
- return;
-
- vol = &VOL[i];
- *dev = vol->dev;
-}
-
-
-/* Local Variables: */
-/* fill-column: 60 */
-/* End: */
diff --git a/sys/dev/vinum/vinumconfig.c b/sys/dev/vinum/vinumconfig.c
deleted file mode 100644
index 989af88..0000000
--- a/sys/dev/vinum/vinumconfig.c
+++ /dev/null
@@ -1,2166 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumconfig.c,v 1.41 2003/05/23 00:57:34 grog Exp grog $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#define STATIC static
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-#define MAXTOKEN 64 /* maximum number of tokens in a line */
-
-/*
- * We can afford the luxury of global variables here,
- * since start_config ensures that these functions
- * are single-threaded.
- */
-
-/* These are indices in vinum_conf of the last-mentioned of each kind of object */
-static int current_drive; /* note the last drive we mention, for
- * some defaults */
-static int current_plex; /* and the same for the last plex */
-static int current_volume; /* and the last volme */
-static struct _ioctl_reply *ioctl_reply; /* struct to return via ioctl */
-
-
-/* These values are used by most of these routines, so set them as globals */
-static char *token[MAXTOKEN]; /* pointers to individual tokens */
-static int tokens; /* number of tokens */
-
-#define TOCONS 0x01
-#define TOTTY 0x02
-#define TOLOG 0x04
-
-struct putchar_arg {
- int flags;
- struct tty *tty;
-};
-
-#define MSG_MAX 1024 /* maximum length of a formatted message */
-/*
- * Format an error message and return to the user
- * in the reply. CARE: This routine is designed
- * to be called only from the configuration
- * routines, so it assumes it's the owner of the
- * configuration lock, and unlocks it on exit.
- */
-void
-throw_rude_remark(int error, char *msg,...)
-{
- int retval;
- va_list ap;
- char *text;
- static int finishing; /* don't recurse */
- int was_finishing;
-
- if ((vinum_conf.flags & VF_LOCKED) == 0) /* bug catcher */
- panic("throw_rude_remark: called without config lock");
- va_start(ap, msg);
- if ((ioctl_reply != NULL) /* we're called from the user */
- &&(!(vinum_conf.flags & VF_READING_CONFIG))) { /* and not reading from disk: return msg */
- /*
- * We can't just format to ioctl_reply, since it
- * may contain our input parameters
- */
- text = Malloc(MSG_MAX);
- if (text == NULL) {
- log(LOG_ERR, "vinum: can't allocate error message buffer\n");
- printf("vinum: ");
- vprintf(msg, ap); /* print to the console */
- printf("\n");
- } else {
- retval = kvprintf(msg, NULL, (void *) text, 10, ap);
- text[retval] = '\0'; /* delimit */
- strlcpy(ioctl_reply->msg, text, sizeof(ioctl_reply->msg));
- ioctl_reply->error = error; /* first byte is the error number */
- Free(text);
- }
- } else {
- printf("vinum: ");
- vprintf(msg, ap); /* print to the console */
- printf("\n");
- }
- va_end(ap);
-
- if (vinum_conf.flags & VF_READING_CONFIG) { /* go through to the bitter end, */
- if ((vinum_conf.flags & VF_READING_CONFIG) /* we're reading from disk, */
- &&((daemon_options & daemon_noupdate) == 0)) {
- log(LOG_NOTICE, "Disabling configuration updates\n");
- daemon_options |= daemon_noupdate;
- }
- return;
- }
- /*
- * We have a problem here: we want to unlock the
- * configuration, which implies tidying up, but
- * if we find an error while tidying up, we
- * could recurse for ever. Use this kludge to
- * only try once.
- */
- was_finishing = finishing;
- finishing = 1;
- finish_config(was_finishing); /* unlock anything we may be holding */
- finishing = was_finishing;
- longjmp(command_fail, error);
-}
-
-/*
- * Check a volume to see if the plex is already assigned to it.
- * Return index in volume->plex, or -1 if not assigned
- */
-int
-my_plex(int volno, int plexno)
-{
- int i;
- struct volume *vol;
-
- vol = &VOL[volno]; /* point to volno */
- for (i = 0; i < vol->plexes; i++)
- if (vol->plex[i] == plexno)
- return i;
- return -1; /* not found */
-}
-
-/*
- * Check a plex to see if the subdisk is already assigned to it.
- * Return index in plex->sd, or -1 if not assigned
- */
-int
-my_sd(int plexno, int sdno)
-{
- int i;
- struct plex *plex;
-
- plex = &PLEX[plexno];
- for (i = 0; i < plex->subdisks; i++)
- if (plex->sdnos[i] == sdno)
- return i;
- return -1; /* not found */
-}
-
-/* Add plex to the volume if possible */
-int
-give_plex_to_volume(int volno, int plexno, int preferme)
-{
- struct volume *vol;
- int i;
- int volplexno;
-
- /*
- * It's not an error for the plex to already
- * belong to the volume, but we need to check a
- * number of things to make sure it's done right.
- * Some day.
- */
- volplexno = my_plex(volno, plexno);
- vol = &VOL[volno]; /* point to volume */
- if (volplexno < 0) {
- if (vol->plexes == MAXPLEX) /* all plexes allocated */
- throw_rude_remark(ENOSPC,
- "Too many plexes for volume %s",
- vol->name);
- else if ((vol->plexes > 0) /* we have other plexes */
- &&((vol->flags & VF_CONFIG_SETUPSTATE) == 0)) /* and we're not setting up state */
- invalidate_subdisks(&PLEX[plexno], sd_stale); /* make our subdisks invalid */
- vol->plex[vol->plexes] = plexno; /* this one */
- vol->plexes++; /* add another plex */
- PLEX[plexno].volno = volno; /* note the number of our volume */
-
- /* Find out how big our volume is */
- for (i = 0; i < vol->plexes; i++)
- vol->size = max(vol->size, PLEX[vol->plex[i]].length);
- volplexno = vol->plexes - 1; /* number of plex in volume */
- }
- if (preferme) {
- if (vol->preferred_plex >= 0) /* already had a facourite, */
- printf("vinum: changing preferred plex for %s from %s to %s\n",
- vol->name,
- PLEX[vol->plex[vol->preferred_plex]].name,
- PLEX[plexno].name);
- vol->preferred_plex = volplexno;
- }
- return volplexno;
-}
-
-/*
- * Add subdisk to a plex if possible
- */
-int
-give_sd_to_plex(int plexno, int sdno)
-{
- int i;
- struct plex *plex;
- struct sd *sd;
-
- /*
- * It's not an error for the sd to already
- * belong to the plex, but we need to check a
- * number of things to make sure it's done right.
- * Some day.
- */
- i = my_sd(plexno, sdno);
- if (i >= 0) /* does it already belong to us? */
- return i; /* that's it */
-
- plex = &PLEX[plexno]; /* point to the plex */
- sd = &SD[sdno]; /* and the subdisk */
-
- /* Do we have an offset? Otherwise put it after the last one */
- if (sd->plexoffset < 0) { /* no offset specified */
- if (plex->subdisks > 0) {
- struct sd *lastsd = &SD[plex->sdnos[plex->subdisks - 1]]; /* last subdisk */
-
- if (plex->organization == plex_concat) /* concat, */
- sd->plexoffset = lastsd->sectors + lastsd->plexoffset; /* starts here */
- else /* striped, RAID-4 or RAID-5 */
- sd->plexoffset = plex->stripesize * plex->subdisks; /* starts here */
- } else /* first subdisk */
- sd->plexoffset = 0; /* start at the beginning */
- }
- if (plex->subdisks == MAXSD) { /* we already have our maximum */
- if (sd->state == sd_unallocated) /* haven't finished allocating the sd, */
- free_sd(sdno); /* free it to return drive space */
- throw_rude_remark(ENOSPC, /* crap out */
- "Can't add %s to %s: plex full",
- sd->name,
- plex->name);
- }
- plex->subdisks++; /* another entry */
- if (plex->subdisks >= plex->subdisks_allocated) /* need more space */
- EXPAND(plex->sdnos, int, plex->subdisks_allocated, INITIAL_SUBDISKS_IN_PLEX);
-
- /* Adjust size of plex and volume. */
- if (isparity(plex)) /* RAID-4 or RAID-5 */
- plex->length = (plex->subdisks - 1) * sd->sectors; /* size is one disk short */
- else
- plex->length += sd->sectors; /* plex gets this much bigger */
- if (plex->volno >= 0) /* we have a volume */
- VOL[plex->volno].size = max(VOL[plex->volno].size, plex->length); /* adjust its size */
-
- /*
- * We need to check that the subdisks don't overlap,
- * but we can't do that until a point where we *must*
- * know the size of all the subdisks. That's not
- * here. But we need to sort them by offset
- */
- for (i = 0; i < plex->subdisks - 1; i++) {
- if (sd->plexoffset < SD[plex->sdnos[i]].plexoffset) { /* it fits before this one */
- /* First move any remaining subdisks by one */
- int j;
-
- for (j = plex->subdisks - 1; j > i; j--) /* move up one at a time */
- plex->sdnos[j] = plex->sdnos[j - 1];
- plex->sdnos[i] = sdno;
- sd->plexsdno = i; /* note where we are in the subdisk */
- return i;
- }
- }
-
- /*
- * The plex doesn't have any subdisk with a
- * larger offset. Insert it here.
- */
- plex->sdnos[i] = sdno;
- sd->plexsdno = i; /* note where we are in the subdisk */
- sd->plexno = plex->plexno; /* and who we belong to */
- return i;
-}
-
-/*
- * Add a subdisk to drive if possible. The
- * pointer to the drive must already be stored in
- * the sd structure, but the drive doesn't know
- * about the subdisk yet.
- */
-void
-give_sd_to_drive(int sdno)
-{
- struct sd *sd; /* pointer to subdisk */
- struct drive *drive; /* and drive */
- int fe; /* index in free list */
- int sfe; /* and index of subdisk when assigning max */
-
- sd = &SD[sdno]; /* point to sd */
- drive = &DRIVE[sd->driveno]; /* and drive */
-
- if (drive->state != drive_up) {
- update_sd_state(sdno); /* that crashes the subdisk */
- return;
- }
- sd->sectorsize = drive->sectorsize; /* get sector size from drive */
- if (drive->flags & VF_HOTSPARE) /* the drive is a hot spare, */
- throw_rude_remark(ENOSPC,
- "Can't place %s on hot spare drive %s",
- sd->name,
- drive->label.name);
- if ((drive->sectors_available == 0) /* no space left */
- ||(sd->sectors > drive->sectors_available)) { /* or too big, */
- sd->driveoffset = -1; /* don't be confusing */
- free_sd(sd->sdno);
- throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name);
- return; /* in case we come back here */
- }
- drive->subdisks_used++; /* one more subdisk */
-
- if (sd->sectors == 0) { /* take the largest chunk */
- sfe = 0; /* to keep the compiler happy */
- for (fe = 0; fe < drive->freelist_entries; fe++) {
- if (drive->freelist[fe].sectors >= sd->sectors) { /* more space here */
- sd->sectors = drive->freelist[fe].sectors; /* take it */
- sd->driveoffset = drive->freelist[fe].offset;
- sfe = fe; /* and note the index for later */
- }
- }
- if (sd->sectors == 0) { /* no luck, */
- sd->driveoffset = -1; /* don't be confusing */
- free_sd(sd->sdno);
- throw_rude_remark(ENOSPC, /* give up */
- "No space for %s on %s",
- sd->name,
- drive->label.name);
- }
- if (sfe < (drive->freelist_entries - 1)) /* not the last one, */
- bcopy(&drive->freelist[sfe + 1],
- &drive->freelist[sfe],
- (drive->freelist_entries - sfe) * sizeof(struct drive_freelist));
- drive->freelist_entries--; /* one less entry */
- drive->sectors_available -= sd->sectors; /* and note how much less space we have */
- } else if (sd->driveoffset < 0) { /* no offset specified, find one */
- for (fe = 0; fe < drive->freelist_entries; fe++) {
- if (drive->freelist[fe].sectors >= sd->sectors) { /* it'll fit here */
- sd->driveoffset = drive->freelist[fe].offset;
- if (sd->sectors == drive->freelist[fe].sectors) { /* used up the entire entry */
- if (fe < (drive->freelist_entries - 1)) /* not the last one, */
- bcopy(&drive->freelist[fe + 1],
- &drive->freelist[fe],
- (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
- drive->freelist_entries--; /* one less entry */
- } else {
- drive->freelist[fe].sectors -= sd->sectors; /* this much less space */
- drive->freelist[fe].offset += sd->sectors; /* this much further on */
- }
- drive->sectors_available -= sd->sectors; /* and note how much less space we have */
- break;
- }
- }
- if (sd->driveoffset < 0)
- /*
- * Didn't find anything. Although the drive has
- * enough space, it's too fragmented
- */
- {
- free_sd(sd->sdno);
- throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name);
- }
- } else { /* specific offset */
- /*
- * For a specific offset to work, the space must be
- * entirely in a single freelist entry. Look for it.
- */
- u_int64_t sdend = sd->driveoffset + sd->sectors; /* end of our subdisk */
- for (fe = 0; fe < drive->freelist_entries; fe++) {
- u_int64_t dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of entry */
- if (dend >= sdend) { /* fits before here */
- if (drive->freelist[fe].offset > sd->driveoffset) { /* starts after the beginning of sd area */
- sd->driveoffset = -1; /* don't be confusing */
- set_sd_state(sd->sdno, sd_down, setstate_force);
- throw_rude_remark(ENOSPC,
- "No space for %s on drive %s at offset %lld",
- sd->name,
- drive->label.name,
- sd->driveoffset);
- return;
- }
- /*
- * We've found the space, and we can allocate it.
- * We don't need to say that to the subdisk, which
- * already knows about it. We need to tell it to
- * the free list, though. We have four possibilities:
- *
- * 1. The subdisk exactly eats up the entry. That's the
- * same as above.
- * 2. The subdisk starts at the beginning and leaves space
- * at the end.
- * 3. The subdisk starts after the beginning and leaves
- * space at the end as well: we end up with another
- * fragment.
- * 4. The subdisk leaves space at the beginning and finishes
- * at the end.
- */
- drive->sectors_available -= sd->sectors; /* note how much less space we have */
- if (sd->driveoffset == drive->freelist[fe].offset) { /* 1 or 2 */
- if (sd->sectors == drive->freelist[fe].sectors) { /* 1: used up the entire entry */
- if (fe < (drive->freelist_entries - 1)) /* not the last one, */
- bcopy(&drive->freelist[fe + 1],
- &drive->freelist[fe],
- (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
- drive->freelist_entries--; /* one less entry */
- } else { /* 2: space at the end */
- drive->freelist[fe].sectors -= sd->sectors; /* this much less space */
- drive->freelist[fe].offset += sd->sectors; /* this much further on */
- }
- } else { /* 3 or 4 */
- drive->freelist[fe].sectors = sd->driveoffset - drive->freelist[fe].offset;
- if (dend > sdend) { /* 3: space at the end as well */
- if (fe < (drive->freelist_entries - 1)) /* not the last one */
- bcopy(&drive->freelist[fe], /* move the rest down */
- &drive->freelist[fe + 1],
- (drive->freelist_entries - fe) * sizeof(struct drive_freelist));
- drive->freelist_entries++; /* one less entry */
- drive->freelist[fe + 1].offset = sdend; /* second entry starts after sd */
- drive->freelist[fe + 1].sectors = dend - sdend; /* and is this long */
- }
- }
- break;
- }
- }
- }
- drive->opencount++; /* one more subdisk attached */
-}
-
-/* Get an empty drive entry from the drive table */
-int
-get_empty_drive(void)
-{
- int driveno;
- struct drive *drive;
-
- /* first see if we have one which has been deallocated */
- for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
- if (DRIVE[driveno].state == drive_unallocated) /* bingo */
- break;
- }
-
- if (driveno >= vinum_conf.drives_allocated) /* we've used all our allocation */
- EXPAND(DRIVE, struct drive, vinum_conf.drives_allocated, INITIAL_DRIVES);
-
- /* got a drive entry. Make it pretty */
- drive = &DRIVE[driveno];
- bzero(drive, sizeof(struct drive));
- drive->driveno = driveno; /* put number in structure */
- drive->flags |= VF_NEWBORN; /* newly born drive */
- drive->dev = NULL;
- strcpy(drive->devicename, "unknown"); /* and make the name ``unknown'' */
- return driveno; /* return the index */
-}
-
-/*
- * Find the named drive in vinum_conf.drive,
- * return the index in vinum_conf.drive.
- * Don't mark the drive as allocated (XXX SMP)
- * If create != 0, create an entry if it doesn't exist
- */
-/* XXX check if we have it open from attach */
-int
-find_drive(const char *name, int create)
-{
- int driveno;
- struct drive *drive;
-
- if (name != NULL) {
- for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
- drive = &DRIVE[driveno]; /* point to drive */
- if ((drive->label.name[0] != '\0') /* it has a name */
- &&(strcmp(drive->label.name, name) == 0) /* and it's this one */
- &&(drive->state > drive_unallocated)) /* and it's a real one: found */
- return driveno;
- }
- }
- /* the drive isn't in the list. Add it if he wants */
- if (create == 0) /* don't want to create */
- return -1; /* give up */
-
- driveno = get_empty_drive();
- drive = &DRIVE[driveno];
- if (name != NULL)
- strlcpy(drive->label.name, /* put in its name */
- name,
- sizeof(drive->label.name));
- drive->state = drive_referenced; /* in use, nothing worthwhile there */
- return driveno; /* return the index */
-}
-
-/*
- * Find a drive given its device name.
- * devname must be valid.
- * Otherwise the same as find_drive above.
- */
-int
-find_drive_by_name(const char *devname, int create)
-{
- int driveno;
- struct drive *drive;
-
- for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
- drive = &DRIVE[driveno]; /* point to drive */
- if ((strcmp(drive->devicename, devname) == 0) /* it's this device */
- &&(drive->state > drive_unallocated)) /* and it's a real one: found */
- return driveno;
- }
-
- /* the drive isn't in the list. Add it if he wants */
- if (create == 0) /* don't want to create */
- return -1; /* give up */
-
- driveno = get_empty_drive();
- drive = &DRIVE[driveno];
- bcopy(devname, /* put in its name */
- drive->devicename,
- min(sizeof(drive->devicename),
- strlen(devname)));
- drive->state = drive_referenced; /* in use, nothing worthwhile there */
- return driveno; /* return the index */
-}
-
-/* Find an empty subdisk in the subdisk table */
-int
-get_empty_sd(void)
-{
- int sdno;
- struct sd *sd;
-
- /* first see if we have one which has been deallocated */
- for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
- if (SD[sdno].state == sd_unallocated) /* bingo */
- break;
- }
- if (sdno >= vinum_conf.subdisks_allocated)
- /*
- * We've run out of space. sdno is pointing
- * where we want it, but at the moment we
- * don't have the space. Get it.
- *
- * XXX We should check for overflow here. We
- * shouldn't allocate more than VINUM_MAXSD
- * subdisks (currently at least a quarter of a
- * million).
- */
- EXPAND(SD, struct sd, vinum_conf.subdisks_allocated, INITIAL_SUBDISKS);
-
- /* initialize some things */
- sd = &SD[sdno]; /* point to it */
- bzero(sd, sizeof(struct sd)); /* initialize */
- sd->flags |= VF_NEWBORN; /* newly born subdisk */
- sd->plexno = -1; /* no plex */
- sd->sectors = -1; /* no space */
- sd->driveno = -1; /* no drive */
- sd->plexoffset = -1; /* and no offsets */
- sd->driveoffset = -1;
- return sdno; /* return the index */
-}
-
-/* return a drive to the free pool */
-void
-free_drive(struct drive *drive)
-{
- LOCKDRIVE(drive);
- if (drive->flags & VF_OPEN) /* it's open, */
- close_locked_drive(drive); /* close it */
- if (drive->freelist)
- Free(drive->freelist);
- if (drive->dev != NULL)
- dev_rel(drive->dev);
- bzero(drive, sizeof(struct drive)); /* this also sets drive_unallocated */
- unlockdrive(drive);
-}
-
-/*
- * Find the named subdisk in vinum_conf.sd.
- *
- * If create != 0, create an entry if it doesn't exist
- *
- * Return index in vinum_conf.sd
- */
-int
-find_subdisk(const char *name, int create)
-{
- int sdno;
- struct sd *sd;
-
- for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
- if (strcmp(SD[sdno].name, name) == 0) /* found it */
- return sdno;
- }
-
- /* the subdisk isn't in the list. Add it if he wants */
- if (create == 0) /* don't want to create */
- return -1; /* give up */
-
- /* Allocate one and insert the name */
- sdno = get_empty_sd();
- sd = &SD[sdno];
- bcopy(name, sd->name, min(sizeof(sd->name), strlen(name))); /* put in its name */
- return sdno; /* return the pointer */
-}
-
-/* Return space to a drive */
-void
-return_drive_space(int driveno, int64_t offset, int length)
-{
- struct drive *drive;
- int fe; /* free list entry */
- u_int64_t sdend; /* end of our subdisk */
- u_int64_t dend; /* end of our freelist entry */
-
- drive = &DRIVE[driveno];
- if (drive->state == drive_up) {
- sdend = offset + length; /* end of our subdisk */
-
- /* Look for where to return the sd address space */
- for (fe = 0;
- (fe < drive->freelist_entries) && (drive->freelist[fe].offset < offset);
- fe++);
- /*
- * Now we are pointing to the last entry, the first
- * with a higher offset than the subdisk, or both.
- */
- if ((fe > 1) /* not the first entry */
- &&((fe == drive->freelist_entries) /* gone past the end */
- ||(drive->freelist[fe].offset > offset))) /* or past the block were looking for */
- fe--; /* point to the block before */
- dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of the entry */
-
- /*
- * At this point, we are pointing to the correct
- * place in the free list. A number of possibilities
- * exist:
- *
- * 1. The block to be freed starts at the end of the
- * block to which we are pointing. This has two
- * subcases:
- *
- * a. The block to be freed ends at the beginning
- * of the following block. Merge the three
- * areas into a single block.
- *
- * b. The block is shorter than the space between
- * the current block and the next one. Enlarge
- * the current block.
- *
- * 2. The block to be freed starts after the end
- * of the block. Again, we have two cases:
- *
- * a. It ends before the start of the following block.
- * Create a new free block.
- *
- * b. It ends at the start of the following block.
- * Enlarge the following block downwards.
- *
- * When there is only one free space block, and the
- * space to be returned is before it, the pointer is
- * to a non-existent zeroth block. XXX check this
- */
- if (offset == dend) { /* Case 1: it starts at the end of this block */
- if ((fe < drive->freelist_entries - 1) /* we're not the last block in the free list */
- /* and the subdisk ends at the start of the next block */
- &&(sdend == drive->freelist[fe + 1].offset)) {
- drive->freelist[fe].sectors /* 1a: merge all three blocks */
- = drive->freelist[fe + 1].sectors;
- if (fe < drive->freelist_entries - 2) /* still more blocks after next */
- bcopy(&drive->freelist[fe + 2], /* move down one */
- &drive->freelist[fe + 1],
- (drive->freelist_entries - 2 - fe)
- * sizeof(struct drive_freelist));
- drive->freelist_entries--; /* one less entry in the free list */
- } else /* 1b: just enlarge this block */
- drive->freelist[fe].sectors += length;
- } else { /* Case 2 */
- if (offset > dend) /* it starts after this block */
- fe++; /* so look at the next block */
- if ((fe < drive->freelist_entries) /* we're not the last block in the free list */
- /* and the subdisk ends at the start of this block: case 4 */
- &&(sdend == drive->freelist[fe].offset)) {
- drive->freelist[fe].offset = offset; /* it starts where the sd was */
- drive->freelist[fe].sectors += length; /* and it's this much bigger */
- } else { /* case 3: non-contiguous */
- if (fe < drive->freelist_entries) /* not after the last block, */
- bcopy(&drive->freelist[fe], /* move the rest up one entry */
- &drive->freelist[fe + 1],
- (drive->freelist_entries - fe)
- * sizeof(struct drive_freelist));
- drive->freelist_entries++; /* one less entry */
- drive->freelist[fe].offset = offset; /* this entry represents the sd */
- drive->freelist[fe].sectors = length;
- }
- }
- drive->sectors_available += length; /* the sectors are now available */
- }
-}
-
-/*
- * Free an allocated sd entry.
- * This performs memory management only. remove()
- * is responsible for checking relationships.
- */
-void
-free_sd(int sdno)
-{
- struct sd *sd;
-
- sd = &SD[sdno];
- if ((sd->driveno >= 0) /* we have a drive, */
- &&(sd->sectors > 0)) /* and some space on it */
- return_drive_space(sd->driveno, /* return the space */
- sd->driveoffset,
- sd->sectors);
- if (sd->plexno >= 0)
- PLEX[sd->plexno].subdisks--; /* one less subdisk */
- /*
- * If we come here as the result of a
- * configuration error, we may not yet have
- * created a device entry for the subdisk.
- */
- if (sd->dev)
- destroy_dev(sd->dev);
- bzero(sd, sizeof(struct sd)); /* and clear it out */
- sd->state = sd_unallocated;
- vinum_conf.subdisks_used--; /* one less sd */
-}
-
-/* Find an empty plex in the plex table */
-int
-get_empty_plex(void)
-{
- int plexno;
- struct plex *plex; /* if we allocate one */
-
- /* first see if we have one which has been deallocated */
- for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) {
- if (PLEX[plexno].state == plex_unallocated) /* bingo */
- break; /* and get out of here */
- }
-
- if (plexno >= vinum_conf.plexes_allocated)
- EXPAND(PLEX, struct plex, vinum_conf.plexes_allocated, INITIAL_PLEXES);
-
- /* Found a plex. Give it an sd structure */
- plex = &PLEX[plexno]; /* this one is ours */
- bzero(plex, sizeof(struct plex)); /* polish it up */
- plex->sdnos = (int *) Malloc(sizeof(int) * INITIAL_SUBDISKS_IN_PLEX); /* allocate sd table */
- CHECKALLOC(plex->sdnos, "vinum: Can't allocate plex subdisk table");
- bzero(plex->sdnos, (sizeof(int) * INITIAL_SUBDISKS_IN_PLEX)); /* do we need this? */
- plex->flags |= VF_NEWBORN; /* newly born plex */
- plex->subdisks = 0; /* no subdisks in use */
- plex->subdisks_allocated = INITIAL_SUBDISKS_IN_PLEX; /* and we have space for this many */
- plex->organization = plex_disorg; /* and it's not organized */
- plex->volno = -1; /* no volume yet */
- return plexno; /* return the index */
-}
-
-/*
- * Find the named plex in vinum_conf.plex
- *
- * If create != 0, create an entry if it doesn't exist
- * return index in vinum_conf.plex
- */
-int
-find_plex(const char *name, int create)
-{
- int plexno;
- struct plex *plex;
-
- for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) {
- if (strcmp(PLEX[plexno].name, name) == 0) /* found it */
- return plexno;
- }
-
- /* the plex isn't in the list. Add it if he wants */
- if (create == 0) /* don't want to create */
- return -1; /* give up */
-
- /* Allocate one and insert the name */
- plexno = get_empty_plex();
- plex = &PLEX[plexno]; /* point to it */
- bcopy(name, plex->name, min(sizeof(plex->name), strlen(name))); /* put in its name */
- return plexno; /* return the pointer */
-}
-
-/*
- * Free an allocated plex entry
- * and its associated memory areas
- */
-void
-free_plex(int plexno)
-{
- struct plex *plex;
-
- plex = &PLEX[plexno];
- if (plex->sdnos)
- Free(plex->sdnos);
- if (plex->lock)
- Free(plex->lock);
- if (plex->dev)
- destroy_dev(plex->dev);
- bzero(plex, sizeof(struct plex)); /* and clear it out */
- plex->state = plex_unallocated;
-}
-
-/* Find an empty volume in the volume table */
-int
-get_empty_volume(void)
-{
- int volno;
- struct volume *vol;
- int i;
-
- /* first see if we have one which has been deallocated */
- for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
- if (VOL[volno].state == volume_unallocated) /* bingo */
- break;
- }
-
- if (volno >= vinum_conf.volumes_allocated)
- EXPAND(VOL, struct volume, vinum_conf.volumes_allocated, INITIAL_VOLUMES);
-
- /* Now initialize fields */
- vol = &VOL[volno];
- bzero(vol, sizeof(struct volume));
- vol->flags |= VF_NEWBORN | VF_CREATED; /* newly born volume */
- vol->preferred_plex = ROUND_ROBIN_READPOL; /* round robin */
- for (i = 0; i < MAXPLEX; i++) /* mark the plexes missing */
- vol->plex[i] = -1;
- return volno; /* return the index */
-}
-
-/*
- * Find the named volume in vinum_conf.volume.
- *
- * If create != 0, create an entry if it doesn't exist
- * return the index in vinum_conf
- */
-int
-find_volume(const char *name, int create)
-{
- int volno;
- struct volume *vol;
-
- for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
- if (strcmp(VOL[volno].name, name) == 0) /* found it */
- return volno;
- }
-
- /* the volume isn't in the list. Add it if he wants */
- if (create == 0) /* don't want to create */
- return -1; /* give up */
-
- /* Allocate one and insert the name */
- volno = get_empty_volume();
- vol = &VOL[volno];
- bcopy(name, vol->name, min(sizeof(vol->name), strlen(name))); /* put in its name */
- vol->blocksize = DEV_BSIZE; /* block size of this volume */
- return volno; /* return the pointer */
-}
-
-/*
- * Free an allocated volume entry
- * and its associated memory areas
- */
-void
-free_volume(int volno)
-{
- struct volume *vol;
-
- vol = &VOL[volno];
- if (vol->dev)
- destroy_dev(vol->dev);
- bzero(vol, sizeof(struct volume)); /* and clear it out */
- vol->state = volume_unallocated;
-}
-
-/*
- * Handle a drive definition. We store the information in the global variable
- * drive, so we don't need to allocate.
- *
- * If we find an error, print a message and return
- */
-void
-config_drive(int update)
-{
- enum drive_label_info partition_status; /* info about the partition */
- int parameter;
- int driveno; /* index of drive in vinum_conf */
- struct drive *drive; /* and pointer to it */
- int otherdriveno; /* index of possible second drive */
- int sdno;
-
- if (tokens < 2) /* not enough tokens */
- throw_rude_remark(EINVAL, "Drive has no name\n");
- driveno = find_drive(token[1], 1); /* allocate a drive to initialize */
- drive = &DRIVE[driveno]; /* and get a pointer */
- if (update && ((drive->flags & VF_NEWBORN) == 0)) /* this drive exists already */
- return; /* don't do anything */
- drive->flags &= ~VF_NEWBORN; /* no longer newly born */
-
- if (drive->state != drive_referenced) { /* we already know this drive */
- /*
- * XXX Check which definition is more up-to-date. Give
- * preference for the definition on its own drive.
- */
- return; /* XXX */
- }
- for (parameter = 2; parameter < tokens; parameter++) { /* look at the other tokens */
- switch (get_keyword(token[parameter], &keyword_set)) {
- case kw_device:
- parameter++;
- otherdriveno = find_drive_by_name(token[parameter], 0); /* see if it exists already */
- if (otherdriveno >= 0) { /* yup, */
- drive->state = drive_unallocated; /* deallocate the drive */
- throw_rude_remark(EEXIST, /* and complain */
- "Drive %s would have same device as drive %s",
- token[1],
- DRIVE[otherdriveno].label.name);
- }
- if (drive->devicename[0] == '/') { /* we know this drive... */
- if (strcmp(drive->devicename, token[parameter])) /* different name */
- close_drive(drive); /* close it if it's open */
- else /* no change */
- break;
- }
- /* open the device and get the configuration */
- bcopy(token[parameter], /* insert device information */
- drive->devicename,
- min(sizeof(drive->devicename),
- strlen(token[parameter])));
- partition_status = read_drive_label(drive, 1);
- switch (partition_status) {
- case DL_CANT_OPEN: /* not our kind */
- close_drive(drive);
- if (drive->lasterror == EFTYPE) /* wrong kind of partition */
- throw_rude_remark(drive->lasterror,
- "Drive %s has invalid partition type",
- drive->label.name);
- else /* I/O error of some kind */
- throw_rude_remark(drive->lasterror,
- "Can't initialize drive %s",
- drive->label.name);
- break;
-
- case DL_WRONG_DRIVE: /* valid drive, not the name we expected */
- if (vinum_conf.flags & VF_FORCECONFIG) { /* but we'll accept that */
- bcopy(token[1], drive->label.name, sizeof(drive->label.name));
- break;
- }
- close_drive(drive);
- /*
- * There's a potential race condition here:
- * the rude remark refers to a field in an
- * unallocated drive, which potentially could
- * be reused. This works because we're the only
- * thread accessing the config at the moment.
- */
- drive->state = drive_unallocated; /* throw it away completely */
- throw_rude_remark(drive->lasterror,
- "Incorrect drive name %s specified for drive %s",
- token[1],
- drive->label.name);
- break;
-
- case DL_DELETED_LABEL: /* it was a drive, but we deleted it */
- case DL_NOT_OURS: /* nothing to do with the rest */
- case DL_OURS:
- break;
- }
- /*
- * read_drive_label overwrites the device name.
- * If we get here, we can have the drive,
- * so put it back again
- */
- bcopy(token[parameter],
- drive->devicename,
- min(sizeof(drive->devicename),
- strlen(token[parameter])));
- break;
-
- case kw_state:
- parameter++; /* skip the keyword */
- if (vinum_conf.flags & VF_READING_CONFIG)
- drive->state = DriveState(token[parameter]); /* set the state */
- break;
-
- case kw_hotspare: /* this drive is a hot spare */
- drive->flags |= VF_HOTSPARE;
- break;
-
- default:
- close_drive(drive);
- throw_rude_remark(EINVAL,
- "Drive %s, invalid keyword: %s",
- token[1],
- token[parameter]);
- }
- }
-
- if (drive->devicename[0] != '/') {
- drive->state = drive_unallocated; /* deallocate the drive */
- throw_rude_remark(EINVAL, "No device name for %s", drive->label.name);
- }
- vinum_conf.drives_used++; /* passed all hurdles: one more in use */
- /*
- * If we're replacing a drive, it could be that
- * we already have subdisks referencing this
- * drive. Note where they should be and change
- * their state to obsolete.
- */
- for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
- if ((SD[sdno].state > sd_referenced)
- && (SD[sdno].driveno == driveno)) {
- give_sd_to_drive(sdno);
- if (SD[sdno].state > sd_stale)
- SD[sdno].state = sd_stale;
- }
- }
-}
-
-/*
- * Handle a subdisk definition. We store the
- * information in the global variable sd, so we
- * don't need to allocate.
- *
- * On error throw a message back to the caller.
- */
-void
-config_subdisk(int update)
-{
- int parameter;
- int sdno; /* index of sd in vinum_conf */
- struct sd *sd; /* and pointer to it */
- u_int64_t size;
- int detached = 0; /* set to 1 if this is a detached subdisk */
- int sdindex = -1; /* index in plexes subdisk table */
- enum sdstate state = sd_unallocated; /* state to set, if specified */
- int autosize = 0; /* set if we autosize in give_sd_to_drive */
- int namedsdno; /* index of another with this name */
- char partition = 0; /* partition of external subdisk */
-
- sdno = get_empty_sd(); /* allocate an SD to initialize */
- sd = &SD[sdno]; /* and get a pointer */
-
- for (parameter = 1; parameter < tokens; parameter++) { /* look at the other tokens */
- switch (get_keyword(token[parameter], &keyword_set)) {
- /*
- * If we have a 'name' parameter, it must
- * come first, because we're too lazy to tidy
- * up dangling refs if it comes later.
- */
- case kw_name:
- namedsdno = find_subdisk(token[++parameter], 0); /* find an existing sd with this name */
- if (namedsdno >= 0) { /* got one */
- if (SD[namedsdno].state == sd_referenced) { /* we've been told about this one */
- if (parameter > 2)
- throw_rude_remark(EINVAL,
- "sd %s: name parameter must come first\n", /* no go */
- token[parameter]);
- else {
- int i;
- struct plex *plex; /* for tidying up dangling references */
-
- *sd = SD[namedsdno]; /* copy from the referenced one */
- SD[namedsdno].state = sd_unallocated; /* and deallocate the referenced one */
- plex = &PLEX[sd->plexno]; /* now take a look at our plex */
- for (i = 0; i < plex->subdisks; i++) { /* look for the pointer */
- if (plex->sdnos[i] == namedsdno) /* pointing to the old subdisk */
- plex->sdnos[i] = sdno; /* bend it to point here */
- }
- }
- }
- if (update) /* are we updating? */
- return; /* that's OK, nothing more to do */
- else
- throw_rude_remark(EINVAL, "Duplicate subdisk %s", token[parameter]);
- } else
- bcopy(token[parameter],
- sd->name,
- min(sizeof(sd->name), strlen(token[parameter])));
- break;
-
- case kw_detached:
- detached = 1;
- break;
-
- case kw_plexoffset:
- size = sizespec(token[++parameter]);
- if ((size == -1) /* unallocated */
- &&(vinum_conf.flags & VF_READING_CONFIG)) /* reading from disk */
- break; /* invalid sd; just ignore it */
- if ((size % DEV_BSIZE) != 0)
- throw_rude_remark(EINVAL,
- "sd %s, bad plex offset alignment: %lld",
- sd->name,
- (long long) size);
- else
- sd->plexoffset = size / DEV_BSIZE;
- break;
-
- case kw_driveoffset:
- size = sizespec(token[++parameter]);
- if ((size == -1) /* unallocated */
- &&(vinum_conf.flags & VF_READING_CONFIG)) /* reading from disk */
- break; /* invalid sd; just ignore it */
- if ((size % DEV_BSIZE) != 0)
- throw_rude_remark(EINVAL,
- "sd %s, bad drive offset alignment: %lld",
- sd->name,
- (long long) size);
- else
- sd->driveoffset = size / DEV_BSIZE;
- break;
-
- case kw_len:
- if (get_keyword(token[++parameter], &keyword_set) == kw_max) /* select maximum size from drive */
- size = 0; /* this is how we say it :-) */
- else
- size = sizespec(token[parameter]);
- if ((size % DEV_BSIZE) != 0)
- throw_rude_remark(EINVAL, "sd %s, length %d not multiple of sector size", sd->name, size);
- else
- sd->sectors = size / DEV_BSIZE;
- /*
- * We have a problem with autosizing: we need to
- * give the drive to the plex before we give it
- * to the drive, in order to be clean if we give
- * up in the middle, but at this time the size hasn't
- * been set. Note that we have to fix up after
- * giving the subdisk to the drive.
- */
- if (size == 0)
- autosize = 1; /* note that we're autosizing */
- break;
-
- case kw_drive:
- sd->driveno = find_drive(token[++parameter], 1); /* insert drive information */
- break;
-
- case kw_plex:
- sd->plexno = find_plex(token[++parameter], 1); /* insert plex information */
- break;
-
- /*
- * Set the state. We can't do this directly,
- * because give_sd_to_plex may change it
- */
- case kw_state:
- parameter++; /* skip the keyword */
- if (vinum_conf.flags & VF_READING_CONFIG)
- state = SdState(token[parameter]); /* set the state */
- break;
-
- case kw_partition:
- parameter++; /* skip the keyword */
- if ((strlen(token[parameter]) != 1)
- || (token[parameter][0] < 'a')
- || (token[parameter][0] > 'h'))
- throw_rude_remark(EINVAL,
- "%s: invalid partition %c",
- sd->name,
- token[parameter][0]);
- else
- partition = token[parameter][0];
- break;
-
- case kw_retryerrors:
- sd->flags |= VF_RETRYERRORS;
- break;
-
- default:
- throw_rude_remark(EINVAL, "%s: invalid keyword: %s", sd->name, token[parameter]);
- }
- }
-
- /* Check we have a drive name */
- if (sd->driveno < 0) { /* didn't specify a drive */
- sd->driveno = current_drive; /* set to the current drive */
- if (sd->driveno < 0) /* no current drive? */
- throw_rude_remark(EINVAL, "Subdisk %s is not associated with a drive", sd->name);
- }
- if (DRIVE[sd->driveno].state != drive_up)
- sd->state = sd_crashed;
-
- if (autosize != 0) /* need to find a size, */
- give_sd_to_drive(sdno); /* do it before the plex */
-
- /* Check for a plex name */
- if ((sd->plexno < 0) /* didn't specify a plex */
- &&(!detached)) /* and didn't say not to, */
- sd->plexno = current_plex; /* set to the current plex */
-
- if (sd->plexno >= 0)
- sdindex = give_sd_to_plex(sd->plexno, sdno); /* now tell the plex that it has this sd */
-
- sd->sdno = sdno; /* point to our entry in the table */
-
- /* Does the subdisk have a name? If not, give it one */
- if (sd->name[0] == '\0') { /* no name */
- char sdsuffix[8]; /* form sd name suffix here */
-
- /* Do we have a plex name? */
- if (sdindex >= 0) /* we have a plex */
- strlcpy(sd->name, /* take it from there */
- PLEX[sd->plexno].name,
- sizeof(sd->name));
- else { /* no way */
- if (sd->state == sd_unallocated) { /* haven't finished allocating the sd, */
- if (autosize != 0) { /* but we might have allocated drive space */
- vinum_conf.subdisks_used++; /* ugly hack needed for free_sd() */
- free_sd(sdno); /* free it to return drive space */
- } else { /* just clear it */
- bzero(sd, sizeof(struct sd));
- sd->state = sd_unallocated;
- }
- }
- throw_rude_remark(EINVAL, "Unnamed sd is not associated with a plex");
- }
- sprintf(sdsuffix, ".s%d", sdindex); /* form the suffix */
- strlcat(sd->name, sdsuffix, sizeof(sd->name)); /* and add it to the name */
- }
- /* do we have complete info for this subdisk? */
- if (sd->sectors < 0)
- throw_rude_remark(EINVAL, "sd %s has no length spec", sd->name);
-
- if (sd->dev == NULL)
- /*
- * sdno can (at least theoretically) overflow
- * into the low order bit of the type field.
- * This gives rise to a subdisk with type
- * VINUM_SD2_TYPE. This is a feature, not a
- * bug.
- */
- sd->dev = make_dev(&vinum_cdevsw,
- VINUMMINOR(sdno, VINUM_SD_TYPE),
- UID_ROOT,
- GID_OPERATOR,
- S_IRUSR | S_IWUSR | S_IRGRP,
- "vinum/sd/%s",
- sd->name);
- if (state != sd_unallocated) /* we had a specific state to set */
- sd->state = state; /* do it now */
- else if (sd->state == sd_unallocated) /* no, nothing set yet, */
- sd->state = sd_empty; /* must be empty */
- if (autosize == 0) /* no autoconfig, do the drive now */
- give_sd_to_drive(sdno);
- vinum_conf.subdisks_used++; /* one more in use */
-}
-
-/*
- * Handle a plex definition.
- */
-void
-config_plex(int update)
-{
- int parameter;
- int plexno; /* index of plex in vinum_conf */
- struct plex *plex; /* and pointer to it */
- int pindex = MAXPLEX; /* index in volume's plex list */
- int detached = 0; /* don't give it to a volume */
- int namedplexno;
- enum plexstate state = plex_init; /* state to set at end */
- int preferme; /* set if we want to be preferred access */
- int stripesize;
-
- stripesize = 0;
- current_plex = -1; /* forget the previous plex */
- preferme = 0; /* nothing special yet */
- plexno = get_empty_plex(); /* allocate a plex */
- plex = &PLEX[plexno]; /* and point to it */
- plex->plexno = plexno; /* and back to the config */
-
- for (parameter = 1; parameter < tokens; parameter++) { /* look at the other tokens */
- switch (get_keyword(token[parameter], &keyword_set)) {
- /*
- * If we have a 'name' parameter, it must
- * come first, because we're too lazy to tidy
- * up dangling refs if it comes later.
- */
- case kw_name:
- namedplexno = find_plex(token[++parameter], 0); /* find an existing plex with this name */
- if (namedplexno >= 0) { /* plex exists already, */
- if (PLEX[namedplexno].state == plex_referenced) { /* we've been told about this one */
- if (parameter > 2) /* we've done other things first, */
- throw_rude_remark(EINVAL,
- "plex %s: name parameter must come first\n", /* no go */
- token[parameter]);
- else {
- int i;
- struct volume *vol; /* for tidying up dangling references */
-
- *plex = PLEX[namedplexno]; /* get the info */
- PLEX[namedplexno].state = plex_unallocated; /* and deallocate the other one */
- vol = &VOL[plex->volno]; /* point to the volume */
- for (i = 0; i < MAXPLEX; i++) { /* for each plex */
- if (vol->plex[i] == namedplexno)
- vol->plex[i] = plexno; /* bend the pointer */
- }
- }
- break; /* use this one */
- }
- if (update) /* are we updating? */
- return; /* yes: that's OK, just return */
- else
- throw_rude_remark(EINVAL, "Duplicate plex %s", token[parameter]);
- } else
- bcopy(token[parameter], /* put in the name */
- plex->name,
- min(MAXPLEXNAME, strlen(token[parameter])));
- break;
-
- case kw_detached:
- detached = 1;
- break;
-
- case kw_org: /* plex organization */
- switch (get_keyword(token[++parameter], &keyword_set)) {
- case kw_concat:
- plex->organization = plex_concat;
- break;
-
- case kw_striped:
- {
- plex->organization = plex_striped;
-
- if (++parameter >= tokens) /* No stripe size specified. */
- stripesize = 0;
- else
- stripesize = sizespec(token[parameter]);
-
- break;
- }
-
- case kw_raid4:
- {
- plex->organization = plex_raid4;
-
- if (++parameter >= tokens) /* No stripe size specified. */
- stripesize = 0;
- else
- stripesize = sizespec(token[parameter]);
-
- break;
- }
-
- case kw_raid5:
- {
- plex->organization = plex_raid5;
-
- if (++parameter >= tokens) /* No stripe size specified. */
- stripesize = 0;
- else
- stripesize = sizespec(token[parameter]);
-
- break;
- }
-
- default:
- throw_rude_remark(EINVAL, "Invalid plex organization");
- }
- if (isstriped(plex)) {
- if (stripesize == 0) /* didn't specify a valid stripe size */
- throw_rude_remark(EINVAL, "Need a stripe size parameter");
- else if (stripesize % DEV_BSIZE != 0)
- throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size",
- plex->name,
- stripesize);
- else
- plex->stripesize = stripesize / DEV_BSIZE;
- }
- break;
-
- /*
- * We're the preferred plex of our volume.
- * Unfortunately, we don't know who our
- * volume is yet. Note that we want to be
- * preferred, and actually do it after we
- * get a volume.
- */
- case kw_preferred:
- preferme = 1;
- break;
-
- case kw_volume:
- plex->volno = find_volume(token[++parameter], 1); /* insert a pointer to the volume */
- break;
-
- case kw_sd: /* add a subdisk */
- {
- int sdno;
-
- sdno = find_subdisk(token[++parameter], 1); /* find a subdisk */
- SD[sdno].plexoffset = sizespec(token[++parameter]); /* get the offset */
- give_sd_to_plex(plexno, sdno); /* and insert it there */
- break;
- }
-
- case kw_state:
- parameter++; /* skip the keyword */
- if (vinum_conf.flags & VF_READING_CONFIG)
- state = PlexState(token[parameter]); /* set the state */
- break;
-
- default:
- throw_rude_remark(EINVAL, "plex %s, invalid keyword: %s",
- plex->name,
- token[parameter]);
- }
- }
-
- if (plex->organization == plex_disorg)
- throw_rude_remark(EINVAL, "No plex organization specified");
-
- if ((plex->volno < 0) /* we don't have a volume */
- &&(!detached)) /* and we wouldn't object */
- plex->volno = current_volume;
-
- if (plex->volno >= 0)
- pindex = give_plex_to_volume(plex->volno, /* Now tell the volume that it has this plex */
- plexno,
- preferme);
-
- /* Does the plex have a name? If not, give it one */
- if (plex->name[0] == '\0') { /* no name */
- char plexsuffix[8]; /* form plex name suffix here */
- /* Do we have a volume name? */
- if (plex->volno >= 0) /* we have a volume */
- strlcpy(plex->name, /* take it from there */
- VOL[plex->volno].name,
- sizeof(plex->name));
- else /* no way */
- throw_rude_remark(EINVAL, "Unnamed plex is not associated with a volume");
- sprintf(plexsuffix, ".p%d", pindex); /* form the suffix */
- strlcat(plex->name, plexsuffix, sizeof(plex->name)); /* and add it to the name */
- }
- if (isstriped(plex)) {
- plex->lock = (struct rangelock *)
- Malloc(PLEX_LOCKS * sizeof(struct rangelock));
- CHECKALLOC(plex->lock, "vinum: Can't allocate lock table\n");
- bzero((char *) plex->lock, PLEX_LOCKS * sizeof(struct rangelock));
- plex->lockmtx = &plexmutex[plexno % PLEXMUTEXES]; /* use this mutex for locking */
- }
- /* Note the last plex we configured */
- current_plex = plexno;
- plex->state = state; /* set whatever state we chose */
- vinum_conf.plexes_used++; /* one more in use */
- if (plex->dev == NULL)
- plex->dev = make_dev(&vinum_cdevsw,
- VINUMMINOR(plexno, VINUM_PLEX_TYPE),
- UID_ROOT,
- GID_OPERATOR,
- S_IRUSR | S_IWUSR | S_IRGRP,
- "vinum/plex/%s",
- plex->name);
-}
-
-/*
- * Handle a volume definition.
- * If we find an error, print a message, deallocate the nascent volume, and return
- */
-void
-config_volume(int update)
-{
- int parameter;
- int volno;
- struct volume *vol; /* collect volume info here */
- int i;
-
- if (tokens < 2) /* not enough tokens */
- throw_rude_remark(EINVAL, "Volume has no name");
- current_volume = -1; /* forget the previous volume */
- volno = find_volume(token[1], 1); /* allocate a volume to initialize */
- vol = &VOL[volno]; /* and get a pointer */
- if (update && ((vol->flags & VF_CREATED) == 0)) /* this volume exists already */
- return; /* don't do anything */
- vol->flags &= ~VF_CREATED; /* it exists now */
-
- for (parameter = 2; parameter < tokens; parameter++) { /* look at all tokens */
- switch (get_keyword(token[parameter], &keyword_set)) {
- case kw_plex:
- {
- int plexno; /* index of this plex */
- int myplexno; /* and index if it's already ours */
-
- plexno = find_plex(token[++parameter], 1); /* find a plex */
- if (plexno < 0) /* couldn't */
- break; /* we've already had an error message */
- myplexno = my_plex(volno, plexno); /* does it already belong to us? */
- if (myplexno > 0) /* yes, shouldn't get it again */
- throw_rude_remark(EINVAL,
- "Plex %s already belongs to volume %s",
- token[parameter],
- vol->name);
- else if (++vol->plexes > 8) /* another entry */
- throw_rude_remark(EINVAL,
- "Too many plexes for volume %s",
- vol->name);
- vol->plex[vol->plexes - 1] = plexno;
- PLEX[plexno].state = plex_referenced; /* we know something about it */
- PLEX[plexno].volno = volno; /* and this volume references it */
- }
- break;
-
- case kw_readpol:
- switch (get_keyword(token[++parameter], &keyword_set)) { /* decide what to do */
- case kw_round:
- vol->preferred_plex = ROUND_ROBIN_READPOL; /* default */
- break;
-
- case kw_prefer:
- {
- int myplexno; /* index of this plex */
-
- myplexno = find_plex(token[++parameter], 1); /* find a plex */
- if (myplexno < 0) { /* couldn't */
- printf("vinum: couldn't find preferred plex %s for %s\n",
- token[parameter],
- vol->name);
- break; /* we've already had an error message */
- }
- myplexno = my_plex(volno, myplexno); /* does it already belong to us? */
- if (myplexno > 0) /* yes */
- vol->preferred_plex = myplexno; /* just note the index */
- else if (++vol->plexes > 8) /* another entry */
- throw_rude_remark(EINVAL, "Too many plexes");
- else { /* space for the new plex */
- vol->plex[vol->plexes - 1] = myplexno; /* add it to our list */
- vol->preferred_plex = vol->plexes - 1; /* and note the index */
- }
- }
- break;
-
- default:
- throw_rude_remark(EINVAL, "Invalid read policy");
- }
-
- case kw_setupstate:
- vol->flags |= VF_CONFIG_SETUPSTATE; /* set the volume up later on */
- break;
-
- case kw_state:
- parameter++; /* skip the keyword */
- if (vinum_conf.flags & VF_READING_CONFIG)
- vol->state = VolState(token[parameter]); /* set the state */
- break;
-
- /*
- * XXX experimental ideas. These are not
- * documented, and will not be until I
- * decide they're worth keeping.
- */
- case kw_writethrough: /* set writethrough mode */
- vol->flags |= VF_WRITETHROUGH;
- break;
-
- case kw_writeback: /* set writeback mode */
- vol->flags &= ~VF_WRITETHROUGH;
- break;
-
- default:
- throw_rude_remark(EINVAL, "volume %s, invalid keyword: %s",
- vol->name,
- token[parameter]);
- }
- }
- current_volume = volno; /* note last referred volume */
- vol->volno = volno; /* also note in volume */
-
- /*
- * Before we can actually use the volume, we need
- * a volume label. We could start to fake one here,
- * but it will be a lot easier when we have some
- * to copy from the drives, so defer it until we
- * set up the configuration. XXX
- */
- if (vol->state == volume_unallocated)
- vol->state = volume_down; /* now ready to bring up at the end */
-
- /* Find out how big our volume is */
- for (i = 0; i < vol->plexes; i++)
- vol->size = max(vol->size, PLEX[vol->plex[i]].length);
- vinum_conf.volumes_used++; /* one more in use */
- if (vol->dev == NULL)
- vol->dev = make_dev(&vinum_cdevsw,
- VINUMMINOR(volno, VINUM_VOLUME_TYPE),
- UID_ROOT,
- GID_OPERATOR,
- S_IRUSR | S_IWUSR | S_IRGRP,
- "vinum/%s",
- vol->name);
-}
-
-/*
- * Parse a config entry. CARE! This destroys the original contents of the
- * config entry, which we don't really need after this. More specifically, it
- * places \0 characters at the end of each token.
- *
- * Return 0 if all is well, otherwise EINVAL for invalid keyword,
- * or ENOENT if 'read' command doesn't find any drives.
- */
-int
-parse_config(char *cptr, struct keywordset *keyset, int update)
-{
- int status;
-
- status = 0; /* until proven otherwise */
- tokens = tokenize(cptr, token, MAXTOKEN); /* chop up into tokens */
-
- if (tokens <= 0) /* screwed up or empty line */
- return tokens; /* give up */
- else if (tokens == MAXTOKEN) /* too many */
- throw_rude_remark(E2BIG,
- "Configuration error for %s: too many parameters",
- token[1]);
-
- if (token[0][0] == '#') /* comment line */
- return 0;
-
- switch (get_keyword(token[0], keyset)) { /* decide what to do */
- case kw_drive:
- config_drive(update);
- break;
-
- case kw_subdisk:
- config_subdisk(update);
- break;
-
- case kw_plex:
- config_plex(update);
- break;
-
- case kw_volume:
- config_volume(update);
- break;
-
- /* Anything else is invalid in this context */
- default:
- throw_rude_remark(EINVAL, /* should we die? */
- "Invalid configuration information: %s",
- token[0]);
- }
- return status;
-}
-
-/*
- * parse a line handed in from userland via ioctl.
- * This differs only by the error reporting mechanism:
- * we return the error indication in the reply to the
- * ioctl, so we need to set a global static pointer in
- * this file. This technique works because we have
- * ensured that configuration is performed in a single-
- * threaded manner
- */
-int
-parse_user_config(char *cptr, struct keywordset *keyset)
-{
- int status;
-
- ioctl_reply = (struct _ioctl_reply *) cptr;
- status = parse_config(cptr, keyset, 0);
- ioctl_reply = NULL; /* don't do this again */
- return status;
-}
-
-/* Remove an object */
-void
-remove(struct vinum_ioctl_msg *msg)
-{
- struct vinum_ioctl_msg message = *msg; /* make a copy to hand on */
-
- ioctl_reply = (struct _ioctl_reply *) msg; /* reinstate the address to reply to */
- ioctl_reply->error = 0; /* no error, */
- ioctl_reply->msg[0] = '\0'; /* no message */
-
- switch (message.type) {
- case drive_object:
- remove_drive_entry(message.index, message.force);
- updateconfig(0);
- return;
-
- case sd_object:
- remove_sd_entry(message.index, message.force, message.recurse);
- updateconfig(0);
- return;
-
- case plex_object:
- remove_plex_entry(message.index, message.force, message.recurse);
- updateconfig(0);
- return;
-
- case volume_object:
- remove_volume_entry(message.index, message.force, message.recurse);
- updateconfig(0);
- return;
-
- default:
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "Invalid object type");
- }
-}
-
-/* Remove a drive. */
-void
-remove_drive_entry(int driveno, int force)
-{
- struct drive *drive = &DRIVE[driveno];
- int sdno;
-
- if ((driveno > vinum_conf.drives_allocated) /* not a valid drive */
- ||(drive->state == drive_unallocated)) { /* or nothing there */
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "No such drive");
- } else if (drive->opencount > 0) { /* we have subdisks */
- if (force) { /* do it at any cost */
- for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) {
- if ((SD[sdno].state != sd_unallocated) /* subdisk is allocated */
- &&(SD[sdno].driveno == driveno)) /* and it belongs to this drive */
- remove_sd_entry(sdno, force, 0);
- }
- remove_drive(driveno); /* now remove it */
- vinum_conf.drives_used--; /* one less drive */
- } else
- ioctl_reply->error = EBUSY; /* can't do that */
- } else {
- remove_drive(driveno); /* just remove it */
- vinum_conf.drives_used--; /* one less drive */
- }
-}
-
-/* remove a subdisk */
-void
-remove_sd_entry(int sdno, int force, int recurse)
-{
- struct sd *sd = &SD[sdno];
-
- if ((sdno > vinum_conf.subdisks_allocated) /* not a valid sd */
- ||(sd->state == sd_unallocated)) { /* or nothing there */
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "No such subdisk");
- } else if (sd->flags & VF_OPEN) /* we're open */
- ioctl_reply->error = EBUSY; /* no getting around that */
- else if (sd->plexno >= 0) { /* we have a plex */
- if (force) { /* do it at any cost */
- struct plex *plex = &PLEX[sd->plexno]; /* point to our plex */
- int mysdno;
-
- for (mysdno = 0; /* look for ourselves */
- mysdno < plex->subdisks && &SD[plex->sdnos[mysdno]] != sd;
- mysdno++);
- if (mysdno == plex->subdisks) /* didn't find it */
- log(LOG_ERR,
- "Error removing subdisk %s: not found in plex %s\n",
- SD[mysdno].name,
- plex->name);
- else { /* remove the subdisk from plex */
- if (mysdno < (plex->subdisks - 1)) /* not the last subdisk */
- bcopy(&plex->sdnos[mysdno + 1],
- &plex->sdnos[mysdno],
- (plex->subdisks - 1 - mysdno) * sizeof(int));
- plex->subdisks--;
- sd->plexno = -1; /* disown the subdisk */
- }
-
- /*
- * Removing a subdisk from a striped or
- * RAID-4 or RAID-5 plex really tears the
- * hell out of the structure, and it needs
- * to be reinitialized.
- */
- if (plex->organization != plex_concat) /* not concatenated, */
- set_plex_state(plex->plexno, plex_faulty, setstate_force); /* need to reinitialize */
- log(LOG_INFO, "vinum: removing %s\n", sd->name);
- free_sd(sdno);
- } else
- ioctl_reply->error = EBUSY; /* can't do that */
- } else {
- log(LOG_INFO, "vinum: removing %s\n", sd->name);
- free_sd(sdno);
- }
-}
-
-/* remove a plex */
-void
-remove_plex_entry(int plexno, int force, int recurse)
-{
- struct plex *plex = &PLEX[plexno];
- int sdno;
-
- if ((plexno > vinum_conf.plexes_allocated) /* not a valid plex */
- ||(plex->state == plex_unallocated)) { /* or nothing there */
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "No such plex");
- } else if (plex->flags & VF_OPEN) { /* we're open */
- ioctl_reply->error = EBUSY; /* no getting around that */
- return;
- }
- if (plex->subdisks) {
- if (force) { /* do it anyway */
- if (recurse) { /* remove all below */
- int sds = plex->subdisks;
- for (sdno = 0; sdno < sds; sdno++)
- free_sd(plex->sdnos[sdno]); /* free all subdisks */
- } else { /* just tear them out */
- int sds = plex->subdisks;
- for (sdno = 0; sdno < sds; sdno++)
- SD[plex->sdnos[sdno]].plexno = -1; /* no plex any more */
- }
- } else { /* can't do it without force */
- ioctl_reply->error = EBUSY; /* can't do that */
- return;
- }
- }
- if (plex->volno >= 0) { /* we are part of a volume */
- if (force) { /* do it at any cost */
- struct volume *vol = &VOL[plex->volno];
- int myplexno;
-
- for (myplexno = 0; myplexno < vol->plexes; myplexno++)
- if (vol->plex[myplexno] == plexno) /* found it */
- break;
- if (myplexno == vol->plexes) /* didn't find it. Huh? */
- log(LOG_ERR,
- "Error removing plex %s: not found in volume %s\n",
- plex->name,
- vol->name);
- if (myplexno < (vol->plexes - 1)) /* not the last plex in the list */
- bcopy(&vol->plex[myplexno + 1],
- &vol->plex[myplexno],
- vol->plexes - 1 - myplexno);
- vol->plexes--;
- } else {
- ioctl_reply->error = EBUSY; /* can't do that */
- return;
- }
- }
- log(LOG_INFO, "vinum: removing %s\n", plex->name);
- free_plex(plexno);
- vinum_conf.plexes_used--; /* one less plex */
-}
-
-/* remove a volume */
-void
-remove_volume_entry(int volno, int force, int recurse)
-{
- struct volume *vol = &VOL[volno];
- int plexno;
-
- if ((volno > vinum_conf.volumes_allocated) /* not a valid volume */
- ||(vol->state == volume_unallocated)) { /* or nothing there */
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "No such volume");
- } else if (vol->flags & VF_OPEN) /* we're open */
- ioctl_reply->error = EBUSY; /* no getting around that */
- else if (vol->plexes) {
- if (recurse && force) { /* remove all below */
- int plexes = vol->plexes;
-
-/* for (plexno = plexes - 1; plexno >= 0; plexno--) */
- for (plexno = 0; plexno < plexes; plexno++)
- remove_plex_entry(vol->plex[plexno], force, recurse);
- log(LOG_INFO, "vinum: removing %s\n", vol->name);
- free_volume(volno);
- vinum_conf.volumes_used--; /* one less volume */
- } else
- ioctl_reply->error = EBUSY; /* can't do that */
- } else {
- log(LOG_INFO, "vinum: removing %s\n", vol->name);
- free_volume(volno);
- vinum_conf.volumes_used--; /* one less volume */
- }
-}
-
-/* Currently called only from ioctl */
-void
-update_sd_config(int sdno, int diskconfig)
-{
- if (!diskconfig)
- set_sd_state(sdno, sd_up, setstate_configuring);
- SD[sdno].flags &= ~VF_NEWBORN;
-}
-
-void
-update_plex_config(int plexno, int diskconfig)
-{
- u_int64_t size;
- int sdno;
- struct plex *plex = &PLEX[plexno];
- enum plexstate state = plex_up; /* state we want the plex in */
- int remainder; /* size of fractional stripe at end */
- int added_plex; /* set if we add a plex to a volume */
- int required_sds; /* number of subdisks we need */
- struct sd *sd;
- struct volume *vol;
- int data_sds = 0; /* number of sds carrying data */
-
- if (plex->state < plex_init) /* not a real plex, */
- return;
- added_plex = 0;
- if (plex->volno >= 0) { /* we have a volume */
- vol = &VOL[plex->volno];
-
- /*
- * If we're newly born,
- * and the volume isn't,
- * and it has other plexes,
- * and we didn't read this mess from disk,
- * we were added later.
- */
- if ((plex->flags & VF_NEWBORN)
- && ((vol->flags & VF_NEWBORN) == 0)
- && (vol->plexes > 0)
- && (diskconfig == 0)) {
- added_plex = 1;
- state = plex_down; /* so take ourselves down */
- }
- }
- /*
- * Check that our subdisks make sense. For
- * striped plexes, we need at least two
- * subdisks, and for RAID-4 and RAID-5 plexes we
- * need at least three subdisks. In each case
- * they must all be the same size.
- */
- if (plex->organization == plex_striped) {
- data_sds = plex->subdisks;
- required_sds = 2;
- } else if (isparity(plex)) { /* RAID 4 or 5 */
- data_sds = plex->subdisks - 1;
- required_sds = 3;
- } else
- required_sds = 0;
- if (required_sds > 0) { /* striped, RAID-4 or RAID-5 */
- if (plex->subdisks < required_sds) {
- log(LOG_ERR,
- "vinum: plex %s does not have at least %d subdisks\n",
- plex->name,
- required_sds);
- state = plex_faulty;
- }
- /*
- * Now see if the plex size is a multiple of
- * the stripe size. If not, trim off the end
- * of each subdisk and return it to the drive.
- */
- if (plex->length > 0) {
- if (data_sds > 0) {
- if (plex->stripesize > 0) {
- remainder = (int) (plex->length /* are we exact? */
- % ((u_int64_t) plex->stripesize * data_sds));
- if (remainder) { /* no */
- log(LOG_INFO, "vinum: removing %d blocks of partial stripe at the end of %s\n",
- remainder,
- plex->name);
- plex->length -= remainder; /* shorten the plex */
- remainder /= data_sds; /* spread the remainder amongst the sds */
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
- return_drive_space(sd->driveno, /* return the space */
- sd->driveoffset + sd->sectors - remainder,
- remainder);
- sd->sectors -= remainder; /* and shorten it */
- }
- }
- } else /* no data sds, */
- plex->length = 0; /* reset length */
- }
- }
- }
- size = 0;
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- sd = &SD[plex->sdnos[sdno]];
- if (isstriped(plex)
- && (sdno > 0)
- && (sd->sectors != SD[plex->sdnos[sdno - 1]].sectors)) {
- log(LOG_ERR, "vinum: %s must have equal sized subdisks\n", plex->name);
- state = plex_down;
- }
- size += sd->sectors;
- if (added_plex) /* we were added later */
- sd->state = sd_stale; /* stale until proven otherwise */
- if (plex->sectorsize != 0) {
- if (sd->sectorsize != plex->sectorsize) /* incompatible sector sizes? */
- printf("vinum: incompatible sector sizes. "
- "%s has %d bytes, %s has %d bytes. Ignored.\n",
- sd->name,
- sd->sectorsize,
- plex->name,
- plex->sectorsize);
- } else /* not set yet, */
- plex->sectorsize = sd->sectorsize;
- }
-
- if (plex->subdisks) { /* plex has subdisks, calculate size */
- /*
- * XXX We shouldn't need to calculate the size any
- * more. Check this some time
- */
- if (isparity(plex))
- size = size / plex->subdisks * (plex->subdisks - 1); /* less space for RAID-4 and RAID-5 */
- if (plex->length != size)
- log(LOG_INFO,
- "Correcting length of %s: was %lld, is %lld\n",
- plex->name,
- (long long) plex->length,
- (long long) size);
- plex->length = size;
- } else { /* no subdisks, */
- plex->length = 0; /* no size */
- state = plex_down; /* take it down */
- }
- update_plex_state(plexno); /* set the state */
- plex->flags &= ~VF_NEWBORN;
-}
-
-void
-update_volume_config(int volno)
-{
- struct volume *vol = &VOL[volno];
- struct plex *plex;
- int plexno;
-
- if (vol->state != volume_unallocated)
- /*
- * Recalculate the size of the volume,
- * which might change if the original
- * plexes were not a multiple of the
- * stripe size.
- */
- {
- vol->size = 0;
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- plex = &PLEX[vol->plex[plexno]];
- vol->size = max(plex->length, vol->size); /* maximum size */
- plex->volplexno = plexno; /* note it in the plex */
- if (vol->sectorsize != 0) {
- if (plex->sectorsize != vol->sectorsize) /* incompatible sector sizes? */
- printf("vinum: incompatible sector sizes. "
- "%s has %d, %s has %d. Ignored.\n",
- plex->name,
- plex->sectorsize,
- vol->name,
- vol->sectorsize);
- } else /* not set yet, */
- vol->sectorsize = plex->sectorsize;
- }
- }
- vol->flags &= ~VF_NEWBORN; /* no longer newly born */
-}
-
-/*
- * Update the global configuration. This is
- * called after configuration changes.
- *
- * diskconfig is != 0 if we're reading in a config
- * from disk. In this case, we don't try to bring
- * the devices up, though we will bring them down
- * if there's some error which got missed when
- * writing to disk.
- */
-void
-updateconfig(int diskconfig)
-{
- int plexno;
- int volno;
-
- for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++)
- update_plex_config(plexno, diskconfig);
-
- for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) {
- if (VOL[volno].state > volume_uninit) {
- VOL[volno].flags &= ~VF_CONFIG_SETUPSTATE; /* no more setupstate */
- update_volume_state(volno);
- update_volume_config(volno);
- }
- }
- save_config();
-}
-
-/*
- * Start manual changes to the configuration and lock out
- * others who may wish to do so.
- * XXX why do we need this and lock_config too?
- */
-int
-start_config(int force)
-{
- int error;
-
- current_drive = -1; /* note the last drive we mention, for
- * some defaults */
- current_plex = -1; /* and the same for the last plex */
- current_volume = -1; /* and the last volume */
- while ((vinum_conf.flags & VF_CONFIGURING) != 0) {
- vinum_conf.flags |= VF_WILL_CONFIGURE;
- if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0)
- return error;
- }
- /*
- * We need two flags here: VF_CONFIGURING
- * tells other processes to hold off (this
- * function), and VF_CONFIG_INCOMPLETE
- * tells the state change routines not to
- * propagate incrememntal state changes
- */
- vinum_conf.flags |= VF_CONFIGURING | VF_CONFIG_INCOMPLETE;
- if (force)
- vinum_conf.flags |= VF_FORCECONFIG; /* overwrite differently named drives */
- current_drive = -1; /* reset the defaults */
- current_plex = -1; /* and the same for the last plex */
- current_volume = -1; /* and the last volme */
- return 0;
-}
-
-/*
- * Update the config if update is 1, and unlock
- * it. We won't update the configuration if we
- * are called in a recursive loop via throw_rude_remark.
- */
-void
-finish_config(int update)
-{
- /* we've finished our config */
- vinum_conf.flags &= ~(VF_CONFIG_INCOMPLETE | VF_READING_CONFIG | VF_FORCECONFIG);
- if (update)
- updateconfig(0); /* so update things */
- else
- updateconfig(1); /* do some updates only */
- vinum_conf.flags &= ~VF_CONFIGURING; /* and now other people can take a turn */
- if ((vinum_conf.flags & VF_WILL_CONFIGURE) != 0) {
- vinum_conf.flags &= ~VF_WILL_CONFIGURE;
- wakeup_one(&vinum_conf);
- }
-}
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumdaemon.c b/sys/dev/vinum/vinumdaemon.c
deleted file mode 100644
index 121357d..0000000
--- a/sys/dev/vinum/vinumdaemon.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/* daemon.c: kernel part of Vinum daemon */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumdaemon.c,v 1.8 2000/01/03 05:22:03 grog Exp grog $
- */
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-#ifdef VINUMDEBUG
-#include <sys/reboot.h>
-#endif
-
-/* declarations */
-void recover_io(struct request *rq);
-
-int daemon_options = 0; /* options */
-int daemonpid; /* PID of daemon */
-struct daemonq *daemonq; /* daemon's work queue */
-struct daemonq *dqend; /* and the end of the queue */
-
-/*
- * We normally call Malloc to get a queue element. In interrupt
- * context, we can't guarantee that we'll get one, since we're not
- * allowed to wait. If malloc fails, use one of these elements.
- */
-
-#define INTQSIZE 4
-struct daemonq intq[INTQSIZE]; /* queue elements for interrupt context */
-struct daemonq *intqp; /* and pointer in it */
-
-void
-vinum_daemon(void)
-{
- int s;
- struct daemonq *request;
-
- PROC_LOCK(curproc);
- curproc->p_flag |= P_SYSTEM; /* we're a system process */
- mtx_lock_spin(&sched_lock);
- curproc->p_sflag |= PS_INMEM;
- mtx_unlock_spin(&sched_lock);
- PROC_UNLOCK(curproc);
- daemon_save_config(); /* start by saving the configuration */
- daemonpid = curproc->p_pid; /* mark our territory */
- while (1) {
- tsleep(&vinum_daemon, PRIBIO, "vinum", 0); /* wait for something to happen */
-
- /*
- * It's conceivable that, as the result of an
- * I/O error, we'll be out of action long
- * enough that another daemon gets started.
- * That's OK, just give up gracefully.
- */
- if (curproc->p_pid != daemonpid) { /* we've been ousted in our sleep */
- if (daemon_options & daemon_verbose)
- log(LOG_INFO, "vinum: abdicating\n");
- return;
- }
- while (daemonq != NULL) { /* we have work to do, */
- s = splhigh(); /* don't get interrupted here */
- request = daemonq; /* get the request */
- daemonq = daemonq->next; /* and detach it */
- if (daemonq == NULL) /* got to the end, */
- dqend = NULL; /* no end any more */
- splx(s);
-
- switch (request->type) {
- /*
- * We had an I/O error on a request. Go through the
- * request and try to salvage it
- */
- case daemonrq_ioerror:
- if (daemon_options & daemon_verbose) {
- struct request *rq = request->info.rq;
-
- log(LOG_WARNING,
- "vinum: recovering I/O request: %p\n%s dev %d.%d, offset 0x%llx, length %ld\n",
- rq,
- rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
- major(rq->bp->b_dev),
- minor(rq->bp->b_dev),
- (long long)rq->bp->b_blkno,
- rq->bp->b_bcount);
- }
- recover_io(request->info.rq); /* the failed request */
- break;
-
- /*
- * Write the config to disk. We could end up with
- * quite a few of these in a row. Only honour the
- * last one
- */
- case daemonrq_saveconfig:
- if ((daemonq == NULL) /* no more requests */
- ||(daemonq->type != daemonrq_saveconfig)) { /* or the next isn't the same */
- if (((daemon_options & daemon_noupdate) == 0) /* we're allowed to do it */
- &&((vinum_conf.flags & VF_READING_CONFIG) == 0)) { /* and we're not building the config now */
- /*
- * We obviously don't want to save a
- * partial configuration. Less obviously,
- * we don't need to do anything if we're
- * asked to write the config when we're
- * building it up, because we save it at
- * the end.
- */
- if (daemon_options & daemon_verbose)
- log(LOG_INFO, "vinum: saving config\n");
- daemon_save_config(); /* save it */
- }
- }
- break;
-
- case daemonrq_return: /* been told to stop */
- if (daemon_options & daemon_verbose)
- log(LOG_INFO, "vinum: stopping\n");
- daemon_options |= daemon_stopped; /* note that we've stopped */
- Free(request);
- while (daemonq != NULL) { /* backed up requests, */
- request = daemonq; /* get the request */
- daemonq = daemonq->next; /* and detach it */
- Free(request); /* then free it */
- }
- wakeup(&vinumclose); /* and wake any waiting vinum(8)s */
- return;
-
- case daemonrq_ping: /* tell the caller we're here */
- if (daemon_options & daemon_verbose)
- log(LOG_INFO, "vinum: ping reply\n");
- wakeup(&vinum_finddaemon); /* wake up the caller */
- break;
-
- case daemonrq_closedrive: /* close a drive */
- close_drive(request->info.drive); /* do it */
- break;
-
- case daemonrq_init: /* initialize a plex */
- /* XXX */
- case daemonrq_revive: /* revive a subdisk */
- /* XXX */
- /* FALLTHROUGH */
- default:
- log(LOG_WARNING, "Invalid request\n");
- break;
- }
- if (request->privateinuse) /* one of ours, */
- request->privateinuse = 0; /* no longer in use */
- else
- Free(request); /* return it */
- }
- }
-}
-
-/*
- * Recover a failed I/O operation.
- *
- * The correct way to do this is to examine the request and determine
- * how to recover each individual failure. In the case of a write,
- * this could be as simple as doing nothing: the defective drives may
- * already be down, and there may be nothing else to do. In case of
- * a read, it will be necessary to retry if there are alternative
- * copies of the data.
- *
- * The easy way (here) is just to reissue the request. This will take
- * a little longer, but nothing like as long as the failure will have
- * taken.
- *
- */
-void
-recover_io(struct request *rq)
-{
- /*
- * This should read:
- *
- * vinumstrategy(rq->bp);
- *
- * Negotiate with phk to get it fixed.
- */
- DEV_STRATEGY(rq->bp); /* reissue the command */
-}
-
-/* Functions called to interface with the daemon */
-
-/* queue a request for the daemon */
-void
-queue_daemon_request(enum daemonrq type, union daemoninfo info)
-{
- int s;
-
- struct daemonq *qelt = (struct daemonq *) Malloc(sizeof(struct daemonq));
-
- if (qelt == NULL) { /* malloc failed, we're prepared for that */
- /*
- * Take one of our spares. Give up if it's still in use; the only
- * message we're likely to get here is a 'drive failed' message,
- * and that'll come by again if we miss it.
- */
- if (intqp->privateinuse) /* still in use? */
- return; /* yes, give up */
- qelt = intqp++;
- if (intqp == &intq[INTQSIZE]) /* got to the end, */
- intqp = intq; /* wrap around */
- qelt->privateinuse = 1; /* it's ours, and it's in use */
- } else
- qelt->privateinuse = 0;
-
- qelt->next = NULL; /* end of the chain */
- qelt->type = type;
- qelt->info = info;
- s = splhigh();
- if (daemonq) { /* something queued already */
- dqend->next = qelt;
- dqend = qelt;
- } else { /* queue is empty, */
- daemonq = qelt; /* this is the whole queue */
- dqend = qelt;
- }
- splx(s);
- wakeup(&vinum_daemon); /* and give the dæmon a kick */
-}
-
-/*
- * see if the daemon is running. Return 0 (no error)
- * if it is, ESRCH otherwise
- */
-int
-vinum_finddaemon()
-{
- int result;
-
- if (daemonpid != 0) { /* we think we have a daemon, */
- queue_daemon_request(daemonrq_ping, (union daemoninfo) 0); /* queue a ping */
- result = tsleep(&vinum_finddaemon, PUSER, "reap", 2 * hz);
- if (result == 0) /* yup, the daemon's up and running */
- return 0;
- }
- /* no daemon, or we couldn't talk to it: start it */
- vinum_daemon(); /* start the daemon */
- return 0;
-}
-
-int
-vinum_setdaemonopts(int options)
-{
- daemon_options = options;
- return 0;
-}
diff --git a/sys/dev/vinum/vinumext.h b/sys/dev/vinum/vinumext.h
deleted file mode 100644
index ed2e6a5..0000000
--- a/sys/dev/vinum/vinumext.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumext.h,v 1.33 2003/05/23 00:57:48 grog Exp $
- * $FreeBSD$
- */
-
-/* vinumext.h: external definitions */
-
-/* *sigh* We still need this at the moment. */
-#ifdef _KERNEL
-extern struct _vinum_conf vinum_conf; /* configuration information */
-extern struct mtx plexmutex[]; /* mutexes for plexes to use */
-#else
-extern struct __vinum_conf vinum_conf; /* configuration information */
-#endif
-
-#ifdef VINUMDEBUG
-extern int debug; /* debug flags */
-#endif
-
-/* Physical read and write drive */
-#define read_drive(a, b, c, d) driveio (a, b, c, d, BIO_READ)
-#define write_drive(a, b, c, d) driveio (a, b, c, d, BIO_WRITE)
-
-#define CHECKALLOC(ptr, msg) \
- if (ptr == NULL) \
- { \
- printf (msg); \
- longjmp (command_fail, -1); \
- }
-#ifndef _KERNEL
-struct vnode;
-struct thread;
-#endif
-
-#ifdef _KERNEL
-int vinum_inactive(int);
-void free_vinum(int);
-int give_sd_to_plex(int plexno, int sdno);
-void give_sd_to_drive(int sdno);
-int give_plex_to_volume(int, int, int);
-struct drive *check_drive(char *);
-enum drive_label_info read_drive_label(struct drive *, int);
-int parse_config(char *, struct keywordset *, int);
-int parse_user_config(char *cptr, struct keywordset *keyset);
-u_int64_t sizespec(char *spec);
-int volume_index(struct volume *volume);
-int plex_index(struct plex *plex);
-int sd_index(struct sd *sd);
-int drive_index(struct drive *drive);
-int my_plex(int volno, int plexno);
-int my_sd(int plexno, int sdno);
-int get_empty_drive(void);
-int find_drive(const char *name, int create);
-int find_drive_by_name(const char *devname, int create);
-int get_empty_sd(void);
-int find_subdisk(const char *name, int create);
-void return_drive_space(int driveno, int64_t offset, int length);
-void free_sd(int sdno);
-void free_volume(int volno);
-int get_empty_plex(void);
-int find_plex(const char *name, int create);
-void free_plex(int plexno);
-int get_empty_volume(void);
-int find_volume(const char *name, int create);
-void config_subdisk(int);
-void config_plex(int);
-void config_volume(int);
-void config_drive(int);
-void updateconfig(int);
-void update_sd_config(int sdno, int kernelstate);
-void update_plex_config(int plexno, int kernelstate);
-void update_volume_config(int volno);
-void update_config(void);
-void drive_io_done(struct buf *);
-void save_config(void);
-void daemon_save_config(void);
-void write_config(char *, int);
-int start_config(int);
-void finish_config(int);
-void remove(struct vinum_ioctl_msg *msg);
-void remove_drive_entry(int driveno, int force);
-void remove_sd_entry(int sdno, int force, int recurse);
-void remove_plex_entry(int plexno, int force, int recurse);
-void remove_volume_entry(int volno, int force, int recurse);
-
-void checkdiskconfig(char *);
-int open_drive(struct drive *, struct thread *, int);
-void close_drive(struct drive *drive);
-void close_locked_drive(struct drive *drive);
-int driveio(struct drive *, char *, size_t, off_t, int);
-int set_drive_parms(struct drive *drive);
-int init_drive(struct drive *, int);
-/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
-void throw_rude_remark(int, char *,...);
-
-void format_config(char *config, int len);
-void checkkernel(char *op);
-void free_drive(struct drive *drive);
-void down_drive(struct drive *drive);
-void remove_drive(int driveno);
-
-int vinum_scandisk(char *drivename);
-
-/* I/O */
-d_open_t vinumopen;
-d_close_t vinumclose;
-d_strategy_t vinumstrategy;
-d_ioctl_t vinumioctl;
-
-int vinum_super_ioctl(struct cdev *, u_long, caddr_t);
-int vinumstart(struct buf *bp, int reviveok);
-int launch_requests(struct request *rq, int reviveok);
-void sdio(struct buf *bp);
-
-/* XXX Do we need this? */
-int vinumpart(struct cdev *);
-
-extern jmp_buf command_fail; /* return here if config fails */
-
-#ifdef VINUMDEBUG
-/* Memory allocation and request tracing */
-void vinum_meminfo(caddr_t data);
-int vinum_mallocinfo(caddr_t data);
-int vinum_rqinfo(caddr_t data);
-void LongJmp(jmp_buf, int);
-char *basename(char *);
-#endif
-
-#ifdef VINUMDEBUG
-void expand_table(void **, int, int, char *, int);
-#else
-void expand_table(void **, int, int);
-#endif
-
-struct disklabel;
-struct request;
-struct rqgroup *allocrqg(struct request *rq, int elements);
-void deallocrqg(struct rqgroup *rqg);
-
-/* Device number decoding */
-int Volno(struct cdev *x);
-int Plexno(struct cdev *x);
-int Sdno(struct cdev *x);
-
-/* State transitions */
-int set_drive_state(int driveno, enum drivestate state, enum setstateflags flags);
-int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
-enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
-int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
-int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
-void update_sd_state(int sdno);
-void forceup(int plexno);
-void update_plex_state(int plexno);
-void update_volume_state(int volno);
-void invalidate_subdisks(struct plex *, enum sdstate);
-void start_object(struct vinum_ioctl_msg *);
-void stop_object(struct vinum_ioctl_msg *);
-void setstate(struct vinum_ioctl_msg *msg);
-void setstate_by_force(struct vinum_ioctl_msg *msg);
-void vinum_label(int);
-int vinum_writedisklabel(struct volume *, struct disklabel *);
-int initsd(int, int);
-struct buf *parityrebuild(struct plex *, u_int64_t, int, enum parityop, struct rangelock **, off_t *);
-enum requeststatus sddownstate(struct request *rq);
-
-int restart_plex(int plexno);
-int revive_read(struct sd *sd);
-int revive_block(int sdno);
-void parityops(struct vinum_ioctl_msg *);
-
-/* Auxiliary functions */
-enum sdstates sdstatemap(struct plex *plex);
-enum volplexstate vpstate(struct plex *plex);
-#endif
-
-struct drive *validdrive(int driveno, struct _ioctl_reply *);
-struct sd *validsd(int sdno, struct _ioctl_reply *);
-struct plex *validplex(int plexno, struct _ioctl_reply *);
-struct volume *validvol(int volno, struct _ioctl_reply *);
-void resetstats(struct vinum_ioctl_msg *msg);
-
-/* Locking */
-#ifdef VINUMDEBUG
-int lockdrive(struct drive *drive, char *, int);
-#else
-int lockdrive(struct drive *drive);
-#endif
-void unlockdrive(struct drive *drive);
-int lockvol(struct volume *vol);
-void unlockvol(struct volume *vol);
-int lockplex(struct plex *plex);
-void unlockplex(struct plex *plex);
-struct rangelock *lockrange(daddr_t stripe, struct buf *bp, struct plex *plex);
-int lock_config(void);
-void unlock_config(void);
-
-/* Dæmon */
-
-void vinum_daemon(void);
-int vinum_finddaemon(void);
-int vinum_setdaemonopts(int);
-extern struct daemonq *daemonq; /* daemon's work queue */
-extern struct daemonq *dqend; /* and the end of the queue */
-extern struct cdevsw vinum_cdevsw;
-
-#undef Free /* defined in some funny net stuff */
-#ifdef _KERNEL
-#ifdef VINUMDEBUG
-#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
-#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
-caddr_t MMalloc(int size, char *, int);
-void FFree(void *mem, char *, int);
-#define LOCKDRIVE(d) lockdrive (d, __FILE__, __LINE__)
-#else
-#define Malloc(x) malloc((x), M_DEVBUF, \
- curthread->td_intr_nesting_level == 0? M_WAITOK: M_NOWAIT)
-#define Free(x) free((x), M_DEVBUF)
-#define LOCKDRIVE(d) lockdrive (d)
-#endif
-#else
-#define Malloc(x) malloc ((x)) /* just the size */
-#define Free(x) free ((x)) /* just the address */
-#endif
-
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumhdr.h b/sys/dev/vinum/vinumhdr.h
deleted file mode 100644
index 4117821..0000000
--- a/sys/dev/vinum/vinumhdr.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- */
-
-/* Header files used by all modules */
-/*
- * $Id: vinumhdr.h,v 1.19 2001/05/22 04:07:22 grog Exp grog $
- * $FreeBSD$
- */
-
-#include <sys/param.h>
-#ifdef _KERNEL
-#include "opt_vinum.h"
-#include <sys/systm.h>
-#include <sys/kdb.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/conf.h>
-#include <sys/mount.h>
-#include <sys/vnode.h>
-#include <sys/sysctl.h>
-#endif
-#include <sys/errno.h>
-#include <sys/time.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/malloc.h>
-#include <sys/uio.h>
-#include <sys/namei.h>
-#include <sys/stat.h>
-#include <sys/disk.h>
-#include <sys/disklabel.h>
-#include <sys/syslog.h>
-#include <sys/fcntl.h>
-#include <sys/queue.h>
-#ifdef _KERNEL
-#include <machine/setjmp.h>
-#include <machine/stdarg.h>
-#else
-#include <setjmp.h>
-#include <stdarg.h>
-#endif
-#include <vm/vm.h>
-#include <dev/vinum/vinumvar.h>
-#include <dev/vinum/vinumio.h>
-#include <dev/vinum/vinumkw.h>
-#include <dev/vinum/vinumext.h>
-#include <dev/vinum/vinumutil.h>
-#include <machine/cpu.h>
diff --git a/sys/dev/vinum/vinuminterrupt.c b/sys/dev/vinum/vinuminterrupt.c
deleted file mode 100644
index d031ef9..0000000
--- a/sys/dev/vinum/vinuminterrupt.c
+++ /dev/null
@@ -1,473 +0,0 @@
-/* vinuminterrupt.c: bottom half of the driver */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinuminterrupt.c,v 1.41 2003/08/24 17:55:56 obrien Exp $
- */
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-#include <sys/resourcevar.h>
-
-void complete_raid5_write(struct rqelement *);
-void complete_rqe(struct buf *bp);
-void sdio_done(struct buf *bp);
-
-/*
- * Take a completed buffer, transfer the data back if
- * it's a read, and complete the high-level request
- * if this is the last subrequest.
- *
- * The bp parameter is in fact a struct rqelement, which
- * includes a couple of extras at the end.
- */
-void
-complete_rqe(struct buf *bp)
-{
- struct rqelement *rqe;
- struct request *rq;
- struct rqgroup *rqg;
- struct buf *ubp; /* user buffer */
- struct drive *drive;
- struct sd *sd;
- char *gravity; /* for error messages */
-
- rqe = (struct rqelement *) bp; /* point to the element element that completed */
- rqg = rqe->rqg; /* and the request group */
- rq = rqg->rq; /* and the complete request */
- ubp = rq->bp; /* user buffer */
-
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_iodone, (union rqinfou) rqe, ubp);
-#endif
- drive = &DRIVE[rqe->driveno];
- drive->active--; /* one less outstanding I/O on this drive */
- vinum_conf.active--; /* one less outstanding I/O globally */
- if ((drive->active == (DRIVE_MAXACTIVE - 1)) /* we were at the drive limit */
- ||(vinum_conf.active == VINUM_MAXACTIVE)) /* or the global limit */
- wakeup(&launch_requests); /* let another one at it */
- if ((bp->b_io.bio_flags & BIO_ERROR) != 0) { /* transfer in error */
- gravity = "";
- sd = &SD[rqe->sdno];
-
- if (bp->b_error != 0) /* did it return a number? */
- rq->error = bp->b_error; /* yes, put it in. */
- else if (rq->error == 0) /* no: do we have one already? */
- rq->error = EIO; /* no: catchall "I/O error" */
- sd->lasterror = rq->error;
- if (bp->b_iocmd == BIO_READ) { /* read operation */
- if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) {
- gravity = " fatal";
- set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */
- }
- log(LOG_ERR,
- "%s:%s read error, block %lld for %ld bytes\n",
- gravity,
- sd->name,
- (long long)bp->b_blkno,
- bp->b_bcount);
- } else { /* write operation */
- if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) {
- gravity = "fatal ";
- set_sd_state(rqe->sdno, sd_stale, setstate_force); /* subdisk is stale */
- }
- log(LOG_ERR,
- "%s:%s write error, block %lld for %ld bytes\n",
- gravity,
- sd->name,
- (long long)bp->b_blkno,
- bp->b_bcount);
- }
- log(LOG_ERR,
- "%s: user buffer block %lld for %ld bytes\n",
- sd->name,
- (long long)ubp->b_blkno,
- ubp->b_bcount);
- if (rq->error == ENXIO) { /* the drive's down too */
- log(LOG_ERR,
- "%s: fatal drive I/O error, block %lld for %ld bytes\n",
- DRIVE[rqe->driveno].label.name,
- (long long)bp->b_blkno,
- bp->b_bcount);
- DRIVE[rqe->driveno].lasterror = rq->error;
- set_drive_state(rqe->driveno, /* take the drive down */
- drive_down,
- setstate_force);
- }
- }
- /* Now update the statistics */
- if (bp->b_iocmd == BIO_READ) { /* read operation */
- DRIVE[rqe->driveno].reads++;
- DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
- SD[rqe->sdno].reads++;
- SD[rqe->sdno].bytes_read += bp->b_bcount;
- PLEX[rqe->rqg->plexno].reads++;
- PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
- if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */
- VOL[PLEX[rqe->rqg->plexno].volno].reads++;
- VOL[PLEX[rqe->rqg->plexno].volno].bytes_read += bp->b_bcount;
- }
- } else { /* write operation */
- DRIVE[rqe->driveno].writes++;
- DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
- SD[rqe->sdno].writes++;
- SD[rqe->sdno].bytes_written += bp->b_bcount;
- PLEX[rqe->rqg->plexno].writes++;
- PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
- if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */
- VOL[PLEX[rqe->rqg->plexno].volno].writes++;
- VOL[PLEX[rqe->rqg->plexno].volno].bytes_written += bp->b_bcount;
- }
- }
- if (rqg->flags & XFR_RECOVERY_READ) { /* recovery read, */
- int *sdata; /* source */
- int *data; /* and group data */
- int length; /* and count involved */
- int count; /* loop counter */
- struct rqelement *urqe = &rqg->rqe[rqg->badsdno]; /* rqe of the bad subdisk */
-
- /* XOR destination is the user data */
- sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* old data contents */
- data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */
- length = urqe->grouplen * (DEV_BSIZE / sizeof(int)); /* and number of ints */
-
- for (count = 0; count < length; count++)
- data[count] ^= sdata[count];
-
- /*
- * In a normal read, we will normally read directly
- * into the user buffer. This doesn't work if
- * we're also doing a recovery, so we have to
- * copy it
- */
- if (rqe->flags & XFR_NORMAL_READ) { /* normal read as well, */
- char *src = &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* read data is here */
- char *dst;
-
- dst = (char *) ubp->b_data + (rqe->useroffset << DEV_BSHIFT); /* where to put it in user buffer */
- length = rqe->datalen << DEV_BSHIFT; /* and count involved */
- bcopy(src, dst, length); /* move it */
- }
- } else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 4/5 group write operation */
- &&(rqg->active == 1)) /* and this is the last active request */
- complete_raid5_write(rqe);
- /*
- * This is the earliest place where we can be
- * sure that the request has really finished,
- * since complete_raid5_write can issue new
- * requests.
- */
- rqg->active--; /* this request now finished */
- if (rqg->active == 0) { /* request group finished, */
- rq->active--; /* one less */
- if (rqg->lock) { /* got a lock? */
- unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
- rqg->lock = 0;
- }
- }
- if (rq->active == 0) { /* request finished, */
-#ifdef VINUMDEBUG
- if (debug & DEBUG_RESID) {
- if (ubp->b_resid != 0) /* still something to transfer? */
- kdb_enter("resid");
- }
-#endif
-
- if (rq->error) { /* did we have an error? */
- if (rq->isplex) { /* plex operation, */
- ubp->b_io.bio_flags |= BIO_ERROR; /* yes, propagate to user */
- ubp->b_error = rq->error;
- } else /* try to recover */
- queue_daemon_request(daemonrq_ioerror, (union daemoninfo) rq); /* let the daemon complete */
- } else {
- ubp->b_resid = 0; /* completed our transfer */
- if (rq->isplex == 0) /* volume request, */
- VOL[rq->volplex.volno].active--; /* another request finished */
- if (rq->flags & XFR_COPYBUF) {
- Free(ubp->b_data);
- ubp->b_data = rq->save_data;
- }
- bufdone(ubp); /* top level buffer completed */
- freerq(rq); /* return the request storage */
- }
- }
-}
-
-/* Free a request block and anything hanging off it */
-void
-freerq(struct request *rq)
-{
- struct rqgroup *rqg;
- struct rqgroup *nrqg; /* next in chain */
- int rqno;
-
- for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
- if (rqg->lock) /* got a lock? */
- unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
- for (rqno = 0; rqno < rqg->count; rqno++) {
- if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
- &&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
- Free(rqg->rqe[rqno].b.b_data); /* free it */
- if (rqg->rqe[rqno].flags & XFR_BUFLOCKED) { /* locked this buffer, */
- BUF_UNLOCK(&rqg->rqe[rqno].b); /* unlock it again */
- BUF_LOCKFREE(&rqg->rqe[rqno].b);
- }
- }
- nrqg = rqg->next; /* note the next one */
- Free(rqg); /* and free this one */
- }
- Free(rq); /* free the request itself */
-}
-
-/* I/O on subdisk completed */
-void
-sdio_done(struct buf *bp)
-{
- struct sdbuf *sbp;
-
- sbp = (struct sdbuf *) bp;
- if (sbp->b.b_io.bio_flags & BIO_ERROR) { /* had an error */
- sbp->bp->b_io.bio_flags |= BIO_ERROR; /* propagate upwards */
- sbp->bp->b_error = sbp->b.b_error;
- }
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_sdiodone, (union rqinfou) bp, bp);
-#endif
- sbp->bp->b_resid = sbp->b.b_resid; /* copy the resid field */
- /* Now update the statistics */
- if (bp->b_iocmd == BIO_READ) { /* read operation */
- DRIVE[sbp->driveno].reads++;
- DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount;
- SD[sbp->sdno].reads++;
- SD[sbp->sdno].bytes_read += sbp->b.b_bcount;
- } else { /* write operation */
- DRIVE[sbp->driveno].writes++;
- DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount;
- SD[sbp->sdno].writes++;
- SD[sbp->sdno].bytes_written += sbp->b.b_bcount;
- }
- bufdone(sbp->bp); /* complete the caller's I/O */
- BUF_UNLOCK(&sbp->b);
- BUF_LOCKFREE(&sbp->b);
- Free(sbp);
-}
-
-/* Start the second phase of a RAID-4 or RAID-5 group write operation. */
-void
-complete_raid5_write(struct rqelement *rqe)
-{
- int *sdata; /* source */
- int *pdata; /* and parity block data */
- int length; /* and count involved */
- int count; /* loop counter */
- int rqno; /* request index */
- int rqoffset; /* offset of request data from parity data */
- struct buf *ubp; /* user buffer header */
- struct request *rq; /* pointer to our request */
- struct rqgroup *rqg; /* and to the request group */
- struct rqelement *prqe; /* point to the parity block */
- struct drive *drive; /* drive to access */
-
- rqg = rqe->rqg; /* and to our request group */
- rq = rqg->rq; /* point to our request */
- ubp = rq->bp; /* user's buffer header */
- prqe = &rqg->rqe[0]; /* point to the parity block */
-
- /*
- * If we get to this function, we have normal or
- * degraded writes, or a combination of both. We do
- * the same thing in each case: we perform an
- * exclusive or to the parity block. The only
- * difference is the origin of the data and the
- * address range.
- */
- if (rqe->flags & XFR_DEGRADED_WRITE) { /* do the degraded write stuff */
- pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */
- bzero(pdata, prqe->grouplen << DEV_BSHIFT); /* start with nothing in the parity block */
-
- /* Now get what data we need from each block */
- for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */
- rqe = &rqg->rqe[rqno]; /* this request */
- sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */
- length = rqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */
-
- /*
- * Add the data block to the parity block. Before
- * we started the request, we zeroed the parity
- * block, so the result of adding all the other
- * blocks and the block we want to write will be
- * the correct parity block.
- */
- for (count = 0; count < length; count++)
- pdata[count] ^= sdata[count];
- if ((rqe->flags & XFR_MALLOCED) /* the buffer was malloced, */
- &&((rqg->flags & XFR_NORMAL_WRITE) == 0)) { /* and we have no normal write, */
- Free(rqe->b.b_data); /* free it now */
- rqe->flags &= ~XFR_MALLOCED;
- }
- }
- }
- if (rqg->flags & XFR_NORMAL_WRITE) { /* do normal write stuff */
- /* Get what data we need from each block */
- for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */
- rqe = &rqg->rqe[rqno]; /* this request */
- if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE))
- == (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) { /* good data block to write */
- sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */
- rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */
- pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */
- length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */
-
- /*
- * "remove" the old data block
- * from the parity block
- */
- if ((pdata < ((int *) prqe->b.b_data))
- || (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount)))
- || (sdata < ((int *) rqe->b.b_data))
- || (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount))))
- panic("complete_raid5_write: bounds overflow");
- for (count = 0; count < length; count++)
- pdata[count] ^= sdata[count];
-
- /* "add" the new data block */
- sdata = (int *) (&ubp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */
- if ((sdata < ((int *) ubp->b_data))
- || (&sdata[length] > ((int *) (ubp->b_data + ubp->b_bcount))))
- panic("complete_raid5_write: bounds overflow");
- for (count = 0; count < length; count++)
- pdata[count] ^= sdata[count];
-
- /* Free the malloced buffer */
- if (rqe->flags & XFR_MALLOCED) { /* the buffer was malloced, */
- Free(rqe->b.b_data); /* free it */
- rqe->flags &= ~XFR_MALLOCED;
- } else
- panic("complete_raid5_write: malloc conflict");
-
- if ((rqe->b.b_iocmd == BIO_READ) /* this was a read */
- &&((rqe->flags & XFR_BAD_SUBDISK) == 0)) { /* and we can write this block */
- rqe->b.b_flags &= ~B_DONE; /* start a new request */
- rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */
- rqe->b.b_iodone = complete_rqe; /* call us here when done */
- rqe->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */
- rqe->b.b_data = &ubp->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */
- rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */
- rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim more */
- rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */
- rqe->b.b_blkno += rqe->dataoffset; /* point to the correct block */
- rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
- rqe->b.b_iooffset = rqe->b.b_offset;
- rqg->active++; /* another active request */
- drive = &DRIVE[rqe->driveno]; /* drive to access */
-
- /* We can't sleep here, so we just increment the counters. */
- drive->active++;
- if (drive->active >= drive->maxactive)
- drive->maxactive = drive->active;
- vinum_conf.active++;
- if (vinum_conf.active >= vinum_conf.maxactive)
- vinum_conf.maxactive = vinum_conf.active;
-#ifdef VINUMDEBUG
- if (debug & DEBUG_ADDRESSES)
- log(LOG_DEBUG,
- " %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n",
- rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
- major(rqe->b.b_dev),
- minor(rqe->b.b_dev),
- rqe->sdno,
- (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
- (long long)rqe->b.b_blkno,
- rqe->b.b_bcount);
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_raid5_data, (union rqinfou) rqe, ubp);
-#endif
- DEV_STRATEGY(&rqe->b);
- }
- }
- }
- }
- /* Finally, write the parity block */
- rqe = &rqg->rqe[0];
- rqe->b.b_flags &= ~B_DONE; /* we're not done */
- rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */
- rqe->b.b_iodone = complete_rqe; /* call us here when done */
- rqg->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */
- rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT; /* length to write */
- rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
- rqe->b.b_iooffset = rqe->b.b_offset;
- rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim we have more */
- rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */
- rqg->active++; /* another active request */
- drive = &DRIVE[rqe->driveno]; /* drive to access */
-
- /* We can't sleep here, so we just increment the counters. */
- drive->active++;
- if (drive->active >= drive->maxactive)
- drive->maxactive = drive->active;
- vinum_conf.active++;
- if (vinum_conf.active >= vinum_conf.maxactive)
- vinum_conf.maxactive = vinum_conf.active;
-
-#ifdef VINUMDEBUG
- if (debug & DEBUG_ADDRESSES)
- log(LOG_DEBUG,
- " %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n",
- rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
- major(rqe->b.b_dev),
- minor(rqe->b.b_dev),
- rqe->sdno,
- (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
- (long long)rqe->b.b_blkno,
- rqe->b.b_bcount);
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubp);
-#endif
- DEV_STRATEGY(&rqe->b);
-}
-
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumio.c b/sys/dev/vinum/vinumio.c
deleted file mode 100644
index d7d554d..0000000
--- a/sys/dev/vinum/vinumio.c
+++ /dev/null
@@ -1,918 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumio.c,v 1.39 2003/05/23 00:59:53 grog Exp grog $
- * $FreeBSD$
- */
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-static char *sappend(char *txt, char *s);
-static int drivecmp(const void *va, const void *vb);
-
-/*
- * Open the device associated with the drive, and
- * set drive's vp. Return an error number.
- */
-int
-open_drive(struct drive *drive, struct thread *td, int verbose)
-{
- struct cdevsw *dsw; /* pointer to cdevsw entry */
-
- if (drive->flags & VF_OPEN) /* open already, */
- return EBUSY; /* don't do it again */
-
- drive->dev = getdiskbyname(drive->devicename);
- if (drive->dev == NULL) /* didn't find anything */
- return ENOENT;
- dev_ref(drive->dev);
-
- drive->dev->si_iosize_max = DFLTPHYS;
- dsw = devsw(drive->dev);
- if (dsw == NULL) /* sanity, should not happen */
- drive->lasterror = ENOENT;
- else if ((dsw->d_flags & D_DISK) == 0)
- drive->lasterror = ENOTBLK;
- else {
- DROP_GIANT();
- drive->lasterror = (dsw->d_open) (drive->dev, FWRITE | FREAD, 0, td);
- PICKUP_GIANT();
- }
-
- if (drive->lasterror != 0) { /* failed */
- drive->state = drive_down; /* just force it down */
- if (verbose)
- log(LOG_WARNING,
- "vinum open_drive %s: failed with error %d\n",
- drive->devicename, drive->lasterror);
- } else
- drive->flags |= VF_OPEN; /* we're open now */
-
- return drive->lasterror;
-}
-
-/*
- * Set some variables in the drive struct in more
- * convenient form. Return error indication.
- */
-int
-set_drive_parms(struct drive *drive)
-{
- drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
- drive->secsperblock = drive->blocksize /* number of sectors per block */
- / drive->sectorsize;
-
- /* Now update the label part */
- bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
- microtime(&drive->label.date_of_birth); /* and current time */
- drive->label.drive_size = drive->mediasize; /* size of the drive in bytes */
-#ifdef VINUMDEBUG
- if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
- drive->label.drive_size *= 100;
-#endif
-
- /* number of sectors available for subdisks */
- drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
-
- /*
- * Bug in 3.0 as of January 1998: you can open
- * non-existent slices. They have a length of 0.
- */
- if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
- set_drive_state(drive->driveno, drive_down, setstate_force);
- drive->lasterror = ENOSPC;
- return ENOSPC;
- }
- drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
- drive->freelist = (struct drive_freelist *)
- Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
- if (drive->freelist == NULL) /* can't malloc, dammit */
- return ENOSPC;
- drive->freelist_entries = 1; /* just (almost) the complete drive */
- drive->freelist[0].offset = DATASTART; /* starts here */
- drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
- if (drive->label.name[0] != '\0') /* got a name */
- set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
- else /* we know about it, but that's all */
- drive->state = drive_referenced;
- return 0;
-}
-
-/*
- * Initialize a drive: open the device and add
- * device information.
- */
-int
-init_drive(struct drive *drive, int verbose)
-{
-
- drive->lasterror = open_drive(drive, curthread, verbose); /* open the drive */
- if (drive->lasterror)
- return drive->lasterror;
-
- DROP_GIANT();
- drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
- DIOCGSECTORSIZE,
- (caddr_t) & drive->sectorsize,
- FREAD,
- curthread);
- if (drive->lasterror == 0)
- drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
- DIOCGMEDIASIZE,
- (caddr_t) & drive->mediasize,
- FREAD,
- curthread);
- PICKUP_GIANT();
- if (drive->lasterror) {
- if (verbose)
- log(LOG_ERR,
- "vinum: Can't get drive dimensions for %s: error %d\n",
- drive->devicename,
- drive->lasterror);
- close_drive(drive);
- return drive->lasterror;
- }
- return set_drive_parms(drive); /* set various odds and ends */
-}
-
-/* Close a drive if it's open. */
-void
-close_drive(struct drive *drive)
-{
- LOCKDRIVE(drive); /* keep the daemon out */
- if (drive->flags & VF_OPEN)
- close_locked_drive(drive); /* and close it */
- if (drive->state > drive_down) /* if it's up */
- drive->state = drive_down; /* make sure it's down */
- unlockdrive(drive);
-}
-
-/*
- * Real drive close code, called with drive already locked.
- * We have also checked that the drive is open. No errors.
- */
-void
-close_locked_drive(struct drive *drive)
-{
- int error;
-
- /*
- * If we can't access the drive, we can't flush
- * the queues, which spec_close() will try to
- * do. Get rid of them here first.
- */
- DROP_GIANT();
- error = (*devsw(drive->dev)->d_close) (drive->dev, FWRITE | FREAD, 0, NULL);
- PICKUP_GIANT();
- drive->flags &= ~VF_OPEN; /* no longer open */
- if (drive->lasterror == 0)
- drive->lasterror = error;
-}
-
-/*
- * Remove drive from the configuration.
- * Caller must ensure that it isn't active.
- */
-void
-remove_drive(int driveno)
-{
- struct drive *drive = &vinum_conf.drive[driveno];
- struct vinum_hdr *vhdr; /* buffer for header */
- int error;
-
- if (drive->state > drive_referenced) { /* real drive */
- if (drive->state == drive_up) {
- vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
- CHECKALLOC(vhdr, "Can't allocate memory");
- error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
- if (error)
- drive->lasterror = error;
- else {
- vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
- write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
- }
- Free(vhdr);
- }
- free_drive(drive); /* close it and free resources */
- save_config(); /* and save the updated configuration */
- }
-}
-
-/*
- * Transfer drive data. Usually called from one of these defines;
- * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
- * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
- *
- * length and offset are in bytes, but must be multiples of sector
- * size. The function *does not check* for this condition, and
- * truncates ruthlessly.
- * Return error number.
- */
-int
-driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
-{
- int error;
- struct buf *bp;
-
- error = 0; /* to keep the compiler happy */
- while (length) { /* divide into small enough blocks */
- int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
-
- bp = geteblk(len); /* get a buffer header */
- bp->b_flags = 0;
- bp->b_iocmd = flag;
- bp->b_dev = drive->dev; /* device */
- bp->b_blkno = offset / drive->sectorsize; /* block number */
- bp->b_offset = offset;
- bp->b_iooffset = offset;
- bp->b_saveaddr = bp->b_data;
- bp->b_data = buf;
- bp->b_bcount = len;
- DEV_STRATEGY(bp); /* initiate the transfer */
- error = bufwait(bp);
- bp->b_data = bp->b_saveaddr;
- bp->b_flags |= B_INVAL | B_AGE;
- bp->b_ioflags &= ~BIO_ERROR;
- brelse(bp);
- if (error)
- break;
- length -= len; /* update pointers */
- buf += len;
- offset += len;
- }
- return error;
-}
-
-/*
- * Check a drive for a vinum header. If found,
- * update the drive information. We come here
- * with a partially populated drive structure
- * which includes the device name.
- *
- * Return information on what we found.
- *
- * This function is called from two places: check_drive,
- * which wants to find out whether the drive is a
- * Vinum drive, and config_drive, which asserts that
- * it is a vinum drive. In the first case, we don't
- * print error messages (verbose==0), in the second
- * we do (verbose==1).
- */
-enum drive_label_info
-read_drive_label(struct drive *drive, int verbose)
-{
- int error;
- int result; /* result of our search */
- struct vinum_hdr *vhdr; /* and as header */
-
- error = init_drive(drive, 0); /* find the drive */
- if (error) /* find the drive */
- return DL_CANT_OPEN; /* not ours */
-
- vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
- CHECKALLOC(vhdr, "Can't allocate memory");
-
- drive->state = drive_up; /* be optimistic */
- error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
- if (vhdr->magic == VINUM_MAGIC) { /* ours! */
- if (drive->label.name[0] /* we have a name for this drive */
- &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
- drive->lasterror = EINVAL;
- result = DL_WRONG_DRIVE; /* it's the wrong drive */
- drive->state = drive_unallocated; /* put it back, it's not ours */
- } else
- result = DL_OURS;
- /*
- * We copy the drive anyway so that we have
- * the correct name in the drive info. This
- * may not be the name specified
- */
- drive->label = vhdr->label; /* put in the label information */
- } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
- result = DL_DELETED_LABEL; /* and return the info */
- else
- result = DL_NOT_OURS; /* we could have it, but we don't yet */
- Free(vhdr); /* that's all. */
- return result;
-}
-
-/*
- * Check a drive for a vinum header. If found,
- * read configuration information from the drive and
- * incorporate the data into the configuration.
- *
- * Return drive number.
- */
-struct drive *
-check_drive(char *devicename)
-{
- int driveno;
- int i;
- struct drive *drive;
-
- driveno = find_drive_by_name(devicename, 1); /* if entry doesn't exist, create it */
- drive = &vinum_conf.drive[driveno]; /* and get a pointer */
-
- if (drive->state >= drive_down) /* up or down, we know it */
- return drive;
- if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
- for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
- if ((i != driveno) /* not this drive */
- &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
- &&(strcmp(DRIVE[i].label.name,
- DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
- struct drive *mydrive = &DRIVE[i];
-
- if (mydrive->devicename[0] == '/') { /* we know a device name for it */
- /*
- * set an error, but don't take the
- * drive down: that would cause unneeded
- * error messages.
- */
- drive->lasterror = EEXIST;
- break;
- } else { /* it's just a place holder, */
- int sdno;
-
- for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
- if ((SD[sdno].driveno == i) /* it's pointing to this one, */
- &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
- SD[sdno].driveno = drive->driveno; /* point to the one we found */
- update_sd_state(sdno); /* and update its state */
- }
- }
- bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
- }
- }
- }
- return drive;
- } else { /* not ours, */
- close_drive(drive);
- free_drive(drive); /* get rid of it */
- return NULL;
- }
-}
-
-static char *
-sappend(char *txt, char *s)
-{
- while ((*s++ = *txt++) != 0);
- return s - 1;
-}
-
-void
-format_config(char *config, int len)
-{
- int i;
- int j;
- char *s = config;
- char *configend = &config[len];
-
- bzero(config, len);
-
- /* First write the volume configuration */
- for (i = 0; i < vinum_conf.volumes_allocated; i++) {
- struct volume *vol;
-
- vol = &vinum_conf.volume[i];
- if ((vol->state > volume_uninit)
- && (vol->name[0] != '\0')) { /* paranoia */
- snprintf(s,
- configend - s,
- "volume %s state %s",
- vol->name,
- volume_state(vol->state));
- while (*s)
- s++; /* find the end */
- s = sappend("\n", s);
- }
- }
-
- /* Then the plex configuration */
- for (i = 0; i < vinum_conf.plexes_allocated; i++) {
- struct plex *plex;
- struct volume *vol;
-
- plex = &vinum_conf.plex[i];
- if ((plex->state > plex_referenced)
- && (plex->name[0] != '\0')) { /* paranoia */
- snprintf(s,
- configend - s,
- "plex name %s state %s org %s ",
- plex->name,
- plex_state(plex->state),
- plex_org(plex->organization));
- while (*s)
- s++; /* find the end */
- if (isstriped(plex)) {
- snprintf(s,
- configend - s,
- "%ds ",
- (int) plex->stripesize);
- while (*s)
- s++; /* find the end */
- }
- if (plex->volno >= 0) { /* we have a volume */
- vol = &VOL[plex->volno];
- snprintf(s,
- configend - s,
- "vol %s ",
- vol->name);
- while (*s)
- s++; /* find the end */
- if ((vol->preferred_plex >= 0) /* has a preferred plex */
- &&vol->plex[vol->preferred_plex] == i) /* and it's us */
- snprintf(s, configend - s, "preferred ");
- while (*s)
- s++; /* find the end */
- }
- for (j = 0; j < plex->subdisks; j++) {
- snprintf(s,
- configend - s,
- " sd %s",
- vinum_conf.sd[plex->sdnos[j]].name);
- }
- s = sappend("\n", s);
- }
- }
-
- /* And finally the subdisk configuration */
- for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
- struct sd *sd;
- char *drivename;
-
- sd = &SD[i];
- if ((sd->state != sd_referenced)
- && (sd->state != sd_unallocated)
- && (sd->name[0] != '\0')) { /* paranoia */
- drivename = vinum_conf.drive[sd->driveno].label.name;
- /*
- * XXX We've seen cases of dead subdisks
- * which don't have a drive. If we let them
- * through here, the drive name is null, so
- * they get the drive named 'plex'.
- *
- * This is a breakage limiter, not a fix.
- */
- if (drivename[0] == '\0')
- drivename = "*invalid*";
- snprintf(s,
- configend - s,
- "sd name %s drive %s len %llus driveoffset %llus state %s",
- sd->name,
- drivename,
- (unsigned long long) sd->sectors,
- (unsigned long long) sd->driveoffset,
- sd_state(sd->state));
- while (*s)
- s++; /* find the end */
- if (sd->plexno >= 0)
- snprintf(s,
- configend - s,
- " plex %s plexoffset %llds",
- vinum_conf.plex[sd->plexno].name,
- (long long) sd->plexoffset);
- else
- snprintf(s, configend - s, " detached");
- while (*s)
- s++; /* find the end */
- if (sd->flags & VF_RETRYERRORS) {
- snprintf(s, configend - s, " retryerrors");
- while (*s)
- s++; /* find the end */
- }
- snprintf(s, configend - s, " \n");
- while (*s)
- s++; /* find the end */
- }
- }
- if (s > &config[len - 2])
- panic("vinum: configuration data overflow");
-}
-
-/*
- * issue a save config request to the dæmon. The actual work
- * is done in process context by daemon_save_config.
- */
-void
-save_config(void)
-{
- queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) 0);
-}
-
-/*
- * Write the configuration to all vinum slices. This
- * is performed by the daemon only.
- */
-void
-daemon_save_config(void)
-{
- int error;
- int written_config; /* set when we first write the config to disk */
- int driveno;
- struct drive *drive; /* point to current drive info */
- struct vinum_hdr *vhdr; /* and as header */
- char *config; /* point to config data */
-
- /* don't save the configuration while we're still working on it */
- if (vinum_conf.flags & VF_CONFIGURING)
- return;
- written_config = 0; /* no config written yet */
- /* Build a volume header */
- vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
- CHECKALLOC(vhdr, "Can't allocate config data");
- vhdr->magic = VINUM_MAGIC; /* magic number */
- vhdr->config_length = MAXCONFIG; /* length of following config info */
-
- config = Malloc(MAXCONFIG); /* get space for the config data */
- CHECKALLOC(config, "Can't allocate config data");
-
- format_config(config, MAXCONFIG);
- error = 0; /* no errors yet */
- for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
- drive = &vinum_conf.drive[driveno]; /* point to drive */
- if (drive->state > drive_referenced) {
- LOCKDRIVE(drive); /* don't let it change */
-
- /*
- * First, do some drive consistency checks. Some
- * of these are kludges, others require a process
- * context and couldn't be done before.
- */
- if ((drive->devicename[0] == '\0')
- || (drive->label.name[0] == '\0')) {
- unlockdrive(drive);
- free_drive(drive); /* get rid of it */
- break;
- }
- if (((drive->flags & VF_OPEN) == 0) /* drive not open */
- &&(drive->state > drive_down)) { /* and it thinks it's not down */
- unlockdrive(drive);
- set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
- continue;
- }
- if ((drive->state == drive_down) /* it's down */
- &&(drive->flags & VF_OPEN)) { /* but open, */
- unlockdrive(drive);
- close_drive(drive); /* close it */
- } else if (drive->state > drive_down) {
- microtime(&drive->label.last_update); /* time of last update is now */
- bcopy((char *) &drive->label, /* and the label info from the drive structure */
- (char *) &vhdr->label,
- sizeof(vhdr->label));
- if ((drive->state != drive_unallocated)
- && (drive->state != drive_referenced)) { /* and it's a real drive */
- error = write_drive(drive,
- (char *) vhdr,
- VINUMHEADERLEN,
- VINUM_LABEL_OFFSET);
- if (error == 0) /* first config copy */
- error = write_drive(drive,
- config,
- MAXCONFIG,
- VINUM_CONFIG_OFFSET);
- if (error == 0)
- error = write_drive(drive, /* second copy */
- config,
- MAXCONFIG,
- VINUM_CONFIG_OFFSET + MAXCONFIG);
- unlockdrive(drive);
- if (error) {
- log(LOG_ERR,
- "vinum: Can't write config to %s, error %d\n",
- drive->devicename,
- error);
- set_drive_state(drive->driveno, drive_down, setstate_force);
- } else
- written_config = 1; /* we've written it on at least one drive */
- }
- } else /* not worth looking at, */
- unlockdrive(drive); /* just unlock it again */
- }
- }
- Free(vhdr);
- Free(config);
-}
-
-/*
- * Search disks on system for vinum slices and add
- * them to the configuuration if they're not
- * there already. devicename is a blank-separate
- * list of device names. If not provided, use
- * sysctl to get a list of all disks on the
- * system.
- *
- * Return an error indication.
- */
-int
-vinum_scandisk(char *devicename)
-{
- struct drive *volatile drive;
- volatile int driveno;
- int firstdrive; /* first drive in this list */
- volatile int gooddrives; /* number of usable drives found */
- int firsttime; /* set if we have never configured before */
- int error;
- char *config_text; /* read the config info from disk into here */
- char *volatile cptr; /* pointer into config information */
- char *eptr; /* end pointer into config information */
- char *config_line; /* copy the config line to */
- volatile int status;
- int *drivelist; /* list of drive indices */
- char *partname; /* for creating partition names */
- char *cp; /* pointer to start of disk name */
- char *ep; /* and to first char after name */
- char *np; /* name pointer in naem we build */
- size_t alloclen;
- int malloced;
- int partnamelen; /* length of partition name */
- int drives;
- int goodpart; /* good vinum drives on this disk */
-
- malloced = 0; /* devicename not malloced */
- if (devicename == NULL) { /* no devices specified, */
- /* get a list of all disks in the system */
- /* Get size of disk list */
- error = kernel_sysctlbyname(&thread0, "kern.disks", NULL,
- NULL, NULL, 0, &alloclen);
- if (error) {
- log(LOG_ERR, "vinum: can't get disk list: %d\n", error);
- return EINVAL;
- }
- devicename = Malloc(alloclen);
- if (devicename == NULL) {
- printf("vinum: can't allocate memory for drive list");
- return ENOMEM;
- } else
- malloced = 1;
- /* Now get the list of disks */
- kernel_sysctlbyname(&thread0, "kern.disks", devicename,
- &alloclen, NULL, 0, NULL);
- }
- status = 0; /* success indication */
- vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */
- partname = Malloc(MAXPATHLEN); /* extract name of disk here */
- if (partname == NULL) {
- printf("vinum_scandisk: can't allocate memory for drive name");
- return ENOMEM;
- }
- gooddrives = 0; /* number of usable drives found */
- firstdrive = vinum_conf.drives_used; /* the first drive */
- firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */
-
- /* allocate a drive pointer list */
- drives = 256; /* should be enough for most cases */
- drivelist = (int *) Malloc(drives * sizeof(int));
- CHECKALLOC(drivelist, "Can't allocate memory");
- error = lock_config(); /* make sure we're alone here */
- if (error)
- return error;
- error = setjmp(command_fail); /* come back here on error */
- if (error) /* longjmped out */
- return error;
-
- /* Open all drives and find which was modified most recently */
- for (cp = devicename; *cp; cp = ep) {
- char part; /* UNIX partition */
-#ifdef __i386__
- int slice;
-#endif
-
- while (*cp == ' ')
- cp++; /* find start of name */
- if (*cp == '\0') /* done, */
- break;
- ep = cp;
- while (*ep && (*ep != ' ')) /* find end of name */
- ep++;
-
- np = partname; /* start building up a name here */
- if (*cp != '/') { /* name doesn't start with /, */
- strcpy(np, "/dev/"); /* assume /dev */
- np += strlen("/dev/");
- }
- memcpy(np, cp, ep - cp); /* put in name */
- np += ep - cp; /* and point past */
-
- goodpart = 0; /* no partitions on this disk yet */
- partnamelen = MAXPATHLEN + np - partname; /* remaining length in partition name */
-#ifdef __i386__
- /* first try the partition table */
- for (slice = 1; slice < 5; slice++)
- for (part = 'a'; part < 'i'; part++) {
- if (part != 'c') { /* don't do the c partition */
- snprintf(np,
- partnamelen,
- "s%d%c",
- slice,
- part);
- drive = check_drive(partname); /* try to open it */
- if (drive) { /* got something, */
- if (drive->flags & VF_CONFIGURED) /* already read this config, */
- log(LOG_WARNING,
- "vinum: already read config from %s\n", /* say so */
- drive->label.name);
- else {
- if (gooddrives == drives) /* ran out of entries */
- EXPAND(drivelist, int, drives, drives); /* double the size */
- drivelist[gooddrives] = drive->driveno; /* keep the drive index */
- drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
- gooddrives++;
- goodpart++;
- }
- }
- }
- }
-#endif
- /*
- * If the machine doesn't have a BIOS
- * partition table, try normal devices.
- */
- if (goodpart == 0) { /* didn't find anything, */
- for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */
- if (part != 'c') { /* don't do the c partition */
- snprintf(np,
- partnamelen,
- "%c",
- part);
- drive = check_drive(partname); /* try to open it */
- if (drive) { /* got something, */
- if (drive->flags & VF_CONFIGURED) /* already read this config, */
- log(LOG_WARNING,
- "vinum: already read config from %s\n", /* say so */
- drive->label.name);
- else {
- if (gooddrives == drives) /* ran out of entries */
- EXPAND(drivelist, int, drives, drives); /* double the size */
- drivelist[gooddrives] = drive->driveno; /* keep the drive index */
- drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
- gooddrives++;
- goodpart++;
- }
- }
- }
- }
- }
- Free(partname);
-
- if (gooddrives == 0) {
- if (firsttime)
- log(LOG_WARNING, "vinum: no drives found\n");
- else
- log(LOG_INFO, "vinum: no additional drives found\n");
- if (malloced)
- Free(devicename);
- unlock_config();
- return ENOENT;
- }
- /*
- * We now have at least one drive open. Sort
- * them in order of config time and merge the
- * config info with what we have already.
- */
- qsort(drivelist, gooddrives, sizeof(int), drivecmp);
- config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
- CHECKALLOC(config_text, "Can't allocate memory");
- config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
- CHECKALLOC(config_line, "Can't allocate memory");
- for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */
- drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
-
- if (firsttime && (driveno == 0)) /* we've never configured before, */
- log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
- else
- log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
-
- if (drive->state == drive_up)
- /* Read in both copies of the configuration information */
- error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
- else {
- error = EIO;
- printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
- }
-
- if (error != 0) {
- log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
- free_drive(drive); /* give it back */
- status = error;
- }
- /*
- * At this point, check that the two copies
- * are the same, and do something useful if
- * not. In particular, consider which is
- * newer, and what this means for the
- * integrity of the data on the drive.
- */
- else {
- vinum_conf.drives_used++; /* another drive in use */
- /* Parse the configuration, and add it to the global configuration */
- for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
- volatile int parse_status; /* return value from parse_config */
-
- for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
- *eptr++ = *cptr++;
- *eptr = '\0'; /* and delimit */
- if (setjmp(command_fail) == 0) { /* come back here on error and continue */
- parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
- /*
- * parse_config recognizes referenced
- * drives and builds a drive entry for
- * them. This may expand the drive
- * table, thus invalidating the pointer.
- */
- drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
-
- if (parse_status < 0) { /* error in config */
- /*
- * This config should have been parsed
- * in user space. If we run into
- * problems here, something serious is
- * afoot. Complain and let the user
- * snarf the config to see what's
- * wrong.
- */
- log(LOG_ERR,
- "vinum: Config error on %s, aborting integration\n",
- drive->devicename);
- free_drive(drive); /* give it back */
- status = EINVAL;
- }
- }
- while (*cptr == '\n')
- cptr++; /* skip to next line */
- }
- }
- drive->flags |= VF_CONFIGURED; /* this drive's configuration is complete */
- }
-
- Free(config_line);
- Free(config_text);
- Free(drivelist);
- vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */
- if (status != 0)
- printf("vinum: couldn't read configuration");
- else
- updateconfig(VF_READING_CONFIG); /* update from disk config */
- if (malloced)
- Free(devicename);
- unlock_config();
- return status;
-}
-
-/*
- * Compare the modification dates of the drives, for qsort.
- * Return 1 if a < b, 0 if a == b, 01 if a > b: in other
- * words, sort backwards.
- */
-int
-drivecmp(const void *va, const void *vb)
-{
- const struct drive *a = &DRIVE[*(const int *) va];
- const struct drive *b = &DRIVE[*(const int *) vb];
-
- if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
- && (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
- return 0;
- else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
- || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
- && (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
- return -1;
- else
- return 1;
-}
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumio.h b/sys/dev/vinum/vinumio.h
deleted file mode 100644
index bf5134a..0000000
--- a/sys/dev/vinum/vinumio.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumio.h,v 1.23 2003/05/04 05:25:46 grog Exp grog $
- * $FreeBSD$
- */
-
-#define L 'F' /* ID letter of our ioctls */
-
-#define MAX_IOCTL_REPLY 1024
-
-#ifdef VINUMDEBUG
-struct debuginfo {
- int changeit;
- int param;
-};
-
-#endif
-
-enum objecttype {
- drive_object,
- sd_object,
- plex_object,
- volume_object,
- invalid_object
-};
-
-/*
- * The state to set with VINUM_SETSTATE. Since each object has a
- * different set of states, we need to translate later.
- */
-enum objectstate {
- object_down,
- object_initializing,
- object_initialized,
- object_up
-};
-
-/*
- * This structure is used for modifying objects
- * (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
- * VINUM_DETACH, VINUM_REPLACE
- */
-struct vinum_ioctl_msg {
- int index;
- enum objecttype type;
- enum objectstate state; /* state to set (VINUM_SETSTATE) */
- enum parityop op; /* for parity ops */
- int force; /* do it even if it doesn't make sense */
- int recurse; /* recurse (VINUM_REMOVE) */
- int verify; /* verify (initsd, rebuildparity) */
- int otherobject; /* superordinate object (attach),
- * replacement object (replace) */
- int rename; /* rename object (attach) */
- int64_t offset; /* offset of subdisk (for attach) */
- int blocksize; /* size of block to revive (bytes) */
-};
-
-/* VINUM_CREATE returns a buffer of this kind */
-struct _ioctl_reply {
- int error;
- char msg[MAX_IOCTL_REPLY];
-};
-
-struct vinum_rename_msg {
- int index;
- int recurse; /* rename subordinate objects too */
- enum objecttype type;
- char newname[MAXNAME]; /* new name to give to object */
-};
-
-/* ioctl requests */
-#define BUFSIZE 1024 /* size of buffer, including continuations */
-#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
-#define VINUM_GETCONFIG _IOR(L, 65, struct __vinum_conf) /* get global config */
-#define VINUM_DRIVECONFIG _IOWR(L, 66, struct _drive) /* get drive config */
-#define VINUM_SDCONFIG _IOWR(L, 67, struct _sd) /* get subdisk config */
-#define VINUM_PLEXCONFIG _IOWR(L, 68, struct _plex) /* get plex config */
-#define VINUM_VOLCONFIG _IOWR(L, 69, struct _volume) /* get volume config */
-#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct _sd) /* get sd config for plex (plex, sdno) */
-#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
-#define VINUM_SAVECONFIG _IOW(L, 72, int) /* write config to disk */
-#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
-#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
-#define VINUM_READCONFIG _IOC(IOC_IN | IOC_OUT, L, 75, BUFSIZE) /* read config from disk */
-#ifdef VINUMDEBUG
-#define VINUM_DEBUG _IOWR(L, 127, struct debuginfo) /* call the debugger from ioctl () */
-#endif
-
-/*
- * Start an object. Pass two integers:
- * msg [0] index in vinum_conf.<object>
- * msg [1] type of object (see below)
- *
- * Return ioctl_reply
- */
-#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
-#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
-#define VINUM_STARTCONFIG _IOW(L, 78, int) /* start a configuration operation */
-#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
-#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
-#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
-#define VINUM_REMOVE _IOWR(L, 83, struct _ioctl_reply) /* remove an object */
-#define VINUM_READPOL _IOWR(L, 84, struct _ioctl_reply) /* set read policy */
-#define VINUM_SETSTATE_FORCE _IOC(IOC_IN | IOC_OUT, L, 85, MAX_IOCTL_REPLY) /* diddle object state */
-#define VINUM_RESETSTATS _IOWR(L, 86, struct _ioctl_reply) /* reset object stats */
-#define VINUM_ATTACH _IOWR(L, 87, struct _ioctl_reply) /* attach an object */
-#define VINUM_DETACH _IOWR(L, 88, struct _ioctl_reply) /* remove an object */
-
-#define VINUM_RENAME _IOWR(L, 89, struct _ioctl_reply) /* rename an object */
-#define VINUM_REPLACE _IOWR(L, 90, struct _ioctl_reply) /* replace an object */
-
-#ifdef VINUMDEBUG
-#define VINUM_RQINFO _IOWR(L, 91, struct rqinfo) /* get request info [i] from trace buffer */
-#endif
-
-#define VINUM_DAEMON _IOC(0, L, 92, 0) /* perform the kernel part of Vinum daemon */
-#define VINUM_FINDDAEMON _IOC(0, L, 93, 0) /* check for presence of Vinum daemon */
-#define VINUM_SETDAEMON _IOW(L, 94, int) /* set daemon flags */
-#define VINUM_GETDAEMON _IOR(L, 95, int) /* get daemon flags */
-#define VINUM_PARITYOP _IOWR(L, 96, struct _ioctl_reply) /* check/rebuild RAID-4/5 parity */
-#define VINUM_MOVE _IOWR(L, 98, struct _ioctl_reply) /* move an object */
diff --git a/sys/dev/vinum/vinumioctl.c b/sys/dev/vinum/vinumioctl.c
deleted file mode 100644
index 235b125..0000000
--- a/sys/dev/vinum/vinumioctl.c
+++ /dev/null
@@ -1,960 +0,0 @@
-/*
- * XXX replace all the checks on object validity with
- * calls to valid<object>
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumioctl.c,v 1.23 2003/05/23 01:02:22 grog Exp grog $
- */
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-#ifdef VINUMDEBUG
-#include <sys/reboot.h>
-#endif
-
-void attachobject(struct vinum_ioctl_msg *);
-void detachobject(struct vinum_ioctl_msg *);
-void renameobject(struct vinum_rename_msg *);
-void replaceobject(struct vinum_ioctl_msg *);
-void moveobject(struct vinum_ioctl_msg *);
-void setreadpol(struct vinum_ioctl_msg *);
-
-jmp_buf command_fail; /* return on a failed command */
-
-/* ioctl routine */
-int
-vinumioctl(struct cdev *dev,
- u_long cmd,
- caddr_t data,
- int flag,
- struct thread *td)
-{
- unsigned int objno;
- struct sd *sd;
- struct plex *plex;
- struct volume *vol;
-
- /* First, decide what we're looking at */
- if ((minor(dev) == VINUM_SUPERDEV_MINOR)
- || (minor(dev) == VINUM_DAEMON_MINOR))
- return vinum_super_ioctl(dev, cmd, data);
- else /* real device */
- switch (DEVTYPE(dev)) {
- case VINUM_SD_TYPE:
- case VINUM_SD2_TYPE: /* second half of sd namespace */
- objno = Sdno(dev);
-
- sd = &SD[objno];
-
- switch (cmd) {
- case DIOCGSECTORSIZE:
- *(u_int *) data = sd->sectorsize;
- return 0;
-
- case DIOCGMEDIASIZE:
- *(u_int64_t *) data = sd->sectors * sd->sectorsize;
- return 0;
-
- /*
- * We don't have this stuff on hardware,
- * so just pretend to do it so that
- * utilities don't get upset.
- */
- case DIOCWDINFO: /* write partition info */
- case DIOCSDINFO: /* set partition info */
- return 0; /* not a titty */
-
- default:
- return ENOTTY; /* not my kind of ioctl */
- }
-
- return 0; /* pretend we did it */
-
- case VINUM_PLEX_TYPE:
- objno = Plexno(dev);
-
- plex = &PLEX[objno];
-
- switch (cmd) {
- case DIOCGSECTORSIZE:
- *(u_int64_t *) data = plex->sectorsize;
- return 0;
-
- case DIOCGMEDIASIZE:
- *(u_int64_t *) data = plex->length * plex->sectorsize;
- return 0;
-
- /*
- * We don't have this stuff on hardware,
- * so just pretend to do it so that
- * utilities don't get upset.
- */
- case DIOCWDINFO: /* write partition info */
- case DIOCSDINFO: /* set partition info */
- return 0; /* not a titty */
-
- default:
- return ENOTTY; /* not my kind of ioctl */
- }
-
- return 0; /* pretend we did it */
-
- case VINUM_VOLUME_TYPE:
- objno = Volno(dev);
-
- if ((unsigned) objno >= (unsigned) vinum_conf.volumes_allocated) /* not a valid volume */
- return ENXIO;
- vol = &VOL[objno];
- if (vol->state != volume_up) /* not up, */
- return EIO; /* I/O error */
-
- switch (cmd) {
- case DIOCGSECTORSIZE:
- *(u_int *) data = vol->sectorsize;
- return 0;
-
- case DIOCGMEDIASIZE:
- *(u_int64_t *) data = vol->size * vol->sectorsize;
- return 0;
-
- /*
- * We don't have this stuff on hardware,
- * so just pretend to do it so that
- * utilities don't get upset.
- */
- case DIOCWDINFO: /* write partition info */
- case DIOCSDINFO: /* set partition info */
- return 0; /* not a titty */
-
- default:
- return ENOTTY; /* not my kind of ioctl */
- }
- break;
- }
- return 0; /* XXX */
-}
-
-/* Handle ioctls for the super device */
-int
-vinum_super_ioctl(struct cdev *dev,
- u_long cmd,
- caddr_t data)
-{
- int error = 0;
- unsigned int index; /* for transferring config info */
- unsigned int sdno; /* for transferring config info */
- int fe; /* free list element number */
- struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
-
- ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
- if (error) /* bombed out */
- return 0; /* the reply will contain meaningful info */
- switch (cmd) {
-#ifdef VINUMDEBUG
- case VINUM_DEBUG:
- if (((struct debuginfo *) data)->changeit) /* change debug settings */
- debug = (((struct debuginfo *) data)->param);
- else {
- if (debug & DEBUG_REMOTEGDB)
- boothowto |= RB_GDB; /* serial debug line */
- else
- boothowto &= ~RB_GDB; /* local ddb */
- kdb_enter("vinum debug");
- }
- ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
- ioctl_reply->error = 0;
- return 0;
-#endif
-
- case VINUM_CREATE: /* create a vinum object */
- error = lock_config(); /* get the config for us alone */
- if (error) /* can't do it, */
- return error; /* give up */
- error = setjmp(command_fail); /* come back here on error */
- if (error == 0) /* first time, */
- ioctl_reply->error = parse_user_config((char *) data, /* update the config */
- &keyword_set);
- else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
- ioctl_reply->error = EINVAL; /* note that something's up */
- ioctl_reply->msg[0] = '\0'; /* no message? */
- }
- unlock_config();
- return 0; /* must be 0 to return the real error info */
-
- case VINUM_GETCONFIG: /* get the configuration information */
- bcopy(&vinum_conf, data, sizeof(vinum_conf));
- return 0;
-
- /* start configuring the subsystem */
- case VINUM_STARTCONFIG:
- return start_config(*(int *) data); /* just lock it. Parameter is 'force' */
-
- /*
- * Move the individual parts of the config to user space.
- *
- * Specify the index of the object in the first word of data,
- * and return the object there
- */
- case VINUM_DRIVECONFIG:
- index = *(int *) data; /* get the index */
- if (index >= (unsigned) vinum_conf.drives_allocated) /* can't do it */
- return ENXIO; /* bang */
- bcopy(&DRIVE[index], data, sizeof(struct _drive)); /* copy the config item out */
- return 0;
-
- case VINUM_SDCONFIG:
- index = *(int *) data; /* get the index */
- if (index >= (unsigned) vinum_conf.subdisks_allocated) /* can't do it */
- return ENXIO; /* bang */
- bcopy(&SD[index], data, sizeof(struct _sd)); /* copy the config item out */
- return 0;
-
- case VINUM_PLEXCONFIG:
- index = *(int *) data; /* get the index */
- if (index >= (unsigned) vinum_conf.plexes_allocated) /* can't do it */
- return ENXIO; /* bang */
- bcopy(&PLEX[index], data, sizeof(struct _plex)); /* copy the config item out */
- return 0;
-
- case VINUM_VOLCONFIG:
- index = *(int *) data; /* get the index */
- if (index >= (unsigned) vinum_conf.volumes_allocated) /* can't do it */
- return ENXIO; /* bang */
- bcopy(&VOL[index], data, sizeof(struct _volume)); /* copy the config item out */
- return 0;
-
- case VINUM_PLEXSDCONFIG:
- index = *(int *) data; /* get the plex index */
- sdno = ((int *) data)[1]; /* and the sd index */
- if ((index >= (unsigned) vinum_conf.plexes_allocated) /* plex doesn't exist */
- ||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
- return ENXIO; /* bang */
- bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
- data,
- sizeof(struct _sd));
- return 0;
-
- /*
- * We get called in two places: one from the
- * userland config routines, which call us
- * to complete the config and save it. This
- * call supplies the value 0 as a parameter.
- *
- * The other place is from the user "saveconfig"
- * routine, which can only work if we're *not*
- * configuring. In this case, supply parameter 1.
- */
- case VINUM_SAVECONFIG:
- if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
- if (*(int *) data == 0) /* finish config */
- finish_config(1); /* finish the configuration and update it */
- else
- return EBUSY; /* can't do it now */
- }
- save_config(); /* save configuration to disk */
- return 0;
-
- case VINUM_RELEASECONFIG: /* release the config */
- if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
- finish_config(0); /* finish the configuration, don't change it */
- save_config(); /* save configuration to disk */
- } else
- error = EINVAL; /* release what config? */
- return error;
-
- case VINUM_READCONFIG:
- if (((char *) data)[0] == '\0')
- ioctl_reply->error = vinum_scandisk(NULL); /* built your own list */
- else
- ioctl_reply->error = vinum_scandisk((char *) data);
- if (ioctl_reply->error == ENOENT) {
- if (vinum_conf.drives_used > 0)
- strcpy(ioctl_reply->msg, "no additional drives found");
- else
- strcpy(ioctl_reply->msg, "no drives found");
- } else if (ioctl_reply->error)
- strcpy(ioctl_reply->msg, "can't read configuration information, see log file");
- return 0; /* must be 0 to return the real error info */
-
- case VINUM_INIT:
- ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
- ioctl_reply->error = 0;
- return 0;
-
- case VINUM_RESETCONFIG:
- if (vinum_inactive(0)) { /* if the volumes are not active */
- /*
- * Note the open count. We may be called from v, so we'll be open.
- * Keep the count so we don't underflow
- */
- free_vinum(1); /* clean up everything */
- log(LOG_NOTICE, "vinum: CONFIGURATION OBLITERATED\n");
- ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
- ioctl_reply->error = 0;
- return 0;
- }
- return EBUSY;
-
- case VINUM_SETSTATE:
- setstate((struct vinum_ioctl_msg *) data); /* set an object state */
- return 0;
-
- /*
- * Set state by force, without changing
- * anything else.
- */
- case VINUM_SETSTATE_FORCE:
- setstate_by_force((struct vinum_ioctl_msg *) data); /* set an object state */
- return 0;
-
-#ifdef VINUMDEBUG
- case VINUM_MEMINFO:
- vinum_meminfo(data);
- return 0;
-
- case VINUM_MALLOCINFO:
- return vinum_mallocinfo(data);
-
- case VINUM_RQINFO:
- return vinum_rqinfo(data);
-#endif
-
- case VINUM_REMOVE:
- remove((struct vinum_ioctl_msg *) data); /* remove an object */
- return 0;
-
- case VINUM_GETFREELIST: /* get a drive free list element */
- index = *(int *) data; /* get the drive index */
- fe = ((int *) data)[1]; /* and the free list element */
- if ((index >= (unsigned) vinum_conf.drives_allocated) /* plex doesn't exist */
- ||(DRIVE[index].state == drive_unallocated))
- return ENODEV;
- if (fe >= DRIVE[index].freelist_entries) /* no such entry */
- return ENOENT;
- bcopy(&DRIVE[index].freelist[fe],
- data,
- sizeof(struct drive_freelist));
- return 0;
-
- case VINUM_RESETSTATS:
- resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
- return 0;
-
- /* attach an object to a superordinate object */
- case VINUM_ATTACH:
- attachobject((struct vinum_ioctl_msg *) data);
- return 0;
-
- /* detach an object from a superordinate object */
- case VINUM_DETACH:
- detachobject((struct vinum_ioctl_msg *) data);
- return 0;
-
- /* rename an object */
- case VINUM_RENAME:
- renameobject((struct vinum_rename_msg *) data);
- return 0;
-
- /* replace an object */
- case VINUM_REPLACE:
- replaceobject((struct vinum_ioctl_msg *) data);
- return 0;
-
- case VINUM_DAEMON:
- vinum_daemon(); /* perform the daemon */
- return 0;
-
- case VINUM_FINDDAEMON: /* check for presence of daemon */
- return vinum_finddaemon();
- return 0;
-
- case VINUM_SETDAEMON: /* set daemon flags */
- return vinum_setdaemonopts(*(int *) data);
-
- case VINUM_GETDAEMON: /* get daemon flags */
- *(int *) data = daemon_options;
- return 0;
-
- case VINUM_PARITYOP: /* check/rebuild RAID-4/5 parity */
- parityops((struct vinum_ioctl_msg *) data);
- return 0;
-
- /* move an object */
- case VINUM_MOVE:
- moveobject((struct vinum_ioctl_msg *) data);
- return 0;
-
- case VINUM_READPOL:
- setreadpol((struct vinum_ioctl_msg *) data);
- return 0;
-
- default:
- /* FALLTHROUGH */
- break;
- }
- return 0; /* to keep the compiler happy */
-}
-
-/*
- * The following four functions check the supplied
- * object index and return a pointer to the object
- * if it exists. Otherwise they longjump out via
- * throw_rude_remark.
- */
-struct drive *
-validdrive(int driveno, struct _ioctl_reply *reply)
-{
- if ((driveno < vinum_conf.drives_allocated)
- && (DRIVE[driveno].state > drive_referenced))
- return &DRIVE[driveno];
- strcpy(reply->msg, "No such drive");
- reply->error = ENOENT;
- return NULL;
-}
-
-struct sd *
-validsd(int sdno, struct _ioctl_reply *reply)
-{
- if ((sdno < vinum_conf.subdisks_allocated)
- && (SD[sdno].state > sd_referenced))
- return &SD[sdno];
- strcpy(reply->msg, "No such subdisk");
- reply->error = ENOENT;
- return NULL;
-}
-
-struct plex *
-validplex(int plexno, struct _ioctl_reply *reply)
-{
- if ((plexno < vinum_conf.plexes_allocated)
- && (PLEX[plexno].state > plex_referenced))
- return &PLEX[plexno];
- strcpy(reply->msg, "No such plex");
- reply->error = ENOENT;
- return NULL;
-}
-
-struct volume *
-validvol(int volno, struct _ioctl_reply *reply)
-{
- if ((volno < vinum_conf.volumes_allocated)
- && (VOL[volno].state > volume_uninit))
- return &VOL[volno];
- strcpy(reply->msg, "No such volume");
- reply->error = ENOENT;
- return NULL;
-}
-
-/* reset an object's stats */
-void
-resetstats(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
-
- switch (msg->type) {
- case drive_object:
- if (msg->index < vinum_conf.drives_allocated) {
- struct drive *drive = &DRIVE[msg->index];
- if (drive->state > drive_referenced) {
- drive->reads = 0; /* number of reads on this drive */
- drive->writes = 0; /* number of writes on this drive */
- drive->bytes_read = 0; /* number of bytes read */
- drive->bytes_written = 0; /* number of bytes written */
- reply->error = 0;
- return;
- }
- reply->error = EINVAL;
- return;
- }
- case sd_object:
- if (msg->index < vinum_conf.subdisks_allocated) {
- struct sd *sd = &SD[msg->index];
- if (sd->state > sd_referenced) {
- sd->reads = 0; /* number of reads on this subdisk */
- sd->writes = 0; /* number of writes on this subdisk */
- sd->bytes_read = 0; /* number of bytes read */
- sd->bytes_written = 0; /* number of bytes written */
- reply->error = 0;
- return;
- }
- reply->error = EINVAL;
- return;
- }
- break;
-
- case plex_object:
- if (msg->index < vinum_conf.plexes_allocated) {
- struct plex *plex = &PLEX[msg->index];
- if (plex->state > plex_referenced) {
- plex->reads = 0;
- plex->writes = 0; /* number of writes on this plex */
- plex->bytes_read = 0; /* number of bytes read */
- plex->bytes_written = 0; /* number of bytes written */
- plex->recovered_reads = 0; /* number of recovered read operations */
- plex->degraded_writes = 0; /* number of degraded writes */
- plex->parityless_writes = 0; /* number of parityless writes */
- plex->multiblock = 0; /* requests that needed more than one block */
- plex->multistripe = 0; /* requests that needed more than one stripe */
- reply->error = 0;
- return;
- }
- reply->error = EINVAL;
- return;
- }
- break;
-
- case volume_object:
- if (msg->index < vinum_conf.volumes_allocated) {
- struct volume *vol = &VOL[msg->index];
- if (vol->state > volume_uninit) {
- vol->bytes_read = 0; /* number of bytes read */
- vol->bytes_written = 0; /* number of bytes written */
- vol->reads = 0; /* number of reads on this volume */
- vol->writes = 0; /* number of writes on this volume */
- vol->recovered_reads = 0; /* reads recovered from another plex */
- reply->error = 0;
- return;
- }
- reply->error = EINVAL;
- return;
- }
- case invalid_object: /* can't get this */
- reply->error = EINVAL;
- return;
- }
-}
-
-/* attach an object to a superior object */
-void
-attachobject(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
- int sdno;
- struct sd *sd;
- struct plex *plex;
- struct volume *vol;
-
- switch (msg->type) {
- case drive_object: /* you can't attach a drive to anything */
- case volume_object: /* nor a volume */
- case invalid_object: /* "this can't happen" */
- reply->error = EINVAL;
- reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
- return;
-
- case sd_object:
- sd = validsd(msg->index, reply);
- if (sd == NULL) /* not a valid subdisk */
- return;
- plex = validplex(msg->otherobject, reply);
- if (plex) {
- /*
- * We should be more intelligent about this.
- * We should be able to reattach a dead
- * subdisk, but if we want to increase the total
- * number of subdisks, we have a lot of reshuffling
- * to do. XXX
- */
- if ((plex->organization != plex_concat) /* can't attach to striped and RAID-4/5 */
- &&(!msg->force)) { /* without using force */
- reply->error = EINVAL; /* no message, the user should check */
- strcpy(reply->msg, "Can't attach to this plex organization");
- } else if (sd->plexno >= 0) { /* already belong to a plex */
- reply->error = EBUSY; /* no message, the user should check */
- sprintf(reply->msg, "%s is already attached to %s",
- sd->name,
- sd[sd->plexno].name);
- reply->msg[0] = '\0';
- } else {
- sd->plexoffset = msg->offset; /* this is where we want it */
- set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
- give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
- update_sd_config(sd->sdno, 0);
- save_config();
- if (sd->state == sd_reviving)
- reply->error = EAGAIN; /* need to revive it */
- else
- reply->error = 0;
- }
- }
- break;
-
- case plex_object:
- plex = validplex(msg->index, reply); /* get plex */
- if (plex == NULL)
- return;
- vol = validvol(msg->otherobject, reply); /* and volume information */
- if (vol) {
- if (vol->plexes == MAXPLEX) { /* we have too many already */
- reply->error = ENOSPC; /* nowhere to put it */
- strcpy(reply->msg, "Too many plexes");
- } else if (plex->volno >= 0) { /* the plex has an owner */
- reply->error = EBUSY; /* no message, the user should check */
- sprintf(reply->msg, "%s is already attached to %s",
- plex->name,
- VOL[plex->volno].name);
- } else {
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- sd = &SD[plex->sdnos[sdno]];
-
- if (sd->state > sd_down) /* real subdisk, vaguely accessible */
- set_sd_state(plex->sdnos[sdno], sd_stale, setstate_force); /* make it stale */
- }
- set_plex_state(plex->plexno, plex_up, setstate_none); /* update plex state */
- give_plex_to_volume(msg->otherobject, msg->index, 0); /* and give it to the volume */
- update_plex_config(plex->plexno, 0);
- save_config();
- reply->error = 0; /* all went well */
- }
- }
- }
-}
-
-/* detach an object from a superior object */
-void
-detachobject(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
- struct sd *sd;
- struct plex *plex;
- struct volume *vol;
- int sdno;
- int plexno;
-
- switch (msg->type) {
- case drive_object: /* you can't detach a drive from anything */
- case volume_object: /* nor a volume */
- case invalid_object: /* "this can't happen" */
- reply->error = EINVAL;
- reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
- return;
-
- case sd_object:
- sd = validsd(msg->index, reply);
- if (sd == NULL)
- return;
- if (sd->plexno < 0) { /* doesn't belong to a plex */
- reply->error = ENOENT;
- strcpy(reply->msg, "Subdisk is not attached");
- return;
- } else { /* valid plex number */
- plex = &PLEX[sd->plexno];
- if ((!msg->force) /* don't force things */
- &&((plex->state == plex_up) /* and the plex is up */
- ||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
- reply->error = EBUSY; /* we need this sd */
- reply->msg[0] = '\0';
- return;
- }
- sd->plexno = -1; /* anonymous sd */
- if (plex->subdisks == 1) { /* this was the only subdisk */
- Free(plex->sdnos); /* free the subdisk array */
- plex->sdnos = NULL; /* and note the fact */
- plex->subdisks_allocated = 0; /* no subdisk space */
- } else {
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
- break;
- }
- if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
- bcopy(&plex->sdnos[sdno + 1],
- &plex->sdnos[sdno],
- (plex->subdisks - 1 - sdno) * sizeof(int));
- }
- plex->subdisks--;
- if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1))
- /* this subdisk is named after the plex */
- {
- bcopy(sd->name,
- &sd->name[3],
- min(strlen(sd->name) + 1, MAXSDNAME - 3));
- bcopy("ex-", sd->name, 3);
- sd->name[MAXSDNAME - 1] = '\0';
- }
- update_plex_config(plex->plexno, 0);
- if (isstriped(plex)) /* we've just mutilated our plex, */
- set_plex_state(plex->plexno,
- plex_down,
- setstate_force | setstate_configuring);
- if (plex->volno >= 0) /* plex attached to volume, */
- update_volume_config(plex->volno);
- save_config();
- reply->error = 0;
- }
- return;
-
- case plex_object:
- plex = validplex(msg->index, reply); /* get plex */
- if (plex == NULL)
- return;
- if (plex->volno >= 0) {
- int volno = plex->volno;
-
- vol = &VOL[volno];
- if ((!msg->force) /* don't force things */
- &&((vol->state == volume_up) /* and the volume is up */
- &&(vol->plexes == 1))) { /* and this is the last plex */
- /*
- * XXX As elsewhere, check whether we will lose
- * mapping by removing this plex
- */
- reply->error = EBUSY; /* we need this plex */
- reply->msg[0] = '\0';
- return;
- }
- plex->volno = -1; /* anonymous plex */
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- if (vol->plex[plexno] == msg->index) /* found our plex */
- break;
- }
- if (plexno < (vol->plexes - 1)) /* not the last one, compact */
- bcopy(&vol->plex[plexno + 1],
- &vol->plex[plexno],
- (vol->plexes - 1 - plexno) * sizeof(int));
- vol->plexes--;
- vol->last_plex_read = 0; /* don't go beyond the end */
- if (!bcmp(vol->name, plex->name, strlen(vol->name) + 1))
- /* this plex is named after the volume */
- {
- /* First, check if the subdisks are the same */
- if (msg->recurse) {
- int sdno;
-
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- struct sd *sd = &SD[plex->sdnos[sdno]];
-
- if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1))
- /* subdisk is named after the plex */
- {
- bcopy(sd->name,
- &sd->name[3],
- min(strlen(sd->name) + 1, MAXSDNAME - 3));
- bcopy("ex-", sd->name, 3);
- sd->name[MAXSDNAME - 1] = '\0';
- }
- }
- }
- bcopy(plex->name,
- &plex->name[3],
- min(strlen(plex->name) + 1, MAXPLEXNAME - 3));
- bcopy("ex-", plex->name, 3);
- plex->name[MAXPLEXNAME - 1] = '\0';
- }
- update_volume_config(volno);
- save_config();
- reply->error = 0;
- } else {
- reply->error = ENOENT;
- strcpy(reply->msg, "Plex is not attached");
- }
- }
-}
-
-void
-renameobject(struct vinum_rename_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
- struct drive *drive;
- struct sd *sd;
- struct plex *plex;
- struct volume *vol;
-
- switch (msg->type) {
- case drive_object: /* you can't attach a drive to anything */
- if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
- reply->error = EEXIST;
- reply->msg[0] = '\0';
- return;
- }
- drive = validdrive(msg->index, reply);
- if (drive) {
- bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
- save_config();
- reply->error = 0;
- }
- return;
-
- case sd_object: /* you can't attach a subdisk to anything */
- if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
- reply->error = EEXIST;
- reply->msg[0] = '\0';
- return;
- }
- sd = validsd(msg->index, reply);
- if (sd) {
- bcopy(msg->newname, sd->name, MAXSDNAME);
- update_sd_config(sd->sdno, 0);
- save_config();
- reply->error = 0;
- }
- return;
-
- case plex_object: /* you can't attach a plex to anything */
- if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
- reply->error = EEXIST;
- reply->msg[0] = '\0';
- return;
- }
- plex = validplex(msg->index, reply);
- if (plex) {
- bcopy(msg->newname, plex->name, MAXPLEXNAME);
- update_plex_config(plex->plexno, 0);
- save_config();
- reply->error = 0;
- }
- return;
-
- case volume_object: /* you can't attach a volume to anything */
- if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
- reply->error = EEXIST;
- reply->msg[0] = '\0';
- return;
- }
- vol = validvol(msg->index, reply);
- if (vol) {
- bcopy(msg->newname, vol->name, MAXVOLNAME);
- update_volume_config(msg->index);
- save_config();
- reply->error = 0;
- }
- return;
-
- case invalid_object:
- reply->error = EINVAL;
- reply->msg[0] = '\0';
- }
-}
-
-/*
- * Replace one object with another.
- * Currently only for drives.
- * message->index is the drive number of the old drive
- * message->otherobject is the drive number of the new drive
- */
-void
-replaceobject(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
-
- reply->error = ENODEV; /* until I know how to do this */
- strcpy(reply->msg, "replace not implemented yet");
-/* save_config (); */
-}
-
-void
-moveobject(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
- struct drive *drive;
- struct sd *sd;
-
- /* Check that our objects are valid (i.e. they exist) */
- drive = validdrive(msg->index, (struct _ioctl_reply *) msg);
- if (drive == NULL)
- return;
- sd = validsd(msg->otherobject, (struct _ioctl_reply *) msg);
- if (sd == NULL)
- return;
- if (sd->driveno == msg->index) /* sd already belongs to drive */
- return;
-
- if (sd->state > sd_stale)
- set_sd_state(sd->sdno, sd_stale, setstate_force); /* make the subdisk stale */
- else
- sd->state = sd_empty;
- if (sd->plexno >= 0) /* part of a plex, */
- update_plex_state(sd->plexno); /* update its state */
-
- /* Return the space on the old drive */
- if ((sd->driveno >= 0) /* we have a drive, */
- &&(sd->sectors > 0)) /* and some space on it */
- return_drive_space(sd->driveno, /* return the space */
- sd->driveoffset,
- sd->sectors);
-
- /* Reassign the old subdisk */
- sd->driveno = msg->index;
- sd->driveoffset = -1; /* let the drive decide where to put us */
- give_sd_to_drive(sd->sdno);
- reply->error = 0;
-}
-
-void
-setreadpol(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
- struct volume *vol;
- struct plex *plex;
- int myplexno = -1;
-
- /* Check that our objects are valid (i.e. they exist) */
- vol = validvol(msg->index, reply);
- if (vol == NULL)
- return;
-
- /* If a plex was specified, check that is is valid */
- if (msg->otherobject >= 0) {
- plex = validplex(msg->otherobject, reply);
- if (vol == NULL)
- return;
-
- /* Is it attached to this volume? */
- myplexno = my_plex(msg->index, msg->otherobject);
- if (myplexno < 0) {
- strcpy(reply->msg, "Plex is not attached to volume");
- reply->error = ENOENT;
- return;
- }
- }
- lock_config();
- vol->preferred_plex = myplexno;
- save_config();
- unlock_config();
- reply->error = 0;
-}
-
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumkw.h b/sys/dev/vinum/vinumkw.h
deleted file mode 100644
index d7bc7a5..0000000
--- a/sys/dev/vinum/vinumkw.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumkw.h,v 1.20 2003/05/07 03:32:09 grog Exp grog $
- * $FreeBSD$
- */
-
-/*
- * Command keywords that vinum knows. These include both user-level
- * and kernel-level stuff
- */
-
-/*
- * Our complete vocabulary. The names of the commands are
- * the same as the identifier without the kw_ at the beginning
- * (i.e. kw_create defines the "create" keyword). Preprocessor
- * magic in parser.c does the rest.
- *
- * To add a new word: put it in the table below and one of the
- * lists in vinumparser.c (probably keywords).
- */
-enum keyword {
- kw_create,
- kw_modify,
- kw_list,
- kw_l = kw_list,
- kw_ld, /* list drive */
- kw_ls, /* list subdisk */
- kw_lp, /* list plex */
- kw_lv, /* list volume */
- kw_set,
- kw_rm,
- kw_mv, /* move object */
- kw_move, /* synonym for mv */
- kw_start,
- kw_stop,
- kw_makedev, /* make /dev/vinum devices */
- kw_setdaemon, /* set daemon flags */
- kw_getdaemon, /* set daemon flags */
- kw_help,
- kw_drive,
- kw_partition,
- kw_sd,
- kw_subdisk = kw_sd,
- kw_plex,
- kw_volume,
- kw_vol = kw_volume,
- kw_read,
- kw_readpol,
- kw_org,
- kw_name,
- kw_concat,
- kw_striped,
- kw_raid4,
- kw_raid5,
- kw_driveoffset,
- kw_plexoffset,
- kw_len,
- kw_length = kw_len,
- kw_size = kw_len,
- kw_state,
- kw_setupstate,
- kw_d, /* flag names */
- kw_f,
- kw_r,
- kw_s,
- kw_v,
- kw_w,
- kw_round, /* round robin */
- /*
- * The first of these is a volume attibute ("prefer plex"), and the
- * second is a plex attribute ("preferred" means that the volume
- * prefers this plex).
- */
- kw_prefer, /* prefer plex */
- kw_preferred, /* preferred plex */
- kw_device,
- kw_init,
- kw_resetconfig,
- kw_writethrough,
- kw_writeback,
- kw_replace,
- kw_resetstats,
- kw_attach,
- kw_detach,
- kw_rename,
- kw_printconfig,
- kw_saveconfig,
- kw_hotspare,
- kw_detached,
- kw_debug, /* go into debugger */
- kw_stripe,
- kw_mirror,
- kw_info,
- kw_quit,
- kw_max,
- kw_setstate,
- kw_checkparity,
- kw_rebuildparity,
- kw_dumpconfig,
- kw_retryerrors,
- kw_invalid_keyword = -1
-};
-
-struct _keywords {
- char *name;
- enum keyword keyword;
-};
-
-struct keywordset {
- int size;
- struct _keywords *k;
-};
-
-extern struct _keywords keywords[];
-extern struct _keywords flag_keywords[];
-
-extern struct keywordset keyword_set;
-extern struct keywordset flag_set;
-
-/* Parser functions */
-
-enum keyword get_keyword(char *, struct keywordset *);
-int tokenize(char *, char *[], int);
diff --git a/sys/dev/vinum/vinumlock.c b/sys/dev/vinum/vinumlock.c
deleted file mode 100644
index f1a2ea3..0000000
--- a/sys/dev/vinum/vinumlock.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumlock.c,v 1.19 2003/05/23 01:07:18 grog Exp $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-/* Lock a drive, wait if it's in use */
-#ifdef VINUMDEBUG
-int
-lockdrive(struct drive *drive, char *file, int line)
-#else
-int
-lockdrive(struct drive *drive)
-#endif
-{
- int error;
-
- /* XXX get rid of drive->flags |= VF_LOCKING; */
- if ((drive->flags & VF_LOCKED) /* it's locked */
- &&(drive->pid == curproc->p_pid)) { /* by us! */
-#ifdef VINUMDEBUG
- log(LOG_WARNING,
- "vinum lockdrive: already locking %s from %s:%d, called from %s:%d\n",
- drive->label.name,
- drive->lockfilename,
- drive->lockline,
- basename(file),
- line);
-#else
- log(LOG_WARNING,
- "vinum lockdrive: already locking %s\n",
- drive->label.name);
-#endif
- return 0;
- }
- while ((drive->flags & VF_LOCKED) != 0) {
- /*
- * There are problems sleeping on a unique identifier,
- * since the drive structure can move, and the unlock
- * function can be called after killing the drive.
- * Solve this by waiting on this function; the number
- * of conflicts is negligible.
- */
- if ((error = tsleep(&lockdrive,
- PRIBIO,
- "vindrv",
- 0)) != 0)
- return error;
- }
- drive->flags |= VF_LOCKED;
- drive->pid = curproc->p_pid; /* it's a panic error if curproc is null */
-#ifdef VINUMDEBUG
- bcopy(basename(file), drive->lockfilename, 15);
- drive->lockfilename[15] = '\0'; /* truncate if necessary */
- drive->lockline = line;
-#endif
- return 0;
-}
-
-/* Unlock a drive and let the next one at it */
-void
-unlockdrive(struct drive *drive)
-{
- drive->flags &= ~VF_LOCKED;
- /* we don't reset pid: it's of hysterical interest */
- wakeup(&lockdrive);
-}
-
-/* Lock a stripe of a plex, wait if it's in use */
-struct rangelock *
-lockrange(daddr_t stripe, struct buf *bp, struct plex *plex)
-{
- struct rangelock *lock;
- struct rangelock *pos; /* position of first free lock */
- int foundlocks; /* number of locks found */
-
- /*
- * We could get by without counting the number
- * of locks we find, but we have a linear search
- * through a table which in most cases will be
- * empty. It's faster to stop when we've found
- * all the locks that are there. This is also
- * the reason why we put pos at the beginning
- * instead of the end, though it requires an
- * extra test.
- */
- pos = NULL;
- foundlocks = 0;
-
- /*
- * we can't use 0 as a valid address, so
- * increment all addresses by 1.
- */
- stripe++;
- mtx_lock(plex->lockmtx);
-
- /* Wait here if the table is full */
- while (plex->usedlocks == PLEX_LOCKS) /* all in use */
- msleep(&plex->usedlocks, plex->lockmtx, PRIBIO, "vlock", 0);
-
-#ifdef DIAGNOSTIC
- if (plex->usedlocks >= PLEX_LOCKS)
- panic("lockrange: Too many locks in use");
-#endif
-
- lock = plex->lock; /* pointer in lock table */
- if (plex->usedlocks > 0) /* something locked, */
- /* Search the lock table for our stripe */
- for (; lock < &plex->lock[PLEX_LOCKS]
- && foundlocks < plex->usedlocks;
- lock++) {
- if (lock->stripe) { /* in use */
- foundlocks++; /* found another one in use */
- if ((lock->stripe == stripe) /* it's our stripe */
- &&(lock->bp != bp)) { /* but not our request */
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LOCKREQS) {
- struct rangelockinfo lockinfo;
-
- lockinfo.stripe = stripe;
- lockinfo.bp = bp;
- lockinfo.plexno = plex->plexno;
- logrq(loginfo_lockwait, (union rqinfou) &lockinfo, bp);
- }
-#endif
- plex->lockwaits++; /* waited one more time */
- msleep(lock, plex->lockmtx, PRIBIO, "vrlock", 0);
- lock = &plex->lock[-1]; /* start again */
- foundlocks = 0;
- pos = NULL;
- }
- } else if (pos == NULL) /* still looking for somewhere? */
- pos = lock; /* a place to put this one */
- }
- /*
- * This untidy looking code ensures that we'll
- * always end up pointing to the first free lock
- * entry, thus minimizing the number of
- * iterations necessary.
- */
- if (pos == NULL) /* didn't find one on the way, */
- pos = lock; /* use the one we're pointing to */
-
- /*
- * The address range is free, and we're pointing
- * to the first unused entry. Make it ours.
- */
- pos->stripe = stripe;
- pos->bp = bp;
- plex->usedlocks++; /* one more lock */
- mtx_unlock(plex->lockmtx);
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LOCKREQS) {
- struct rangelockinfo lockinfo;
-
- lockinfo.stripe = stripe;
- lockinfo.bp = bp;
- lockinfo.plexno = plex->plexno;
- logrq(loginfo_lock, (union rqinfou) &lockinfo, bp);
- }
-#endif
- return pos;
-}
-
-/* Unlock a volume and let the next one at it */
-void
-unlockrange(int plexno, struct rangelock *lock)
-{
- struct plex *plex;
-
- plex = &PLEX[plexno];
-#ifdef DIAGNOSTIC
- if (lock < &plex->lock[0] || lock >= &plex->lock[PLEX_LOCKS])
- panic("vinum: rangelock %p on plex %d invalid, not between %p and %p",
- lock,
- plexno,
- &plex->lock[0],
- &plex->lock[PLEX_LOCKS]);
-#endif
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LOCKREQS) {
- struct rangelockinfo lockinfo;
-
- lockinfo.stripe = lock->stripe;
- lockinfo.bp = lock->bp;
- lockinfo.plexno = plex->plexno;
- logrq(loginfo_lockwait, (union rqinfou) &lockinfo, lock->bp);
- }
-#endif
- lock->stripe = 0; /* no longer used */
- plex->usedlocks--; /* one less lock */
- if (plex->usedlocks == PLEX_LOCKS - 1) /* we were full, */
- wakeup(&plex->usedlocks); /* get a waiter if one's there */
- wakeup((void *) lock);
-}
-
-/* Get a lock for the global config. Wait if it's not available. */
-int
-lock_config(void)
-{
- int error;
-
- while ((vinum_conf.flags & VF_LOCKED) != 0) {
- vinum_conf.flags |= VF_LOCKING;
- if ((error = tsleep(&vinum_conf, PRIBIO, "vincfg", 0)) != 0)
- return error;
- }
- vinum_conf.flags |= VF_LOCKED;
- return 0;
-}
-
-/* Unlock global config and wake up any waiters. */
-void
-unlock_config(void)
-{
- vinum_conf.flags &= ~VF_LOCKED;
- if ((vinum_conf.flags & VF_LOCKING) != 0) {
- vinum_conf.flags &= ~VF_LOCKING;
- wakeup(&vinum_conf);
- }
-}
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinummemory.c b/sys/dev/vinum/vinummemory.c
deleted file mode 100644
index 43e1937..0000000
--- a/sys/dev/vinum/vinummemory.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinummemory.c,v 1.31 2003/05/23 01:08:36 grog Exp $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <dev/vinum/vinumhdr.h>
-
-#ifdef VINUMDEBUG
-#include <dev/vinum/request.h>
-extern struct rqinfo rqinfo[];
-extern struct rqinfo *rqip;
-int rqinfo_size = RQINFO_SIZE; /* for debugger */
-
-#undef longjmp /* this was defined as LongJmp */
-#define strrchr rindex
-#ifdef __i386__ /* check for validity */
-void
-LongJmp(jmp_buf buf, int retval)
-{
-/*
- * longjmp is not documented, not even jmp_buf.
- * This is what's in i386/i386/support.s:
- * ENTRY(longjmp)
- * movl 4(%esp),%eax
- * movl (%eax),%ebx restore ebx
- * movl 4(%eax),%esp restore esp
- * movl 8(%eax),%ebp restore ebp
- * movl 12(%eax),%esi restore esi
- * movl 16(%eax),%edi restore edi
- * movl 20(%eax),%edx get rta
- * movl %edx,(%esp) put in return frame
- * xorl %eax,%eax return(1);
- * incl %eax
- * ret
- *
- * from which we deduce the structure of jmp_buf:
- */
- struct JmpBuf {
- int jb_ebx;
- int jb_esp;
- int jb_ebp;
- int jb_esi;
- int jb_edi;
- int jb_eip;
- };
-
- struct JmpBuf *jb = (struct JmpBuf *) buf;
-
- if ((jb->jb_esp < 0xc0000000)
- || (jb->jb_ebp < 0xc0000000)
- || (jb->jb_eip < 0xc0000000))
- panic("Invalid longjmp");
- longjmp(buf, retval);
-}
-
-#else /* not i386 */
-#define LongJmp longjmp /* just use the kernel function */
-#endif /* i386 */
-
-/* find the base name of a path name */
-char *
-basename(char *file)
-{
- char *f = strrchr(file, '/'); /* chop off dirname if present */
-
- if (f == NULL)
- return file;
- else
- return ++f; /* skip the / */
-}
-#endif /* VINUMDEBUG */
-
-#ifdef VINUMDEBUG
-void
-expand_table(void **table, int oldsize, int newsize, char *file, int line)
-#else
-void
-expand_table(void **table, int oldsize, int newsize)
-#endif
-{
- if (newsize > oldsize) {
- int *temp;
- int s;
-
- s = splhigh();
-#ifdef VINUMDEBUG
- temp = (int *) MMalloc(newsize, file, line); /* allocate a new table */
-#else
- temp = (int *) Malloc(newsize); /* allocate a new table */
-#endif
- CHECKALLOC(temp, "vinum: Can't expand table\n");
- bzero((char *) temp, newsize); /* clean it all out */
- if (*table != NULL) { /* already something there, */
- bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
-#ifdef VINUMDEBUG
- FFree(*table, file, line);
-#else
- Free(*table);
-#endif
- }
- *table = temp;
- splx(s);
- }
-}
-
-#ifdef VINUMDEBUG
-#define MALLOCENTRIES 16384
-int malloccount = 0;
-int highwater = 0; /* highest index ever allocated */
-struct mc malloced[MALLOCENTRIES];
-
-#define FREECOUNT 64
-int freecount = FREECOUNT; /* for debugger */
-int lastfree = 0;
-struct mc freeinfo[FREECOUNT];
-
-int total_malloced;
-static int mallocseq = 0;
-
-caddr_t
-MMalloc(int size, char *file, int line)
-{
- int s;
- caddr_t result;
- int i;
-
- if (malloccount >= MALLOCENTRIES) { /* too many */
- log(LOG_ERR, "vinum: can't allocate table space to trace memory allocation");
- return 0; /* can't continue */
- }
- /* Wait for malloc if we can */
- result = malloc(size,
- M_DEVBUF,
- curthread->td_intr_nesting_level == 0 ? M_WAITOK : M_NOWAIT);
- if (result == NULL)
- log(LOG_ERR, "vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
- else {
- s = splhigh();
- for (i = 0; i < malloccount; i++) {
- if (((result + size) > malloced[i].address)
- && (result < malloced[i].address + malloced[i].size)) /* overlap */
- kdb_enter("Malloc overlap");
- }
- if (result) {
- char *f = basename(file);
-
- i = malloccount++;
- total_malloced += size;
- microtime(&malloced[i].time);
- malloced[i].seq = mallocseq++;
- malloced[i].size = size;
- malloced[i].line = line;
- malloced[i].address = result;
- strlcpy(malloced[i].file, f, MCFILENAMELEN);
- }
- if (malloccount > highwater)
- highwater = malloccount;
- splx(s);
- }
- return result;
-}
-
-void
-FFree(void *mem, char *file, int line)
-{
- int s;
- int i;
-
- s = splhigh();
- for (i = 0; i < malloccount; i++) {
- if ((caddr_t) mem == malloced[i].address) { /* found it */
- bzero(mem, malloced[i].size); /* XXX */
- free(mem, M_DEVBUF);
- malloccount--;
- total_malloced -= malloced[i].size;
- if (debug & DEBUG_MEMFREE) { /* keep track of recent frees */
- char *f = strrchr(file, '/'); /* chop off dirname if present */
-
- if (f == NULL)
- f = file;
- else
- f++; /* skip the / */
-
- microtime(&freeinfo[lastfree].time);
- freeinfo[lastfree].seq = malloced[i].seq;
- freeinfo[lastfree].size = malloced[i].size;
- freeinfo[lastfree].line = line;
- freeinfo[lastfree].address = mem;
- bcopy(f, freeinfo[lastfree].file, MCFILENAMELEN);
- if (++lastfree == FREECOUNT)
- lastfree = 0;
- }
- if (i < malloccount) /* more coming after */
- bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
- splx(s);
- return;
- }
- }
- splx(s);
- log(LOG_ERR,
- "Freeing unallocated data at 0x%p from %s, line %d\n",
- mem,
- file,
- line);
- kdb_enter("Free");
-}
-
-void
-vinum_meminfo(caddr_t data)
-{
- struct meminfo *m = (struct meminfo *) data;
-
- m->mallocs = malloccount;
- m->total_malloced = total_malloced;
- m->malloced = malloced;
- m->highwater = highwater;
-}
-
-int
-vinum_mallocinfo(caddr_t data)
-{
- struct mc *m = (struct mc *) data;
- unsigned int ent = m->seq; /* index of entry to return */
-
- if (ent >= malloccount)
- return ENOENT;
- m->address = malloced[ent].address;
- m->size = malloced[ent].size;
- m->line = malloced[ent].line;
- m->seq = malloced[ent].seq;
- strlcpy(m->file, malloced[ent].file, MCFILENAMELEN);
- return 0;
-}
-
-/*
- * return the nth request trace buffer entry. This
- * is indexed back from the current entry (which
- * has index 0)
- */
-int
-vinum_rqinfo(caddr_t data)
-{
- struct rqinfo *rq = (struct rqinfo *) data;
- int ent = *(int *) data; /* 1st word is index */
- int lastent = rqip - rqinfo; /* entry number of current entry */
-
- if (ent >= RQINFO_SIZE) /* out of the table */
- return ENOENT;
- if ((ent = lastent - ent - 1) < 0)
- ent += RQINFO_SIZE; /* roll over backwards */
- bcopy(&rqinfo[ent], rq, sizeof(struct rqinfo));
- return 0;
-}
-#endif
diff --git a/sys/dev/vinum/vinumobj.h b/sys/dev/vinum/vinumobj.h
deleted file mode 100644
index d6a4d87..0000000
--- a/sys/dev/vinum/vinumobj.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumobj.h,v 1.7 2003/05/23 01:08:58 grog Exp $
- * $FreeBSD$
- */
-
-/*
- * Definitions of Vinum objects: drive, subdisk, plex and volume.
- * This file is included both by userland programs and by kernel code.
- * The userland structures are a subset of the kernel structures, and
- * all userland fields are at the beginning, so that a simple copy in
- * the length of the userland structure will be sufficient. In order
- * to perform this copy, vinumioctl must know both structures, so it
- * includes this file again with _KERNEL reset.
- */
-
-#ifndef _KERNEL
-/*
- * Flags for all objects. Most of them only apply
- * to specific objects, but we currently have
- * space for all in any 32 bit flags word.
- */
-enum objflags {
- VF_LOCKED = 1, /* somebody has locked access to this object */
- VF_LOCKING = 2, /* we want access to this object */
- VF_OPEN = 4, /* object has openers */
- VF_WRITETHROUGH = 8, /* volume: write through */
- VF_INITED = 0x10, /* unit has been initialized */
- VF_WLABEL = 0x20, /* label area is writable */
- VF_LABELLING = 0x40, /* unit is currently being labelled */
- VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
- VF_RAW = 0x100, /* raw volume (no file system) */
- VF_LOADED = 0x200, /* module is loaded */
- VF_CONFIGURING = 0x400, /* somebody is changing the config */
- VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
- VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
- VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
- VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
- VF_FORCECONFIG = 0x8000, /* configure drives even with different names */
- VF_NEWBORN = 0x10000, /* for objects: we've just created it */
- VF_CONFIGURED = 0x20000, /* for drives: we read the config */
- VF_STOPPING = 0x40000, /* for vinum_conf: stop on last close */
- VF_DAEMONOPEN = 0x80000, /* the daemon has us open (only superdev) */
- VF_CREATED = 0x100000, /* for volumes: freshly created, more then new */
- VF_HOTSPARE = 0x200000, /* for drives: use as hot spare */
- VF_RETRYERRORS = 0x400000, /* don't down subdisks on I/O errors */
- VF_HASDEBUG = 0x800000, /* set if we support debug */
-};
-
-#endif
-
-/* Global configuration information for the vinum subsystem */
-#ifdef _KERNEL
-struct _vinum_conf
-#else
-struct __vinum_conf
-#endif
-{
- int version; /* version of structures */
-#ifdef _KERNEL
- /* Pointers to vinum structures */
- struct drive *drive;
- struct sd *sd;
- struct plex *plex;
- struct volume *volume;
-#else
- /* Pointers to vinum structures */
- struct _drive *drive;
- struct _sd *sd;
- struct _plex *plex;
- struct _volume *volume;
-#endif
-
- /* the number allocated of each object */
- int drives_allocated;
- int subdisks_allocated;
- int plexes_allocated;
- int volumes_allocated;
-
- /* and the number currently in use */
- /*
- * Note that drives_used is not valid during drive recognition
- * (vinum_scandisk and friends). Many invalid drives are added and
- * later removed; the count isn't correct until we leave
- * vinum_scandisk.
- */
- int drives_used;
- int subdisks_used;
- int plexes_used;
- int volumes_used;
-
- int flags; /* see above */
-
-#define VINUM_MAXACTIVE 30000 /* maximum number of active requests */
- int active; /* current number of requests outstanding */
- int maxactive; /* maximum number of requests ever outstanding */
-#ifdef _KERNEL
-#ifdef VINUMDEBUG
- struct request *lastrq;
- struct buf *lastbuf;
-#endif
-#endif
-};
-
-/* Use these defines to simplify code */
-#define DRIVE vinum_conf.drive
-#define SD vinum_conf.sd
-#define PLEX vinum_conf.plex
-#define VOL vinum_conf.volume
-#define VFLAGS vinum_conf.flags
-
-/*
- * A drive corresponds to a disk slice. We use a different term to show
- * the difference in usage: it doesn't have to be a slice, and could
- * theoretically be a complete, unpartitioned disk
- */
-
-#ifdef _KERNEL
-struct drive
-#else
-struct _drive
-#endif
-{
- char devicename[MAXDRIVENAME]; /* name of the slice it's on */
- struct vinum_label label; /* and the label information */
- enum drivestate state; /* current state */
- int flags; /* flags */
- int subdisks_allocated; /* number of entries in sd */
- int subdisks_used; /* and the number used */
- int blocksize; /* size of fs blocks */
- int pid; /* of locker */
- u_int64_t sectors_available; /* number of sectors still available */
- int secsperblock;
- int lasterror; /* last error on drive */
- int driveno; /* index of drive in vinum_conf */
- int opencount; /* number of up subdisks */
- u_int64_t reads; /* number of reads on this drive */
- u_int64_t writes; /* number of writes on this drive */
- u_int64_t bytes_read; /* number of bytes read */
- u_int64_t bytes_written; /* number of bytes written */
-#define DRIVE_MAXACTIVE 30000 /* maximum number of active requests */
- int active; /* current number of requests outstanding */
- int maxactive; /* maximum number of requests ever outstanding */
- int freelist_size; /* number of entries alloced in free list */
- int freelist_entries; /* number of entries used in free list */
- struct drive_freelist *freelist; /* sorted list of free space on drive */
-#ifdef _KERNEL
- u_int sectorsize;
- off_t mediasize;
- struct cdev *dev; /* device information */
-#ifdef VINUMDEBUG
- char lockfilename[16]; /* name of file from which we were locked */
- int lockline; /* and the line number */
-#endif
-#endif
-};
-
-#ifdef _KERNEL
-struct sd
-#else
-struct _sd
-#endif
-{
- char name[MAXSDNAME]; /* name of subdisk */
- enum sdstate state; /* state */
- int flags;
- int lasterror; /* last error occurred */
- /* offsets in blocks */
- int64_t driveoffset; /* offset on drive */
- /*
- * plexoffset is the offset from the beginning
- * of the plex to the very first part of the
- * subdisk, in sectors. For striped, RAID-4 and
- * RAID-5 plexes, only the first stripe is
- * located at this offset
- */
- int64_t plexoffset; /* offset in plex */
- u_int64_t sectors; /* and length in sectors */
- int sectorsize; /* sector size for DIOCGSECTORSIZE */
- int plexno; /* index of plex, if it belongs */
- int driveno; /* index of the drive on which it is located */
- int sdno; /* our index in vinum_conf */
- int plexsdno; /* and our number in our plex */
- /* (undefined if no plex) */
- u_int64_t reads; /* number of reads on this subdisk */
- u_int64_t writes; /* number of writes on this subdisk */
- u_int64_t bytes_read; /* number of bytes read */
- u_int64_t bytes_written; /* number of bytes written */
- /* revive parameters */
- u_int64_t revived; /* block number of current revive request */
- int revive_blocksize; /* revive block size (bytes) */
- int revive_interval; /* and time to wait between transfers */
- pid_t reviver; /* PID of reviving process */
- /* init parameters */
- u_int64_t initialized; /* block number of current init request */
- int init_blocksize; /* init block size (bytes) */
- int init_interval; /* and time to wait between transfers */
-#ifdef _KERNEL
- struct request *waitlist; /* list of requests waiting on revive op */
- struct cdev *dev; /* associated device */
-#endif
-};
-
-#ifdef _KERNEL
-struct plex
-#else
-struct _plex
-#endif
-{
- enum plexorg organization; /* Plex organization */
- enum plexstate state; /* and current state */
- u_int64_t length; /* total length of plex (sectors) */
- int flags;
- int stripesize; /* size of stripe or raid band, in sectors */
- int sectorsize; /* sector size for DIOCGSECTORSIZE */
- int subdisks; /* number of associated subdisks */
- int subdisks_allocated; /* number of subdisks allocated space for */
- int *sdnos; /* list of component subdisks */
- int plexno; /* index of plex in vinum_conf */
- int volno; /* index of volume */
- int volplexno; /* number of plex in volume */
- /* Statistics */
- u_int64_t reads; /* number of reads on this plex */
- u_int64_t writes; /* number of writes on this plex */
- u_int64_t bytes_read; /* number of bytes read */
- u_int64_t bytes_written; /* number of bytes written */
- u_int64_t recovered_reads; /* number of recovered read operations */
- u_int64_t degraded_writes; /* number of degraded writes */
- u_int64_t parityless_writes; /* number of parityless writes */
- u_int64_t multiblock; /* requests that needed more than one block */
- u_int64_t multistripe; /* requests that needed more than one stripe */
- int sddowncount; /* number of subdisks down */
- /* Lock information */
- int usedlocks; /* number currently in use */
- int lockwaits; /* and number of waits for locks */
- off_t checkblock; /* block number for parity op */
- char name[MAXPLEXNAME]; /* name of plex */
-#ifdef _KERNEL
- struct rangelock *lock; /* ranges of locked addresses */
- struct mtx *lockmtx; /* lock mutex, one of plexmutex [] */
- daddr_t last_addr; /* last address read from this plex */
- struct cdev *dev; /* associated device */
-#endif
-};
-
-#ifdef _KERNEL
-struct volume
-#else
-struct _volume
-#endif
-{
- char name[MAXVOLNAME]; /* name of volume */
- enum volumestate state; /* current state */
- int plexes; /* number of plexes */
- int preferred_plex; /* index of plex to read from,
- * -1 for round-robin */
- /*
- * index of plex used for last read, for
- * round-robin.
- */
- int last_plex_read;
- int volno; /* volume number */
- int flags; /* status and configuration flags */
- int openflags; /* flags supplied to last open(2) */
- u_int64_t size; /* size of volume */
- int blocksize; /* logical block size */
- int sectorsize; /* sector size for DIOCGSECTORSIZE */
- int active; /* number of outstanding requests active */
- int subops; /* and the number of suboperations */
- /* Statistics */
- u_int64_t bytes_read; /* number of bytes read */
- u_int64_t bytes_written; /* number of bytes written */
- u_int64_t reads; /* number of reads on this volume */
- u_int64_t writes; /* number of writes on this volume */
- u_int64_t recovered_reads; /* reads recovered from another plex */
- /*
- * Unlike subdisks in the plex, space for the
- * plex pointers is static.
- */
- int plex[MAXPLEX]; /* index of plexes */
-#ifdef _KERNEL
- struct cdev *dev; /* associated device */
-#endif
-};
diff --git a/sys/dev/vinum/vinumparser.c b/sys/dev/vinum/vinumparser.c
deleted file mode 100644
index 49da34b..0000000
--- a/sys/dev/vinum/vinumparser.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumparser.c,v 1.25 2003/05/07 03:33:28 grog Exp grog $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * This file contains the parser for the configuration routines. It's used
- * both in the kernel and in the user interface program, thus the separate file.
- */
-
-/*
- * Go through a text and split up into text tokens. These are either non-blank
- * sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
- * " characters may be escaped by \, which otherwise has no special meaning.
- *
- * Delimit by following with a \0, and return pointers to the starts at token [].
- * Return the number of tokens found as the return value.
- *
- * This method has the restriction that a closing " or ' must be followed by
- * grey space.
- *
- * Error conditions are end of line before end of quote, or no space after
- * a closing quote. In this case, tokenize() returns -1.
- */
-
-#include <sys/param.h>
-#include <dev/vinum/vinumkw.h>
-#ifdef _KERNEL
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <machine/setjmp.h>
-/* All this mess for a single struct definition */
-#include <sys/uio.h>
-#include <sys/namei.h>
-#include <sys/mount.h>
-
-#include <dev/vinum/vinumvar.h>
-#include <dev/vinum/vinumio.h>
-#include <dev/vinum/vinumext.h>
-#define iswhite(c) ((c == ' ') || (c == '\t')) /* check for white space */
-#else /* userland */
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <string.h>
-#define iswhite isspace /* use the ctype macro */
-#endif
-
-/* enum keyword is defined in vinumvar.h */
-
-#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
-#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
-#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
-
-/* Normal keywords. These are all the words that vinum knows. */
-struct _keywords keywords[] =
-{keypair(drive),
- keypair(partition),
- keypair(sd),
- keypair(subdisk),
- keypair(plex),
- keypair(volume),
- keypair(vol),
- keypair(setupstate),
- keypair(readpol),
- keypair(org),
- keypair(name),
- keypair(writethrough),
- keypair(writeback),
- keypair(device),
- keypair(concat),
- keypair(raid4),
- keypair(raid5),
- keypair(striped),
- keypair(plexoffset),
- keypair(driveoffset),
- keypair(length),
- keypair(len),
- keypair(size),
- keypair(state),
- keypair(round),
- keypair(prefer),
- keypair(preferred),
- keypair(rename),
- keypair(detached),
-#ifndef _KERNEL /* for vinum(8) only */
- keypair(debug),
- keypair(stripe),
- keypair(mirror),
-#endif
- keypair(attach),
- keypair(detach),
- keypair(printconfig),
- keypair(saveconfig),
- keypair(replace),
- keypair(create),
- keypair(read),
- keypair(modify),
- keypair(list),
- keypair(l),
- keypair(ld),
- keypair(ls),
- keypair(lp),
- keypair(lv),
- keypair(info),
- keypair(set),
- keypair(rm),
- keypair(mv),
- keypair(move),
- keypair(init),
- keypair(resetconfig),
- keypair(start),
- keypair(stop),
- keypair(makedev),
- keypair(help),
- keypair(quit),
- keypair(setdaemon),
- keypair(getdaemon),
- keypair(max),
- keypair(replace),
- keypair(readpol),
- keypair(resetstats),
- keypair(setstate),
- keypair(checkparity),
- keypair(rebuildparity),
- keypair(dumpconfig),
- keypair(retryerrors)
-};
-struct keywordset keyword_set = KEYWORDSET(keywords);
-
-#ifndef _KERNEL
-struct _keywords flag_keywords[] =
-{flagkeypair(f),
- flagkeypair(d),
- flagkeypair(v),
- flagkeypair(s),
- flagkeypair(r),
- flagkeypair(w)
-};
-struct keywordset flag_set = KEYWORDSET(flag_keywords);
-
-#endif
-
-/*
- * Take a blank separated list of tokens and turn it into a list of
- * individual nul-delimited strings. Build a list of pointers at
- * token, which must have enough space for the tokens. Return the
- * number of tokens, or -1 on error (typically a missing string
- * delimiter).
- */
-int
-tokenize(char *cptr, char *token[], int maxtoken)
-{
- char delim; /* delimiter for searching for the partner */
- int tokennr; /* index of this token */
-
- for (tokennr = 0; tokennr < maxtoken;) {
- while (iswhite(*cptr))
- cptr++; /* skip initial white space */
- if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
- return tokennr; /* return number of tokens found */
- delim = *cptr;
- token[tokennr] = cptr; /* point to it */
- tokennr++; /* one more */
- if (tokennr == maxtoken) /* run off the end? */
- return tokennr;
- if ((delim == '\'') || (delim == '"')) { /* delimitered */
- for (;;) {
- cptr++;
- if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
- cptr++; /* move on past */
- if (!iswhite(*cptr)) /* error, no space after closing quote */
- return -1;
- *cptr++ = '\0'; /* delimit */
- } else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
- return -1;
- }
- } else { /* not quoted */
- while ((*cptr != '\0') && (!iswhite(*cptr)) && (*cptr != '\n'))
- cptr++;
- if (*cptr != '\0') /* not end of the line, */
- *cptr++ = '\0'; /* delimit and move to the next */
- }
- }
- return maxtoken; /* can't get here */
-}
-
-/* Find a keyword and return an index */
-enum keyword
-get_keyword(char *name, struct keywordset *keywordset)
-{
- int i;
- struct _keywords *keywords = keywordset->k; /* point to the keywords */
- if (name != NULL) { /* parameter exists */
- for (i = 0; i < keywordset->size; i++)
- if (!strcmp(name, keywords[i].name))
- return (enum keyword) keywords[i].keyword;
- }
- return kw_invalid_keyword;
-}
diff --git a/sys/dev/vinum/vinumraid5.c b/sys/dev/vinum/vinumraid5.c
deleted file mode 100644
index 4bdd64f..0000000
--- a/sys/dev/vinum/vinumraid5.c
+++ /dev/null
@@ -1,700 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Cybernet Corporation and Nan Yang Computer Services Limited.
- * All rights reserved.
- *
- * This software was developed as part of the NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Cybernet Corporation
- * and Nan Yang Computer Services Limited
- * 4. Neither the name of the Companies nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumraid5.c,v 1.23 2003/02/08 03:32:45 grog Exp $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-#include <sys/resourcevar.h>
-
-/*
- * Parameters which describe the current transfer.
- * These are only used for calculation, but they
- * need to be passed to other functions, so it's
- * tidier to put them in a struct
- */
-struct metrics {
- daddr_t stripebase; /* base address of stripe (1st subdisk) */
- int stripeoffset; /* offset in stripe */
- int stripesectors; /* total sectors to transfer in this stripe */
- daddr_t sdbase; /* offset in subdisk of stripe base */
- int sdcount; /* number of disks involved in this transfer */
- daddr_t diskstart; /* remember where this transfer starts */
- int psdno; /* number of parity subdisk */
- int badsdno; /* number of down subdisk, if there is one */
- int firstsdno; /* first data subdisk number */
- /* These correspond to the fields in rqelement, sort of */
- int useroffset;
- /*
- * Initial offset and length values for the first
- * data block
- */
- int initoffset; /* start address of block to transfer */
- short initlen; /* length in sectors of data transfer */
- /* Define a normal operation */
- int dataoffset; /* start address of block to transfer */
- int datalen; /* length in sectors of data transfer */
- /* Define a group operation */
- int groupoffset; /* subdisk offset of group operation */
- int grouplen; /* length in sectors of group operation */
- /* Define a normal write operation */
- int writeoffset; /* subdisk offset of normal write */
- int writelen; /* length in sectors of write operation */
- enum xferinfo flags; /* to check what we're doing */
- int rqcount; /* number of elements in request */
-};
-
-enum requeststatus bre5(struct request *rq,
- int plexno,
- daddr_t * diskstart,
- daddr_t diskend);
-void complete_raid5_write(struct rqelement *);
-enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
-void setrqebounds(struct rqelement *rqe, struct metrics *mp);
-
-/*
- * define the low-level requests needed to perform
- * a high-level I/O operation for a specific plex
- * 'plexno'.
- *
- * Return 0 if all subdisks involved in the
- * request are up, 1 if some subdisks are not up,
- * and -1 if the request is at least partially
- * outside the bounds of the subdisks.
- *
- * Modify the pointer *diskstart to point to the
- * end address. On read, return on the first bad
- * subdisk, so that the caller
- * (build_read_request) can try alternatives.
- *
- * On entry to this routine, the prq structures
- * are not assigned. The assignment is performed
- * by expandrq(). Strictly speaking, the elements
- * rqe->sdno of all entries should be set to -1,
- * since 0 (from bzero) is a valid subdisk number.
- * We avoid this problem by initializing the ones
- * we use, and not looking at the others (index >=
- * prq->requests).
- */
-enum requeststatus
-bre5(struct request *rq,
- int plexno,
- daddr_t * diskaddr,
- daddr_t diskend)
-{
- struct metrics m; /* most of the information */
- struct sd *sd;
- struct plex *plex;
- struct buf *bp; /* user's bp */
- struct rqgroup *rqg; /* the request group that we will create */
- struct rqelement *rqe; /* point to this request information */
- int rsectors; /* sectors remaining in this stripe */
- int mysdno; /* another sd index in loops */
- int rqno; /* request number */
-
- rqg = NULL; /* shut up, damn compiler */
- m.diskstart = *diskaddr; /* start of transfer */
- bp = rq->bp; /* buffer pointer */
- plex = &PLEX[plexno]; /* point to the plex */
-
-
- while (*diskaddr < diskend) { /* until we get it all sorted out */
- if (*diskaddr >= plex->length) /* beyond the end of the plex */
- return REQUEST_EOF; /* can't continue */
-
- m.badsdno = -1; /* no bad subdisk yet */
-
- /* Part A: Define the request */
- /*
- * First, calculate some sizes:
- * The offset of the start address from
- * the start of the stripe.
- */
- m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1));
-
- /*
- * The plex-relative address of the
- * start of the stripe.
- */
- m.stripebase = *diskaddr - m.stripeoffset;
-
- /* subdisk containing the parity stripe */
- if (plex->organization == plex_raid5)
- m.psdno = plex->subdisks - 1
- - (*diskaddr / (plex->stripesize * (plex->subdisks - 1)))
- % plex->subdisks;
- else /* RAID-4 */
- m.psdno = plex->subdisks - 1;
-
- /*
- * The number of the subdisk in which
- * the start is located.
- */
- m.firstsdno = m.stripeoffset / plex->stripesize;
- if (m.firstsdno >= m.psdno) /* at or past parity sd */
- m.firstsdno++; /* increment it */
-
- /*
- * The offset from the beginning of
- * the stripe on this subdisk.
- */
- m.initoffset = m.stripeoffset % plex->stripesize;
-
- /* The offset of the stripe start relative to this subdisk */
- m.sdbase = m.stripebase / (plex->subdisks - 1);
-
- m.useroffset = *diskaddr - m.diskstart; /* The offset of the start in the user buffer */
-
- /*
- * The number of sectors to transfer in the
- * current (first) subdisk.
- */
- m.initlen = min(diskend - *diskaddr, /* the amount remaining to transfer */
- plex->stripesize - m.initoffset); /* and the amount left in this block */
-
- /*
- * The number of sectors to transfer in this stripe
- * is the minumum of the amount remaining to transfer
- * and the amount left in this stripe.
- */
- m.stripesectors = min(diskend - *diskaddr,
- plex->stripesize * (plex->subdisks - 1) - m.stripeoffset);
-
- /* The number of data subdisks involved in this request */
- m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize;
-
- /* Part B: decide what kind of transfer this will be.
-
- * start and end addresses of the transfer in
- * the current block.
- *
- * There are a number of different kinds of
- * transfer, each of which relates to a
- * specific subdisk:
- *
- * 1. Normal read. All participating subdisks
- * are up, and the transfer can be made
- * directly to the user buffer. The bounds
- * of the transfer are described by
- * m.dataoffset and m.datalen. We have
- * already calculated m.initoffset and
- * m.initlen, which define the parameters
- * for the first data block.
- *
- * 2. Recovery read. One participating
- * subdisk is down. To recover data, all
- * the other subdisks, including the parity
- * subdisk, must be read. The data is
- * recovered by exclusive-oring all the
- * other blocks. The bounds of the
- * transfer are described by m.groupoffset
- * and m.grouplen.
- *
- * 3. A read request may request reading both
- * available data (normal read) and
- * non-available data (recovery read).
- * This can be a problem if the address
- * ranges of the two reads do not coincide:
- * in this case, the normal read needs to
- * be extended to cover the address range
- * of the recovery read, and must thus be
- * performed out of malloced memory.
- *
- * 4. Normal write. All the participating
- * subdisks are up. The bounds of the
- * transfer are described by m.dataoffset
- * and m.datalen. Since these values
- * differ for each block, we calculate the
- * bounds for the parity block
- * independently as the maximum of the
- * individual blocks and store these values
- * in m.writeoffset and m.writelen. This
- * write proceeds in four phases:
- *
- * i. Read the old contents of each block
- * and the parity block.
- * ii. ``Remove'' the old contents from
- * the parity block with exclusive or.
- * iii. ``Insert'' the new contents of the
- * block in the parity block, again
- * with exclusive or.
- *
- * iv. Write the new contents of the data
- * blocks and the parity block. The data
- * block transfers can be made directly from
- * the user buffer.
- *
- * 5. Degraded write where the data block is
- * not available. The bounds of the
- * transfer are described by m.groupoffset
- * and m.grouplen. This requires the
- * following steps:
- *
- * i. Read in all the other data blocks,
- * excluding the parity block.
- *
- * ii. Recreate the parity block from the
- * other data blocks and the data to be
- * written.
- *
- * iii. Write the parity block.
- *
- * 6. Parityless write, a write where the
- * parity block is not available. This is
- * in fact the simplest: just write the
- * data blocks. This can proceed directly
- * from the user buffer. The bounds of the
- * transfer are described by m.dataoffset
- * and m.datalen.
- *
- * 7. Combination of degraded data block write
- * and normal write. In this case the
- * address ranges of the reads may also
- * need to be extended to cover all
- * participating blocks.
- *
- * All requests in a group transfer transfer
- * the same address range relative to their
- * subdisk. The individual transfers may
- * vary, but since our group of requests is
- * all in a single slice, we can define a
- * range in which they all fall.
- *
- * In the following code section, we determine
- * which kind of transfer we will perform. If
- * there is a group transfer, we also decide
- * its bounds relative to the subdisks. At
- * the end, we have the following values:
- *
- * m.flags indicates the kinds of transfers
- * we will perform.
- * m.initoffset indicates the offset of the
- * beginning of any data operation relative
- * to the beginning of the stripe base.
- * m.initlen specifies the length of any data
- * operation.
- * m.dataoffset contains the same value as
- * m.initoffset.
- * m.datalen contains the same value as
- * m.initlen. Initially dataoffset and
- * datalen describe the parameters for the
- * first data block; while building the data
- * block requests, they are updated for each
- * block.
- * m.groupoffset indicates the offset of any
- * group operation relative to the beginning
- * of the stripe base.
- * m.grouplen specifies the length of any
- * group operation.
- * m.writeoffset indicates the offset of a
- * normal write relative to the beginning of
- * the stripe base. This value differs from
- * m.dataoffset in that it applies to the
- * entire operation, and not just the first
- * block.
- * m.writelen specifies the total span of a
- * normal write operation. writeoffset and
- * writelen are used to define the parity
- * block.
- */
- m.groupoffset = 0; /* assume no group... */
- m.grouplen = 0; /* until we know we have one */
- m.writeoffset = m.initoffset; /* start offset of transfer */
- m.writelen = 0; /* nothing to write yet */
- m.flags = 0; /* no flags yet */
- rsectors = m.stripesectors; /* remaining sectors to examine */
- m.dataoffset = m.initoffset; /* start at the beginning of the transfer */
- m.datalen = m.initlen;
-
- if (m.sdcount > 1) {
- plex->multiblock++; /* more than one block for the request */
- /*
- * If we have two transfers that don't overlap,
- * (one at the end of the first block, the other
- * at the beginning of the second block),
- * it's cheaper to split them.
- */
- if (rsectors < plex->stripesize) {
- m.sdcount = 1; /* just one subdisk */
- m.stripesectors = m.initlen; /* and just this many sectors */
- rsectors = m.initlen; /* and in the loop counter */
- }
- }
- if (SD[plex->sdnos[m.psdno]].state < sd_reborn) /* is our parity subdisk down? */
- m.badsdno = m.psdno; /* note that it's down */
- if (bp->b_iocmd == BIO_READ) { /* read operation */
- for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
- if (mysdno == m.psdno) /* ignore parity on read */
- mysdno++;
- if (mysdno == plex->subdisks) /* wraparound */
- mysdno = 0;
- if (mysdno == m.psdno) /* parity, */
- mysdno++; /* we've given already */
-
- if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */
- if (m.badsdno >= 0) /* we had one already, */
- return REQUEST_DOWN; /* we can't take a second */
- m.badsdno = mysdno; /* got the first */
- m.groupoffset = m.dataoffset; /* define the bounds */
- m.grouplen = m.datalen;
- m.flags |= XFR_RECOVERY_READ; /* we need recovery */
- plex->recovered_reads++; /* count another one */
- } else
- m.flags |= XFR_NORMAL_READ; /* normal read */
-
- /* Update the pointers for the next block */
- m.dataoffset = 0; /* back to the start of the stripe */
- rsectors -= m.datalen; /* remaining sectors to examine */
- m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
- }
- } else { /* write operation */
- for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
- if (mysdno == m.psdno) /* parity stripe, we've dealt with that */
- mysdno++;
- if (mysdno == plex->subdisks) /* wraparound */
- mysdno = 0;
- if (mysdno == m.psdno) /* parity, */
- mysdno++; /* we've given already */
-
- sd = &SD[plex->sdnos[mysdno]];
- if (sd->state != sd_up) {
- enum requeststatus s;
-
- s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
- if (s && (m.badsdno >= 0)) { /* second bad disk, */
- int sdno;
- /*
- * If the parity disk is down, there's
- * no recovery. We make all involved
- * subdisks stale. Otherwise, we
- * should be able to recover, but it's
- * like pulling teeth. Fix it later.
- */
- for (sdno = 0; sdno < m.sdcount; sdno++) {
- struct sd *sd = &SD[plex->sdnos[sdno]];
- if (sd->state >= sd_reborn) /* sort of up, */
- set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */
- }
- return s; /* and crap out */
- }
- m.badsdno = mysdno; /* note which one is bad */
- m.flags |= XFR_DEGRADED_WRITE; /* we need recovery */
- plex->degraded_writes++; /* count another one */
- m.groupoffset = m.dataoffset; /* define the bounds */
- m.grouplen = m.datalen;
- } else {
- m.flags |= XFR_NORMAL_WRITE; /* normal write operation */
- if (m.writeoffset > m.dataoffset) { /* move write operation lower */
- m.writelen = max(m.writeoffset + m.writelen,
- m.dataoffset + m.datalen)
- - m.dataoffset;
- m.writeoffset = m.dataoffset;
- } else
- m.writelen = max(m.writeoffset + m.writelen,
- m.dataoffset + m.datalen)
- - m.writeoffset;
- }
-
- /* Update the pointers for the next block */
- m.dataoffset = 0; /* back to the start of the stripe */
- rsectors -= m.datalen; /* remaining sectors to examine */
- m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
- }
- if (m.badsdno == m.psdno) { /* got a bad parity block, */
- struct sd *psd = &SD[plex->sdnos[m.psdno]];
-
- if (psd->state == sd_down)
- set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */
- else if (psd->state == sd_crashed)
- set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */
- m.flags &= ~XFR_NORMAL_WRITE; /* this write isn't normal, */
- m.flags |= XFR_PARITYLESS_WRITE; /* it's parityless */
- plex->parityless_writes++; /* count another one */
- }
- }
-
- /* reset the initial transfer values */
- m.dataoffset = m.initoffset; /* start at the beginning of the transfer */
- m.datalen = m.initlen;
-
- /* decide how many requests we need */
- if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))
- /* doing a recovery read or degraded write, */
- m.rqcount = plex->subdisks; /* all subdisks */
- else if (m.flags & XFR_NORMAL_WRITE) /* normal write, */
- m.rqcount = m.sdcount + 1; /* all data blocks and the parity block */
- else /* parityless write or normal read */
- m.rqcount = m.sdcount; /* just the data blocks */
-
- /* Part C: build the requests */
- rqg = allocrqg(rq, m.rqcount); /* get a request group */
- if (rqg == NULL) { /* malloc failed */
- bp->b_error = ENOMEM;
- bp->b_ioflags |= BIO_ERROR;
- return REQUEST_ENOMEM;
- }
- rqg->plexno = plexno;
- rqg->flags = m.flags;
- rqno = 0; /* index in the request group */
-
- /* 1: PARITY BLOCK */
- /*
- * Are we performing an operation which requires parity? In that case,
- * work out the parameters and define the parity block.
- * XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE
- */
- if (m.flags & XFR_PARITYOP) { /* need parity */
- rqe = &rqg->rqe[rqno]; /* point to element */
- sd = &SD[plex->sdnos[m.psdno]]; /* the subdisk in question */
- rqe->rqg = rqg; /* point back to group */
- rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */
- &~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE); /* transfer flags without data op stuf */
- setrqebounds(rqe, &m); /* set up the bounds of the transfer */
- rqe->sdno = sd->sdno; /* subdisk number */
- rqe->driveno = sd->driveno;
- if (build_rq_buffer(rqe, plex)) /* build the buffer */
- return REQUEST_ENOMEM; /* can't do it */
- rqe->b.b_iocmd = BIO_READ; /* we must read first */
- m.sdcount++; /* adjust the subdisk count */
- rqno++; /* and point to the next request */
- }
- /*
- * 2: DATA BLOCKS
- * Now build up requests for the blocks required
- * for individual transfers
- */
- for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) {
- if (mysdno == m.psdno) /* parity, */
- mysdno++; /* we've given already */
- if (mysdno == plex->subdisks) /* got to the end, */
- mysdno = 0; /* wrap around */
- if (mysdno == m.psdno) /* parity, */
- mysdno++; /* we've given already */
-
- rqe = &rqg->rqe[rqno]; /* point to element */
- sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */
- rqe->rqg = rqg; /* point to group */
- if (m.flags & XFR_NEEDS_MALLOC) /* we need a malloced buffer first */
- rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */
- else
- rqe->flags = m.flags | XFR_DATA_BLOCK; /* transfer flags */
- if (mysdno == m.badsdno) { /* this is the bad subdisk */
- rqg->badsdno = rqno; /* note which one */
- rqe->flags |= XFR_BAD_SUBDISK; /* note that it's dead */
- /*
- * we can't read or write from/to it,
- * but we don't need to malloc
- */
- rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE);
- }
- setrqebounds(rqe, &m); /* set up the bounds of the transfer */
- rqe->useroffset = m.useroffset; /* offset in user buffer */
- rqe->sdno = sd->sdno; /* subdisk number */
- rqe->driveno = sd->driveno;
- if (build_rq_buffer(rqe, plex)) /* build the buffer */
- return REQUEST_ENOMEM; /* can't do it */
- if ((m.flags & XFR_PARITYOP) /* parity operation, */
- &&((m.flags & XFR_BAD_SUBDISK) == 0)) /* and not the bad subdisk, */
- rqe->b.b_iocmd = BIO_READ; /* we must read first */
-
- /* Now update pointers for the next block */
- *diskaddr += m.datalen; /* skip past what we've done */
- m.stripesectors -= m.datalen; /* deduct from what's left */
- m.useroffset += m.datalen; /* and move on in the user buffer */
- m.datalen = min(m.stripesectors, plex->stripesize); /* and recalculate */
- m.dataoffset = 0; /* start at the beginning of next block */
- }
-
- /*
- * 3: REMAINING BLOCKS FOR RECOVERY
- * Finally, if we have a recovery operation, build
- * up transfers for the other subdisks. Follow the
- * subdisks around until we get to where we started.
- * These requests use only the group parameters.
- */
- if ((rqno < m.rqcount) /* haven't done them all already */
- &&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) {
- for (; rqno < m.rqcount; rqno++, mysdno++) {
- if (mysdno == m.psdno) /* parity, */
- mysdno++; /* we've given already */
- if (mysdno == plex->subdisks) /* got to the end, */
- mysdno = 0; /* wrap around */
- if (mysdno == m.psdno) /* parity, */
- mysdno++; /* we've given already */
-
- rqe = &rqg->rqe[rqno]; /* point to element */
- sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */
- rqe->rqg = rqg; /* point to group */
-
- rqe->sdoffset = m.sdbase + m.groupoffset; /* start of transfer */
- rqe->dataoffset = 0; /* for tidiness' sake */
- rqe->groupoffset = 0; /* group starts at the beginining */
- rqe->datalen = 0;
- rqe->grouplen = m.grouplen;
- rqe->buflen = m.grouplen;
- rqe->flags = (m.flags | XFR_MALLOCED) /* transfer flags without data op stuf */
- &~XFR_DATAOP;
- rqe->sdno = sd->sdno; /* subdisk number */
- rqe->driveno = sd->driveno;
- if (build_rq_buffer(rqe, plex)) /* build the buffer */
- return REQUEST_ENOMEM; /* can't do it */
- rqe->b.b_iocmd = BIO_READ; /* we must read first */
- }
- }
- /*
- * We need to lock the address range before
- * doing anything. We don't have to be
- * performing a recovery operation: somebody
- * else could be doing so, and the results could
- * influence us. Note the fact here, we'll perform
- * the lock in launch_requests.
- */
- rqg->lockbase = m.stripebase;
- if (*diskaddr < diskend) /* didn't finish the request on this stripe */
- plex->multistripe++; /* count another one */
- }
- return REQUEST_OK;
-}
-
-/*
- * Helper function for rqe5: adjust the bounds of
- * the transfers to minimize the buffer
- * allocation.
- *
- * Each request can handle two of three different
- * data ranges:
- *
- * 1. The range described by the parameters
- * dataoffset and datalen, for normal read or
- * parityless write.
- * 2. The range described by the parameters
- * groupoffset and grouplen, for recovery read
- * and degraded write.
- * 3. For normal write, the range depends on the
- * kind of block. For data blocks, the range
- * is defined by dataoffset and datalen. For
- * parity blocks, it is defined by writeoffset
- * and writelen.
- *
- * In order not to allocate more memory than
- * necessary, this function adjusts the bounds
- * parameter for each request to cover just the
- * minimum necessary for the function it performs.
- * This will normally vary from one request to the
- * next.
- *
- * Things are slightly different for the parity
- * block. In this case, the bounds defined by
- * mp->writeoffset and mp->writelen also play a
- * rôle. Select this case by setting the
- * parameter forparity != 0.
- */
-void
-setrqebounds(struct rqelement *rqe, struct metrics *mp)
-{
- /* parity block of a normal write */
- if ((rqe->flags & (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK))
- == (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) { /* case 3 */
- if (rqe->flags & XFR_DEGRADED_WRITE) { /* also degraded write */
- /*
- * With a combined normal and degraded write, we
- * will zero out the area of the degraded write
- * in the second phase, so we don't need to read
- * it in. Unfortunately, we need a way to tell
- * build_request_buffer the size of the buffer,
- * and currently that's the length of the read.
- * As a result, we read everything, even the stuff
- * that we're going to nuke.
- * FIXME XXX
- */
- if (mp->groupoffset < mp->writeoffset) { /* group operation starts lower */
- rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
- rqe->dataoffset = mp->writeoffset - mp->groupoffset; /* data starts here */
- rqe->groupoffset = 0; /* and the group at the beginning */
- } else { /* individual data starts first */
- rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
- rqe->dataoffset = 0; /* individual data starts at the beginning */
- rqe->groupoffset = mp->groupoffset - mp->writeoffset; /* group starts here */
- }
- rqe->datalen = mp->writelen;
- rqe->grouplen = mp->grouplen;
- } else { /* just normal write (case 3) */
- rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
- rqe->dataoffset = 0; /* degradation starts at the beginning */
- rqe->groupoffset = 0; /* for tidiness' sake */
- rqe->datalen = mp->writelen;
- rqe->grouplen = 0;
- }
- } else if (rqe->flags & XFR_DATAOP) { /* data operation (case 1 or 3) */
- if (rqe->flags & XFR_GROUPOP) { /* also a group operation (case 2) */
- if (mp->groupoffset < mp->dataoffset) { /* group operation starts lower */
- rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
- rqe->dataoffset = mp->dataoffset - mp->groupoffset; /* data starts here */
- rqe->groupoffset = 0; /* and the group at the beginning */
- } else { /* individual data starts first */
- rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
- rqe->dataoffset = 0; /* individual data starts at the beginning */
- rqe->groupoffset = mp->groupoffset - mp->dataoffset; /* group starts here */
- }
- rqe->datalen = mp->datalen;
- rqe->grouplen = mp->grouplen;
- } else { /* just data operation (case 1) */
- rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
- rqe->dataoffset = 0; /* degradation starts at the beginning */
- rqe->groupoffset = 0; /* for tidiness' sake */
- rqe->datalen = mp->datalen;
- rqe->grouplen = 0;
- }
- } else { /* just group operations (case 2) */
- rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
- rqe->dataoffset = 0; /* for tidiness' sake */
- rqe->groupoffset = 0; /* group starts at the beginining */
- rqe->datalen = 0;
- rqe->grouplen = mp->grouplen;
- }
- rqe->buflen = max(rqe->dataoffset + rqe->datalen, /* total buffer length */
- rqe->groupoffset + rqe->grouplen);
-}
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumrequest.c b/sys/dev/vinum/vinumrequest.c
deleted file mode 100644
index 0915f8c..0000000
--- a/sys/dev/vinum/vinumrequest.c
+++ /dev/null
@@ -1,1125 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumrequest.c,v 1.69 2003/10/18 17:57:48 phk Exp $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-#include <sys/resourcevar.h>
-
-enum requeststatus bre(struct request *rq,
- int plexno,
- daddr_t * diskstart,
- daddr_t diskend);
-enum requeststatus bre5(struct request *rq,
- int plexno,
- daddr_t * diskstart,
- daddr_t diskend);
-enum requeststatus build_read_request(struct request *rq, int volplexno);
-enum requeststatus build_write_request(struct request *rq);
-enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
-int find_alternate_sd(struct request *rq);
-int check_range_covered(struct request *);
-void complete_rqe(struct buf *bp);
-void complete_raid5_write(struct rqelement *);
-int abortrequest(struct request *rq, int error);
-void sdio_done(struct buf *bp);
-int vinum_bounds_check(struct buf *bp, struct volume *vol);
-caddr_t allocdatabuf(struct rqelement *rqe);
-void freedatabuf(struct rqelement *rqe);
-
-#ifdef VINUMDEBUG
-struct rqinfo rqinfo[RQINFO_SIZE];
-struct rqinfo *rqip = rqinfo;
-
-void
-logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp)
-{
- int s = splhigh();
-
- microtime(&rqip->timestamp); /* when did this happen? */
- rqip->type = type;
- rqip->bp = ubp; /* user buffer */
- switch (type) {
- case loginfo_user_bp:
- case loginfo_user_bpl:
- case loginfo_sdio: /* subdisk I/O */
- case loginfo_sdiol: /* subdisk I/O launch */
- case loginfo_sdiodone: /* subdisk I/O complete */
- bcopy(info.bp, &rqip->info.b, sizeof(struct buf));
- rqip->devmajor = major(info.bp->b_dev);
- rqip->devminor = minor(info.bp->b_dev);
- break;
-
- case loginfo_iodone:
- case loginfo_rqe:
- case loginfo_raid5_data:
- case loginfo_raid5_parity:
- bcopy(info.rqe, &rqip->info.rqe, sizeof(struct rqelement));
- rqip->devmajor = major(info.rqe->b.b_dev);
- rqip->devminor = minor(info.rqe->b.b_dev);
- break;
-
- case loginfo_lockwait:
- case loginfo_lock:
- case loginfo_unlock:
- bcopy(info.lockinfo, &rqip->info.lockinfo, sizeof(struct rangelock));
-
- break;
-
- case loginfo_unused:
- break;
- }
- rqip++;
- if (rqip >= &rqinfo[RQINFO_SIZE]) /* wrap around */
- rqip = rqinfo;
- splx(s);
-}
-
-#endif
-
-void
-vinumstrategy(struct bio *biop)
-{
- struct buf *bp = (struct buf *) biop;
- int volno;
- struct volume *vol = NULL;
-
- switch (DEVTYPE(bp->b_dev)) {
- case VINUM_SD_TYPE:
- case VINUM_SD2_TYPE:
- sdio(bp);
- return;
-
- default:
- bp->b_error = EIO; /* I/O error */
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return;
-
- case VINUM_VOLUME_TYPE: /* volume I/O */
- volno = Volno(bp->b_dev);
- vol = &VOL[volno];
- if (vol->state != volume_up) { /* can't access this volume */
- bp->b_error = EIO; /* I/O error */
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return;
- }
- if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */
- bufdone(bp);
- return;
- }
- /* FALLTHROUGH */
- /*
- * Plex I/O is pretty much the same as volume I/O
- * for a single plex. Indicate this by passing a NULL
- * pointer (set above) for the volume
- */
- case VINUM_PLEX_TYPE:
- bp->b_resid = bp->b_bcount; /* transfer everything */
- vinumstart(bp, 0);
- return;
- }
-}
-
-/*
- * Start a transfer. Return -1 on error, 0 if OK,
- * 1 if we need to retry. Parameter reviveok is
- * set when doing transfers for revives: it allows
- * transfers to be started immediately when a
- * revive is in progress. During revive, normal
- * transfers are queued if they share address
- * space with a currently active revive operation.
- */
-int
-vinumstart(struct buf *bp, int reviveok)
-{
- int plexno;
- int maxplex; /* maximum number of plexes to handle */
- struct volume *vol;
- struct request *rq; /* build up our request here */
- enum requeststatus status;
-
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_user_bp, (union rqinfou) bp, bp);
-#endif
-
- if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */
- bp->b_error = EINVAL; /* invalid size */
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return -1;
- }
- rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
- if (rq == NULL) { /* can't do it */
- bp->b_error = ENOMEM; /* can't get memory */
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return -1;
- }
- bzero(rq, sizeof(struct request));
-
- /*
- * Note the volume ID. This can be NULL, which
- * the request building functions use as an
- * indication for single plex I/O.
- */
- rq->bp = bp; /* and the user buffer struct */
-
- if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */
- rq->volplex.volno = Volno(bp->b_dev); /* get the volume number */
- vol = &VOL[rq->volplex.volno]; /* and point to it */
- vol->active++; /* one more active request */
- maxplex = vol->plexes; /* consider all its plexes */
- } else {
- vol = NULL; /* no volume */
- rq->volplex.plexno = Plexno(bp->b_dev); /* point to the plex */
- rq->isplex = 1; /* note that it's a plex */
- maxplex = 1; /* just the one plex */
- }
-
- if (bp->b_iocmd == BIO_READ) {
- /*
- * This is a read request. Decide
- * which plex to read from.
- *
- * There's a potential race condition here,
- * since we're not locked, and we could end
- * up multiply incrementing the round-robin
- * counter. This doesn't have any serious
- * effects, however.
- */
- if (vol != NULL) {
- plexno = vol->preferred_plex; /* get the plex to use */
- if (plexno < 0) { /* round robin */
- for (plexno = 0; plexno < vol->plexes; plexno++)
- if (abs(bp->b_blkno - PLEX[vol->plex[plexno]].last_addr) <= ROUNDROBIN_SWITCH)
- break;
- if (plexno >= vol->plexes) {
- vol->last_plex_read++;
- if (vol->last_plex_read >= vol->plexes)
- vol->last_plex_read = 0;
- plexno = vol->last_plex_read;
- } else {
- vol->last_plex_read = plexno;
- };
- PLEX[vol->plex[plexno]].last_addr = bp->b_blkno;
- }
- status = build_read_request(rq, plexno); /* build a request */
- } else {
- daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */
- status = bre(rq, /* build a request list */
- rq->volplex.plexno,
- &diskaddr,
- diskaddr + (bp->b_bcount / DEV_BSIZE));
- }
-
- if (status > REQUEST_RECOVERED) { /* can't satisfy it */
- if (status == REQUEST_DOWN) { /* not enough subdisks */
- bp->b_error = EIO; /* I/O error */
- bp->b_io.bio_flags |= BIO_ERROR;
- }
- bufdone(bp);
- freerq(rq);
- return -1;
- }
- return launch_requests(rq, reviveok); /* now start the requests if we can */
- } else
- /*
- * This is a write operation. We write to all plexes. If this is
- * a RAID-4 or RAID-5 plex, we must also update the parity stripe.
- */
- {
- if (vol != NULL) {
- if ((vol->plexes > 0) /* multiple plex */
- ||(isparity((&PLEX[vol->plex[0]])))) { /* or RAID-[45], */
- rq->save_data = bp->b_data; /* save the data buffer address */
- bp->b_data = Malloc(bp->b_bcount);
- bcopy(rq->save_data, bp->b_data, bp->b_bcount); /* make a copy */
- rq->flags |= XFR_COPYBUF; /* and note that we did it */
- }
- status = build_write_request(rq);
- } else { /* plex I/O */
- daddr_t diskstart;
-
- diskstart = bp->b_blkno; /* start offset of transfer */
- status = bre(rq,
- Plexno(bp->b_dev),
- &diskstart,
- bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */
- }
- if (status > REQUEST_RECOVERED) { /* can't satisfy it */
- if (status == REQUEST_DOWN) { /* not enough subdisks */
- bp->b_error = EIO; /* I/O error */
- bp->b_io.bio_flags |= BIO_ERROR;
- }
- if (rq->flags & XFR_COPYBUF) {
- Free(bp->b_data);
- bp->b_data = rq->save_data;
- }
- bufdone(bp);
- freerq(rq);
- return -1;
- }
- return launch_requests(rq, reviveok); /* now start the requests if we can */
- }
-}
-
-/*
- * Call the low-level strategy routines to
- * perform the requests in a struct request
- */
-int
-launch_requests(struct request *rq, int reviveok)
-{
- struct rqgroup *rqg;
- int rqno; /* loop index */
- struct rqelement *rqe; /* current element */
- struct drive *drive;
- int rcount; /* request count */
-
- /*
- * First find out whether we're reviving, and
- * the request contains a conflict. If so, we
- * hang the request off plex->waitlist of the
- * first plex we find which is reviving.
- */
-
- if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */
- &&(!reviveok)) { /* and we don't want to do it now, */
- struct sd *sd;
- struct request *waitlist; /* point to the waitlist */
-
- sd = &SD[rq->sdno];
- if (sd->waitlist != NULL) { /* something there already, */
- waitlist = sd->waitlist;
- while (waitlist->next != NULL) /* find the end */
- waitlist = waitlist->next;
- waitlist->next = rq; /* hook our request there */
- } else
- sd->waitlist = rq; /* hook our request at the front */
-
-#ifdef VINUMDEBUG
- if (debug & DEBUG_REVIVECONFLICT)
- log(LOG_DEBUG,
- "Revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n",
- rq->sdno,
- rq,
- rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
- major(rq->bp->b_dev),
- minor(rq->bp->b_dev),
- (intmax_t) rq->bp->b_blkno,
- rq->bp->b_bcount);
-#endif
- return 0; /* and get out of here */
- }
- rq->active = 0; /* nothing yet */
-#ifdef VINUMDEBUG
- /* XXX This is probably due to a bug */
- if (rq->rqg == NULL) { /* no request */
- log(LOG_ERR, "vinum: null rqg\n");
- abortrequest(rq, EINVAL);
- return -1;
- }
-#endif
-#ifdef VINUMDEBUG
- if (debug & DEBUG_ADDRESSES)
- log(LOG_DEBUG,
- "Request: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n",
- rq,
- rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
- major(rq->bp->b_dev),
- minor(rq->bp->b_dev),
- (intmax_t) rq->bp->b_blkno,
- rq->bp->b_bcount);
- vinum_conf.lastrq = rq;
- vinum_conf.lastbuf = rq->bp;
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_user_bpl, (union rqinfou) rq->bp, rq->bp);
-#endif
-
- /*
- * We used to have an splbio() here anyway, out
- * of superstition. With the division of labour
- * below (first count the requests, then issue
- * them), it looks as if we don't need this
- * splbio() protection. In fact, as dillon
- * points out, there's a race condition
- * incrementing and decrementing rq->active and
- * rqg->active. This splbio() didn't help
- * there, because the device strategy routine
- * can sleep. Solve this by putting shorter
- * duration locks on the code.
- */
- /*
- * This loop happens without any participation
- * of the bottom half, so it requires no
- * protection.
- */
- for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */
- rqg->active = rqg->count; /* they're all active */
- for (rqno = 0; rqno < rqg->count; rqno++) {
- rqe = &rqg->rqe[rqno];
- if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */
- rqg->active--; /* one less active request */
- }
- if (rqg->active) /* we have at least one active request, */
- rq->active++; /* one more active request group */
- }
-
- /*
- * Now fire off the requests. In this loop the
- * bottom half could be completing requests
- * before we finish. We avoid splbio()
- * protection by ensuring we don't tread in the
- * same places that the bottom half does.
- */
- for (rqg = rq->rqg; rqg != NULL;) { /* through the whole request chain */
- if (rqg->lockbase >= 0) /* this rqg needs a lock first */
- rqg->lock = lockrange(rqg->lockbase, rqg->rq->bp, &PLEX[rqg->plexno]);
- rcount = rqg->count;
- for (rqno = 0; rqno < rcount;) {
- rqe = &rqg->rqe[rqno];
-
- /*
- * Point to next rqg before the bottom half
- * changes the structures.
- */
- if (++rqno >= rcount)
- rqg = rqg->next;
- if ((rqe->flags & XFR_BAD_SUBDISK) == 0) { /* this subdisk is good, */
- drive = &DRIVE[rqe->driveno]; /* look at drive */
- drive->active++;
- if (drive->active >= drive->maxactive)
- drive->maxactive = drive->active;
- vinum_conf.active++;
- if (vinum_conf.active >= vinum_conf.maxactive)
- vinum_conf.maxactive = vinum_conf.active;
-
-#ifdef VINUMDEBUG
- if (debug & DEBUG_ADDRESSES)
- log(LOG_DEBUG,
- " %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%jx, length %ld\n",
- rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
- major(rqe->b.b_dev),
- minor(rqe->b.b_dev),
- rqe->sdno,
- (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
- (intmax_t) rqe->b.b_blkno,
- rqe->b.b_bcount);
- if (debug & DEBUG_LASTREQS) {
- microtime(&rqe->launchtime); /* time we launched this request */
- logrq(loginfo_rqe, (union rqinfou) rqe, rq->bp);
- }
-#endif
- /* fire off the request */
- rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
- rqe->b.b_iooffset = rqe->b.b_offset;
- DEV_STRATEGY(&rqe->b);
- }
- }
- }
- return 0;
-}
-
-/*
- * define the low-level requests needed to perform a
- * high-level I/O operation for a specific plex 'plexno'.
- *
- * Return REQUEST_OK if all subdisks involved in the request are up,
- * REQUEST_DOWN if some subdisks are not up, and REQUEST_EOF if the
- * request is at least partially outside the bounds of the subdisks.
- *
- * Modify the pointer *diskstart to point to the end address. On
- * read, return on the first bad subdisk, so that the caller
- * (build_read_request) can try alternatives.
- *
- * On entry to this routine, the rqg structures are not assigned. The
- * assignment is performed by expandrq(). Strictly speaking, the
- * elements rqe->sdno of all entries should be set to -1, since 0
- * (from bzero) is a valid subdisk number. We avoid this problem by
- * initializing the ones we use, and not looking at the others (index
- * >= rqg->requests).
- */
-enum requeststatus
-bre(struct request *rq,
- int plexno,
- daddr_t * diskaddr,
- daddr_t diskend)
-{
- int sdno;
- struct sd *sd;
- struct rqgroup *rqg;
- struct buf *bp; /* user's bp */
- struct plex *plex;
- enum requeststatus status; /* return value */
- daddr_t plexoffset; /* offset of transfer in plex */
- daddr_t stripebase; /* base address of stripe (1st subdisk) */
- daddr_t stripeoffset; /* offset in stripe */
- daddr_t blockoffset; /* offset in stripe on subdisk */
- struct rqelement *rqe; /* point to this request information */
- daddr_t diskstart = *diskaddr; /* remember where this transfer starts */
- enum requeststatus s; /* temp return value */
-
- bp = rq->bp; /* buffer pointer */
- status = REQUEST_OK; /* return value: OK until proven otherwise */
- plex = &PLEX[plexno]; /* point to the plex */
-
- switch (plex->organization) {
- case plex_concat:
- sd = NULL; /* (keep compiler quiet) */
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- sd = &SD[plex->sdnos[sdno]];
- if (*diskaddr < sd->plexoffset) /* we must have a hole, */
- status = REQUEST_DEGRADED; /* note the fact */
- if (*diskaddr < (sd->plexoffset + sd->sectors)) { /* the request starts in this subdisk */
- rqg = allocrqg(rq, 1); /* space for the request */
- if (rqg == NULL) { /* malloc failed */
- bp->b_error = ENOMEM;
- bp->b_io.bio_flags |= BIO_ERROR;
- return REQUEST_ENOMEM;
- }
- rqg->plexno = plexno;
-
- rqe = &rqg->rqe[0]; /* point to the element */
- rqe->rqg = rqg; /* group */
- rqe->sdno = sd->sdno; /* put in the subdisk number */
- plexoffset = *diskaddr; /* start offset in plex */
- rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
- rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */
- rqe->dataoffset = 0;
- rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */
- sd->sectors - rqe->sdoffset);
- rqe->groupoffset = 0; /* no groups for concatenated plexes */
- rqe->grouplen = 0;
- rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
- rqe->flags = 0;
- rqe->driveno = sd->driveno;
- if (sd->state != sd_up) { /* *now* we find the sd is down */
- s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
- if (s == REQUEST_DOWN) { /* down? */
- rqe->flags = XFR_BAD_SUBDISK; /* yup */
- if (rq->bp->b_iocmd == BIO_READ) /* read request, */
- return REQUEST_DEGRADED; /* give up here */
- /*
- * If we're writing, don't give up
- * because of a bad subdisk. Go
- * through to the bitter end, but note
- * which ones we can't access.
- */
- status = REQUEST_DEGRADED; /* can't do it all */
- }
- }
- *diskaddr += rqe->datalen; /* bump the address */
- if (build_rq_buffer(rqe, plex)) { /* build the buffer */
- deallocrqg(rqg);
- bp->b_error = ENOMEM;
- bp->b_io.bio_flags |= BIO_ERROR;
- return REQUEST_ENOMEM; /* can't do it */
- }
- }
- if (*diskaddr == diskend) /* we're finished, */
- break; /* get out of here */
- }
- /*
- * We've got to the end of the plex. Have we got to the end of
- * the transfer? It would seem that having an offset beyond the
- * end of the subdisk is an error, but in fact it can happen if
- * the volume has another plex of different size. There's a valid
- * question as to why you would want to do this, but currently
- * it's allowed.
- *
- * In a previous version, I returned REQUEST_DOWN here. I think
- * REQUEST_EOF is more appropriate now.
- */
- if (diskend > sd->sectors + sd->plexoffset) /* pointing beyond EOF? */
- status = REQUEST_EOF;
- break;
-
- case plex_striped:
- {
- while (*diskaddr < diskend) { /* until we get it all sorted out */
- if (*diskaddr >= plex->length) /* beyond the end of the plex */
- return REQUEST_EOF; /* can't continue */
-
- /* The offset of the start address from the start of the stripe. */
- stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
-
- /* The plex-relative address of the start of the stripe. */
- stripebase = *diskaddr - stripeoffset;
-
- /* The number of the subdisk in which the start is located. */
- sdno = stripeoffset / plex->stripesize;
-
- /* The offset from the beginning of the stripe on this subdisk. */
- blockoffset = stripeoffset % plex->stripesize;
-
- sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */
- rqg = allocrqg(rq, 1); /* space for the request */
- if (rqg == NULL) { /* malloc failed */
- bp->b_error = ENOMEM;
- bp->b_io.bio_flags |= BIO_ERROR;
- return REQUEST_ENOMEM;
- }
- rqg->plexno = plexno;
-
- rqe = &rqg->rqe[0]; /* point to the element */
- rqe->rqg = rqg;
- rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
- rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */
- rqe->dataoffset = 0;
- rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */
- plex->stripesize - blockoffset); /* and the amount left in this stripe */
- rqe->groupoffset = 0; /* no groups for striped plexes */
- rqe->grouplen = 0;
- rqe->buflen = rqe->datalen; /* buffer length is data buffer length */
- rqe->flags = 0;
- rqe->sdno = sd->sdno; /* put in the subdisk number */
- rqe->driveno = sd->driveno;
-
- if (sd->state != sd_up) { /* *now* we find the sd is down */
- s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
- if (s == REQUEST_DOWN) { /* down? */
- rqe->flags = XFR_BAD_SUBDISK; /* yup */
- if (rq->bp->b_iocmd == BIO_READ) /* read request, */
- return REQUEST_DEGRADED; /* give up here */
- /*
- * If we're writing, don't give up
- * because of a bad subdisk. Go through
- * to the bitter end, but note which
- * ones we can't access.
- */
- status = REQUEST_DEGRADED; /* can't do it all */
- }
- }
- /*
- * It would seem that having an offset
- * beyond the end of the subdisk is an
- * error, but in fact it can happen if the
- * volume has another plex of different
- * size. There's a valid question as to why
- * you would want to do this, but currently
- * it's allowed.
- */
- if (rqe->sdoffset + rqe->datalen > sd->sectors) { /* ends beyond the end of the subdisk? */
- rqe->datalen = sd->sectors - rqe->sdoffset; /* truncate */
-#ifdef VINUMDEBUG
- if (debug & DEBUG_EOFINFO) { /* tell on the request */
- log(LOG_DEBUG,
- "vinum: EOF on plex %s, sd %s offset %x (user offset 0x%jx)\n",
- plex->name,
- sd->name,
- (u_int) sd->sectors,
- (intmax_t) bp->b_blkno);
- log(LOG_DEBUG,
- "vinum: stripebase %#jx, stripeoffset %#jx, blockoffset %#jx\n",
- (intmax_t) stripebase,
- (intmax_t) stripeoffset,
- (intmax_t) blockoffset);
- }
-#endif
- }
- if (build_rq_buffer(rqe, plex)) { /* build the buffer */
- deallocrqg(rqg);
- bp->b_error = ENOMEM;
- bp->b_io.bio_flags |= BIO_ERROR;
- return REQUEST_ENOMEM; /* can't do it */
- }
- *diskaddr += rqe->datalen; /* look at the remainder */
- if ((*diskaddr < diskend) /* didn't finish the request on this stripe */
- &&(*diskaddr < plex->length)) { /* and there's more to come */
- plex->multiblock++; /* count another one */
- if (sdno == plex->subdisks - 1) /* last subdisk, */
- plex->multistripe++; /* another stripe as well */
- }
- }
- }
- break;
-
- /*
- * RAID-4 and RAID-5 are complicated enough to have their own
- * function.
- */
- case plex_raid4:
- case plex_raid5:
- status = bre5(rq, plexno, diskaddr, diskend);
- break;
-
- default:
- log(LOG_ERR, "vinum: invalid plex type %d in bre\n", plex->organization);
- status = REQUEST_DOWN; /* can't access it */
- }
-
- return status;
-}
-
-/*
- * Build up a request structure for reading volumes.
- * This function is not needed for plex reads, since there's
- * no recovery if a plex read can't be satisified.
- */
-enum requeststatus
-build_read_request(struct request *rq, /* request */
- int plexindex)
-{ /* index in the volume's plex table */
- struct buf *bp;
- daddr_t startaddr; /* offset of previous part of transfer */
- daddr_t diskaddr; /* offset of current part of transfer */
- daddr_t diskend; /* and end offset of transfer */
- int plexno; /* plex index in vinum_conf */
- struct rqgroup *rqg; /* point to the request we're working on */
- struct volume *vol; /* volume in question */
- int recovered = 0; /* set if we recover a read */
- enum requeststatus status = REQUEST_OK;
- int plexmask; /* bit mask of plexes, for recovery */
-
- bp = rq->bp; /* buffer pointer */
- diskaddr = bp->b_blkno; /* start offset of transfer */
- diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */
- rqg = &rq->rqg[plexindex]; /* plex request */
- vol = &VOL[rq->volplex.volno]; /* point to volume */
-
- while (diskaddr < diskend) { /* build up request components */
- startaddr = diskaddr;
- status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
- switch (status) {
- case REQUEST_OK:
- continue;
-
- case REQUEST_RECOVERED:
- /*
- * XXX FIXME if we have more than one plex, and we can
- * satisfy the request from another, don't use the
- * recovered request, since it's more expensive.
- */
- recovered = 1;
- break;
-
- case REQUEST_ENOMEM:
- return status;
- /*
- * If we get here, our request is not complete. Try
- * to fill in the missing parts from another plex.
- * This can happen multiple times in this function,
- * and we reinitialize the plex mask each time, since
- * we could have a hole in our plexes.
- */
- case REQUEST_EOF:
- case REQUEST_DOWN: /* can't access the plex */
- case REQUEST_DEGRADED: /* can't access the plex */
- plexmask = ((1 << vol->plexes) - 1) /* all plexes in the volume */
- &~(1 << plexindex); /* except for the one we were looking at */
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- if (plexmask == 0) /* no plexes left to try */
- return REQUEST_DOWN; /* failed */
- diskaddr = startaddr; /* start at the beginning again */
- if (plexmask & (1 << plexno)) { /* we haven't tried this plex yet */
- bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
- if (diskaddr > startaddr) { /* we satisfied another part */
- recovered = 1; /* we recovered from the problem */
- status = REQUEST_OK; /* don't complain about it */
- break;
- }
- }
- }
- if (diskaddr == startaddr) /* didn't get any further, */
- return status;
- }
- if (recovered)
- vol->recovered_reads += recovered; /* adjust our recovery count */
- }
- return status;
-}
-
-/*
- * Build up a request structure for writes.
- * Return 0 if all subdisks involved in the request are up, 1 if some
- * subdisks are not up, and -1 if the request is at least partially
- * outside the bounds of the subdisks.
- */
-enum requeststatus
-build_write_request(struct request *rq)
-{ /* request */
- struct buf *bp;
- daddr_t diskstart; /* offset of current part of transfer */
- daddr_t diskend; /* and end offset of transfer */
- int plexno; /* plex index in vinum_conf */
- struct volume *vol; /* volume in question */
- enum requeststatus status;
-
- bp = rq->bp; /* buffer pointer */
- vol = &VOL[rq->volplex.volno]; /* point to volume */
- diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */
- status = REQUEST_DOWN; /* assume the worst */
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- diskstart = bp->b_blkno; /* start offset of transfer */
- /*
- * Build requests for the plex.
- * We take the best possible result here (min,
- * not max): we're happy if we can write at all
- */
- status = min(status, bre(rq,
- vol->plex[plexno],
- &diskstart,
- diskend));
- }
- return status;
-}
-
-/* Fill in the struct buf part of a request element. */
-enum requeststatus
-build_rq_buffer(struct rqelement *rqe, struct plex *plex)
-{
- struct sd *sd; /* point to subdisk */
- struct volume *vol;
- struct buf *bp;
- struct buf *ubp; /* user (high level) buffer header */
-
- vol = &VOL[rqe->rqg->rq->volplex.volno];
- sd = &SD[rqe->sdno]; /* point to subdisk */
- bp = &rqe->b;
- ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */
-
- /* Initialize the buf struct */
- /* copy these flags from user bp */
- bp->b_flags = ubp->b_flags & (B_NOCACHE | B_ASYNC);
- bp->b_io.bio_flags = 0;
- bp->b_iocmd = ubp->b_iocmd;
-#ifdef VINUMDEBUG
- if (rqe->flags & XFR_BUFLOCKED) /* paranoia */
- panic("build_rq_buffer: rqe already locked"); /* XXX remove this when we're sure */
-#endif
- BUF_LOCKINIT(bp); /* get a lock for the buffer */
- BUF_LOCK(bp, LK_EXCLUSIVE, NULL); /* and lock it */
- BUF_KERNPROC(bp);
- rqe->flags |= XFR_BUFLOCKED;
- bp->b_iodone = complete_rqe;
- /*
- * You'd think that we wouldn't need to even
- * build the request buffer for a dead subdisk,
- * but in some cases we need information like
- * the user buffer address. Err on the side of
- * generosity and supply what we can. That
- * obviously doesn't include drive information
- * when the drive is dead.
- */
- if ((rqe->flags & XFR_BAD_SUBDISK) == 0) /* subdisk is accessible, */
- bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */
- bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */
- bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */
- bp->b_resid = bp->b_bcount; /* and it's still all waiting */
- bp->b_bufsize = bp->b_bcount; /* and buffer size */
- bp->b_rcred = FSCRED; /* we have the file system credentials */
- bp->b_wcred = FSCRED; /* we have the file system credentials */
-
- if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */
- bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */
- if (bp->b_data == NULL) { /* failed */
- abortrequest(rqe->rqg->rq, ENOMEM);
- return REQUEST_ENOMEM; /* no memory */
- }
- } else
- /*
- * Point directly to user buffer data. This means
- * that we don't need to do anything when we have
- * finished the transfer
- */
- bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
- /*
- * On a recovery read, we perform an XOR of
- * all blocks to the user buffer. To make
- * this work, we first clean out the buffer
- */
- if ((rqe->flags & (XFR_RECOVERY_READ | XFR_BAD_SUBDISK))
- == (XFR_RECOVERY_READ | XFR_BAD_SUBDISK)) { /* bad subdisk of a recovery read */
- int length = rqe->grouplen << DEV_BSHIFT; /* and count involved */
- char *data = (char *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* destination */
-
- bzero(data, length); /* clean it out */
- }
- return 0;
-}
-
-/*
- * Abort a request: free resources and complete the
- * user request with the specified error
- */
-int
-abortrequest(struct request *rq, int error)
-{
- struct buf *bp = rq->bp; /* user buffer */
-
- bp->b_error = error;
- freerq(rq); /* free everything we're doing */
- bp->b_io.bio_flags |= BIO_ERROR;
- return error; /* and give up */
-}
-
-/*
- * Check that our transfer will cover the
- * complete address space of the user request.
- *
- * Return 1 if it can, otherwise 0
- */
-int
-check_range_covered(struct request *rq)
-{
- return 1;
-}
-
-/* Perform I/O on a subdisk */
-void
-sdio(struct buf *bp)
-{
- int s; /* spl */
- struct sd *sd;
- struct sdbuf *sbp;
- daddr_t endoffset;
- struct drive *drive;
-
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_sdio, (union rqinfou) bp, bp);
-#endif
- sd = &SD[Sdno(bp->b_dev)]; /* point to the subdisk */
- drive = &DRIVE[sd->driveno];
-
- if (drive->state != drive_up) {
- if (sd->state >= sd_crashed) {
- if (bp->b_iocmd == BIO_WRITE) /* writing, */
- set_sd_state(sd->sdno, sd_stale, setstate_force);
- else
- set_sd_state(sd->sdno, sd_crashed, setstate_force);
- }
- bp->b_error = EIO;
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return;
- }
- /*
- * We allow access to any kind of subdisk as long as we can expect
- * to get the I/O performed.
- */
- if (sd->state < sd_empty) { /* nothing to talk to, */
- bp->b_error = EIO;
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return;
- }
- /* Get a buffer */
- sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
- if (sbp == NULL) {
- bp->b_error = ENOMEM;
- bp->b_io.bio_flags |= BIO_ERROR;
- bufdone(bp);
- return;
- }
- bzero(sbp, sizeof(struct sdbuf)); /* start with nothing */
- sbp->b.b_flags = bp->b_flags;
- sbp->b.b_iocmd = bp->b_iocmd;
- sbp->b.b_bufsize = bp->b_bcount; /* buffer size */
- sbp->b.b_bcount = bp->b_bcount; /* number of bytes to transfer */
- sbp->b.b_resid = bp->b_resid; /* and amount waiting */
- sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */
- sbp->b.b_data = bp->b_data; /* data buffer */
- sbp->b.b_blkno = bp->b_blkno + sd->driveoffset;
- sbp->b.b_iodone = sdio_done; /* come here on completion */
- BUF_LOCKINIT(&sbp->b); /* get a lock for the buffer */
- BUF_LOCK(&sbp->b, LK_EXCLUSIVE, NULL); /* and lock it */
- BUF_KERNPROC(&sbp->b);
- sbp->bp = bp; /* note the address of the original header */
- sbp->sdno = sd->sdno; /* note for statistics */
- sbp->driveno = sd->driveno;
- endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */
- if (endoffset > sd->sectors) { /* beyond the end */
- sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
- if (sbp->b.b_bcount <= 0) { /* nothing to transfer */
- bp->b_resid = bp->b_bcount; /* nothing transferred */
- bufdone(bp);
- BUF_UNLOCK(&sbp->b);
- BUF_LOCKFREE(&sbp->b);
- Free(sbp);
- return;
- }
- }
-#ifdef VINUMDEBUG
- if (debug & DEBUG_ADDRESSES)
- log(LOG_DEBUG,
- " %s dev %d.%d, sd %d, offset 0x%jx, devoffset 0x%jx, length %ld\n",
- sbp->b.b_iocmd == BIO_READ ? "Read" : "Write",
- major(sbp->b.b_dev),
- minor(sbp->b.b_dev),
- sbp->sdno,
- (intmax_t) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
- (intmax_t) sbp->b.b_blkno,
- sbp->b.b_bcount);
-#endif
- s = splbio();
-#ifdef VINUMDEBUG
- if (debug & DEBUG_LASTREQS)
- logrq(loginfo_sdiol, (union rqinfou) &sbp->b, &sbp->b);
-#endif
- sbp->b.b_offset = sbp->b.b_blkno << DEV_BSHIFT;
- sbp->b.b_iooffset = sbp->b.b_offset;
- DEV_STRATEGY(&sbp->b);
- splx(s);
-}
-
-/*
- * Simplified version of bounds_check_with_label
- * Determine the size of the transfer, and make sure it is
- * within the boundaries of the partition. Adjust transfer
- * if needed, and signal errors or early completion.
- *
- * Volumes are simpler than disk slices: they only contain
- * one component (though we call them a, b and c to make
- * system utilities happy), and they always take up the
- * complete space of the "partition".
- *
- * I'm still not happy with this: why should the label be
- * protected? If it weren't so damned difficult to write
- * one in the first pleace (because it's protected), it wouldn't
- * be a problem.
- */
-int
-vinum_bounds_check(struct buf *bp, struct volume *vol)
-{
- int64_t maxsize = vol->size; /* size of the partition (sectors) */
- int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
-
-#ifdef LABELSECTOR
- /* Would this transfer overwrite the disk label? */
- if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */
-#if LABELSECTOR != 0
- && bp->b_blkno + size > LABELSECTOR /* and finishes after */
-#endif
- && (bp->b_iocmd == BIO_WRITE) /* and it's a write */
- &&(!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */
- bp->b_error = EROFS; /* read-only */
- bp->b_io.bio_flags |= BIO_ERROR;
- return -1;
- }
-#endif
- if (size == 0) /* no transfer specified, */
- return 0; /* treat as EOF */
- /* beyond partition? */
- if (bp->b_blkno < 0 /* negative start */
- || bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */
- /* if exactly at end of disk, return an EOF */
- if (bp->b_blkno == maxsize) {
- bp->b_resid = bp->b_bcount;
- return 0;
- }
- /* or truncate if part of it fits */
- size = maxsize - bp->b_blkno;
- if (size <= 0) { /* nothing to transfer */
- bp->b_error = EINVAL;
- bp->b_io.bio_flags |= BIO_ERROR;
- return -1;
- }
- bp->b_bcount = size << DEV_BSHIFT;
- }
- return 1;
-}
-
-/*
- * Allocate a request group and hook
- * it in in the list for rq
- */
-struct rqgroup *
-allocrqg(struct request *rq, int elements)
-{
- struct rqgroup *rqg; /* the one we're going to allocate */
- int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
-
- rqg = (struct rqgroup *) Malloc(size);
- if (rqg != NULL) { /* malloc OK, */
- if (rq->rqg) /* we already have requests */
- rq->lrqg->next = rqg; /* hang it off the end */
- else /* first request */
- rq->rqg = rqg; /* at the start */
- rq->lrqg = rqg; /* this one is the last in the list */
-
- bzero(rqg, size); /* no old junk */
- rqg->rq = rq; /* point back to the parent request */
- rqg->count = elements; /* number of requests in the group */
- rqg->lockbase = -1; /* no lock required yet */
- }
- return rqg;
-}
-
-/*
- * Deallocate a request group out of a chain. We do
- * this by linear search: the chain is short, this
- * almost never happens, and currently it can only
- * happen to the first member of the chain.
- */
-void
-deallocrqg(struct rqgroup *rqg)
-{
- struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */
-
- if (rqg->lock) /* got a lock? */
- unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
- if (rqgc == rqg) /* we're first in line */
- rqg->rq->rqg = rqg->next; /* unhook ourselves */
- else {
- while ((rqgc->next != NULL) /* find the group */
- &&(rqgc->next != rqg))
- rqgc = rqgc->next;
- if (rqgc->next == NULL)
- log(LOG_ERR,
- "vinum deallocrqg: rqg %p not found in request %p\n",
- rqg->rq,
- rqg);
- else
- rqgc->next = rqg->next; /* make the chain jump over us */
- }
- Free(rqg);
-}
-
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumrevive.c b/sys/dev/vinum/vinumrevive.c
deleted file mode 100644
index 59a91a9..0000000
--- a/sys/dev/vinum/vinumrevive.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumrevive.c,v 1.19 2003/05/08 04:34:47 grog Exp grog $
- */
-
-#include <sys/cdefs.h>
-
-__FBSDID("$FreeBSD$");
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-/*
- * Revive a block of a subdisk. Return an error
- * indication. EAGAIN means successful copy, but
- * that more blocks remain to be copied. EINVAL
- * means that the subdisk isn't associated with a
- * plex (which means a programming error if we get
- * here at all; FIXME).
- */
-
-int
-revive_block(int sdno)
-{
- int s; /* priority level */
- struct sd *sd;
- struct plex *plex;
- struct volume *vol;
- struct buf *bp;
- int error = EAGAIN;
- int size; /* size of revive block, bytes */
- daddr_t plexblkno; /* lblkno in plex */
- int psd; /* parity subdisk number */
- u_int64_t stripe; /* stripe number */
- int paritysd = 0; /* set if this is the parity stripe */
- struct rangelock *lock; /* for locking */
- daddr_t stripeoffset; /* offset in stripe */
-
- plexblkno = 0; /* to keep the compiler happy */
- sd = &SD[sdno];
- lock = NULL;
- if (sd->plexno < 0) /* no plex? */
- return EINVAL;
- plex = &PLEX[sd->plexno]; /* point to plex */
- if (plex->volno >= 0)
- vol = &VOL[plex->volno];
- else
- vol = NULL;
-
- if ((sd->revive_blocksize == 0) /* no block size */
- ||(sd->revive_blocksize & ((1 << DEV_BSHIFT) - 1))) /* or invalid block size */
- sd->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
- else if (sd->revive_blocksize > MAX_REVIVE_BLOCKSIZE)
- sd->revive_blocksize = MAX_REVIVE_BLOCKSIZE;
- size = min(sd->revive_blocksize >> DEV_BSHIFT, sd->sectors - sd->revived) << DEV_BSHIFT;
- sd->reviver = curproc->p_pid; /* note who last had a bash at it */
-
- /* Now decide where to read from */
- switch (plex->organization) {
- case plex_concat:
- plexblkno = sd->revived + sd->plexoffset; /* corresponding address in plex */
- break;
-
- case plex_striped:
- stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */
- if (stripeoffset + (size >> DEV_BSHIFT) > plex->stripesize)
- size = (plex->stripesize - stripeoffset) << DEV_BSHIFT;
- plexblkno = sd->plexoffset /* base */
- + (sd->revived - stripeoffset) * plex->subdisks /* offset to beginning of stripe */
- + stripeoffset; /* offset from beginning of stripe */
- break;
-
- case plex_raid4:
- case plex_raid5:
- stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */
- plexblkno = sd->plexoffset /* base */
- + (sd->revived - stripeoffset) * (plex->subdisks - 1) /* offset to beginning of stripe */
- +stripeoffset; /* offset from beginning of stripe */
- stripe = (sd->revived / plex->stripesize); /* stripe number */
-
- /* Make sure we don't go beyond the end of the band. */
- size = min(size, (plex->stripesize - stripeoffset) << DEV_BSHIFT);
- if (plex->organization == plex_raid4)
- psd = plex->subdisks - 1; /* parity subdisk for this stripe */
- else
- psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
- paritysd = plex->sdnos[psd] == sdno; /* note if it's the parity subdisk */
-
- /*
- * Now adjust for the strangenesses
- * in RAID-4 and RAID-5 striping.
- */
- if (sd->plexsdno > psd) /* beyond the parity stripe, */
- plexblkno -= plex->stripesize; /* one stripe less */
- else if (paritysd)
- plexblkno -= plex->stripesize * sd->plexsdno; /* go back to the beginning of the band */
- break;
-
- case plex_disorg: /* to keep the compiler happy */
- break; /* to keep the pedants happy */
- }
-
- if (paritysd) { /* we're reviving a parity block, */
- bp = parityrebuild(plex, sd->revived, size, rebuildparity, &lock, NULL); /* do the grunt work */
- if (bp == NULL) /* no buffer space */
- return ENOMEM; /* chicken out */
- } else { /* data block */
- s = splbio();
- bp = geteblk(size); /* Get a buffer */
- splx(s);
- if (bp == NULL)
- return ENOMEM;
-
- /*
- * Amount to transfer: block size, unless it
- * would overlap the end.
- */
- bp->b_bcount = size;
- bp->b_resid = bp->b_bcount;
- bp->b_blkno = plexblkno; /* start here */
- if (isstriped(plex)) /* we need to lock striped plexes */
- lock = lockrange(plexblkno << DEV_BSHIFT, bp, plex); /* lock it */
- if (vol != NULL) /* it's part of a volume, */
- /*
- * First, read the data from the volume. We
- * don't care which plex, that's bre's job.
- */
- bp->b_dev = VOL[plex->volno].dev; /* create the device number */
- else /* it's an unattached plex */
- bp->b_dev = PLEX[sd->plexno].dev; /* create the device number */
-
- bp->b_iocmd = BIO_READ; /* either way, read it */
- bp->b_flags = 0;
- vinumstart(bp, 1);
- bufwait(bp);
- }
-
- if (bp->b_ioflags & BIO_ERROR) {
- error = bp->b_error;
- if (lock) /* we took a lock, */
- unlockrange(sd->plexno, lock); /* give it back */
- } else
- /* Now write to the subdisk */
- {
- bp->b_dev = SD[sdno].dev; /* create the device number */
- bp->b_flags &= ~B_DONE; /* no longer done */
- bp->b_ioflags = 0;
- bp->b_iocmd = BIO_WRITE;
- bp->b_resid = bp->b_bcount;
- bp->b_blkno = sd->revived; /* write it to here */
- sdio(bp); /* perform the I/O */
- bufwait(bp);
- if (bp->b_ioflags & BIO_ERROR)
- error = bp->b_error;
- else {
- sd->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
- if (sd->revived >= sd->sectors) { /* finished */
- sd->revived = 0;
- set_sd_state(sdno, sd_up, setstate_force); /* bring the sd up */
- log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
- save_config(); /* and save the updated configuration */
- error = 0; /* we're done */
- }
- }
- if (lock) /* we took a lock, */
- unlockrange(sd->plexno, lock); /* give it back */
- while (sd->waitlist) { /* we have waiting requests */
-#ifdef VINUMDEBUG
- struct request *rq = sd->waitlist;
-
- if (debug & DEBUG_REVIVECONFLICT)
- log(LOG_DEBUG,
- "Relaunch revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n",
- rq->sdno,
- rq,
- rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
- major(rq->bp->b_dev),
- minor(rq->bp->b_dev),
- (intmax_t) rq->bp->b_blkno,
- rq->bp->b_bcount);
-#endif
- launch_requests(sd->waitlist, 1); /* do them now */
- sd->waitlist = sd->waitlist->next; /* and move on to the next */
- }
- }
- if (bp->b_qindex == 0) { /* not on a queue, */
- bp->b_flags |= B_INVAL;
- bp->b_ioflags &= ~BIO_ERROR;
- brelse(bp); /* is this kosher? */
- }
- return error;
-}
-
-/*
- * Check or rebuild the parity blocks of a RAID-4
- * or RAID-5 plex.
- *
- * The variables plex->checkblock and
- * plex->rebuildblock represent the
- * subdisk-relative address of the stripe we're
- * looking at, not the plex-relative address. We
- * store it in the plex and not as a local
- * variable because this function could be
- * stopped, and we don't want to repeat the part
- * we've already done. This is also the reason
- * why we don't initialize it here except at the
- * end. It gets initialized with the plex on
- * creation.
- *
- * Each call to this function processes at most
- * one stripe. We can't loop in this function,
- * because we're unstoppable, so we have to be
- * called repeatedly from userland.
- */
-void
-parityops(struct vinum_ioctl_msg *data)
-{
- int plexno;
- struct plex *plex;
- int size; /* I/O transfer size, bytes */
- int stripe; /* stripe number in plex */
- int psd; /* parity subdisk number */
- struct rangelock *lock; /* lock on stripe */
- struct _ioctl_reply *reply;
- off_t pstripe; /* pointer to our stripe counter */
- struct buf *pbp;
- off_t errorloc; /* offset of parity error */
- enum parityop op; /* operation to perform */
-
- plexno = data->index;
- op = data->op;
- pbp = NULL;
- reply = (struct _ioctl_reply *) data;
- reply->error = EAGAIN; /* expect to repeat this call */
- plex = &PLEX[plexno];
- if (!isparity(plex)) { /* not RAID-4 or RAID-5 */
- reply->error = EINVAL;
- return;
- } else if (plex->state < plex_flaky) {
- reply->error = EIO;
- strcpy(reply->msg, "Plex is not completely accessible\n");
- return;
- }
- pstripe = data->offset;
- stripe = pstripe / plex->stripesize; /* stripe number */
- psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
- size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */
- plex->stripesize << DEV_BSHIFT);
-
- pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */
- if (pbp == NULL) { /* no buffer space */
- reply->error = ENOMEM;
- return; /* chicken out */
- }
- /*
- * Now we have a result in the data buffer of
- * the parity buffer header, which we have kept.
- * Decide what to do with it.
- */
- reply->msg[0] = '\0'; /* until shown otherwise */
- if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */
- if ((op == rebuildparity)
- || (op == rebuildandcheckparity)) {
- pbp->b_iocmd = BIO_WRITE;
- pbp->b_resid = pbp->b_bcount;
- sdio(pbp); /* write the parity block */
- bufwait(pbp);
- }
- if (((op == checkparity)
- || (op == rebuildandcheckparity))
- && (errorloc != -1)) {
- if (op == checkparity)
- reply->error = EIO;
- sprintf(reply->msg,
- "Parity incorrect at offset 0x%jx\n",
- (intmax_t) errorloc);
- }
- if (reply->error == EAGAIN) { /* still OK, */
- plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
- if (plex->checkblock >= SD[plex->sdnos[0]].sectors) { /* finished */
- plex->checkblock = 0;
- reply->error = 0;
- }
- }
- }
- if (pbp->b_ioflags & BIO_ERROR)
- reply->error = pbp->b_error;
- pbp->b_flags |= B_INVAL;
- pbp->b_ioflags &= ~BIO_ERROR;
- brelse(pbp);
- unlockrange(plexno, lock);
-}
-
-/*
- * Rebuild a parity stripe. Return pointer to
- * parity bp. On return,
- *
- * 1. The band is locked. The caller must unlock
- * the band and release the buffer header.
- *
- * 2. All buffer headers except php have been
- * released. The caller must release pbp.
- *
- * 3. For checkparity and rebuildandcheckparity,
- * the parity is compared with the current
- * parity block. If it's different, the
- * offset of the error is returned to
- * errorloc. The caller can set the value of
- * the pointer to NULL if this is called for
- * rebuilding parity.
- *
- * pstripe is the subdisk-relative base address of
- * the data to be reconstructed, size is the size
- * of the transfer in bytes.
- */
-struct buf *
-parityrebuild(struct plex *plex,
- u_int64_t pstripe,
- int size,
- enum parityop op,
- struct rangelock **lockp,
- off_t * errorloc)
-{
- int error;
- int s;
- int sdno;
- u_int64_t stripe; /* stripe number */
- int *parity_buf; /* buffer address for current parity block */
- int *newparity_buf; /* and for new parity block */
- int mysize; /* I/O transfer size for this transfer */
- int isize; /* mysize in ints */
- int i;
- int psd; /* parity subdisk number */
- int newpsd; /* and "subdisk number" of new parity */
- struct buf **bpp; /* pointers to our bps */
- struct buf *pbp; /* buffer header for parity stripe */
- int *sbuf;
- int bufcount; /* number of buffers we need */
-
- stripe = pstripe / plex->stripesize; /* stripe number */
- psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
- parity_buf = NULL; /* to keep the compiler happy */
- error = 0;
-
- /*
- * It's possible that the default transfer size
- * we chose is not a factor of the stripe size.
- * We *must* limit this operation to a single
- * stripe, at least for RAID-5 rebuild, since
- * the parity subdisk changes between stripes,
- * so in this case we need to perform a short
- * transfer. Set variable mysize to reflect
- * this.
- */
- mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT);
- isize = mysize / (sizeof(int)); /* number of ints in the buffer */
- bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */
- newpsd = plex->subdisks;
- bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */
-
- /* First, build requests for all subdisks */
- for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */
- if ((sdno != psd) || (op != rebuildparity)) {
- /* Get a buffer header and initialize it. */
- s = splbio();
- bpp[sdno] = geteblk(mysize); /* Get a buffer */
- if (bpp[sdno] == NULL) {
- while (sdno-- > 0) { /* release the ones we got */
- bpp[sdno]->b_flags |= B_INVAL;
- brelse(bpp[sdno]); /* give back our resources */
- }
- splx(s);
- printf("vinum: can't allocate buffer space for parity op.\n");
- return NULL; /* no bpps */
- }
- splx(s);
- if (sdno == psd)
- parity_buf = (int *) bpp[sdno]->b_data;
- if (sdno == newpsd) /* the new one? */
- bpp[sdno]->b_dev = SD[plex->sdnos[psd]].dev; /* write back to the parity SD */
- else
- bpp[sdno]->b_dev = SD[plex->sdnos[sdno]].dev; /* device number */
- bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
- bpp[sdno]->b_flags = 0;
- bpp[sdno]->b_bcount = mysize;
- bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
- bpp[sdno]->b_blkno = pstripe; /* transfer from here */
- }
- }
-
- /* Initialize result buffer */
- pbp = bpp[newpsd];
- newparity_buf = (int *) bpp[newpsd]->b_data;
- bzero(newparity_buf, mysize);
-
- /*
- * Now lock the stripe with the first non-parity
- * bp as locking bp.
- */
- *lockp = lockrange(pstripe * plex->stripesize * (plex->subdisks - 1),
- bpp[psd ? 0 : 1],
- plex);
-
- /*
- * Then issue requests for all subdisks in
- * parallel. Don't transfer the parity stripe
- * if we're rebuilding parity, unless we also
- * want to check it.
- */
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */
- if ((sdno != psd) || (op != rebuildparity)) {
- sdio(bpp[sdno]);
- }
- }
-
- /*
- * Next, wait for the requests to complete.
- * We wait in the order in which they were
- * issued, which isn't necessarily the order in
- * which they complete, but we don't have a
- * convenient way of doing the latter, and the
- * delay is minimal.
- */
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
- if ((sdno != psd) || (op != rebuildparity)) {
- bufwait(bpp[sdno]);
- if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */
- error = bpp[sdno]->b_error;
- else if (sdno != psd) { /* update parity */
- sbuf = (int *) bpp[sdno]->b_data;
- for (i = 0; i < isize; i++)
- ((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
- }
- }
- if (sdno != psd) { /* release all bps except parity */
- bpp[sdno]->b_flags |= B_INVAL;
- brelse(bpp[sdno]); /* give back our resources */
- }
- }
-
- /*
- * If we're checking, compare the calculated
- * and the read parity block. If they're
- * different, return the plex-relative offset;
- * otherwise return -1.
- */
- if ((op == checkparity)
- || (op == rebuildandcheckparity)) {
- *errorloc = -1; /* no error yet */
- for (i = 0; i < isize; i++) {
- if (parity_buf[i] != newparity_buf[i]) {
- *errorloc = (off_t) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1)
- + i * sizeof(int);
- break;
- }
- }
- bpp[psd]->b_flags |= B_INVAL;
- brelse(bpp[psd]); /* give back our resources */
- }
- /* release our resources */
- Free(bpp);
- if (error) {
- pbp->b_ioflags |= BIO_ERROR;
- pbp->b_error = error;
- }
- return pbp;
-}
-
-/*
- * Initialize a subdisk by writing zeroes to the
- * complete address space. If verify is set,
- * check each transfer for correctness.
- *
- * Each call to this function writes (and maybe
- * checks) a single block.
- */
-int
-initsd(int sdno, int verify)
-{
- int s; /* priority level */
- struct sd *sd;
- struct plex *plex;
- struct volume *vol;
- struct buf *bp;
- int error;
- int size; /* size of init block, bytes */
- daddr_t plexblkno; /* lblkno in plex */
- int verified; /* set when we're happy with what we wrote */
-
- error = 0;
- plexblkno = 0; /* to keep the compiler happy */
- sd = &SD[sdno];
- if (sd->plexno < 0) /* no plex? */
- return EINVAL;
- plex = &PLEX[sd->plexno]; /* point to plex */
- if (plex->volno >= 0)
- vol = &VOL[plex->volno];
- else
- vol = NULL;
-
- if (sd->init_blocksize == 0) {
- sd->init_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
- } else if (sd->init_blocksize > MAX_REVIVE_BLOCKSIZE)
- sd->init_blocksize = MAX_REVIVE_BLOCKSIZE;
-
- size = min(sd->init_blocksize >> DEV_BSHIFT, sd->sectors - sd->initialized) << DEV_BSHIFT;
-
- verified = 0;
- while (!verified) { /* until we're happy with it, */
- s = splbio();
- bp = geteblk(size); /* Get a buffer */
- splx(s);
- if (bp == NULL)
- return ENOMEM;
-
- bp->b_bcount = size;
- bp->b_resid = bp->b_bcount;
- bp->b_blkno = sd->initialized; /* write it to here */
- bzero(bp->b_data, bp->b_bcount);
- bp->b_dev = SD[sdno].dev; /* create the device number */
- bp->b_iocmd = BIO_WRITE;
- sdio(bp); /* perform the I/O */
- bufwait(bp);
- if (bp->b_ioflags & BIO_ERROR)
- error = bp->b_error;
- if (bp->b_qindex == 0) { /* not on a queue, */
- bp->b_flags |= B_INVAL;
- bp->b_ioflags &= ~BIO_ERROR;
- brelse(bp); /* is this kosher? */
- }
- if ((error == 0) && verify) { /* check that it got there */
- s = splbio();
- bp = geteblk(size); /* get a buffer */
- if (bp == NULL) {
- splx(s);
- error = ENOMEM;
- } else {
- bp->b_bcount = size;
- bp->b_resid = bp->b_bcount;
- bp->b_blkno = sd->initialized; /* read from here */
- bp->b_dev = SD[sdno].dev; /* create the device number */
- bp->b_iocmd = BIO_READ; /* read it back */
- splx(s);
- sdio(bp);
- bufwait(bp);
- /*
- * XXX Bug fix code. This is hopefully no
- * longer needed (21 February 2000).
- */
- if (bp->b_ioflags & BIO_ERROR)
- error = bp->b_error;
- else if ((*bp->b_data != 0) /* first word spammed */
- ||(bcmp(bp->b_data, &bp->b_data[1], bp->b_bcount - 1))) { /* or one of the others */
- printf("vinum: init error on %s, offset 0x%llx sectors\n",
- sd->name,
- (long long) sd->initialized);
- verified = 0;
- } else
- verified = 1;
- if (bp->b_qindex == 0) { /* not on a queue, */
- bp->b_flags |= B_INVAL;
- bp->b_ioflags &= ~BIO_ERROR;
- brelse(bp); /* is this kosher? */
- }
- }
- } else
- verified = 1;
- }
- if (error == 0) { /* did it, */
- sd->initialized += size >> DEV_BSHIFT; /* moved this much further down */
- if (sd->initialized >= sd->sectors) { /* finished */
- sd->initialized = 0;
- set_sd_state(sdno, sd_initialized, setstate_force); /* bring the sd up */
- log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
- save_config(); /* and save the updated configuration */
- } else /* more to go, */
- error = EAGAIN; /* ya'll come back, see? */
- }
- return error;
-}
-
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumstate.c b/sys/dev/vinum/vinumstate.c
deleted file mode 100644
index 94b45c1..0000000
--- a/sys/dev/vinum/vinumstate.c
+++ /dev/null
@@ -1,1095 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumstate.c,v 2.21 2003/04/28 02:54:43 grog Exp $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/request.h>
-
-/* Update drive state */
-/* Return 1 if the state changes, otherwise 0 */
-int
-set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags)
-{
- struct drive *drive = &DRIVE[driveno];
- int oldstate = drive->state;
- int sdno;
-
- if (drive->state == drive_unallocated) /* no drive to do anything with, */
- return 0;
-
- if (newstate == oldstate) /* don't change it if it's not different */
- return 1; /* all OK */
- if ((newstate == drive_down) /* the drive's going down */
- &&(!(flags & setstate_force))
- && (drive->opencount != 0)) /* we can't do it */
- return 0; /* don't do it */
- drive->state = newstate; /* set the state */
- if (drive->label.name[0] != '\0') /* we have a name, */
- log(LOG_INFO,
- "vinum: drive %s is %s\n",
- drive->label.name,
- drive_state(drive->state));
- if (drive->state != oldstate) { /* state has changed */
- for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */
- if ((SD[sdno].state >= sd_referenced)
- && (SD[sdno].driveno == driveno)) /* belongs to this drive */
- update_sd_state(sdno); /* update the state */
- }
- }
- if (newstate == drive_up) { /* want to bring it up */
- if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */
- init_drive(drive, 1); /* which changes the state again */
- } else /* taking it down or worse */
- queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */
- (union daemoninfo) drive);
- if ((flags & setstate_configuring) == 0) /* configuring? */
- save_config(); /* no: save the updated configuration now */
- return 1;
-}
-
-/*
- * Try to set the subdisk state. Return 1 if
- * state changed to what we wanted, -1 if it
- * changed to something else, and 0 if no change.
- *
- * This routine is called both from the user (up,
- * down states only) and internally.
- *
- * The setstate_force bit in the flags enables the
- * state change even if it could be dangerous to
- * data consistency. It shouldn't allow nonsense.
- */
-int
-set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags)
-{
- struct sd *sd = &SD[sdno];
- struct plex *plex;
- struct volume *vol;
- int oldstate = sd->state;
- int status = 1; /* status to return */
-
- if (newstate == oldstate) /* already there, */
- return 1;
- else if (sd->state == sd_unallocated) /* no subdisk to do anything with, */
- return 0; /* can't do it */
-
- if (sd->driveoffset < 0) { /* not allocated space */
- sd->state = sd_down;
- if (newstate != sd_down) {
- if (sd->plexno >= 0)
- sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */
- return -1;
- }
- } else { /* space allocated */
- switch (newstate) {
- case sd_down: /* take it down? */
- /*
- * If we're attached to a plex, and we're
- * not reborn, we won't go down without
- * use of force.
- */
- if ((!flags & setstate_force)
- && (sd->plexno >= 0)
- && (sd->state != sd_reborn))
- return 0; /* don't do it */
- break;
-
- case sd_initialized:
- if ((sd->state == sd_initializing) /* we were initializing */
- ||(flags & setstate_force)) /* or we forced it */
- break;
- return 0; /* can't do it otherwise */
-
- case sd_up:
- if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */
- return 0; /* not even by force */
- if (flags & setstate_force) /* forcing it, */
- break; /* just do it, and damn the consequences */
- switch (sd->state) {
- /*
- * Perform the necessary tests. To allow
- * the state transition, just break out of
- * the switch.
- */
- case sd_crashed:
- case sd_reborn:
- case sd_down: /* been down, no data lost */
- /*
- * If we're associated with a plex, and
- * the plex isn't up, or we're the only
- * subdisk in the plex, we can do it.
- */
- if ((sd->plexno >= 0)
- && (((PLEX[sd->plexno].state < plex_firstup)
- || (PLEX[sd->plexno].subdisks > 1))))
- break; /* do it */
- if (oldstate != sd_reborn) {
- sd->state = sd_reborn; /* here it is again */
- log(LOG_INFO,
- "vinum: %s is %s, not %s\n",
- sd->name,
- sd_state(sd->state),
- sd_state(newstate));
- }
- status = -1;
- break;
-
- case sd_init: /* brand new */
- if (flags & setstate_configuring) /* we're doing this while configuring */
- break;
- /* otherwise it's like being empty */
- /* FALLTHROUGH */
-
- case sd_empty:
- case sd_initialized:
- /*
- * If we're not part of a plex, or the
- * plex is not part of a volume with other
- * plexes which are up, we can come up
- * without being inconsistent.
- *
- * If we're part of a parity plex, we'll
- * come up if the caller uses force. This
- * is the way we bring them up after
- * initialization.
- */
- if ((sd->plexno < 0)
- || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0)
- || (isparity((&PLEX[sd->plexno]))
- && (flags & setstate_force)))
- break;
-
- /* Otherwise it's just out of date */
- /* FALLTHROUGH */
-
- case sd_stale: /* out of date info, need reviving */
- case sd_obsolete:
- /*
-
- * 1. If the subdisk is not part of a
- * plex, bring it up, don't revive.
- *
- * 2. If the subdisk is part of a
- * one-plex volume or an unattached
- * plex, and it's not RAID-4 or
- * RAID-5, we *can't revive*. The
- * subdisk doesn't change its state.
- *
- * 3. If the subdisk is part of a
- * one-plex volume or an unattached
- * plex, and it's RAID-4 or RAID-5,
- * but more than one subdisk is down,
- * we *still can't revive*. The
- * subdisk doesn't change its state.
- *
- * 4. If the subdisk is part of a
- * multi-plex volume, we'll change to
- * reviving and let the revive
- * routines find out whether it will
- * work or not. If they don't, the
- * revive stops with an error message,
- * but the state doesn't change
- * (FWIW).
- */
- if (sd->plexno < 0) /* no plex associated, */
- break; /* bring it up */
- plex = &PLEX[sd->plexno];
- if (plex->volno >= 0) /* have a volume */
- vol = &VOL[plex->volno];
- else
- vol = NULL;
- /*
- * We can't do it if:
- *
- * 1: we don't have a volume
- * 2: we're the only plex in the volume
- * 3: we're a RAID-4 or RAID-5 plex, and
- * more than one subdisk is down.
- */
- if (((vol == NULL)
- || (vol->plexes == 1))
- && ((!isparity(plex))
- || (plex->sddowncount > 1))) {
- if (sd->state == sd_initializing) /* it's finished initializing */
- sd->state = sd_initialized;
- else
- return 0; /* can't do it */
- } else {
- sd->state = sd_reviving; /* put in reviving state */
- sd->revived = 0; /* nothing done yet */
- status = EAGAIN; /* need to repeat */
- }
- break;
-
- case sd_reviving:
- if (flags & setstate_force) /* insist, */
- break;
- return EAGAIN; /* no, try again */
-
- default: /* can't do it */
- /*
- * There's no way to bring subdisks up directly from
- * other states. First they need to be initialized
- * or revived.
- */
- return 0;
- }
- break;
-
- default: /* other ones, only internal with force */
- if ((flags & setstate_force) == 0) /* no force? What's this? */
- return 0; /* don't do it */
- }
- }
- if (status == 1) { /* we can do it, */
- sd->state = newstate;
- if (flags & setstate_force)
- log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state));
- else
- log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
- } else /* we don't get here with status 0 */
- log(LOG_INFO,
- "vinum: %s is %s, not %s\n",
- sd->name,
- sd_state(sd->state),
- sd_state(newstate));
- if (sd->plexno >= 0) /* we belong to a plex */
- update_plex_state(sd->plexno); /* update plex state */
- if ((flags & setstate_configuring) == 0) /* save config now */
- save_config();
- return status;
-}
-
-/*
- * Set the state of a plex dependent on its subdisks.
- * This time round, we'll let plex state just reflect
- * aggregate subdisk state, so this becomes an order of
- * magnitude less complicated. In particular, ignore
- * the requested state.
- */
-int
-set_plex_state(int plexno, enum plexstate state, enum setstateflags flags)
-{
- struct plex *plex; /* point to our plex */
- enum plexstate oldstate;
- enum volplexstate vps; /* how do we compare with the other plexes? */
-
- plex = &PLEX[plexno]; /* point to our plex */
- oldstate = plex->state;
-
- /* If the plex isn't allocated, we can't do it. */
- if (plex->state == plex_unallocated)
- return 0;
-
- /*
- * If it's already in the the state we want,
- * and it's not up, just return. If it's up,
- * we still need to do some housekeeping.
- */
- if ((state == oldstate)
- && (state != plex_up))
- return 1;
- vps = vpstate(plex); /* how do we compare with the other plexes? */
- switch (state) {
- /*
- * We can't bring the plex up, even by force,
- * unless it's ready. update_plex_state
- * checks that.
- */
- case plex_up: /* bring the plex up */
- update_plex_state(plex->plexno); /* it'll come up if it can */
- break;
-
- case plex_down: /* want to take it down */
- /*
- * If we're the only one, or the only one
- * which is up, we need force to do it.
- */
- if (((vps == volplex_onlyus)
- || (vps == volplex_onlyusup))
- && (!(flags & setstate_force)))
- return 0; /* can't do it */
- plex->state = state; /* do it */
- invalidate_subdisks(plex, sd_down); /* and down all up subdisks */
- break;
-
- /*
- * This is only requested internally.
- * Trust ourselves
- */
- case plex_faulty:
- plex->state = state; /* do it */
- invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */
- break;
-
- case plex_initializing:
- /* XXX consider what safeguards we need here */
- if ((flags & setstate_force) == 0)
- return 0;
- plex->state = state; /* do it */
- break;
-
- /* What's this? */
- default:
- return 0;
- }
- if (plex->state != oldstate) /* we've changed, */
- log(LOG_INFO, /* tell them about it */
- "vinum: %s is %s\n",
- plex->name,
- plex_state(plex->state));
- /*
- * Now see what we have left, and whether
- * we're taking the volume down
- */
- if (plex->volno >= 0) /* we have a volume */
- update_volume_state(plex->volno); /* update its state */
- if ((flags & setstate_configuring) == 0) /* save config now */
- save_config(); /* yes: save the updated configuration */
- return 1;
-}
-
-/* Update the state of a plex dependent on its plexes. */
-int
-set_volume_state(int volno, enum volumestate state, enum setstateflags flags)
-{
- struct volume *vol = &VOL[volno]; /* point to our volume */
-
- if (vol->state == volume_unallocated) /* no volume to do anything with, */
- return 0;
- if (vol->state == state) /* we're there already */
- return 1;
-
- if (state == volume_up) /* want to come up */
- update_volume_state(volno);
- else if (state == volume_down) { /* want to go down */
- if (((vol->flags & VF_OPEN) == 0) /* not open */
- ||((flags & setstate_force) != 0)) { /* or we're forcing */
- vol->state = volume_down;
- log(LOG_INFO,
- "vinum: volume %s is %s\n",
- vol->name,
- volume_state(vol->state));
- if ((flags & setstate_configuring) == 0) /* save config now */
- save_config(); /* yes: save the updated configuration */
- return 1;
- }
- }
- return 0; /* no change */
-}
-
-/* Set the state of a subdisk based on its environment */
-void
-update_sd_state(int sdno)
-{
- struct sd *sd;
- struct drive *drive;
- enum sdstate oldstate;
-
- sd = &SD[sdno];
- oldstate = sd->state;
- drive = &DRIVE[sd->driveno];
-
- if (drive->state == drive_up) {
- switch (sd->state) {
- case sd_down:
- case sd_crashed:
- sd->state = sd_reborn; /* back up again with no loss */
- break;
-
- default:
- break;
- }
- } else { /* down or worse */
- switch (sd->state) {
- case sd_up:
- case sd_reborn:
- case sd_reviving:
- case sd_empty:
- sd->state = sd_crashed; /* lost our drive */
- break;
-
- default:
- break;
- }
- }
- if (sd->state != oldstate) /* state has changed, */
- log(LOG_INFO, /* say so */
- "vinum: %s is %s\n",
- sd->name,
- sd_state(sd->state));
- if (sd->plexno >= 0) /* we're part of a plex, */
- update_plex_state(sd->plexno); /* update its state */
-}
-
-/*
- * Force a plex and all its subdisks
- * into an 'up' state. This is a helper
- * for update_plex_state.
- */
-void
-forceup(int plexno)
-{
- struct plex *plex;
- int sdno;
-
- plex = &PLEX[plexno]; /* point to the plex */
- plex->state = plex_up; /* and bring it up */
-
- /* change the subdisks to up state */
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- SD[plex->sdnos[sdno]].state = sd_up;
- log(LOG_INFO, /* tell them about it */
- "vinum: %s is up\n",
- SD[plex->sdnos[sdno]].name);
- }
-}
-
-/* Set the state of a plex based on its environment */
-void
-update_plex_state(int plexno)
-{
- struct plex *plex; /* point to our plex */
- enum plexstate oldstate;
- enum sdstates statemap; /* get a map of the subdisk states */
- enum volplexstate vps; /* how do we compare with the other plexes? */
-
- plex = &PLEX[plexno]; /* point to our plex */
- oldstate = plex->state;
- statemap = sdstatemap(plex); /* get a map of the subdisk states */
- vps = vpstate(plex); /* how do we compare with the other plexes? */
-
- if (statemap & sd_initstate) /* something initializing? */
- plex->state = plex_initializing; /* yup, that makes the plex the same */
- else if (statemap == sd_upstate)
- /*
- * All the subdisks are up. This also means that
- * they are consistent, so we can just bring
- * the plex up
- */
- plex->state = plex_up;
- else if (isparity(plex) /* RAID-4 or RAID-5 plex */
- &&(plex->sddowncount == 1)) /* and exactly one subdisk down */
- plex->state = plex_degraded; /* limping a bit */
- else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */
- ||((statemap & ~sd_downstate)
- == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) {
- if ((vps & volplex_otherup) == 0) { /* no other plex is up */
- struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */
-
- /*
- * If we're a striped or concat plex
- * associated with a volume, none of whose
- * plexes are up, and we're new and untested,
- * and the volume has the setupstate bit set,
- * we can pretend to be in a consistent state.
- *
- * We need to do this in one swell foop: on
- * the next call we will no longer be just
- * empty.
- *
- * This code assumes that all the other plexes
- * are also capable of coming up (i.e. all the
- * sds are up), but that's OK: we'll come back
- * to this function for the remaining plexes
- * in the volume.
- */
- if ((plex->state == plex_init)
- && (plex->volno >= 0)
- && (vol->flags & VF_CONFIG_SETUPSTATE)) {
- for (plexno = 0; plexno < vol->plexes; plexno++)
- forceup(VOL[plex->volno].plex[plexno]);
- } else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */
- ||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */
- ||(plex->organization == plex_striped))
- forceup(plexno); /* we'll do it */
- /*
- * This leaves a case where things don't get
- * done: the plex is RAID-4 or RAID-5, and
- * the subdisks are all empty. They need to
- * be initialized first.
- */
- } else {
- if (statemap == sd_upstate) /* all subdisks up */
- plex->state = plex_up; /* we can come up too */
- else
- plex->state = plex_faulty;
- }
- } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */
- plex->state = plex_flaky;
- else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */
- plex->state = plex_corrupt; /* corrupt */
- else if (statemap & (sd_initstate | sd_emptystate)) /* some subdisks empty or initializing */
- plex->state = plex_initializing;
- else /* nothing at all up */
- plex->state = plex_faulty;
-
- if (plex->state != oldstate) /* state has changed, */
- log(LOG_INFO, /* tell them about it */
- "vinum: %s is %s\n",
- plex->name,
- plex_state(plex->state));
- if (plex->volno >= 0) /* we're part of a volume, */
- update_volume_state(plex->volno); /* update its state */
-}
-
-/* Set volume state based on its components */
-void
-update_volume_state(int volno)
-{
- struct volume *vol; /* our volume */
- int plexno;
- enum volumestate oldstate;
-
- vol = &VOL[volno]; /* point to our volume */
- oldstate = vol->state;
-
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */
- if (plex->state >= plex_corrupt) { /* something accessible, */
- vol->state = volume_up;
- break;
- }
- }
- if (plexno == vol->plexes) /* didn't find an up plex */
- vol->state = volume_down;
-
- if (vol->state != oldstate) { /* state changed */
- log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state));
- save_config(); /* save the updated configuration */
- }
-}
-
-/*
- * Called from request routines when they find
- * a subdisk which is not kosher. Decide whether
- * it warrants changing the state. Return
- * REQUEST_DOWN if we can't use the subdisk,
- * REQUEST_OK if we can.
- */
-/*
- * A prior version of this function checked the plex
- * state as well. At the moment, consider plex states
- * information for the user only. We'll ignore them
- * and use the subdisk state only. The last version of
- * this file with the old logic was 2.7. XXX
- */
-enum requeststatus
-checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend)
-{
- struct plex *plex = &PLEX[sd->plexno];
- int writeop = (rq->bp->b_iocmd == BIO_WRITE); /* note if we're writing */
-
- switch (sd->state) {
- /* We shouldn't get called if the subdisk is up */
- case sd_up:
- return REQUEST_OK;
-
- case sd_reviving:
- /*
- * Access to a reviving subdisk depends on the
- * organization of the plex:
- *
- * - If it's concatenated, access the subdisk
- * up to its current revive point. If we
- * want to write to the subdisk overlapping
- * the current revive block, set the
- * conflict flag in the request, asking the
- * caller to put the request on the wait
- * list, which will be attended to by
- * revive_block when it's done.
- * - if it's striped, we can't do it (we could
- * do some hairy calculations, but it's
- * unlikely to work).
- * - if it's RAID-4 or RAID-5, we can do it as
- * long as only one subdisk is down
- */
- if (plex->organization == plex_striped) /* plex is striped, */
- return REQUEST_DOWN;
- else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */
- if (plex->sddowncount > 1) /* with more than one sd down, */
- return REQUEST_DOWN;
- else
- /*
- * XXX We shouldn't do this if we can find a
- * better way. Check the other plexes
- * first, and return a DOWN if another
- * plex will do it better
- */
- return REQUEST_OK; /* OK, we'll find a way */
- }
- if (diskaddr > (sd->revived
- + sd->plexoffset
- + (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */
- return REQUEST_DOWN;
- else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */
- if (writeop) {
- rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */
- rq->sdno = sd->sdno; /* and which sd last caused it */
- } else
- return REQUEST_DOWN;
- }
- return REQUEST_OK;
-
- case sd_reborn:
- if (writeop)
- return REQUEST_OK; /* always write to a reborn disk */
- else /* don't allow a read */
- /*
- * Handle the mapping. We don't want to reject
- * a read request to a reborn subdisk if that's
- * all we have. XXX
- */
- return REQUEST_DOWN;
-
- case sd_down:
- if (writeop) /* writing to a consistent down disk */
- set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */
- return REQUEST_DOWN;
-
- case sd_crashed:
- if (writeop) /* writing to a consistent down disk */
- set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */
- return REQUEST_DOWN;
-
- default:
- return REQUEST_DOWN;
- }
-}
-
-/* return a state map for the subdisks of a plex */
-enum sdstates
-sdstatemap(struct plex *plex)
-{
- int sdno;
- enum sdstates statemap = 0; /* note the states we find */
-
- plex->sddowncount = 0; /* no subdisks down yet */
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */
-
- switch (sd->state) {
- case sd_empty:
- statemap |= sd_emptystate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_init:
- statemap |= sd_initstate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_down:
- statemap |= sd_downstate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_crashed:
- statemap |= sd_crashedstate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_obsolete:
- statemap |= sd_obsoletestate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_stale:
- statemap |= sd_stalestate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_reborn:
- statemap |= sd_rebornstate;
- break;
-
- case sd_up:
- statemap |= sd_upstate;
- break;
-
- case sd_initializing:
- statemap |= sd_initstate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_initialized:
- statemap |= sd_initializedstate;
- (plex->sddowncount)++; /* another unusable subdisk */
- break;
-
- case sd_unallocated:
- case sd_uninit:
- case sd_reviving:
- case sd_referenced:
- statemap |= sd_otherstate;
- (plex->sddowncount)++; /* another unusable subdisk */
- }
- }
- return statemap;
-}
-
-/* determine the state of the volume relative to this plex */
-enum volplexstate
-vpstate(struct plex *plex)
-{
- struct volume *vol;
- enum volplexstate state = volplex_onlyusdown; /* state to return */
- int plexno;
-
- if (plex->volno < 0) { /* not associated with a volume */
- if (plex->state > plex_degraded)
- return volplex_onlyus; /* just us */
- else
- return volplex_onlyusdown; /* assume the worst */
- }
- vol = &VOL[plex->volno]; /* point to our volume */
- for (plexno = 0; plexno < vol->plexes; plexno++) {
- if (&PLEX[vol->plex[plexno]] == plex) { /* us */
- if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */
- state |= volplex_onlyus; /* yes */
- } else {
- if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */
- state |= volplex_otherup; /* and when they were up, they were up */
- else
- state |= volplex_alldown; /* and when they were down, they were down */
- }
- }
- return state; /* and when they were only halfway up */
-} /* they were neither up nor down */
-
-/* Check if all bits b are set in a */
-int allset(int a, int b);
-
-int
-allset(int a, int b)
-{
- return (a & b) == b;
-}
-
-/* Invalidate the subdisks belonging to a plex */
-void
-invalidate_subdisks(struct plex *plex, enum sdstate state)
-{
- int sdno;
-
- for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
- struct sd *sd = &SD[plex->sdnos[sdno]];
-
- switch (sd->state) {
- case sd_unallocated:
- case sd_uninit:
- case sd_init:
- case sd_initializing:
- case sd_initialized:
- case sd_empty:
- case sd_obsolete:
- case sd_stale:
- case sd_crashed:
- case sd_down:
- case sd_referenced:
- break;
-
- case sd_reviving:
- case sd_reborn:
- case sd_up:
- set_sd_state(plex->sdnos[sdno], state, setstate_force);
- }
- }
-}
-
-/*
- * Start an object, in other words do what we can to get it up.
- * This is called from vinumioctl (VINUMSTART).
- * Return error indications via ioctl_reply
- */
-void
-start_object(struct vinum_ioctl_msg *data)
-{
- int status;
- int objindex = data->index; /* data gets overwritten */
- struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
- enum setstateflags flags;
-
- if (data->force != 0) /* are we going to use force? */
- flags = setstate_force; /* yes */
- else
- flags = setstate_none; /* no */
-
- switch (data->type) {
- case drive_object:
- status = set_drive_state(objindex, drive_up, flags);
- if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */
- ioctl_reply->error = EBUSY;
- else
- ioctl_reply->error = 0;
- break;
-
- case sd_object:
- if (DRIVE[SD[objindex].driveno].state != drive_up) {
- ioctl_reply->error = EIO;
- strcpy(ioctl_reply->msg, "Drive is down");
- return;
- }
- if (data->blocksize)
- SD[objindex].revive_blocksize = data->blocksize;
- if ((SD[objindex].state == sd_reviving) /* reviving, */
- ||(SD[objindex].state == sd_stale)) { /* or stale, will revive */
- SD[objindex].state = sd_reviving; /* make sure we're reviving */
- ioctl_reply->error = revive_block(objindex); /* revive another block */
- ioctl_reply->msg[0] = '\0'; /* no comment */
- return;
- } else if (SD[objindex].state == sd_initializing) { /* initializing, */
- if (data->blocksize)
- SD[objindex].init_blocksize = data->blocksize;
- ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */
- ioctl_reply->msg[0] = '\0'; /* no comment */
- return;
- }
- status = set_sd_state(objindex, sd_up, flags); /* set state */
- if (status != EAGAIN) { /* not first revive or initialize, */
- if (SD[objindex].state != sd_up) /* set status on whether we really did it */
- ioctl_reply->error = EBUSY;
- else
- ioctl_reply->error = 0;
- } else
- ioctl_reply->error = status;
- break;
-
- case plex_object:
- status = set_plex_state(objindex, plex_up, flags);
- if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */
- ioctl_reply->error = EBUSY;
- else
- ioctl_reply->error = 0;
- break;
-
- case volume_object:
- status = set_volume_state(objindex, volume_up, flags);
- if (VOL[objindex].state != volume_up) /* set status on whether we really did it */
- ioctl_reply->error = EBUSY;
- else
- ioctl_reply->error = 0;
- break;
-
- default:
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "Invalid object type");
- return;
- }
- /*
- * There's no point in saying anything here:
- * the userland program does it better
- */
- ioctl_reply->msg[0] = '\0';
-}
-
-/*
- * Stop an object, in other words do what we can to get it down
- * This is called from vinumioctl (VINUMSTOP).
- * Return error indications via ioctl_reply.
- */
-void
-stop_object(struct vinum_ioctl_msg *data)
-{
- int status = 1;
- int objindex = data->index; /* save the number from change */
- struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */
-
- switch (data->type) {
- case drive_object:
- status = set_drive_state(objindex, drive_down, data->force);
- break;
-
- case sd_object:
- status = set_sd_state(objindex, sd_down, data->force);
- break;
-
- case plex_object:
- status = set_plex_state(objindex, plex_down, data->force);
- break;
-
- case volume_object:
- status = set_volume_state(objindex, volume_down, data->force);
- break;
-
- default:
- ioctl_reply->error = EINVAL;
- strcpy(ioctl_reply->msg, "Invalid object type");
- return;
- }
- ioctl_reply->msg[0] = '\0';
- if (status == 0) /* couldn't do it */
- ioctl_reply->error = EBUSY;
- else
- ioctl_reply->error = 0;
-}
-
-/*
- * VINUM_SETSTATE ioctl: set an object state.
- * msg is the message passed by the user.
- */
-void
-setstate(struct vinum_ioctl_msg *msg)
-{
- int sdno;
- struct sd *sd;
- struct plex *plex;
- struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
-
- switch (msg->state) {
- case object_down:
- stop_object(msg);
- break;
-
- case object_initializing:
- switch (msg->type) {
- case sd_object:
- sd = &SD[msg->index];
- if ((msg->index >= vinum_conf.subdisks_allocated)
- || (sd->state <= sd_referenced)) {
- sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
- ioctl_reply->error = EFAULT;
- return;
- }
- set_sd_state(msg->index, sd_initializing, msg->force);
- if (sd->state != sd_initializing) {
- strcpy(ioctl_reply->msg, "Can't set state");
- ioctl_reply->error = EBUSY;
- } else
- ioctl_reply->error = 0;
- break;
-
- case plex_object:
- plex = &PLEX[msg->index];
- if ((msg->index >= vinum_conf.plexes_allocated)
- || (plex->state <= plex_unallocated)) {
- sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index);
- ioctl_reply->error = EFAULT;
- return;
- }
- set_plex_state(msg->index, plex_initializing, msg->force);
- if (plex->state != plex_initializing) {
- strcpy(ioctl_reply->msg, "Can't set state");
- ioctl_reply->error = EBUSY;
- } else {
- ioctl_reply->error = 0;
- for (sdno = 0; sdno < plex->subdisks; sdno++) {
- sd = &SD[plex->sdnos[sdno]];
- set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force);
- if (sd->state != sd_initializing) {
- strcpy(ioctl_reply->msg, "Can't set state");
- ioctl_reply->error = EBUSY;
- break;
- }
- }
- }
- break;
-
- default:
- strcpy(ioctl_reply->msg, "Invalid object");
- ioctl_reply->error = EINVAL;
- }
- break;
-
- case object_initialized:
- if (msg->type == sd_object) {
- sd = &SD[msg->index];
- if ((msg->index >= vinum_conf.subdisks_allocated)
- || (sd->state <= sd_referenced)) {
- sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index);
- ioctl_reply->error = EFAULT;
- return;
- }
- set_sd_state(msg->index, sd_initialized, msg->force);
- if (sd->state != sd_initializing) {
- strcpy(ioctl_reply->msg, "Can't set state");
- ioctl_reply->error = EBUSY;
- } else
- ioctl_reply->error = 0;
- } else {
- strcpy(ioctl_reply->msg, "Invalid object");
- ioctl_reply->error = EINVAL;
- }
- break;
-
- case object_up:
- start_object(msg);
- }
-}
-
-/*
- * Brute force set state function. Don't look at
- * any dependencies, just do it. This is mainly
- * intended for testing and recovery.
- */
-void
-setstate_by_force(struct vinum_ioctl_msg *msg)
-{
- struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */
-
- switch (msg->type) {
- case drive_object:
- DRIVE[msg->index].state = msg->state;
- break;
-
- case sd_object:
- SD[msg->index].state = msg->state;
- break;
-
- case plex_object:
- PLEX[msg->index].state = msg->state;
- break;
-
- case volume_object:
- VOL[msg->index].state = msg->state;
- break;
-
- default:
- break;
- }
- ioctl_reply->error = 0;
-}
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumstate.h b/sys/dev/vinum/vinumstate.h
deleted file mode 100644
index 572f317..0000000
--- a/sys/dev/vinum/vinumstate.h
+++ /dev/null
@@ -1,257 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $FreeBSD$
- */
-
-/*
- * This file gets read by makestatetext to create text files
- * with the names of the states, so don't change the file
- * format
- */
-
-enum volumestate {
- volume_unallocated,
- /* present but unused. Must be 0 */
-
- volume_uninit,
- /* mentioned elsewhere but not known to the configuration */
-
- volume_down,
-
- /* The volume is up and functional, but not all plexes may be available */
- volume_up,
- volume_laststate = volume_up /* last value, for table dimensions */
-};
-
-enum plexstate {
- /* An empty entry, not a plex at all. */
- plex_unallocated,
-
- /* The plex has been referenced by a volume */
- plex_referenced,
- /*
- * The plex has been allocated, but there configuration
- * is not complete
- */
- plex_init,
-
- /*
- * A plex which has gone completely down because of
- * I/O errors.
- */
- plex_faulty,
-
- /*
- * A plex which has been taken down by the
- * administrator.
- */
- plex_down,
-
- /* A plex which is being initialized */
- plex_initializing,
-
- /*
- * *** The remaining states represent plexes which are
- * at least partially up. Keep these separate so that
- * they can be checked more easily.
- */
-
- /*
- * A plex entry which is at least partially up. Not
- * all subdisks are available, and an inconsistency
- * has occurred. If no other plex is uncorrupted,
- * the volume is no longer consistent.
- */
- plex_corrupt,
-
- plex_firstup = plex_corrupt, /* first "up" state */
-
- /*
- * A RAID-5 plex entry which is accessible, but one
- * subdisk is down, requiring recovery for many
- * I/O requests.
- */
- plex_degraded,
-
- /*
- * A plex which is really up, but which has a reborn
- * subdisk which we don't completely trust, and
- * which we don't want to read if we can avoid it
- */
- plex_flaky,
-
- /*
- * A plex entry which is completely up. All subdisks
- * are up.
- */
- plex_up,
-
- plex_laststate = plex_up /* last value, for table dimensions */
-};
-
-/* subdisk states */
-enum sdstate {
- /* An empty entry, not a subdisk at all. */
- sd_unallocated,
-
- /*
- * A subdisk entry which has not been created
- * completely. Some fields may be empty.
- */
- sd_uninit,
-
- /* The subdisk has been referenced by a plex */
- sd_referenced,
-
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, but the disk hasn't
- * been updated.
- */
- sd_init,
-
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, and the disk has been
- * updated, but there is no data on the disk.
- */
- sd_empty,
-
- /*
- * A subdisk entry which has been created completely and
- * which is currently being initialized
- */
- sd_initializing,
-
- /*
- * A subdisk entry which has been initialized,
- * but which can't come up because it would
- * cause inconsistencies.
- */
- sd_initialized,
-
- /* *** The following states represent invalid data */
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, the config on disk has been
- * updated, and the data was valid, but since then the
- * drive has been taken down, and as a result updates
- * have been missed.
- */
- sd_obsolete,
-
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, the disk has been updated,
- * and the data was valid, but since then the drive
- * has been crashed and updates have been lost.
- */
- sd_stale,
-
- /* *** The following states represent valid, inaccessible data */
-
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, the disk has been updated,
- * and the data was valid, but since then the drive
- * has gone down. No attempt has been made to write
- * to the subdisk since the crash, so the data is valid.
- */
- sd_crashed,
-
- /*
- * A subdisk entry which was up, which contained
- * valid data, and which was taken down by the
- * administrator. The data is valid.
- */
- sd_down,
-
- /*
- * *** This is invalid data (the subdisk previously had
- * a numerically lower state), but it is currently in the
- * process of being revived. We can write but not read.
- */
- sd_reviving,
-
- /*
- * *** The following states represent accessible subdisks
- * with valid data
- */
-
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, the disk has been updated,
- * and the data was valid, but since then the drive
- * has gone down and up again. No updates were lost,
- * but it is possible that the subdisk has been
- * damaged. We won't read from this subdisk if we
- * have a choice. If this is the only subdisk which
- * covers this address space in the plex, we set its
- * state to sd_up under these circumstances, so this
- * status implies that there is another subdisk to
- * fulfil the request.
- */
- sd_reborn,
-
- /*
- * A subdisk entry which has been created completely.
- * All fields are correct, the disk has been updated,
- * and the data is valid.
- */
- sd_up,
-
- sd_laststate = sd_up /* last value, for table dimensions */
-};
-
-enum drivestate {
- drive_unallocated,
- /* present but unused. Must be 0 */
-
- drive_referenced,
- /* just mentioned in some other config entry */
-
- drive_down,
- /* not accessible */
-
- drive_up,
- /* up and running */
-
- drive_laststate = drive_up /* last value, for table dimensions */
-};
-
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
diff --git a/sys/dev/vinum/vinumutil.c b/sys/dev/vinum/vinumutil.c
deleted file mode 100644
index f63bbd7..0000000
--- a/sys/dev/vinum/vinumutil.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumutil.c,v 1.17 2003/04/28 02:54:43 grog Exp $
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/* This file contains utility routines used both in kernel and user context */
-
-#include <dev/vinum/vinumhdr.h>
-#include <dev/vinum/statetexts.h>
-#ifndef _KERNEL
-#include <stdio.h>
-#include <string.h>
-extern jmp_buf command_fail; /* return on a failed command */
-#endif
-
-static char numeric_state[32]; /* temporary buffer for ASCII conversions */
-#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
-/* Return drive state as a string */
-char *
-drive_state(enum drivestate state)
-{
- if (((unsigned) state) >= STATECOUNT(drive)) {
- sprintf(numeric_state, "Invalid state %d", (int) state);
- return numeric_state;
- } else
- return drivestatetext[state];
-}
-
-/* Return volume state as a string */
-char *
-volume_state(enum volumestate state)
-{
- if (((unsigned) state) >= STATECOUNT(vol)) {
- sprintf(numeric_state, "Invalid state %d", (int) state);
- return numeric_state;
- } else
- return volstatetext[state];
-}
-
-/* Return plex state as a string */
-char *
-plex_state(enum plexstate state)
-{
- if (((unsigned) state) >= STATECOUNT(plex)) {
- sprintf(numeric_state, "Invalid state %d", (int) state);
- return numeric_state;
- } else
- return plexstatetext[state];
-}
-
-/* Return plex organization as a string */
-char *
-plex_org(enum plexorg org)
-{
- switch (org) {
- case plex_disorg: /* disorganized */
- return "disorg";
- break;
-
- case plex_concat: /* concatenated plex */
- return "concat";
- break;
-
- case plex_striped: /* striped plex */
- return "striped";
- break;
-
- case plex_raid4: /* RAID-4 plex */
- return "raid4";
-
- case plex_raid5: /* RAID-5 plex */
- return "raid5";
- break;
-
- default:
- sprintf(numeric_state, "Invalid org %d", (int) org);
- return numeric_state;
- }
-}
-
-/* Return sd state as a string */
-char *
-sd_state(enum sdstate state)
-{
- if (((unsigned) state) >= STATECOUNT(sd)) {
- sprintf(numeric_state, "Invalid state %d", (int) state);
- return numeric_state;
- } else
- return sdstatetext[state];
-}
-
-/* Now convert in the other direction */
-/*
- * These are currently used only internally,
- * so we don't do too much error checking
- */
-enum drivestate
-DriveState(char *text)
-{
- int i;
- for (i = 0; i < STATECOUNT(drive); i++)
- if (strcmp(text, drivestatetext[i]) == 0) /* found it */
- return (enum drivestate) i;
- return -1;
-}
-
-enum sdstate
-SdState(char *text)
-{
- int i;
- for (i = 0; i < STATECOUNT(sd); i++)
- if (strcmp(text, sdstatetext[i]) == 0) /* found it */
- return (enum sdstate) i;
- return -1;
-}
-
-enum plexstate
-PlexState(char *text)
-{
- int i;
- for (i = 0; i < STATECOUNT(plex); i++)
- if (strcmp(text, plexstatetext[i]) == 0) /* found it */
- return (enum plexstate) i;
- return -1;
-}
-
-enum volumestate
-VolState(char *text)
-{
- int i;
- for (i = 0; i < STATECOUNT(vol); i++)
- if (strcmp(text, volstatetext[i]) == 0) /* found it */
- return (enum volumestate) i;
- return -1;
-}
-
-/*
- * Take a number with an optional scale factor and convert
- * it to a number of bytes.
- *
- * The scale factors are:
- *
- * s sectors (of 512 bytes)
- * b blocks (of 512 bytes). This unit is deprecated,
- * because it's confusing, but maintained to avoid
- * confusing Veritas users.
- * k kilobytes (1024 bytes)
- * m megabytes (of 1024 * 1024 bytes)
- * g gigabytes (of 1024 * 1024 * 1024 bytes)
- */
-u_int64_t
-sizespec(char *spec)
-{
- u_int64_t size;
- char *s;
- int sign = 1; /* -1 if negative */
-
- size = 0;
- if (spec != NULL) { /* we have a parameter */
- s = spec;
- if (*s == '-') { /* negative, */
- sign = -1;
- s++; /* skip */
- }
- if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
- while ((*s >= '0') && (*s <= '9')) /* it's numeric */
- size = size * 10 + *s++ - '0'; /* convert it */
- switch (*s) {
- case '\0':
- return size * sign;
-
- case 'B':
- case 'b':
- case 'S':
- case 's':
- return size * sign * 512;
-
- case 'K':
- case 'k':
- return size * sign * 1024;
-
- case 'M':
- case 'm':
- return size * sign * 1024 * 1024;
-
- case 'G':
- case 'g':
- return size * sign * 1024 * 1024 * 1024;
- }
- }
-#ifdef _KERNEL
- throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
-#else
- fprintf(stderr, "Invalid length specification: %s", spec);
- longjmp(command_fail, 1);
-#endif
- }
-#ifdef _KERNEL
- throw_rude_remark(EINVAL, "Missing length specification");
-#else
- fprintf(stderr, "Missing length specification");
- longjmp(command_fail, 1);
-#endif
- /* NOTREACHED */
- return -1;
-}
-
-#ifdef _KERNEL
-#define FOOTYPE struct cdev *
-#else
-#define FOOTYPE dev_t
-#endif
-/*
- * Extract the volume number from a device number. Check that it's
- * the correct type, and that it isn't one of the superdevs.
- */
-int
-Volno(FOOTYPE dev)
-{
- int volno = minor(dev);
-
- if (OBJTYPE(dev) != VINUM_VOLUME_TYPE)
- return -1;
- else
- volno = ((volno & 0x3fff0000) >> 8) | (volno & 0xff);
- if ((volno == VINUM_SUPERDEV_VOL)
- || (volno == VINUM_DAEMON_VOL))
- return -1;
- else
- return volno;
-}
-
-/*
- * Extract a plex number from a device number.
- * Don't check the major number, but check the
- * type. Return -1 for invalid types.
- */
-int
-Plexno(FOOTYPE dev)
-{
- int plexno = minor(dev);
-
- if (OBJTYPE(dev) != VINUM_PLEX_TYPE)
- return -1;
- else
- return ((plexno & 0x3fff0000) >> 8) | (plexno & 0xff);
-}
-
-/*
- * Extract a subdisk number from a device number.
- * Don't check the major number, but check the
- * type. Return -1 for invalid types.
- */
-int
-Sdno(FOOTYPE dev)
-{
- int sdno = minor(dev);
-
- /*
- * Care: VINUM_SD_TYPE is 2 or 3, which is why we use < instead of
- * !=. It's not clear that this makes any sense abstracting it to
- * this level.
- */
- if (OBJTYPE(dev) < VINUM_SD_TYPE)
- return -1;
- else
-/*
- * Note that the number we return includes the low-order bit of the
- * type field. This gives us twice as many potential subdisks as
- * plexes or volumes.
- */
- return ((sdno & 0x7fff0000) >> 8) | (sdno & 0xff);
-}
diff --git a/sys/dev/vinum/vinumutil.h b/sys/dev/vinum/vinumutil.h
deleted file mode 100644
index 2efa42c..0000000
--- a/sys/dev/vinum/vinumutil.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumutil.h,v 1.1 2001/05/22 04:07:22 grog Exp grog $
- * $FreeBSD$
- */
-
-/*
- * Functions defined in vinumutil.c, which is used both in userland
- * and in the kernel.
- */
-char *drive_state(enum drivestate);
-char *volume_state(enum volumestate);
-char *plex_state(enum plexstate);
-char *plex_org(enum plexorg);
-char *sd_state(enum sdstate);
-enum drivestate DriveState(char *text);
-enum sdstate SdState(char *text);
-enum plexstate PlexState(char *text);
-enum volumestate VolState(char *text);
diff --git a/sys/dev/vinum/vinumvar.h b/sys/dev/vinum/vinumvar.h
deleted file mode 100644
index 8e7edd8..0000000
--- a/sys/dev/vinum/vinumvar.h
+++ /dev/null
@@ -1,395 +0,0 @@
-/*-
- * Copyright (c) 1997, 1998, 1999
- * Nan Yang Computer Services Limited. All rights reserved.
- *
- * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
- *
- * Written by Greg Lehey
- *
- * This software is distributed under the so-called ``Berkeley
- * License'':
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Nan Yang Computer
- * Services Limited.
- * 4. Neither the name of the Company nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * This software is provided ``as is'', and any express or implied
- * warranties, including, but not limited to, the implied warranties of
- * merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall the company or contributors be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential
- * damages (including, but not limited to, procurement of substitute
- * goods or services; loss of use, data, or profits; or business
- * interruption) however caused and on any theory of liability, whether
- * in contract, strict liability, or tort (including negligence or
- * otherwise) arising in any way out of the use of this software, even if
- * advised of the possibility of such damage.
- *
- * $Id: vinumvar.h,v 1.33 2003/05/23 01:09:23 grog Exp grog $
- * $FreeBSD$
- */
-
-#include <sys/time.h>
-#include <dev/vinum/vinumstate.h>
-#include <sys/mutex.h>
-
-/* Directory for device nodes. */
-#define VINUM_DIR "/dev/vinum"
-
-/*
- * Some configuration maxima. They're an enum because
- * we can't define global constants. Sorry about that.
- *
- * These aren't as bad as they look: most of them are soft limits.
- */
-
-#define VINUMROOT
-enum constants {
- /*
- * Current version of the data structures. This
- * is used to ensure synchronization between
- * kernel module and userland vinum(8).
- */
- VINUMVERSION = 1,
- VINUM_HEADER = 512, /* size of header on disk */
- MAXCONFIGLINE = 1024, /* maximum size of a single config line */
- MINVINUMSLICE = 1048576, /* minimum size of a slice */
-
- ROUND_ROBIN_READPOL = -1, /* round robin read policy */
-
- /*
- * Type field in high-order two bits of minor
- * number. Subdisks are in fact both type 2 and
- * type 3, giving twice the number of subdisks.
- * This causes some ugliness in the code.
- */
- VINUM_VOLUME_TYPE = 0,
- VINUM_PLEX_TYPE = 1,
- VINUM_SD_TYPE = 2,
- VINUM_SD2_TYPE = 3,
-
-
- /*
- * Define a minor device number.
- * This is not used directly; instead, it's
- * called by the other macros.
- */
-#define VINUMMINOR(o,t) ((o & 0xff) | ((o & 0x3fff00) << 8) | (t << VINUM_TYPE_SHIFT))
-
- VINUM_TYPE_SHIFT = 30,
- VINUM_MAXVOL = 0x3ffffd, /* highest numbered volume */
-
- /*
- * The super device and the daemon device are
- * magic: they're the two highest-numbered
- * volumes.
- */
- VINUM_SUPERDEV_VOL = 0x3ffffe,
- VINUM_DAEMON_VOL = 0x3fffff,
- VINUM_MAXPLEX = 0x3fffff,
- VINUM_MAXSD = 0x7fffff,
-
-#define VINUM_SUPERDEV_MINOR VINUMMINOR (VINUM_SUPERDEV_VOL, VINUM_VOLUME_TYPE)
-#define VINUM_DAEMON_MINOR VINUMMINOR (VINUM_DAEMON_VOL, VINUM_VOLUME_TYPE)
-
- /*
- * Mask for the number part of each object.
- * Plexes and volumes are the same, subdisks use
- * the low-order bit of the type field and thus
- * have twice the number.
- */
-
- MAJORDEV_SHIFT = 8,
-
- MAXPLEX = 8, /* maximum number of plexes in a volume */
- MAXSD = 256, /* maximum number of subdisks in a plex */
- MAXDRIVENAME = 32, /* maximum length of a device name */
- MAXSDNAME = 64, /* maximum length of a subdisk name */
- MAXPLEXNAME = 64, /* maximum length of a plex name */
- MAXVOLNAME = 64, /* maximum length of a volume name */
- MAXNAME = 64, /* maximum length of any name */
-
-
-#define OBJTYPE(x) ((minor(x) >> VINUM_TYPE_SHIFT) & 3)
-
- /* extract device type */
-#define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 3)
-
-#define VINUM_SUPERDEV_NAME VINUM_DIR"/control" /* normal super device */
-#define VINUM_DAEMON_DEV_NAME VINUM_DIR"/controld" /* super device for daemon only */
-
- /*
- * the number of object entries to cater for initially, and also the
- * value by which they are incremented. It doesn't take long
- * to extend them, so theoretically we could start with 1 of each, but
- * it's untidy to allocate such small areas. These values are
- * probably too small.
- */
-
- INITIAL_DRIVES = 4,
- INITIAL_VOLUMES = 4,
- INITIAL_PLEXES = 8,
- INITIAL_SUBDISKS = 16,
- INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
- INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
- INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
- PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
- PLEX_LOCKS = 256, /* number of locks to allocate to a plex */
- PLEXMUTEXES = 32,
- MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */
- DEFAULT_REVIVE_BLOCKSIZE = 65536, /* default revive block size */
- VINUMHOSTNAMELEN = 32, /* host name field in label */
-};
-
-/*
- * Slice header
- *
- * Vinum drives start with this structure:
- *
- *\ Sector
- * |--------------------------------------|
- * | PDP-11 memorial boot block | 0
- * |--------------------------------------|
- * | Disk label, maybe | 1
- * |--------------------------------------|
- * | Slice definition (vinum_hdr) | 8
- * |--------------------------------------|
- * | |
- * | Configuration info, first copy | 9
- * | |
- * |--------------------------------------|
- * | |
- * | Configuration info, second copy | 9 + size of config
- * | |
- * |--------------------------------------|
- */
-
-/* Sizes and offsets of our information */
-enum {
- VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
- VINUMHEADERLEN = 512, /* size of vinum label */
- VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
- MAXCONFIG = 65536, /* and size of config copy */
- DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
-};
-
-/*
- * hostname is 256 bytes long, but we don't need to shlep
- * multiple copies in vinum. We use the host name just
- * to identify this system, and 32 bytes should be ample
- * for that purpose
- */
-
-struct vinum_label {
- char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
- char name[MAXDRIVENAME]; /* our name of the drive */
- struct timeval date_of_birth; /* the time it was created */
- struct timeval last_update; /* and the time of last update */
- /*
- * total size in bytes of the drive. This value
- * includes the headers.
- */
- off_t drive_size;
-};
-
-struct vinum_hdr {
- uint64_t magic; /* we're long on magic numbers */
-#define VINUM_MAGIC 22322600044678729LL /* should be this */
-#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
- /*
- * Size in bytes of each copy of the
- * configuration info. This must be a multiple
- * of the sector size.
- */
- int config_length;
- struct vinum_label label; /* unique label */
-};
-
-/* Information returned from read_drive_label */
-enum drive_label_info {
- DL_CANT_OPEN, /* invalid partition */
- DL_NOT_OURS, /* valid partition, but no vinum label */
- DL_DELETED_LABEL, /* valid partition, deleted label found */
- DL_WRONG_DRIVE, /* drive name doesn't match */
- DL_OURS /* valid partition and label found */
-};
-
-/* kinds of plex organization */
-enum plexorg {
- plex_disorg, /* disorganized */
- plex_concat, /* concatenated plex */
- plex_striped, /* striped plex */
- plex_raid4, /* RAID4 plex */
- plex_raid5 /* RAID5 plex */
-};
-
-/* Recognize plex organizations */
-#define isstriped(p) (p->organization >= plex_striped) /* RAID 1, 4 or 5 */
-#define isparity(p) (p->organization >= plex_raid4) /* RAID 4 or 5 */
-
-/* Address range definitions, for locking volumes */
-struct rangelock {
- daddr_t stripe; /* address + 1 of the range being locked */
- struct buf *bp; /* user's buffer pointer */
-};
-
-struct drive_freelist { /* sorted list of free space on drive */
- u_int64_t offset; /* offset of entry */
- u_int64_t sectors; /* and length in sectors */
-};
-
-/*
- * Include the structure definitions shared
- * between userland and kernel.
- */
-
-#ifdef _KERNEL
-#include <dev/vinum/vinumobj.h>
-#undef _KERNEL
-#include <dev/vinum/vinumobj.h>
-#define _KERNEL
-#else
-#include <dev/vinum/vinumobj.h>
-#endif
-
-/*
- * Table expansion. Expand table, which contains oldcount
- * entries of type element, by increment entries, and change
- * oldcount accordingly
- */
-#ifdef VINUMDEBUG
-#define EXPAND(table, element, oldcount, increment) \
-{ \
- expand_table ((void **) &table, \
- oldcount * sizeof (element), \
- (oldcount + increment) * sizeof (element), \
- __FILE__, \
- __LINE__ ); \
- oldcount += increment; \
- }
-#else
-#define EXPAND(table, element, oldcount, increment) \
-{ \
- expand_table ((void **) &table, \
- oldcount * sizeof (element), \
- (oldcount + increment) * sizeof (element)); \
- oldcount += increment; \
- }
-#endif
-
-/* Information on vinum's memory usage */
-struct meminfo {
- int mallocs; /* number of malloced blocks */
- int total_malloced; /* total amount malloced */
- int highwater; /* maximum number of mallocs */
- struct mc *malloced; /* pointer to kernel table */
-};
-
-#define MCFILENAMELEN 16
-struct mc {
- struct timeval time;
- int seq;
- int size;
- short line;
- caddr_t address;
- char file[MCFILENAMELEN];
-};
-
-/*
- * These enums are used by the state transition
- * routines. They're in bit map format:
- *
- * Bit 0: Other plexes in the volume are down
- * Bit 1: Other plexes in the volume are up
- * Bit 2: The current plex is up
- * Maybe they should be local to
- * state.c
- */
-enum volplexstate {
- volplex_onlyusdown = 0, /* 0: we're the only plex, and we're down */
- volplex_alldown, /* 1: another plex is down, and so are we */
- volplex_otherup, /* 2: another plex is up */
- volplex_otherupdown, /* 3: other plexes are up and down */
- volplex_onlyus, /* 4: we're up and alone */
- volplex_onlyusup, /* 5: only we are up, others are down */
- volplex_allup, /* 6: all plexes are up */
- volplex_someup /* 7: some plexes are up, including us */
-};
-
-/* state map for plex */
-enum sdstates {
- sd_emptystate = 1,
- sd_downstate = 2, /* SD is down */
- sd_crashedstate = 4, /* SD is crashed */
- sd_obsoletestate = 8, /* SD is obsolete */
- sd_stalestate = 16, /* SD is stale */
- sd_rebornstate = 32, /* SD is reborn */
- sd_upstate = 64, /* SD is up */
- sd_initstate = 128, /* SD is initializing */
- sd_initializedstate = 256, /* SD is initialized */
- sd_otherstate = 512, /* SD is in some other state */
-};
-
-/*
- * This is really just a parameter to pass to
- * set_<foo>_state, but since it needs to be known
- * in the external definitions, we need to define
- * it here
- */
-enum setstateflags {
- setstate_none = 0, /* no flags */
- setstate_force = 1, /* force the state change */
- setstate_configuring = 2, /* we're currently configuring, don't save */
-};
-
-/* Operations for parityops to perform. */
-enum parityop {
- checkparity,
- rebuildparity,
- rebuildandcheckparity, /* rebuildparity with the -v option */
-};
-
-/*
- * When doing round-robin reads from a multi-plex volume, switch to the
- * next plex if the difference of the last read sector and the next sector
- * to be read is this many sectors.
- */
-#define ROUNDROBIN_SWITCH 128 /* 64k */
-
-#ifdef VINUMDEBUG
-/* Debugging stuff */
-enum debugflags {
- DEBUG_ADDRESSES = 1, /* show buffer information during requests */
- DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */
- DEBUG_RESID = 4, /* go into debugger in complete_rqe */
- DEBUG_LASTREQS = 8, /* keep a circular buffer of last requests */
- DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */
- DEBUG_EOFINFO = 32, /* print info about EOF detection */
- DEBUG_MEMFREE = 64, /* keep info about Frees */
- DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times the size */
- DEBUG_REMOTEGDB = 256, /* go into remote gdb */
- DEBUG_WARNINGS = 512, /* log various relatively harmless warnings */
- DEBUG_LOCKREQS = 1024, /* log locking requests */
-};
-
-#ifdef _KERNEL
-#ifdef __i386__
-#define longjmp LongJmp /* test our longjmps */
-#endif
-#endif
-#endif
-/* Local Variables: */
-/* fill-column: 50 */
-/* End: */
OpenPOWER on IntegriCloud