diff options
author | phk <phk@FreeBSD.org> | 2004-11-04 09:57:21 +0000 |
---|---|---|
committer | phk <phk@FreeBSD.org> | 2004-11-04 09:57:21 +0000 |
commit | 27e302a86b65c2d82a3601b7d1f56add77a4316b (patch) | |
tree | 624c21e78226c47c48f9ebeceb36ef79e0336468 | |
parent | e5715b2cc150e2463e9765903b2ba151e4540913 (diff) | |
download | FreeBSD-src-27e302a86b65c2d82a3601b7d1f56add77a4316b.zip FreeBSD-src-27e302a86b65c2d82a3601b7d1f56add77a4316b.tar.gz |
Remove unused vinum files.
26 files changed, 0 insertions, 12139 deletions
diff --git a/sys/dev/vinum/COPYRIGHT b/sys/dev/vinum/COPYRIGHT deleted file mode 100644 index f0295e6..0000000 --- a/sys/dev/vinum/COPYRIGHT +++ /dev/null @@ -1,37 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $FreeBSD$ - */ diff --git a/sys/dev/vinum/makestatetext b/sys/dev/vinum/makestatetext deleted file mode 100755 index c5a7da2..0000000 --- a/sys/dev/vinum/makestatetext +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh -# Make statetexts.h from vinumstate.h -# $FreeBSD$ -# $Id: makestatetext,v 1.7 1999/12/29 07:24:54 grog Exp grog $ -infile=vinumstate.h -ofile=statetexts.h -echo >$ofile "/* Created by $0 on" `date`. "Do not edit */" -echo >>$ofile -cat >> $ofile <<FOO -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called \`\`Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided \`\`as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - */ - -FOO - -echo >>$ofile "/* Drive state texts */" -echo >>$ofile "char *drivestatetext [] = - { " -egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile -cat <<FOO >> $ofile - }; - -/* Subdisk state texts */ -char *sdstatetext [] = - { -FOO -egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile -cat <<FOO >> $ofile - }; - -/* Plex state texts */ -char *plexstatetext [] = - { -FOO -egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile -cat <<FOO >> $ofile - }; - -/* Volume state texts */ -char *volstatetext [] = - { -FOO -egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile -cat <<FOO >> $ofile - }; -FOO diff --git a/sys/dev/vinum/request.h b/sys/dev/vinum/request.h deleted file mode 100644 index 600130f..0000000 --- a/sys/dev/vinum/request.h +++ /dev/null @@ -1,273 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: request.h,v 1.22 2003/04/24 04:37:08 grog Exp $ - * $FreeBSD$ - */ - -/* Information needed to set up a transfer */ - -enum xferinfo { - XFR_NORMAL_READ = 1, - XFR_NORMAL_WRITE = 2, /* write request in normal mode */ - XFR_RECOVERY_READ = 4, - XFR_DEGRADED_WRITE = 8, - XFR_PARITYLESS_WRITE = 0x10, - XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */ - XFR_DATA_BLOCK = 0x40, /* data block in request */ - XFR_PARITY_BLOCK = 0x80, /* parity block in request */ - XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */ - XFR_MALLOCED = 0x200, /* this buffer is malloced */ -#ifdef VINUMDEBUG - XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */ -#endif - XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */ - XFR_BUFLOCKED = 0x2000, /* BUF_LOCK performed on this buffer */ - XFR_COPYBUF = 0x4000, /* data buffer was copied */ - /* operations that need a parity block */ - XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE), - /* operations that use the group parameters */ - XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ), - /* operations that that use the data parameters */ - XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE), - /* operations requiring read before write */ - XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE), - /* operations that need a malloced buffer */ - XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE) -}; - -/* - * Describe one low-level request, part of a - * high-level request. This is an extended - * struct buf buffer, and the first element - * *must* be a struct buf. We pass this - * structure to the I/O routines instead of a - * struct buf in order to be able to locate the - * high-level request when it completes. - * - * All offsets and lengths are in sectors. - */ - -struct rqelement { - struct buf b; /* buf structure */ - struct rqgroup *rqg; /* pointer to our group */ - /* Information about the transfer */ - daddr_t sdoffset; /* offset in subdisk */ - int useroffset; /* offset in user buffer of normal data */ - /* - * dataoffset and datalen refer to "individual" data - * transfers which involve only this drive (normal read, - * parityless write) and also degraded write. - * - * groupoffset and grouplen refer to the other "group" - * operations (normal write, recovery read) which involve - * more than one drive. Both the offsets are relative to - * the start of the local buffer. - */ - int dataoffset; /* offset in buffer of the normal data */ - int groupoffset; /* offset in buffer of group data */ - short datalen; /* length of normal data (sectors) */ - short grouplen; /* length of group data (sectors) */ - short buflen; /* total buffer length to allocate */ - short flags; /* really enum xferinfo (see above) */ - /* Ways to find other components */ - short sdno; /* subdisk number */ - short driveno; /* drive number */ - struct timeval launchtime; /* time of launch, for info function */ -}; - -/* - * A group of requests built to satisfy an I/O - * transfer on a single plex. - */ -struct rqgroup { - struct rqgroup *next; /* pointer to next group */ - struct request *rq; /* pointer to the request */ - short count; /* number of requests in this group */ - short active; /* and number active */ - short plexno; /* index of plex */ - int badsdno; /* index of bad subdisk or -1 */ - enum xferinfo flags; /* description of transfer */ - struct rangelock *lock; /* lock for this transfer */ - daddr_t lockbase; /* and lock address */ - struct rqelement rqe[0]; /* and the elements of this request */ -}; - -/* - * Describe one high-level request and the - * work we have to do to satisfy it. - */ -struct request { - struct buf *bp; /* pointer to the high-level request */ - caddr_t save_data; /* for copied write buffers */ - enum xferinfo flags; - union { - int volno; /* volume index */ - int plexno; /* or plex index */ - } volplex; - int error; /* current error indication */ - int sdno; /* reviving subdisk (XFR_REVIVECONFLICT) */ - short isplex; /* set if this is a plex request */ - short active; /* number of subrequests still active */ - struct rqgroup *rqg; /* pointer to the first group of requests */ - struct rqgroup *lrqg; /* and to the last group of requests */ - struct request *next; /* link of waiting requests */ -}; - -/* - * Extended buffer header for subdisk I/O. Includes - * a pointer to the user I/O request. - */ -struct sdbuf { - struct buf b; /* our buffer */ - struct buf *bp; /* and pointer to parent */ - short driveno; /* drive index */ - short sdno; /* and subdisk index */ -}; - -/* - * Values returned by rqe and friends. Be careful - * with these: they are in order of increasing - * seriousness. Some routines check for - * > REQUEST_RECOVERED to indicate a failed request. XXX - */ -enum requeststatus { - REQUEST_OK, /* request built OK */ - REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */ - REQUEST_DEGRADED, /* parts of request failed */ - REQUEST_EOF, /* parts of request failed: outside plex */ - REQUEST_DOWN, /* all of request failed: subdisk(s) down */ - REQUEST_ENOMEM /* all of request failed: ran out of memory */ -}; - -#ifdef VINUMDEBUG -/* Trace entry for request info (DEBUG_LASTREQS) */ -enum rqinfo_type { - loginfo_unused, /* never been used */ - loginfo_user_bp, /* this is the bp when strategy is called */ - loginfo_user_bpl, /* and this is the bp at launch time */ - loginfo_rqe, /* user RQE */ - loginfo_iodone, /* iodone */ - loginfo_raid5_data, /* write RAID-5 data block */ - loginfo_raid5_parity, /* write RAID-5 parity block */ - loginfo_sdio, /* subdisk I/O */ - loginfo_sdiol, /* subdisk I/O launch */ - loginfo_sdiodone, /* subdisk iodone */ - loginfo_lockwait, /* wait for range lock */ - loginfo_lock, /* lock range */ - loginfo_unlock, /* unlock range */ -}; - -/* - * This is the rangelock structure with an added - * buffer pointer and plex number. We don't need - * the plex number for the locking protocol, but - * it does help a lot when logging. - */ -struct rangelockinfo { - daddr_t stripe; /* address + 1 of the range being locked */ - struct buf *bp; /* user's buffer pointer */ - int plexno; -}; - -union rqinfou { /* info to pass to logrq */ - struct buf *bp; - struct rqelement *rqe; /* address of request, for correlation */ - struct rangelockinfo *lockinfo; -}; - -struct rqinfo { - enum rqinfo_type type; /* kind of event */ - struct timeval timestamp; /* time it happened */ - struct buf *bp; /* point to user buffer */ - int devmajor; /* major and minor device info */ - int devminor; - union { - struct buf b; /* yup, the *whole* buffer header */ - struct rqelement rqe; /* and the whole rqe */ - struct rangelock lockinfo; - } info; -}; - -#define RQINFO_SIZE 128 /* number of info slots in buffer */ - -void logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp); -#endif - -/* Structures for the daemon */ - -/* types of request to the daemon */ -enum daemonrq { - daemonrq_none, /* dummy to catch bugs */ - daemonrq_ioerror, /* error occurred on I/O */ - daemonrq_saveconfig, /* save configuration */ - daemonrq_return, /* return to userland */ - daemonrq_ping, /* show sign of life */ - daemonrq_init, /* initialize a plex */ - daemonrq_revive, /* revive a subdisk */ - daemonrq_closedrive, /* close a drive */ -}; - -/* info field for daemon requests */ -union daemoninfo { /* and the request information */ - struct request *rq; /* for daemonrq_ioerror */ - struct sd *sd; /* for daemonrq_revive */ - struct plex *plex; /* for daemonrq_init */ - struct drive *drive; /* for daemonrq_closedrive */ - int nothing; /* for passing NULL */ -}; - -struct daemonq { - struct daemonq *next; /* pointer to next element in queue */ - enum daemonrq type; /* type of request */ - int privateinuse; /* private element, being used */ - union daemoninfo info; /* and the request information */ -}; - -void queue_daemon_request(enum daemonrq type, union daemoninfo info); - -extern int daemon_options; - -enum daemon_option { - daemon_verbose = 1, /* talk about what we're doing */ - daemon_stopped = 2, - daemon_noupdate = 4, /* don't update the disk config, for recovery */ -}; - -void freerq(struct request *rq); -void unlockrange(int plexno, struct rangelock *); -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/statetexts.h b/sys/dev/vinum/statetexts.h deleted file mode 100644 index 88cfc17..0000000 --- a/sys/dev/vinum/statetexts.h +++ /dev/null @@ -1,91 +0,0 @@ -/* Created by ./makestatetext on Wed Jan 5 10:05:30 CST 2000. Do not edit */ - -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $FreeBSD$ - */ - -/* Drive state texts */ -char *drivestatetext[] = -{ - "unallocated", - "referenced", - "down", - "up", -}; - -/* Subdisk state texts */ -char *sdstatetext[] = -{ - "unallocated", - "uninit", - "referenced", - "init", - "empty", - "initializing", - "initialized", - "obsolete", - "stale", - "crashed", - "down", - "reviving", - "reborn", - "up", -}; - -/* Plex state texts */ -char *plexstatetext[] = -{ - "unallocated", - "referenced", - "init", - "faulty", - "down", - "initializing", - "corrupt", - "degraded", - "flaky", - "up", -}; - -/* Volume state texts */ -char *volstatetext[] = -{ - "unallocated", - "uninit", - "down", - "up", -}; diff --git a/sys/dev/vinum/vinum.c b/sys/dev/vinum/vinum.c deleted file mode 100644 index 5fb990d..0000000 --- a/sys/dev/vinum/vinum.c +++ /dev/null @@ -1,542 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinum.c,v 1.44 2003/05/23 00:50:55 grog Exp grog $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#define STATIC static /* nothing while we're testing */ - -#include <dev/vinum/vinumhdr.h> -#include <sys/sysproto.h> /* for sync(2) */ -#ifdef VINUMDEBUG -#include <sys/reboot.h> -int debug = 0; /* debug flags */ -extern int total_malloced; -extern int malloccount; -extern struct mc malloced[]; -#endif -#include <dev/vinum/request.h> - -struct cdevsw vinum_cdevsw = { - .d_version = D_VERSION, - .d_open = vinumopen, - .d_close = vinumclose, - .d_read = physread, - .d_write = physwrite, - .d_ioctl = vinumioctl, - .d_strategy = vinumstrategy, - .d_name = "vinum", - .d_flags = D_DISK | D_NEEDGIANT -}; - -/* Called by main() during pseudo-device attachment. */ -void vinumattach(void *); -STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused); -STATIC void vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev); - -struct _vinum_conf vinum_conf; /* configuration information */ - -struct cdev *vinum_daemon_dev; -struct cdev *vinum_super_dev; - -static eventhandler_tag dev_clone_tag; - -/* - * Mutexes for plex synchronization. Ideally each plex - * should have its own mutex, but the fact that the plex - * struct can move makes that very complicated. Instead, - * have plexes use share these mutexes based on modulo plex - * number. - */ -struct mtx plexmutex[PLEXMUTEXES]; - -/* - * Called by main() during pseudo-device attachment. All we need - * to do is allocate enough space for devices to be configured later, and - * add devsw entries. - */ -void -vinumattach(void *dummy) -{ - char *envp; - int i; -#define MUTEXNAMELEN 16 - char mutexname[MUTEXNAMELEN]; -#if PLEXMUTEXES > 10000 -#error Increase size of MUTEXNAMELEN -#endif -/* modload should prevent multiple loads, so this is worth a panic */ - if ((vinum_conf.flags & VF_LOADED) != 0) - panic("vinum: already loaded"); - - log(LOG_INFO, "vinum: loaded\n"); -#ifdef VINUMDEBUG - vinum_conf.flags |= VF_LOADED | VF_HASDEBUG; /* we're loaded now, and we support debug */ -#else - vinum_conf.flags |= VF_LOADED; /* we're loaded now */ -#endif - - daemonq = NULL; /* initialize daemon's work queue */ - dqend = NULL; - - vinum_daemon_dev = make_dev(&vinum_cdevsw, - VINUM_DAEMON_MINOR, - UID_ROOT, - GID_WHEEL, - S_IRUSR | S_IWUSR, - "vinum/controld"); - vinum_super_dev = make_dev(&vinum_cdevsw, - VINUM_SUPERDEV_MINOR, - UID_ROOT, - GID_WHEEL, - S_IRUSR | S_IWUSR, - "vinum/control"); - - vinum_conf.version = VINUMVERSION; /* note what version we are */ - - /* allocate space: drives... */ - DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES); - CHECKALLOC(DRIVE, "vinum: no memory\n"); - bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES); - vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */ - vinum_conf.drives_used = 0; /* and number in use */ - - /* volumes, ... */ - VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES); - CHECKALLOC(VOL, "vinum: no memory\n"); - bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES); - vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */ - vinum_conf.volumes_used = 0; /* and number in use */ - - /* plexes, ... */ - PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES); - CHECKALLOC(PLEX, "vinum: no memory\n"); - bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES); - vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */ - vinum_conf.plexes_used = 0; /* and number in use */ - - for (i = 0; i < PLEXMUTEXES; i++) { - snprintf(mutexname, MUTEXNAMELEN, "vinumplex%d", i); - mtx_init(&plexmutex[i], mutexname, "plex", MTX_DEF); - } - - /* and subdisks */ - SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS); - CHECKALLOC(SD, "vinum: no memory\n"); - bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS); - vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */ - vinum_conf.subdisks_used = 0; /* and number in use */ - dev_clone_tag = EVENTHANDLER_REGISTER(dev_clone, vinum_clone, 0, 1000); - - /* - * See if the loader has passed us any of the autostart - * options. - */ - envp = NULL; - if ((envp = getenv("vinum.autostart")) != NULL) { /* start all drives now */ - vinum_scandisk(NULL); - freeenv(envp); - } else if ((envp = getenv("vinum.drives")) != NULL) { - vinum_scandisk(envp); - freeenv(envp); - } -} - -/* - * Check if we have anything open. If confopen is != 0, - * that goes for the super device as well, otherwise - * only for volumes. - * - * Return 0 if not inactive, 1 if inactive. - */ -int -vinum_inactive(int confopen) -{ - int i; - int can_do = 1; /* assume we can do it */ - - if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */ - return 0; /* can't do it while we're open */ - lock_config(); - for (i = 0; i < vinum_conf.volumes_allocated; i++) { - if ((VOL[i].state > volume_down) - && (VOL[i].flags & VF_OPEN)) { /* volume is open */ - can_do = 0; - break; - } - } - unlock_config(); - return can_do; -} - -/* - * Free all structures. - * If cleardrive is 0, save the configuration; otherwise - * remove the configuration from the drive. - * - * Before coming here, ensure that no volumes are open. - */ -void -free_vinum(int cleardrive) -{ - int i; - int drives_allocated = vinum_conf.drives_allocated; - - while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN)) - == (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */ - queue_daemon_request(daemonrq_return, (union daemoninfo) 0); /* stop the daemon */ - tsleep(&vinumclose, PUSER, "vstop", 1); /* and wait for it */ - } - if (DRIVE != NULL) { - if (cleardrive) { /* remove the vinum config */ - for (i = 0; i < drives_allocated; i++) - remove_drive(i); /* remove the drive */ - } else { /* keep the config */ - for (i = 0; i < drives_allocated; i++) - free_drive(&DRIVE[i]); /* close files and things */ - } - Free(DRIVE); - } - if (SD != NULL) { - for (i = 0; i < vinum_conf.subdisks_allocated; i++) { - struct sd *sd = &SD[i]; - - if (sd->state != sd_unallocated) - free_sd(i); - } - Free(SD); - } - if (PLEX != NULL) { - for (i = 0; i < vinum_conf.plexes_allocated; i++) { - struct plex *plex = &PLEX[i]; - - if (plex->state != plex_unallocated) /* we have real data there */ - free_plex(i); - } - Free(PLEX); - } - if (VOL != NULL) { - for (i = 0; i < vinum_conf.volumes_allocated; i++) { - struct volume *volume = &VOL[i]; - - if (volume->state != volume_unallocated) - free_volume(i); - } - Free(VOL); - } - bzero(&vinum_conf, sizeof(vinum_conf)); - vinum_conf.version = VINUMVERSION; /* reinstate version number */ -} - -STATIC int -vinum_modevent(module_t mod, modeventtype_t type, void *unused) -{ - struct sync_args dummyarg = - {0}; - int i; - - switch (type) { - case MOD_LOAD: - vinumattach(NULL); - return 0; /* OK */ - case MOD_UNLOAD: - if (!vinum_inactive(1)) /* is anything open? */ - return EBUSY; /* yes, we can't do it */ - vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */ - sync(curthread, &dummyarg); /* write out buffers */ - free_vinum(0); /* clean up */ -#ifdef VINUMDEBUG - if (total_malloced) { - int i; -#ifdef INVARIANTS - int *poke; -#endif - - for (i = 0; i < malloccount; i++) { - if (debug & DEBUG_WARNINGS) /* want to hear about them */ - log(LOG_WARNING, - "vinum: exiting with %d bytes malloced from %s:%d\n", - malloced[i].size, - malloced[i].file, - malloced[i].line); -#ifdef INVARIANTS - poke = &((int *) malloced[i].address) - [malloced[i].size / (2 * sizeof(int))]; /* middle of the area */ - if (*poke == 0xdeadc0de) /* already freed */ - log(LOG_ERR, - "vinum: exiting with malloc table inconsistency at %p from %s:%d\n", - malloced[i].address, - malloced[i].file, - malloced[i].line); -#endif - Free(malloced[i].address); - } - } -#endif - destroy_dev(vinum_daemon_dev); /* daemon device */ - destroy_dev(vinum_super_dev); - for (i = 0; i < PLEXMUTEXES; i++) - mtx_destroy(&plexmutex[i]); - log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */ - EVENTHANDLER_DEREGISTER(dev_clone, dev_clone_tag); - return 0; - default: - return EOPNOTSUPP; - break; - } - return 0; -} - -static moduledata_t vinum_mod = -{ - "vinum", - (modeventhand_t) vinum_modevent, - 0 -}; -DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE); - -/* ARGSUSED */ -/* Open a vinum object */ -int -vinumopen(struct cdev *dev, - int flags, - int fmt, - struct thread *td) -{ - int error; - unsigned int index; - struct volume *vol; - struct plex *plex; - struct sd *sd; - int devminor; /* minor number */ - - devminor = minor(dev); - error = 0; - /* First, decide what we're looking at */ - switch (DEVTYPE(dev)) { - case VINUM_VOLUME_TYPE: - /* - * The super device and daemon device are the last two - * volume numbers, so check for them first. - */ - if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */ - ||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */ - error = suser(td); /* are we root? */ - - if (error == 0) { /* yes, can do */ - if (devminor == VINUM_DAEMON_MINOR) /* daemon device */ - vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */ - else /* superdev */ - vinum_conf.flags |= VF_OPEN; /* we're open */ - } - return error; - } - /* Must be a real volume. Check. */ - index = Volno(dev); - if (index >= vinum_conf.volumes_allocated) - return ENXIO; /* no such device */ - vol = &VOL[index]; - - switch (vol->state) { - case volume_unallocated: - case volume_uninit: - return ENXIO; - - case volume_up: - vol->flags |= VF_OPEN; /* note we're open */ - return 0; - - case volume_down: - return EIO; - - default: - return EINVAL; - } - - case VINUM_PLEX_TYPE: - index = Plexno(dev); /* get plex index in vinum_conf */ - if (index >= vinum_conf.plexes_allocated) - return ENXIO; /* no such device */ - plex = &PLEX[index]; - - switch (plex->state) { - case plex_unallocated: - return ENXIO; - - case plex_referenced: - return EINVAL; - - default: - plex->flags |= VF_OPEN; /* note we're open */ - return 0; - } - - case VINUM_SD_TYPE: - case VINUM_SD2_TYPE: - index = Sdno(dev); /* get the subdisk number */ - if (index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */ - return ENXIO; /* no such device */ - sd = &SD[index]; - - /* - * Opening a subdisk is always a special operation, so - * we ignore the state as long as it represents a real - * subdisk. - */ - switch (sd->state) { - case sd_unallocated: - return ENXIO; - - case sd_uninit: - case sd_referenced: - return EINVAL; - - default: - sd->flags |= VF_OPEN; /* note we're open */ - return 0; - } - } - return 0; /* to keep the compiler happy */ -} - -/* ARGSUSED */ -int -vinumclose(struct cdev *dev, - int flags, - int fmt, - struct thread *td) -{ - unsigned int index; - struct volume *vol; - int devminor; - - devminor = minor(dev); - /* First, decide what we're looking at */ - switch (DEVTYPE(dev)) { - case VINUM_VOLUME_TYPE: - /* - * The super device and daemon device are the last two - * volume numbers, so check for them first. - */ - if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */ - ||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */ - /* - * don't worry about whether we're root: - * nobody else would get this far. - */ - if (devminor == VINUM_SUPERDEV_MINOR) /* normal superdev */ - vinum_conf.flags &= ~VF_OPEN; /* no longer open */ - else { /* the daemon device */ - vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */ - if (vinum_conf.flags & VF_STOPPING) /* we're trying to stop, */ - wakeup(&vinumclose); /* we can continue now */ - } - return 0; - } - /* Real volume */ - index = Volno(dev); - if (index >= vinum_conf.volumes_allocated) - return ENXIO; /* no such device */ - vol = &VOL[index]; - - switch (vol->state) { - case volume_unallocated: - case volume_uninit: - return ENXIO; - - case volume_up: - vol->flags &= ~VF_OPEN; /* reset our flags */ - return 0; - - case volume_down: - return EIO; - - default: - return EINVAL; - } - - case VINUM_PLEX_TYPE: - if (Volno(dev) >= vinum_conf.volumes_allocated) - return ENXIO; - index = Plexno (dev); - if (index >= vinum_conf.plexes_allocated) /* no such plex */ - return ENXIO; - PLEX [index].flags &= ~VF_OPEN; /* no longer open */ - return 0; - - case VINUM_SD_TYPE: - if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */ - (Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */ - return ENXIO; /* no such device */ - index = Sdno (dev); - if (index >= vinum_conf.subdisks_allocated) /* no such sd */ - return ENXIO; - SD [index].flags &= ~VF_OPEN; /* no longer open */ - return 0; - - - default: - return ENODEV; /* don't know what to do with these */ - } -} - -void -vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev) -{ - struct volume *vol; - int i; - - if (*dev != NULL) - return; - if (strncmp(name, "vinum/", sizeof("vinum/") - 1) != 0) - return; - - name += sizeof("vinum/") - 1; - if ((i = find_volume(name, 0)) == -1) - return; - - vol = &VOL[i]; - *dev = vol->dev; -} - - -/* Local Variables: */ -/* fill-column: 60 */ -/* End: */ diff --git a/sys/dev/vinum/vinumconfig.c b/sys/dev/vinum/vinumconfig.c deleted file mode 100644 index 989af88..0000000 --- a/sys/dev/vinum/vinumconfig.c +++ /dev/null @@ -1,2166 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumconfig.c,v 1.41 2003/05/23 00:57:34 grog Exp grog $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#define STATIC static - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -#define MAXTOKEN 64 /* maximum number of tokens in a line */ - -/* - * We can afford the luxury of global variables here, - * since start_config ensures that these functions - * are single-threaded. - */ - -/* These are indices in vinum_conf of the last-mentioned of each kind of object */ -static int current_drive; /* note the last drive we mention, for - * some defaults */ -static int current_plex; /* and the same for the last plex */ -static int current_volume; /* and the last volme */ -static struct _ioctl_reply *ioctl_reply; /* struct to return via ioctl */ - - -/* These values are used by most of these routines, so set them as globals */ -static char *token[MAXTOKEN]; /* pointers to individual tokens */ -static int tokens; /* number of tokens */ - -#define TOCONS 0x01 -#define TOTTY 0x02 -#define TOLOG 0x04 - -struct putchar_arg { - int flags; - struct tty *tty; -}; - -#define MSG_MAX 1024 /* maximum length of a formatted message */ -/* - * Format an error message and return to the user - * in the reply. CARE: This routine is designed - * to be called only from the configuration - * routines, so it assumes it's the owner of the - * configuration lock, and unlocks it on exit. - */ -void -throw_rude_remark(int error, char *msg,...) -{ - int retval; - va_list ap; - char *text; - static int finishing; /* don't recurse */ - int was_finishing; - - if ((vinum_conf.flags & VF_LOCKED) == 0) /* bug catcher */ - panic("throw_rude_remark: called without config lock"); - va_start(ap, msg); - if ((ioctl_reply != NULL) /* we're called from the user */ - &&(!(vinum_conf.flags & VF_READING_CONFIG))) { /* and not reading from disk: return msg */ - /* - * We can't just format to ioctl_reply, since it - * may contain our input parameters - */ - text = Malloc(MSG_MAX); - if (text == NULL) { - log(LOG_ERR, "vinum: can't allocate error message buffer\n"); - printf("vinum: "); - vprintf(msg, ap); /* print to the console */ - printf("\n"); - } else { - retval = kvprintf(msg, NULL, (void *) text, 10, ap); - text[retval] = '\0'; /* delimit */ - strlcpy(ioctl_reply->msg, text, sizeof(ioctl_reply->msg)); - ioctl_reply->error = error; /* first byte is the error number */ - Free(text); - } - } else { - printf("vinum: "); - vprintf(msg, ap); /* print to the console */ - printf("\n"); - } - va_end(ap); - - if (vinum_conf.flags & VF_READING_CONFIG) { /* go through to the bitter end, */ - if ((vinum_conf.flags & VF_READING_CONFIG) /* we're reading from disk, */ - &&((daemon_options & daemon_noupdate) == 0)) { - log(LOG_NOTICE, "Disabling configuration updates\n"); - daemon_options |= daemon_noupdate; - } - return; - } - /* - * We have a problem here: we want to unlock the - * configuration, which implies tidying up, but - * if we find an error while tidying up, we - * could recurse for ever. Use this kludge to - * only try once. - */ - was_finishing = finishing; - finishing = 1; - finish_config(was_finishing); /* unlock anything we may be holding */ - finishing = was_finishing; - longjmp(command_fail, error); -} - -/* - * Check a volume to see if the plex is already assigned to it. - * Return index in volume->plex, or -1 if not assigned - */ -int -my_plex(int volno, int plexno) -{ - int i; - struct volume *vol; - - vol = &VOL[volno]; /* point to volno */ - for (i = 0; i < vol->plexes; i++) - if (vol->plex[i] == plexno) - return i; - return -1; /* not found */ -} - -/* - * Check a plex to see if the subdisk is already assigned to it. - * Return index in plex->sd, or -1 if not assigned - */ -int -my_sd(int plexno, int sdno) -{ - int i; - struct plex *plex; - - plex = &PLEX[plexno]; - for (i = 0; i < plex->subdisks; i++) - if (plex->sdnos[i] == sdno) - return i; - return -1; /* not found */ -} - -/* Add plex to the volume if possible */ -int -give_plex_to_volume(int volno, int plexno, int preferme) -{ - struct volume *vol; - int i; - int volplexno; - - /* - * It's not an error for the plex to already - * belong to the volume, but we need to check a - * number of things to make sure it's done right. - * Some day. - */ - volplexno = my_plex(volno, plexno); - vol = &VOL[volno]; /* point to volume */ - if (volplexno < 0) { - if (vol->plexes == MAXPLEX) /* all plexes allocated */ - throw_rude_remark(ENOSPC, - "Too many plexes for volume %s", - vol->name); - else if ((vol->plexes > 0) /* we have other plexes */ - &&((vol->flags & VF_CONFIG_SETUPSTATE) == 0)) /* and we're not setting up state */ - invalidate_subdisks(&PLEX[plexno], sd_stale); /* make our subdisks invalid */ - vol->plex[vol->plexes] = plexno; /* this one */ - vol->plexes++; /* add another plex */ - PLEX[plexno].volno = volno; /* note the number of our volume */ - - /* Find out how big our volume is */ - for (i = 0; i < vol->plexes; i++) - vol->size = max(vol->size, PLEX[vol->plex[i]].length); - volplexno = vol->plexes - 1; /* number of plex in volume */ - } - if (preferme) { - if (vol->preferred_plex >= 0) /* already had a facourite, */ - printf("vinum: changing preferred plex for %s from %s to %s\n", - vol->name, - PLEX[vol->plex[vol->preferred_plex]].name, - PLEX[plexno].name); - vol->preferred_plex = volplexno; - } - return volplexno; -} - -/* - * Add subdisk to a plex if possible - */ -int -give_sd_to_plex(int plexno, int sdno) -{ - int i; - struct plex *plex; - struct sd *sd; - - /* - * It's not an error for the sd to already - * belong to the plex, but we need to check a - * number of things to make sure it's done right. - * Some day. - */ - i = my_sd(plexno, sdno); - if (i >= 0) /* does it already belong to us? */ - return i; /* that's it */ - - plex = &PLEX[plexno]; /* point to the plex */ - sd = &SD[sdno]; /* and the subdisk */ - - /* Do we have an offset? Otherwise put it after the last one */ - if (sd->plexoffset < 0) { /* no offset specified */ - if (plex->subdisks > 0) { - struct sd *lastsd = &SD[plex->sdnos[plex->subdisks - 1]]; /* last subdisk */ - - if (plex->organization == plex_concat) /* concat, */ - sd->plexoffset = lastsd->sectors + lastsd->plexoffset; /* starts here */ - else /* striped, RAID-4 or RAID-5 */ - sd->plexoffset = plex->stripesize * plex->subdisks; /* starts here */ - } else /* first subdisk */ - sd->plexoffset = 0; /* start at the beginning */ - } - if (plex->subdisks == MAXSD) { /* we already have our maximum */ - if (sd->state == sd_unallocated) /* haven't finished allocating the sd, */ - free_sd(sdno); /* free it to return drive space */ - throw_rude_remark(ENOSPC, /* crap out */ - "Can't add %s to %s: plex full", - sd->name, - plex->name); - } - plex->subdisks++; /* another entry */ - if (plex->subdisks >= plex->subdisks_allocated) /* need more space */ - EXPAND(plex->sdnos, int, plex->subdisks_allocated, INITIAL_SUBDISKS_IN_PLEX); - - /* Adjust size of plex and volume. */ - if (isparity(plex)) /* RAID-4 or RAID-5 */ - plex->length = (plex->subdisks - 1) * sd->sectors; /* size is one disk short */ - else - plex->length += sd->sectors; /* plex gets this much bigger */ - if (plex->volno >= 0) /* we have a volume */ - VOL[plex->volno].size = max(VOL[plex->volno].size, plex->length); /* adjust its size */ - - /* - * We need to check that the subdisks don't overlap, - * but we can't do that until a point where we *must* - * know the size of all the subdisks. That's not - * here. But we need to sort them by offset - */ - for (i = 0; i < plex->subdisks - 1; i++) { - if (sd->plexoffset < SD[plex->sdnos[i]].plexoffset) { /* it fits before this one */ - /* First move any remaining subdisks by one */ - int j; - - for (j = plex->subdisks - 1; j > i; j--) /* move up one at a time */ - plex->sdnos[j] = plex->sdnos[j - 1]; - plex->sdnos[i] = sdno; - sd->plexsdno = i; /* note where we are in the subdisk */ - return i; - } - } - - /* - * The plex doesn't have any subdisk with a - * larger offset. Insert it here. - */ - plex->sdnos[i] = sdno; - sd->plexsdno = i; /* note where we are in the subdisk */ - sd->plexno = plex->plexno; /* and who we belong to */ - return i; -} - -/* - * Add a subdisk to drive if possible. The - * pointer to the drive must already be stored in - * the sd structure, but the drive doesn't know - * about the subdisk yet. - */ -void -give_sd_to_drive(int sdno) -{ - struct sd *sd; /* pointer to subdisk */ - struct drive *drive; /* and drive */ - int fe; /* index in free list */ - int sfe; /* and index of subdisk when assigning max */ - - sd = &SD[sdno]; /* point to sd */ - drive = &DRIVE[sd->driveno]; /* and drive */ - - if (drive->state != drive_up) { - update_sd_state(sdno); /* that crashes the subdisk */ - return; - } - sd->sectorsize = drive->sectorsize; /* get sector size from drive */ - if (drive->flags & VF_HOTSPARE) /* the drive is a hot spare, */ - throw_rude_remark(ENOSPC, - "Can't place %s on hot spare drive %s", - sd->name, - drive->label.name); - if ((drive->sectors_available == 0) /* no space left */ - ||(sd->sectors > drive->sectors_available)) { /* or too big, */ - sd->driveoffset = -1; /* don't be confusing */ - free_sd(sd->sdno); - throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name); - return; /* in case we come back here */ - } - drive->subdisks_used++; /* one more subdisk */ - - if (sd->sectors == 0) { /* take the largest chunk */ - sfe = 0; /* to keep the compiler happy */ - for (fe = 0; fe < drive->freelist_entries; fe++) { - if (drive->freelist[fe].sectors >= sd->sectors) { /* more space here */ - sd->sectors = drive->freelist[fe].sectors; /* take it */ - sd->driveoffset = drive->freelist[fe].offset; - sfe = fe; /* and note the index for later */ - } - } - if (sd->sectors == 0) { /* no luck, */ - sd->driveoffset = -1; /* don't be confusing */ - free_sd(sd->sdno); - throw_rude_remark(ENOSPC, /* give up */ - "No space for %s on %s", - sd->name, - drive->label.name); - } - if (sfe < (drive->freelist_entries - 1)) /* not the last one, */ - bcopy(&drive->freelist[sfe + 1], - &drive->freelist[sfe], - (drive->freelist_entries - sfe) * sizeof(struct drive_freelist)); - drive->freelist_entries--; /* one less entry */ - drive->sectors_available -= sd->sectors; /* and note how much less space we have */ - } else if (sd->driveoffset < 0) { /* no offset specified, find one */ - for (fe = 0; fe < drive->freelist_entries; fe++) { - if (drive->freelist[fe].sectors >= sd->sectors) { /* it'll fit here */ - sd->driveoffset = drive->freelist[fe].offset; - if (sd->sectors == drive->freelist[fe].sectors) { /* used up the entire entry */ - if (fe < (drive->freelist_entries - 1)) /* not the last one, */ - bcopy(&drive->freelist[fe + 1], - &drive->freelist[fe], - (drive->freelist_entries - fe) * sizeof(struct drive_freelist)); - drive->freelist_entries--; /* one less entry */ - } else { - drive->freelist[fe].sectors -= sd->sectors; /* this much less space */ - drive->freelist[fe].offset += sd->sectors; /* this much further on */ - } - drive->sectors_available -= sd->sectors; /* and note how much less space we have */ - break; - } - } - if (sd->driveoffset < 0) - /* - * Didn't find anything. Although the drive has - * enough space, it's too fragmented - */ - { - free_sd(sd->sdno); - throw_rude_remark(ENOSPC, "No space for %s on %s", sd->name, drive->label.name); - } - } else { /* specific offset */ - /* - * For a specific offset to work, the space must be - * entirely in a single freelist entry. Look for it. - */ - u_int64_t sdend = sd->driveoffset + sd->sectors; /* end of our subdisk */ - for (fe = 0; fe < drive->freelist_entries; fe++) { - u_int64_t dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of entry */ - if (dend >= sdend) { /* fits before here */ - if (drive->freelist[fe].offset > sd->driveoffset) { /* starts after the beginning of sd area */ - sd->driveoffset = -1; /* don't be confusing */ - set_sd_state(sd->sdno, sd_down, setstate_force); - throw_rude_remark(ENOSPC, - "No space for %s on drive %s at offset %lld", - sd->name, - drive->label.name, - sd->driveoffset); - return; - } - /* - * We've found the space, and we can allocate it. - * We don't need to say that to the subdisk, which - * already knows about it. We need to tell it to - * the free list, though. We have four possibilities: - * - * 1. The subdisk exactly eats up the entry. That's the - * same as above. - * 2. The subdisk starts at the beginning and leaves space - * at the end. - * 3. The subdisk starts after the beginning and leaves - * space at the end as well: we end up with another - * fragment. - * 4. The subdisk leaves space at the beginning and finishes - * at the end. - */ - drive->sectors_available -= sd->sectors; /* note how much less space we have */ - if (sd->driveoffset == drive->freelist[fe].offset) { /* 1 or 2 */ - if (sd->sectors == drive->freelist[fe].sectors) { /* 1: used up the entire entry */ - if (fe < (drive->freelist_entries - 1)) /* not the last one, */ - bcopy(&drive->freelist[fe + 1], - &drive->freelist[fe], - (drive->freelist_entries - fe) * sizeof(struct drive_freelist)); - drive->freelist_entries--; /* one less entry */ - } else { /* 2: space at the end */ - drive->freelist[fe].sectors -= sd->sectors; /* this much less space */ - drive->freelist[fe].offset += sd->sectors; /* this much further on */ - } - } else { /* 3 or 4 */ - drive->freelist[fe].sectors = sd->driveoffset - drive->freelist[fe].offset; - if (dend > sdend) { /* 3: space at the end as well */ - if (fe < (drive->freelist_entries - 1)) /* not the last one */ - bcopy(&drive->freelist[fe], /* move the rest down */ - &drive->freelist[fe + 1], - (drive->freelist_entries - fe) * sizeof(struct drive_freelist)); - drive->freelist_entries++; /* one less entry */ - drive->freelist[fe + 1].offset = sdend; /* second entry starts after sd */ - drive->freelist[fe + 1].sectors = dend - sdend; /* and is this long */ - } - } - break; - } - } - } - drive->opencount++; /* one more subdisk attached */ -} - -/* Get an empty drive entry from the drive table */ -int -get_empty_drive(void) -{ - int driveno; - struct drive *drive; - - /* first see if we have one which has been deallocated */ - for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) { - if (DRIVE[driveno].state == drive_unallocated) /* bingo */ - break; - } - - if (driveno >= vinum_conf.drives_allocated) /* we've used all our allocation */ - EXPAND(DRIVE, struct drive, vinum_conf.drives_allocated, INITIAL_DRIVES); - - /* got a drive entry. Make it pretty */ - drive = &DRIVE[driveno]; - bzero(drive, sizeof(struct drive)); - drive->driveno = driveno; /* put number in structure */ - drive->flags |= VF_NEWBORN; /* newly born drive */ - drive->dev = NULL; - strcpy(drive->devicename, "unknown"); /* and make the name ``unknown'' */ - return driveno; /* return the index */ -} - -/* - * Find the named drive in vinum_conf.drive, - * return the index in vinum_conf.drive. - * Don't mark the drive as allocated (XXX SMP) - * If create != 0, create an entry if it doesn't exist - */ -/* XXX check if we have it open from attach */ -int -find_drive(const char *name, int create) -{ - int driveno; - struct drive *drive; - - if (name != NULL) { - for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) { - drive = &DRIVE[driveno]; /* point to drive */ - if ((drive->label.name[0] != '\0') /* it has a name */ - &&(strcmp(drive->label.name, name) == 0) /* and it's this one */ - &&(drive->state > drive_unallocated)) /* and it's a real one: found */ - return driveno; - } - } - /* the drive isn't in the list. Add it if he wants */ - if (create == 0) /* don't want to create */ - return -1; /* give up */ - - driveno = get_empty_drive(); - drive = &DRIVE[driveno]; - if (name != NULL) - strlcpy(drive->label.name, /* put in its name */ - name, - sizeof(drive->label.name)); - drive->state = drive_referenced; /* in use, nothing worthwhile there */ - return driveno; /* return the index */ -} - -/* - * Find a drive given its device name. - * devname must be valid. - * Otherwise the same as find_drive above. - */ -int -find_drive_by_name(const char *devname, int create) -{ - int driveno; - struct drive *drive; - - for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) { - drive = &DRIVE[driveno]; /* point to drive */ - if ((strcmp(drive->devicename, devname) == 0) /* it's this device */ - &&(drive->state > drive_unallocated)) /* and it's a real one: found */ - return driveno; - } - - /* the drive isn't in the list. Add it if he wants */ - if (create == 0) /* don't want to create */ - return -1; /* give up */ - - driveno = get_empty_drive(); - drive = &DRIVE[driveno]; - bcopy(devname, /* put in its name */ - drive->devicename, - min(sizeof(drive->devicename), - strlen(devname))); - drive->state = drive_referenced; /* in use, nothing worthwhile there */ - return driveno; /* return the index */ -} - -/* Find an empty subdisk in the subdisk table */ -int -get_empty_sd(void) -{ - int sdno; - struct sd *sd; - - /* first see if we have one which has been deallocated */ - for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { - if (SD[sdno].state == sd_unallocated) /* bingo */ - break; - } - if (sdno >= vinum_conf.subdisks_allocated) - /* - * We've run out of space. sdno is pointing - * where we want it, but at the moment we - * don't have the space. Get it. - * - * XXX We should check for overflow here. We - * shouldn't allocate more than VINUM_MAXSD - * subdisks (currently at least a quarter of a - * million). - */ - EXPAND(SD, struct sd, vinum_conf.subdisks_allocated, INITIAL_SUBDISKS); - - /* initialize some things */ - sd = &SD[sdno]; /* point to it */ - bzero(sd, sizeof(struct sd)); /* initialize */ - sd->flags |= VF_NEWBORN; /* newly born subdisk */ - sd->plexno = -1; /* no plex */ - sd->sectors = -1; /* no space */ - sd->driveno = -1; /* no drive */ - sd->plexoffset = -1; /* and no offsets */ - sd->driveoffset = -1; - return sdno; /* return the index */ -} - -/* return a drive to the free pool */ -void -free_drive(struct drive *drive) -{ - LOCKDRIVE(drive); - if (drive->flags & VF_OPEN) /* it's open, */ - close_locked_drive(drive); /* close it */ - if (drive->freelist) - Free(drive->freelist); - if (drive->dev != NULL) - dev_rel(drive->dev); - bzero(drive, sizeof(struct drive)); /* this also sets drive_unallocated */ - unlockdrive(drive); -} - -/* - * Find the named subdisk in vinum_conf.sd. - * - * If create != 0, create an entry if it doesn't exist - * - * Return index in vinum_conf.sd - */ -int -find_subdisk(const char *name, int create) -{ - int sdno; - struct sd *sd; - - for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { - if (strcmp(SD[sdno].name, name) == 0) /* found it */ - return sdno; - } - - /* the subdisk isn't in the list. Add it if he wants */ - if (create == 0) /* don't want to create */ - return -1; /* give up */ - - /* Allocate one and insert the name */ - sdno = get_empty_sd(); - sd = &SD[sdno]; - bcopy(name, sd->name, min(sizeof(sd->name), strlen(name))); /* put in its name */ - return sdno; /* return the pointer */ -} - -/* Return space to a drive */ -void -return_drive_space(int driveno, int64_t offset, int length) -{ - struct drive *drive; - int fe; /* free list entry */ - u_int64_t sdend; /* end of our subdisk */ - u_int64_t dend; /* end of our freelist entry */ - - drive = &DRIVE[driveno]; - if (drive->state == drive_up) { - sdend = offset + length; /* end of our subdisk */ - - /* Look for where to return the sd address space */ - for (fe = 0; - (fe < drive->freelist_entries) && (drive->freelist[fe].offset < offset); - fe++); - /* - * Now we are pointing to the last entry, the first - * with a higher offset than the subdisk, or both. - */ - if ((fe > 1) /* not the first entry */ - &&((fe == drive->freelist_entries) /* gone past the end */ - ||(drive->freelist[fe].offset > offset))) /* or past the block were looking for */ - fe--; /* point to the block before */ - dend = drive->freelist[fe].offset + drive->freelist[fe].sectors; /* end of the entry */ - - /* - * At this point, we are pointing to the correct - * place in the free list. A number of possibilities - * exist: - * - * 1. The block to be freed starts at the end of the - * block to which we are pointing. This has two - * subcases: - * - * a. The block to be freed ends at the beginning - * of the following block. Merge the three - * areas into a single block. - * - * b. The block is shorter than the space between - * the current block and the next one. Enlarge - * the current block. - * - * 2. The block to be freed starts after the end - * of the block. Again, we have two cases: - * - * a. It ends before the start of the following block. - * Create a new free block. - * - * b. It ends at the start of the following block. - * Enlarge the following block downwards. - * - * When there is only one free space block, and the - * space to be returned is before it, the pointer is - * to a non-existent zeroth block. XXX check this - */ - if (offset == dend) { /* Case 1: it starts at the end of this block */ - if ((fe < drive->freelist_entries - 1) /* we're not the last block in the free list */ - /* and the subdisk ends at the start of the next block */ - &&(sdend == drive->freelist[fe + 1].offset)) { - drive->freelist[fe].sectors /* 1a: merge all three blocks */ - = drive->freelist[fe + 1].sectors; - if (fe < drive->freelist_entries - 2) /* still more blocks after next */ - bcopy(&drive->freelist[fe + 2], /* move down one */ - &drive->freelist[fe + 1], - (drive->freelist_entries - 2 - fe) - * sizeof(struct drive_freelist)); - drive->freelist_entries--; /* one less entry in the free list */ - } else /* 1b: just enlarge this block */ - drive->freelist[fe].sectors += length; - } else { /* Case 2 */ - if (offset > dend) /* it starts after this block */ - fe++; /* so look at the next block */ - if ((fe < drive->freelist_entries) /* we're not the last block in the free list */ - /* and the subdisk ends at the start of this block: case 4 */ - &&(sdend == drive->freelist[fe].offset)) { - drive->freelist[fe].offset = offset; /* it starts where the sd was */ - drive->freelist[fe].sectors += length; /* and it's this much bigger */ - } else { /* case 3: non-contiguous */ - if (fe < drive->freelist_entries) /* not after the last block, */ - bcopy(&drive->freelist[fe], /* move the rest up one entry */ - &drive->freelist[fe + 1], - (drive->freelist_entries - fe) - * sizeof(struct drive_freelist)); - drive->freelist_entries++; /* one less entry */ - drive->freelist[fe].offset = offset; /* this entry represents the sd */ - drive->freelist[fe].sectors = length; - } - } - drive->sectors_available += length; /* the sectors are now available */ - } -} - -/* - * Free an allocated sd entry. - * This performs memory management only. remove() - * is responsible for checking relationships. - */ -void -free_sd(int sdno) -{ - struct sd *sd; - - sd = &SD[sdno]; - if ((sd->driveno >= 0) /* we have a drive, */ - &&(sd->sectors > 0)) /* and some space on it */ - return_drive_space(sd->driveno, /* return the space */ - sd->driveoffset, - sd->sectors); - if (sd->plexno >= 0) - PLEX[sd->plexno].subdisks--; /* one less subdisk */ - /* - * If we come here as the result of a - * configuration error, we may not yet have - * created a device entry for the subdisk. - */ - if (sd->dev) - destroy_dev(sd->dev); - bzero(sd, sizeof(struct sd)); /* and clear it out */ - sd->state = sd_unallocated; - vinum_conf.subdisks_used--; /* one less sd */ -} - -/* Find an empty plex in the plex table */ -int -get_empty_plex(void) -{ - int plexno; - struct plex *plex; /* if we allocate one */ - - /* first see if we have one which has been deallocated */ - for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) { - if (PLEX[plexno].state == plex_unallocated) /* bingo */ - break; /* and get out of here */ - } - - if (plexno >= vinum_conf.plexes_allocated) - EXPAND(PLEX, struct plex, vinum_conf.plexes_allocated, INITIAL_PLEXES); - - /* Found a plex. Give it an sd structure */ - plex = &PLEX[plexno]; /* this one is ours */ - bzero(plex, sizeof(struct plex)); /* polish it up */ - plex->sdnos = (int *) Malloc(sizeof(int) * INITIAL_SUBDISKS_IN_PLEX); /* allocate sd table */ - CHECKALLOC(plex->sdnos, "vinum: Can't allocate plex subdisk table"); - bzero(plex->sdnos, (sizeof(int) * INITIAL_SUBDISKS_IN_PLEX)); /* do we need this? */ - plex->flags |= VF_NEWBORN; /* newly born plex */ - plex->subdisks = 0; /* no subdisks in use */ - plex->subdisks_allocated = INITIAL_SUBDISKS_IN_PLEX; /* and we have space for this many */ - plex->organization = plex_disorg; /* and it's not organized */ - plex->volno = -1; /* no volume yet */ - return plexno; /* return the index */ -} - -/* - * Find the named plex in vinum_conf.plex - * - * If create != 0, create an entry if it doesn't exist - * return index in vinum_conf.plex - */ -int -find_plex(const char *name, int create) -{ - int plexno; - struct plex *plex; - - for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) { - if (strcmp(PLEX[plexno].name, name) == 0) /* found it */ - return plexno; - } - - /* the plex isn't in the list. Add it if he wants */ - if (create == 0) /* don't want to create */ - return -1; /* give up */ - - /* Allocate one and insert the name */ - plexno = get_empty_plex(); - plex = &PLEX[plexno]; /* point to it */ - bcopy(name, plex->name, min(sizeof(plex->name), strlen(name))); /* put in its name */ - return plexno; /* return the pointer */ -} - -/* - * Free an allocated plex entry - * and its associated memory areas - */ -void -free_plex(int plexno) -{ - struct plex *plex; - - plex = &PLEX[plexno]; - if (plex->sdnos) - Free(plex->sdnos); - if (plex->lock) - Free(plex->lock); - if (plex->dev) - destroy_dev(plex->dev); - bzero(plex, sizeof(struct plex)); /* and clear it out */ - plex->state = plex_unallocated; -} - -/* Find an empty volume in the volume table */ -int -get_empty_volume(void) -{ - int volno; - struct volume *vol; - int i; - - /* first see if we have one which has been deallocated */ - for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) { - if (VOL[volno].state == volume_unallocated) /* bingo */ - break; - } - - if (volno >= vinum_conf.volumes_allocated) - EXPAND(VOL, struct volume, vinum_conf.volumes_allocated, INITIAL_VOLUMES); - - /* Now initialize fields */ - vol = &VOL[volno]; - bzero(vol, sizeof(struct volume)); - vol->flags |= VF_NEWBORN | VF_CREATED; /* newly born volume */ - vol->preferred_plex = ROUND_ROBIN_READPOL; /* round robin */ - for (i = 0; i < MAXPLEX; i++) /* mark the plexes missing */ - vol->plex[i] = -1; - return volno; /* return the index */ -} - -/* - * Find the named volume in vinum_conf.volume. - * - * If create != 0, create an entry if it doesn't exist - * return the index in vinum_conf - */ -int -find_volume(const char *name, int create) -{ - int volno; - struct volume *vol; - - for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) { - if (strcmp(VOL[volno].name, name) == 0) /* found it */ - return volno; - } - - /* the volume isn't in the list. Add it if he wants */ - if (create == 0) /* don't want to create */ - return -1; /* give up */ - - /* Allocate one and insert the name */ - volno = get_empty_volume(); - vol = &VOL[volno]; - bcopy(name, vol->name, min(sizeof(vol->name), strlen(name))); /* put in its name */ - vol->blocksize = DEV_BSIZE; /* block size of this volume */ - return volno; /* return the pointer */ -} - -/* - * Free an allocated volume entry - * and its associated memory areas - */ -void -free_volume(int volno) -{ - struct volume *vol; - - vol = &VOL[volno]; - if (vol->dev) - destroy_dev(vol->dev); - bzero(vol, sizeof(struct volume)); /* and clear it out */ - vol->state = volume_unallocated; -} - -/* - * Handle a drive definition. We store the information in the global variable - * drive, so we don't need to allocate. - * - * If we find an error, print a message and return - */ -void -config_drive(int update) -{ - enum drive_label_info partition_status; /* info about the partition */ - int parameter; - int driveno; /* index of drive in vinum_conf */ - struct drive *drive; /* and pointer to it */ - int otherdriveno; /* index of possible second drive */ - int sdno; - - if (tokens < 2) /* not enough tokens */ - throw_rude_remark(EINVAL, "Drive has no name\n"); - driveno = find_drive(token[1], 1); /* allocate a drive to initialize */ - drive = &DRIVE[driveno]; /* and get a pointer */ - if (update && ((drive->flags & VF_NEWBORN) == 0)) /* this drive exists already */ - return; /* don't do anything */ - drive->flags &= ~VF_NEWBORN; /* no longer newly born */ - - if (drive->state != drive_referenced) { /* we already know this drive */ - /* - * XXX Check which definition is more up-to-date. Give - * preference for the definition on its own drive. - */ - return; /* XXX */ - } - for (parameter = 2; parameter < tokens; parameter++) { /* look at the other tokens */ - switch (get_keyword(token[parameter], &keyword_set)) { - case kw_device: - parameter++; - otherdriveno = find_drive_by_name(token[parameter], 0); /* see if it exists already */ - if (otherdriveno >= 0) { /* yup, */ - drive->state = drive_unallocated; /* deallocate the drive */ - throw_rude_remark(EEXIST, /* and complain */ - "Drive %s would have same device as drive %s", - token[1], - DRIVE[otherdriveno].label.name); - } - if (drive->devicename[0] == '/') { /* we know this drive... */ - if (strcmp(drive->devicename, token[parameter])) /* different name */ - close_drive(drive); /* close it if it's open */ - else /* no change */ - break; - } - /* open the device and get the configuration */ - bcopy(token[parameter], /* insert device information */ - drive->devicename, - min(sizeof(drive->devicename), - strlen(token[parameter]))); - partition_status = read_drive_label(drive, 1); - switch (partition_status) { - case DL_CANT_OPEN: /* not our kind */ - close_drive(drive); - if (drive->lasterror == EFTYPE) /* wrong kind of partition */ - throw_rude_remark(drive->lasterror, - "Drive %s has invalid partition type", - drive->label.name); - else /* I/O error of some kind */ - throw_rude_remark(drive->lasterror, - "Can't initialize drive %s", - drive->label.name); - break; - - case DL_WRONG_DRIVE: /* valid drive, not the name we expected */ - if (vinum_conf.flags & VF_FORCECONFIG) { /* but we'll accept that */ - bcopy(token[1], drive->label.name, sizeof(drive->label.name)); - break; - } - close_drive(drive); - /* - * There's a potential race condition here: - * the rude remark refers to a field in an - * unallocated drive, which potentially could - * be reused. This works because we're the only - * thread accessing the config at the moment. - */ - drive->state = drive_unallocated; /* throw it away completely */ - throw_rude_remark(drive->lasterror, - "Incorrect drive name %s specified for drive %s", - token[1], - drive->label.name); - break; - - case DL_DELETED_LABEL: /* it was a drive, but we deleted it */ - case DL_NOT_OURS: /* nothing to do with the rest */ - case DL_OURS: - break; - } - /* - * read_drive_label overwrites the device name. - * If we get here, we can have the drive, - * so put it back again - */ - bcopy(token[parameter], - drive->devicename, - min(sizeof(drive->devicename), - strlen(token[parameter]))); - break; - - case kw_state: - parameter++; /* skip the keyword */ - if (vinum_conf.flags & VF_READING_CONFIG) - drive->state = DriveState(token[parameter]); /* set the state */ - break; - - case kw_hotspare: /* this drive is a hot spare */ - drive->flags |= VF_HOTSPARE; - break; - - default: - close_drive(drive); - throw_rude_remark(EINVAL, - "Drive %s, invalid keyword: %s", - token[1], - token[parameter]); - } - } - - if (drive->devicename[0] != '/') { - drive->state = drive_unallocated; /* deallocate the drive */ - throw_rude_remark(EINVAL, "No device name for %s", drive->label.name); - } - vinum_conf.drives_used++; /* passed all hurdles: one more in use */ - /* - * If we're replacing a drive, it could be that - * we already have subdisks referencing this - * drive. Note where they should be and change - * their state to obsolete. - */ - for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { - if ((SD[sdno].state > sd_referenced) - && (SD[sdno].driveno == driveno)) { - give_sd_to_drive(sdno); - if (SD[sdno].state > sd_stale) - SD[sdno].state = sd_stale; - } - } -} - -/* - * Handle a subdisk definition. We store the - * information in the global variable sd, so we - * don't need to allocate. - * - * On error throw a message back to the caller. - */ -void -config_subdisk(int update) -{ - int parameter; - int sdno; /* index of sd in vinum_conf */ - struct sd *sd; /* and pointer to it */ - u_int64_t size; - int detached = 0; /* set to 1 if this is a detached subdisk */ - int sdindex = -1; /* index in plexes subdisk table */ - enum sdstate state = sd_unallocated; /* state to set, if specified */ - int autosize = 0; /* set if we autosize in give_sd_to_drive */ - int namedsdno; /* index of another with this name */ - char partition = 0; /* partition of external subdisk */ - - sdno = get_empty_sd(); /* allocate an SD to initialize */ - sd = &SD[sdno]; /* and get a pointer */ - - for (parameter = 1; parameter < tokens; parameter++) { /* look at the other tokens */ - switch (get_keyword(token[parameter], &keyword_set)) { - /* - * If we have a 'name' parameter, it must - * come first, because we're too lazy to tidy - * up dangling refs if it comes later. - */ - case kw_name: - namedsdno = find_subdisk(token[++parameter], 0); /* find an existing sd with this name */ - if (namedsdno >= 0) { /* got one */ - if (SD[namedsdno].state == sd_referenced) { /* we've been told about this one */ - if (parameter > 2) - throw_rude_remark(EINVAL, - "sd %s: name parameter must come first\n", /* no go */ - token[parameter]); - else { - int i; - struct plex *plex; /* for tidying up dangling references */ - - *sd = SD[namedsdno]; /* copy from the referenced one */ - SD[namedsdno].state = sd_unallocated; /* and deallocate the referenced one */ - plex = &PLEX[sd->plexno]; /* now take a look at our plex */ - for (i = 0; i < plex->subdisks; i++) { /* look for the pointer */ - if (plex->sdnos[i] == namedsdno) /* pointing to the old subdisk */ - plex->sdnos[i] = sdno; /* bend it to point here */ - } - } - } - if (update) /* are we updating? */ - return; /* that's OK, nothing more to do */ - else - throw_rude_remark(EINVAL, "Duplicate subdisk %s", token[parameter]); - } else - bcopy(token[parameter], - sd->name, - min(sizeof(sd->name), strlen(token[parameter]))); - break; - - case kw_detached: - detached = 1; - break; - - case kw_plexoffset: - size = sizespec(token[++parameter]); - if ((size == -1) /* unallocated */ - &&(vinum_conf.flags & VF_READING_CONFIG)) /* reading from disk */ - break; /* invalid sd; just ignore it */ - if ((size % DEV_BSIZE) != 0) - throw_rude_remark(EINVAL, - "sd %s, bad plex offset alignment: %lld", - sd->name, - (long long) size); - else - sd->plexoffset = size / DEV_BSIZE; - break; - - case kw_driveoffset: - size = sizespec(token[++parameter]); - if ((size == -1) /* unallocated */ - &&(vinum_conf.flags & VF_READING_CONFIG)) /* reading from disk */ - break; /* invalid sd; just ignore it */ - if ((size % DEV_BSIZE) != 0) - throw_rude_remark(EINVAL, - "sd %s, bad drive offset alignment: %lld", - sd->name, - (long long) size); - else - sd->driveoffset = size / DEV_BSIZE; - break; - - case kw_len: - if (get_keyword(token[++parameter], &keyword_set) == kw_max) /* select maximum size from drive */ - size = 0; /* this is how we say it :-) */ - else - size = sizespec(token[parameter]); - if ((size % DEV_BSIZE) != 0) - throw_rude_remark(EINVAL, "sd %s, length %d not multiple of sector size", sd->name, size); - else - sd->sectors = size / DEV_BSIZE; - /* - * We have a problem with autosizing: we need to - * give the drive to the plex before we give it - * to the drive, in order to be clean if we give - * up in the middle, but at this time the size hasn't - * been set. Note that we have to fix up after - * giving the subdisk to the drive. - */ - if (size == 0) - autosize = 1; /* note that we're autosizing */ - break; - - case kw_drive: - sd->driveno = find_drive(token[++parameter], 1); /* insert drive information */ - break; - - case kw_plex: - sd->plexno = find_plex(token[++parameter], 1); /* insert plex information */ - break; - - /* - * Set the state. We can't do this directly, - * because give_sd_to_plex may change it - */ - case kw_state: - parameter++; /* skip the keyword */ - if (vinum_conf.flags & VF_READING_CONFIG) - state = SdState(token[parameter]); /* set the state */ - break; - - case kw_partition: - parameter++; /* skip the keyword */ - if ((strlen(token[parameter]) != 1) - || (token[parameter][0] < 'a') - || (token[parameter][0] > 'h')) - throw_rude_remark(EINVAL, - "%s: invalid partition %c", - sd->name, - token[parameter][0]); - else - partition = token[parameter][0]; - break; - - case kw_retryerrors: - sd->flags |= VF_RETRYERRORS; - break; - - default: - throw_rude_remark(EINVAL, "%s: invalid keyword: %s", sd->name, token[parameter]); - } - } - - /* Check we have a drive name */ - if (sd->driveno < 0) { /* didn't specify a drive */ - sd->driveno = current_drive; /* set to the current drive */ - if (sd->driveno < 0) /* no current drive? */ - throw_rude_remark(EINVAL, "Subdisk %s is not associated with a drive", sd->name); - } - if (DRIVE[sd->driveno].state != drive_up) - sd->state = sd_crashed; - - if (autosize != 0) /* need to find a size, */ - give_sd_to_drive(sdno); /* do it before the plex */ - - /* Check for a plex name */ - if ((sd->plexno < 0) /* didn't specify a plex */ - &&(!detached)) /* and didn't say not to, */ - sd->plexno = current_plex; /* set to the current plex */ - - if (sd->plexno >= 0) - sdindex = give_sd_to_plex(sd->plexno, sdno); /* now tell the plex that it has this sd */ - - sd->sdno = sdno; /* point to our entry in the table */ - - /* Does the subdisk have a name? If not, give it one */ - if (sd->name[0] == '\0') { /* no name */ - char sdsuffix[8]; /* form sd name suffix here */ - - /* Do we have a plex name? */ - if (sdindex >= 0) /* we have a plex */ - strlcpy(sd->name, /* take it from there */ - PLEX[sd->plexno].name, - sizeof(sd->name)); - else { /* no way */ - if (sd->state == sd_unallocated) { /* haven't finished allocating the sd, */ - if (autosize != 0) { /* but we might have allocated drive space */ - vinum_conf.subdisks_used++; /* ugly hack needed for free_sd() */ - free_sd(sdno); /* free it to return drive space */ - } else { /* just clear it */ - bzero(sd, sizeof(struct sd)); - sd->state = sd_unallocated; - } - } - throw_rude_remark(EINVAL, "Unnamed sd is not associated with a plex"); - } - sprintf(sdsuffix, ".s%d", sdindex); /* form the suffix */ - strlcat(sd->name, sdsuffix, sizeof(sd->name)); /* and add it to the name */ - } - /* do we have complete info for this subdisk? */ - if (sd->sectors < 0) - throw_rude_remark(EINVAL, "sd %s has no length spec", sd->name); - - if (sd->dev == NULL) - /* - * sdno can (at least theoretically) overflow - * into the low order bit of the type field. - * This gives rise to a subdisk with type - * VINUM_SD2_TYPE. This is a feature, not a - * bug. - */ - sd->dev = make_dev(&vinum_cdevsw, - VINUMMINOR(sdno, VINUM_SD_TYPE), - UID_ROOT, - GID_OPERATOR, - S_IRUSR | S_IWUSR | S_IRGRP, - "vinum/sd/%s", - sd->name); - if (state != sd_unallocated) /* we had a specific state to set */ - sd->state = state; /* do it now */ - else if (sd->state == sd_unallocated) /* no, nothing set yet, */ - sd->state = sd_empty; /* must be empty */ - if (autosize == 0) /* no autoconfig, do the drive now */ - give_sd_to_drive(sdno); - vinum_conf.subdisks_used++; /* one more in use */ -} - -/* - * Handle a plex definition. - */ -void -config_plex(int update) -{ - int parameter; - int plexno; /* index of plex in vinum_conf */ - struct plex *plex; /* and pointer to it */ - int pindex = MAXPLEX; /* index in volume's plex list */ - int detached = 0; /* don't give it to a volume */ - int namedplexno; - enum plexstate state = plex_init; /* state to set at end */ - int preferme; /* set if we want to be preferred access */ - int stripesize; - - stripesize = 0; - current_plex = -1; /* forget the previous plex */ - preferme = 0; /* nothing special yet */ - plexno = get_empty_plex(); /* allocate a plex */ - plex = &PLEX[plexno]; /* and point to it */ - plex->plexno = plexno; /* and back to the config */ - - for (parameter = 1; parameter < tokens; parameter++) { /* look at the other tokens */ - switch (get_keyword(token[parameter], &keyword_set)) { - /* - * If we have a 'name' parameter, it must - * come first, because we're too lazy to tidy - * up dangling refs if it comes later. - */ - case kw_name: - namedplexno = find_plex(token[++parameter], 0); /* find an existing plex with this name */ - if (namedplexno >= 0) { /* plex exists already, */ - if (PLEX[namedplexno].state == plex_referenced) { /* we've been told about this one */ - if (parameter > 2) /* we've done other things first, */ - throw_rude_remark(EINVAL, - "plex %s: name parameter must come first\n", /* no go */ - token[parameter]); - else { - int i; - struct volume *vol; /* for tidying up dangling references */ - - *plex = PLEX[namedplexno]; /* get the info */ - PLEX[namedplexno].state = plex_unallocated; /* and deallocate the other one */ - vol = &VOL[plex->volno]; /* point to the volume */ - for (i = 0; i < MAXPLEX; i++) { /* for each plex */ - if (vol->plex[i] == namedplexno) - vol->plex[i] = plexno; /* bend the pointer */ - } - } - break; /* use this one */ - } - if (update) /* are we updating? */ - return; /* yes: that's OK, just return */ - else - throw_rude_remark(EINVAL, "Duplicate plex %s", token[parameter]); - } else - bcopy(token[parameter], /* put in the name */ - plex->name, - min(MAXPLEXNAME, strlen(token[parameter]))); - break; - - case kw_detached: - detached = 1; - break; - - case kw_org: /* plex organization */ - switch (get_keyword(token[++parameter], &keyword_set)) { - case kw_concat: - plex->organization = plex_concat; - break; - - case kw_striped: - { - plex->organization = plex_striped; - - if (++parameter >= tokens) /* No stripe size specified. */ - stripesize = 0; - else - stripesize = sizespec(token[parameter]); - - break; - } - - case kw_raid4: - { - plex->organization = plex_raid4; - - if (++parameter >= tokens) /* No stripe size specified. */ - stripesize = 0; - else - stripesize = sizespec(token[parameter]); - - break; - } - - case kw_raid5: - { - plex->organization = plex_raid5; - - if (++parameter >= tokens) /* No stripe size specified. */ - stripesize = 0; - else - stripesize = sizespec(token[parameter]); - - break; - } - - default: - throw_rude_remark(EINVAL, "Invalid plex organization"); - } - if (isstriped(plex)) { - if (stripesize == 0) /* didn't specify a valid stripe size */ - throw_rude_remark(EINVAL, "Need a stripe size parameter"); - else if (stripesize % DEV_BSIZE != 0) - throw_rude_remark(EINVAL, "plex %s: stripe size %d not a multiple of sector size", - plex->name, - stripesize); - else - plex->stripesize = stripesize / DEV_BSIZE; - } - break; - - /* - * We're the preferred plex of our volume. - * Unfortunately, we don't know who our - * volume is yet. Note that we want to be - * preferred, and actually do it after we - * get a volume. - */ - case kw_preferred: - preferme = 1; - break; - - case kw_volume: - plex->volno = find_volume(token[++parameter], 1); /* insert a pointer to the volume */ - break; - - case kw_sd: /* add a subdisk */ - { - int sdno; - - sdno = find_subdisk(token[++parameter], 1); /* find a subdisk */ - SD[sdno].plexoffset = sizespec(token[++parameter]); /* get the offset */ - give_sd_to_plex(plexno, sdno); /* and insert it there */ - break; - } - - case kw_state: - parameter++; /* skip the keyword */ - if (vinum_conf.flags & VF_READING_CONFIG) - state = PlexState(token[parameter]); /* set the state */ - break; - - default: - throw_rude_remark(EINVAL, "plex %s, invalid keyword: %s", - plex->name, - token[parameter]); - } - } - - if (plex->organization == plex_disorg) - throw_rude_remark(EINVAL, "No plex organization specified"); - - if ((plex->volno < 0) /* we don't have a volume */ - &&(!detached)) /* and we wouldn't object */ - plex->volno = current_volume; - - if (plex->volno >= 0) - pindex = give_plex_to_volume(plex->volno, /* Now tell the volume that it has this plex */ - plexno, - preferme); - - /* Does the plex have a name? If not, give it one */ - if (plex->name[0] == '\0') { /* no name */ - char plexsuffix[8]; /* form plex name suffix here */ - /* Do we have a volume name? */ - if (plex->volno >= 0) /* we have a volume */ - strlcpy(plex->name, /* take it from there */ - VOL[plex->volno].name, - sizeof(plex->name)); - else /* no way */ - throw_rude_remark(EINVAL, "Unnamed plex is not associated with a volume"); - sprintf(plexsuffix, ".p%d", pindex); /* form the suffix */ - strlcat(plex->name, plexsuffix, sizeof(plex->name)); /* and add it to the name */ - } - if (isstriped(plex)) { - plex->lock = (struct rangelock *) - Malloc(PLEX_LOCKS * sizeof(struct rangelock)); - CHECKALLOC(plex->lock, "vinum: Can't allocate lock table\n"); - bzero((char *) plex->lock, PLEX_LOCKS * sizeof(struct rangelock)); - plex->lockmtx = &plexmutex[plexno % PLEXMUTEXES]; /* use this mutex for locking */ - } - /* Note the last plex we configured */ - current_plex = plexno; - plex->state = state; /* set whatever state we chose */ - vinum_conf.plexes_used++; /* one more in use */ - if (plex->dev == NULL) - plex->dev = make_dev(&vinum_cdevsw, - VINUMMINOR(plexno, VINUM_PLEX_TYPE), - UID_ROOT, - GID_OPERATOR, - S_IRUSR | S_IWUSR | S_IRGRP, - "vinum/plex/%s", - plex->name); -} - -/* - * Handle a volume definition. - * If we find an error, print a message, deallocate the nascent volume, and return - */ -void -config_volume(int update) -{ - int parameter; - int volno; - struct volume *vol; /* collect volume info here */ - int i; - - if (tokens < 2) /* not enough tokens */ - throw_rude_remark(EINVAL, "Volume has no name"); - current_volume = -1; /* forget the previous volume */ - volno = find_volume(token[1], 1); /* allocate a volume to initialize */ - vol = &VOL[volno]; /* and get a pointer */ - if (update && ((vol->flags & VF_CREATED) == 0)) /* this volume exists already */ - return; /* don't do anything */ - vol->flags &= ~VF_CREATED; /* it exists now */ - - for (parameter = 2; parameter < tokens; parameter++) { /* look at all tokens */ - switch (get_keyword(token[parameter], &keyword_set)) { - case kw_plex: - { - int plexno; /* index of this plex */ - int myplexno; /* and index if it's already ours */ - - plexno = find_plex(token[++parameter], 1); /* find a plex */ - if (plexno < 0) /* couldn't */ - break; /* we've already had an error message */ - myplexno = my_plex(volno, plexno); /* does it already belong to us? */ - if (myplexno > 0) /* yes, shouldn't get it again */ - throw_rude_remark(EINVAL, - "Plex %s already belongs to volume %s", - token[parameter], - vol->name); - else if (++vol->plexes > 8) /* another entry */ - throw_rude_remark(EINVAL, - "Too many plexes for volume %s", - vol->name); - vol->plex[vol->plexes - 1] = plexno; - PLEX[plexno].state = plex_referenced; /* we know something about it */ - PLEX[plexno].volno = volno; /* and this volume references it */ - } - break; - - case kw_readpol: - switch (get_keyword(token[++parameter], &keyword_set)) { /* decide what to do */ - case kw_round: - vol->preferred_plex = ROUND_ROBIN_READPOL; /* default */ - break; - - case kw_prefer: - { - int myplexno; /* index of this plex */ - - myplexno = find_plex(token[++parameter], 1); /* find a plex */ - if (myplexno < 0) { /* couldn't */ - printf("vinum: couldn't find preferred plex %s for %s\n", - token[parameter], - vol->name); - break; /* we've already had an error message */ - } - myplexno = my_plex(volno, myplexno); /* does it already belong to us? */ - if (myplexno > 0) /* yes */ - vol->preferred_plex = myplexno; /* just note the index */ - else if (++vol->plexes > 8) /* another entry */ - throw_rude_remark(EINVAL, "Too many plexes"); - else { /* space for the new plex */ - vol->plex[vol->plexes - 1] = myplexno; /* add it to our list */ - vol->preferred_plex = vol->plexes - 1; /* and note the index */ - } - } - break; - - default: - throw_rude_remark(EINVAL, "Invalid read policy"); - } - - case kw_setupstate: - vol->flags |= VF_CONFIG_SETUPSTATE; /* set the volume up later on */ - break; - - case kw_state: - parameter++; /* skip the keyword */ - if (vinum_conf.flags & VF_READING_CONFIG) - vol->state = VolState(token[parameter]); /* set the state */ - break; - - /* - * XXX experimental ideas. These are not - * documented, and will not be until I - * decide they're worth keeping. - */ - case kw_writethrough: /* set writethrough mode */ - vol->flags |= VF_WRITETHROUGH; - break; - - case kw_writeback: /* set writeback mode */ - vol->flags &= ~VF_WRITETHROUGH; - break; - - default: - throw_rude_remark(EINVAL, "volume %s, invalid keyword: %s", - vol->name, - token[parameter]); - } - } - current_volume = volno; /* note last referred volume */ - vol->volno = volno; /* also note in volume */ - - /* - * Before we can actually use the volume, we need - * a volume label. We could start to fake one here, - * but it will be a lot easier when we have some - * to copy from the drives, so defer it until we - * set up the configuration. XXX - */ - if (vol->state == volume_unallocated) - vol->state = volume_down; /* now ready to bring up at the end */ - - /* Find out how big our volume is */ - for (i = 0; i < vol->plexes; i++) - vol->size = max(vol->size, PLEX[vol->plex[i]].length); - vinum_conf.volumes_used++; /* one more in use */ - if (vol->dev == NULL) - vol->dev = make_dev(&vinum_cdevsw, - VINUMMINOR(volno, VINUM_VOLUME_TYPE), - UID_ROOT, - GID_OPERATOR, - S_IRUSR | S_IWUSR | S_IRGRP, - "vinum/%s", - vol->name); -} - -/* - * Parse a config entry. CARE! This destroys the original contents of the - * config entry, which we don't really need after this. More specifically, it - * places \0 characters at the end of each token. - * - * Return 0 if all is well, otherwise EINVAL for invalid keyword, - * or ENOENT if 'read' command doesn't find any drives. - */ -int -parse_config(char *cptr, struct keywordset *keyset, int update) -{ - int status; - - status = 0; /* until proven otherwise */ - tokens = tokenize(cptr, token, MAXTOKEN); /* chop up into tokens */ - - if (tokens <= 0) /* screwed up or empty line */ - return tokens; /* give up */ - else if (tokens == MAXTOKEN) /* too many */ - throw_rude_remark(E2BIG, - "Configuration error for %s: too many parameters", - token[1]); - - if (token[0][0] == '#') /* comment line */ - return 0; - - switch (get_keyword(token[0], keyset)) { /* decide what to do */ - case kw_drive: - config_drive(update); - break; - - case kw_subdisk: - config_subdisk(update); - break; - - case kw_plex: - config_plex(update); - break; - - case kw_volume: - config_volume(update); - break; - - /* Anything else is invalid in this context */ - default: - throw_rude_remark(EINVAL, /* should we die? */ - "Invalid configuration information: %s", - token[0]); - } - return status; -} - -/* - * parse a line handed in from userland via ioctl. - * This differs only by the error reporting mechanism: - * we return the error indication in the reply to the - * ioctl, so we need to set a global static pointer in - * this file. This technique works because we have - * ensured that configuration is performed in a single- - * threaded manner - */ -int -parse_user_config(char *cptr, struct keywordset *keyset) -{ - int status; - - ioctl_reply = (struct _ioctl_reply *) cptr; - status = parse_config(cptr, keyset, 0); - ioctl_reply = NULL; /* don't do this again */ - return status; -} - -/* Remove an object */ -void -remove(struct vinum_ioctl_msg *msg) -{ - struct vinum_ioctl_msg message = *msg; /* make a copy to hand on */ - - ioctl_reply = (struct _ioctl_reply *) msg; /* reinstate the address to reply to */ - ioctl_reply->error = 0; /* no error, */ - ioctl_reply->msg[0] = '\0'; /* no message */ - - switch (message.type) { - case drive_object: - remove_drive_entry(message.index, message.force); - updateconfig(0); - return; - - case sd_object: - remove_sd_entry(message.index, message.force, message.recurse); - updateconfig(0); - return; - - case plex_object: - remove_plex_entry(message.index, message.force, message.recurse); - updateconfig(0); - return; - - case volume_object: - remove_volume_entry(message.index, message.force, message.recurse); - updateconfig(0); - return; - - default: - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "Invalid object type"); - } -} - -/* Remove a drive. */ -void -remove_drive_entry(int driveno, int force) -{ - struct drive *drive = &DRIVE[driveno]; - int sdno; - - if ((driveno > vinum_conf.drives_allocated) /* not a valid drive */ - ||(drive->state == drive_unallocated)) { /* or nothing there */ - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "No such drive"); - } else if (drive->opencount > 0) { /* we have subdisks */ - if (force) { /* do it at any cost */ - for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { - if ((SD[sdno].state != sd_unallocated) /* subdisk is allocated */ - &&(SD[sdno].driveno == driveno)) /* and it belongs to this drive */ - remove_sd_entry(sdno, force, 0); - } - remove_drive(driveno); /* now remove it */ - vinum_conf.drives_used--; /* one less drive */ - } else - ioctl_reply->error = EBUSY; /* can't do that */ - } else { - remove_drive(driveno); /* just remove it */ - vinum_conf.drives_used--; /* one less drive */ - } -} - -/* remove a subdisk */ -void -remove_sd_entry(int sdno, int force, int recurse) -{ - struct sd *sd = &SD[sdno]; - - if ((sdno > vinum_conf.subdisks_allocated) /* not a valid sd */ - ||(sd->state == sd_unallocated)) { /* or nothing there */ - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "No such subdisk"); - } else if (sd->flags & VF_OPEN) /* we're open */ - ioctl_reply->error = EBUSY; /* no getting around that */ - else if (sd->plexno >= 0) { /* we have a plex */ - if (force) { /* do it at any cost */ - struct plex *plex = &PLEX[sd->plexno]; /* point to our plex */ - int mysdno; - - for (mysdno = 0; /* look for ourselves */ - mysdno < plex->subdisks && &SD[plex->sdnos[mysdno]] != sd; - mysdno++); - if (mysdno == plex->subdisks) /* didn't find it */ - log(LOG_ERR, - "Error removing subdisk %s: not found in plex %s\n", - SD[mysdno].name, - plex->name); - else { /* remove the subdisk from plex */ - if (mysdno < (plex->subdisks - 1)) /* not the last subdisk */ - bcopy(&plex->sdnos[mysdno + 1], - &plex->sdnos[mysdno], - (plex->subdisks - 1 - mysdno) * sizeof(int)); - plex->subdisks--; - sd->plexno = -1; /* disown the subdisk */ - } - - /* - * Removing a subdisk from a striped or - * RAID-4 or RAID-5 plex really tears the - * hell out of the structure, and it needs - * to be reinitialized. - */ - if (plex->organization != plex_concat) /* not concatenated, */ - set_plex_state(plex->plexno, plex_faulty, setstate_force); /* need to reinitialize */ - log(LOG_INFO, "vinum: removing %s\n", sd->name); - free_sd(sdno); - } else - ioctl_reply->error = EBUSY; /* can't do that */ - } else { - log(LOG_INFO, "vinum: removing %s\n", sd->name); - free_sd(sdno); - } -} - -/* remove a plex */ -void -remove_plex_entry(int plexno, int force, int recurse) -{ - struct plex *plex = &PLEX[plexno]; - int sdno; - - if ((plexno > vinum_conf.plexes_allocated) /* not a valid plex */ - ||(plex->state == plex_unallocated)) { /* or nothing there */ - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "No such plex"); - } else if (plex->flags & VF_OPEN) { /* we're open */ - ioctl_reply->error = EBUSY; /* no getting around that */ - return; - } - if (plex->subdisks) { - if (force) { /* do it anyway */ - if (recurse) { /* remove all below */ - int sds = plex->subdisks; - for (sdno = 0; sdno < sds; sdno++) - free_sd(plex->sdnos[sdno]); /* free all subdisks */ - } else { /* just tear them out */ - int sds = plex->subdisks; - for (sdno = 0; sdno < sds; sdno++) - SD[plex->sdnos[sdno]].plexno = -1; /* no plex any more */ - } - } else { /* can't do it without force */ - ioctl_reply->error = EBUSY; /* can't do that */ - return; - } - } - if (plex->volno >= 0) { /* we are part of a volume */ - if (force) { /* do it at any cost */ - struct volume *vol = &VOL[plex->volno]; - int myplexno; - - for (myplexno = 0; myplexno < vol->plexes; myplexno++) - if (vol->plex[myplexno] == plexno) /* found it */ - break; - if (myplexno == vol->plexes) /* didn't find it. Huh? */ - log(LOG_ERR, - "Error removing plex %s: not found in volume %s\n", - plex->name, - vol->name); - if (myplexno < (vol->plexes - 1)) /* not the last plex in the list */ - bcopy(&vol->plex[myplexno + 1], - &vol->plex[myplexno], - vol->plexes - 1 - myplexno); - vol->plexes--; - } else { - ioctl_reply->error = EBUSY; /* can't do that */ - return; - } - } - log(LOG_INFO, "vinum: removing %s\n", plex->name); - free_plex(plexno); - vinum_conf.plexes_used--; /* one less plex */ -} - -/* remove a volume */ -void -remove_volume_entry(int volno, int force, int recurse) -{ - struct volume *vol = &VOL[volno]; - int plexno; - - if ((volno > vinum_conf.volumes_allocated) /* not a valid volume */ - ||(vol->state == volume_unallocated)) { /* or nothing there */ - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "No such volume"); - } else if (vol->flags & VF_OPEN) /* we're open */ - ioctl_reply->error = EBUSY; /* no getting around that */ - else if (vol->plexes) { - if (recurse && force) { /* remove all below */ - int plexes = vol->plexes; - -/* for (plexno = plexes - 1; plexno >= 0; plexno--) */ - for (plexno = 0; plexno < plexes; plexno++) - remove_plex_entry(vol->plex[plexno], force, recurse); - log(LOG_INFO, "vinum: removing %s\n", vol->name); - free_volume(volno); - vinum_conf.volumes_used--; /* one less volume */ - } else - ioctl_reply->error = EBUSY; /* can't do that */ - } else { - log(LOG_INFO, "vinum: removing %s\n", vol->name); - free_volume(volno); - vinum_conf.volumes_used--; /* one less volume */ - } -} - -/* Currently called only from ioctl */ -void -update_sd_config(int sdno, int diskconfig) -{ - if (!diskconfig) - set_sd_state(sdno, sd_up, setstate_configuring); - SD[sdno].flags &= ~VF_NEWBORN; -} - -void -update_plex_config(int plexno, int diskconfig) -{ - u_int64_t size; - int sdno; - struct plex *plex = &PLEX[plexno]; - enum plexstate state = plex_up; /* state we want the plex in */ - int remainder; /* size of fractional stripe at end */ - int added_plex; /* set if we add a plex to a volume */ - int required_sds; /* number of subdisks we need */ - struct sd *sd; - struct volume *vol; - int data_sds = 0; /* number of sds carrying data */ - - if (plex->state < plex_init) /* not a real plex, */ - return; - added_plex = 0; - if (plex->volno >= 0) { /* we have a volume */ - vol = &VOL[plex->volno]; - - /* - * If we're newly born, - * and the volume isn't, - * and it has other plexes, - * and we didn't read this mess from disk, - * we were added later. - */ - if ((plex->flags & VF_NEWBORN) - && ((vol->flags & VF_NEWBORN) == 0) - && (vol->plexes > 0) - && (diskconfig == 0)) { - added_plex = 1; - state = plex_down; /* so take ourselves down */ - } - } - /* - * Check that our subdisks make sense. For - * striped plexes, we need at least two - * subdisks, and for RAID-4 and RAID-5 plexes we - * need at least three subdisks. In each case - * they must all be the same size. - */ - if (plex->organization == plex_striped) { - data_sds = plex->subdisks; - required_sds = 2; - } else if (isparity(plex)) { /* RAID 4 or 5 */ - data_sds = plex->subdisks - 1; - required_sds = 3; - } else - required_sds = 0; - if (required_sds > 0) { /* striped, RAID-4 or RAID-5 */ - if (plex->subdisks < required_sds) { - log(LOG_ERR, - "vinum: plex %s does not have at least %d subdisks\n", - plex->name, - required_sds); - state = plex_faulty; - } - /* - * Now see if the plex size is a multiple of - * the stripe size. If not, trim off the end - * of each subdisk and return it to the drive. - */ - if (plex->length > 0) { - if (data_sds > 0) { - if (plex->stripesize > 0) { - remainder = (int) (plex->length /* are we exact? */ - % ((u_int64_t) plex->stripesize * data_sds)); - if (remainder) { /* no */ - log(LOG_INFO, "vinum: removing %d blocks of partial stripe at the end of %s\n", - remainder, - plex->name); - plex->length -= remainder; /* shorten the plex */ - remainder /= data_sds; /* spread the remainder amongst the sds */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { - sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */ - return_drive_space(sd->driveno, /* return the space */ - sd->driveoffset + sd->sectors - remainder, - remainder); - sd->sectors -= remainder; /* and shorten it */ - } - } - } else /* no data sds, */ - plex->length = 0; /* reset length */ - } - } - } - size = 0; - for (sdno = 0; sdno < plex->subdisks; sdno++) { - sd = &SD[plex->sdnos[sdno]]; - if (isstriped(plex) - && (sdno > 0) - && (sd->sectors != SD[plex->sdnos[sdno - 1]].sectors)) { - log(LOG_ERR, "vinum: %s must have equal sized subdisks\n", plex->name); - state = plex_down; - } - size += sd->sectors; - if (added_plex) /* we were added later */ - sd->state = sd_stale; /* stale until proven otherwise */ - if (plex->sectorsize != 0) { - if (sd->sectorsize != plex->sectorsize) /* incompatible sector sizes? */ - printf("vinum: incompatible sector sizes. " - "%s has %d bytes, %s has %d bytes. Ignored.\n", - sd->name, - sd->sectorsize, - plex->name, - plex->sectorsize); - } else /* not set yet, */ - plex->sectorsize = sd->sectorsize; - } - - if (plex->subdisks) { /* plex has subdisks, calculate size */ - /* - * XXX We shouldn't need to calculate the size any - * more. Check this some time - */ - if (isparity(plex)) - size = size / plex->subdisks * (plex->subdisks - 1); /* less space for RAID-4 and RAID-5 */ - if (plex->length != size) - log(LOG_INFO, - "Correcting length of %s: was %lld, is %lld\n", - plex->name, - (long long) plex->length, - (long long) size); - plex->length = size; - } else { /* no subdisks, */ - plex->length = 0; /* no size */ - state = plex_down; /* take it down */ - } - update_plex_state(plexno); /* set the state */ - plex->flags &= ~VF_NEWBORN; -} - -void -update_volume_config(int volno) -{ - struct volume *vol = &VOL[volno]; - struct plex *plex; - int plexno; - - if (vol->state != volume_unallocated) - /* - * Recalculate the size of the volume, - * which might change if the original - * plexes were not a multiple of the - * stripe size. - */ - { - vol->size = 0; - for (plexno = 0; plexno < vol->plexes; plexno++) { - plex = &PLEX[vol->plex[plexno]]; - vol->size = max(plex->length, vol->size); /* maximum size */ - plex->volplexno = plexno; /* note it in the plex */ - if (vol->sectorsize != 0) { - if (plex->sectorsize != vol->sectorsize) /* incompatible sector sizes? */ - printf("vinum: incompatible sector sizes. " - "%s has %d, %s has %d. Ignored.\n", - plex->name, - plex->sectorsize, - vol->name, - vol->sectorsize); - } else /* not set yet, */ - vol->sectorsize = plex->sectorsize; - } - } - vol->flags &= ~VF_NEWBORN; /* no longer newly born */ -} - -/* - * Update the global configuration. This is - * called after configuration changes. - * - * diskconfig is != 0 if we're reading in a config - * from disk. In this case, we don't try to bring - * the devices up, though we will bring them down - * if there's some error which got missed when - * writing to disk. - */ -void -updateconfig(int diskconfig) -{ - int plexno; - int volno; - - for (plexno = 0; plexno < vinum_conf.plexes_allocated; plexno++) - update_plex_config(plexno, diskconfig); - - for (volno = 0; volno < vinum_conf.volumes_allocated; volno++) { - if (VOL[volno].state > volume_uninit) { - VOL[volno].flags &= ~VF_CONFIG_SETUPSTATE; /* no more setupstate */ - update_volume_state(volno); - update_volume_config(volno); - } - } - save_config(); -} - -/* - * Start manual changes to the configuration and lock out - * others who may wish to do so. - * XXX why do we need this and lock_config too? - */ -int -start_config(int force) -{ - int error; - - current_drive = -1; /* note the last drive we mention, for - * some defaults */ - current_plex = -1; /* and the same for the last plex */ - current_volume = -1; /* and the last volume */ - while ((vinum_conf.flags & VF_CONFIGURING) != 0) { - vinum_conf.flags |= VF_WILL_CONFIGURE; - if ((error = tsleep(&vinum_conf, PRIBIO | PCATCH, "vincfg", 0)) != 0) - return error; - } - /* - * We need two flags here: VF_CONFIGURING - * tells other processes to hold off (this - * function), and VF_CONFIG_INCOMPLETE - * tells the state change routines not to - * propagate incrememntal state changes - */ - vinum_conf.flags |= VF_CONFIGURING | VF_CONFIG_INCOMPLETE; - if (force) - vinum_conf.flags |= VF_FORCECONFIG; /* overwrite differently named drives */ - current_drive = -1; /* reset the defaults */ - current_plex = -1; /* and the same for the last plex */ - current_volume = -1; /* and the last volme */ - return 0; -} - -/* - * Update the config if update is 1, and unlock - * it. We won't update the configuration if we - * are called in a recursive loop via throw_rude_remark. - */ -void -finish_config(int update) -{ - /* we've finished our config */ - vinum_conf.flags &= ~(VF_CONFIG_INCOMPLETE | VF_READING_CONFIG | VF_FORCECONFIG); - if (update) - updateconfig(0); /* so update things */ - else - updateconfig(1); /* do some updates only */ - vinum_conf.flags &= ~VF_CONFIGURING; /* and now other people can take a turn */ - if ((vinum_conf.flags & VF_WILL_CONFIGURE) != 0) { - vinum_conf.flags &= ~VF_WILL_CONFIGURE; - wakeup_one(&vinum_conf); - } -} -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumdaemon.c b/sys/dev/vinum/vinumdaemon.c deleted file mode 100644 index 121357d..0000000 --- a/sys/dev/vinum/vinumdaemon.c +++ /dev/null @@ -1,283 +0,0 @@ -/* daemon.c: kernel part of Vinum daemon */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumdaemon.c,v 1.8 2000/01/03 05:22:03 grog Exp grog $ - */ - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -#ifdef VINUMDEBUG -#include <sys/reboot.h> -#endif - -/* declarations */ -void recover_io(struct request *rq); - -int daemon_options = 0; /* options */ -int daemonpid; /* PID of daemon */ -struct daemonq *daemonq; /* daemon's work queue */ -struct daemonq *dqend; /* and the end of the queue */ - -/* - * We normally call Malloc to get a queue element. In interrupt - * context, we can't guarantee that we'll get one, since we're not - * allowed to wait. If malloc fails, use one of these elements. - */ - -#define INTQSIZE 4 -struct daemonq intq[INTQSIZE]; /* queue elements for interrupt context */ -struct daemonq *intqp; /* and pointer in it */ - -void -vinum_daemon(void) -{ - int s; - struct daemonq *request; - - PROC_LOCK(curproc); - curproc->p_flag |= P_SYSTEM; /* we're a system process */ - mtx_lock_spin(&sched_lock); - curproc->p_sflag |= PS_INMEM; - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(curproc); - daemon_save_config(); /* start by saving the configuration */ - daemonpid = curproc->p_pid; /* mark our territory */ - while (1) { - tsleep(&vinum_daemon, PRIBIO, "vinum", 0); /* wait for something to happen */ - - /* - * It's conceivable that, as the result of an - * I/O error, we'll be out of action long - * enough that another daemon gets started. - * That's OK, just give up gracefully. - */ - if (curproc->p_pid != daemonpid) { /* we've been ousted in our sleep */ - if (daemon_options & daemon_verbose) - log(LOG_INFO, "vinum: abdicating\n"); - return; - } - while (daemonq != NULL) { /* we have work to do, */ - s = splhigh(); /* don't get interrupted here */ - request = daemonq; /* get the request */ - daemonq = daemonq->next; /* and detach it */ - if (daemonq == NULL) /* got to the end, */ - dqend = NULL; /* no end any more */ - splx(s); - - switch (request->type) { - /* - * We had an I/O error on a request. Go through the - * request and try to salvage it - */ - case daemonrq_ioerror: - if (daemon_options & daemon_verbose) { - struct request *rq = request->info.rq; - - log(LOG_WARNING, - "vinum: recovering I/O request: %p\n%s dev %d.%d, offset 0x%llx, length %ld\n", - rq, - rq->bp->b_iocmd == BIO_READ ? "Read" : "Write", - major(rq->bp->b_dev), - minor(rq->bp->b_dev), - (long long)rq->bp->b_blkno, - rq->bp->b_bcount); - } - recover_io(request->info.rq); /* the failed request */ - break; - - /* - * Write the config to disk. We could end up with - * quite a few of these in a row. Only honour the - * last one - */ - case daemonrq_saveconfig: - if ((daemonq == NULL) /* no more requests */ - ||(daemonq->type != daemonrq_saveconfig)) { /* or the next isn't the same */ - if (((daemon_options & daemon_noupdate) == 0) /* we're allowed to do it */ - &&((vinum_conf.flags & VF_READING_CONFIG) == 0)) { /* and we're not building the config now */ - /* - * We obviously don't want to save a - * partial configuration. Less obviously, - * we don't need to do anything if we're - * asked to write the config when we're - * building it up, because we save it at - * the end. - */ - if (daemon_options & daemon_verbose) - log(LOG_INFO, "vinum: saving config\n"); - daemon_save_config(); /* save it */ - } - } - break; - - case daemonrq_return: /* been told to stop */ - if (daemon_options & daemon_verbose) - log(LOG_INFO, "vinum: stopping\n"); - daemon_options |= daemon_stopped; /* note that we've stopped */ - Free(request); - while (daemonq != NULL) { /* backed up requests, */ - request = daemonq; /* get the request */ - daemonq = daemonq->next; /* and detach it */ - Free(request); /* then free it */ - } - wakeup(&vinumclose); /* and wake any waiting vinum(8)s */ - return; - - case daemonrq_ping: /* tell the caller we're here */ - if (daemon_options & daemon_verbose) - log(LOG_INFO, "vinum: ping reply\n"); - wakeup(&vinum_finddaemon); /* wake up the caller */ - break; - - case daemonrq_closedrive: /* close a drive */ - close_drive(request->info.drive); /* do it */ - break; - - case daemonrq_init: /* initialize a plex */ - /* XXX */ - case daemonrq_revive: /* revive a subdisk */ - /* XXX */ - /* FALLTHROUGH */ - default: - log(LOG_WARNING, "Invalid request\n"); - break; - } - if (request->privateinuse) /* one of ours, */ - request->privateinuse = 0; /* no longer in use */ - else - Free(request); /* return it */ - } - } -} - -/* - * Recover a failed I/O operation. - * - * The correct way to do this is to examine the request and determine - * how to recover each individual failure. In the case of a write, - * this could be as simple as doing nothing: the defective drives may - * already be down, and there may be nothing else to do. In case of - * a read, it will be necessary to retry if there are alternative - * copies of the data. - * - * The easy way (here) is just to reissue the request. This will take - * a little longer, but nothing like as long as the failure will have - * taken. - * - */ -void -recover_io(struct request *rq) -{ - /* - * This should read: - * - * vinumstrategy(rq->bp); - * - * Negotiate with phk to get it fixed. - */ - DEV_STRATEGY(rq->bp); /* reissue the command */ -} - -/* Functions called to interface with the daemon */ - -/* queue a request for the daemon */ -void -queue_daemon_request(enum daemonrq type, union daemoninfo info) -{ - int s; - - struct daemonq *qelt = (struct daemonq *) Malloc(sizeof(struct daemonq)); - - if (qelt == NULL) { /* malloc failed, we're prepared for that */ - /* - * Take one of our spares. Give up if it's still in use; the only - * message we're likely to get here is a 'drive failed' message, - * and that'll come by again if we miss it. - */ - if (intqp->privateinuse) /* still in use? */ - return; /* yes, give up */ - qelt = intqp++; - if (intqp == &intq[INTQSIZE]) /* got to the end, */ - intqp = intq; /* wrap around */ - qelt->privateinuse = 1; /* it's ours, and it's in use */ - } else - qelt->privateinuse = 0; - - qelt->next = NULL; /* end of the chain */ - qelt->type = type; - qelt->info = info; - s = splhigh(); - if (daemonq) { /* something queued already */ - dqend->next = qelt; - dqend = qelt; - } else { /* queue is empty, */ - daemonq = qelt; /* this is the whole queue */ - dqend = qelt; - } - splx(s); - wakeup(&vinum_daemon); /* and give the dæmon a kick */ -} - -/* - * see if the daemon is running. Return 0 (no error) - * if it is, ESRCH otherwise - */ -int -vinum_finddaemon() -{ - int result; - - if (daemonpid != 0) { /* we think we have a daemon, */ - queue_daemon_request(daemonrq_ping, (union daemoninfo) 0); /* queue a ping */ - result = tsleep(&vinum_finddaemon, PUSER, "reap", 2 * hz); - if (result == 0) /* yup, the daemon's up and running */ - return 0; - } - /* no daemon, or we couldn't talk to it: start it */ - vinum_daemon(); /* start the daemon */ - return 0; -} - -int -vinum_setdaemonopts(int options) -{ - daemon_options = options; - return 0; -} diff --git a/sys/dev/vinum/vinumext.h b/sys/dev/vinum/vinumext.h deleted file mode 100644 index ed2e6a5..0000000 --- a/sys/dev/vinum/vinumext.h +++ /dev/null @@ -1,261 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumext.h,v 1.33 2003/05/23 00:57:48 grog Exp $ - * $FreeBSD$ - */ - -/* vinumext.h: external definitions */ - -/* *sigh* We still need this at the moment. */ -#ifdef _KERNEL -extern struct _vinum_conf vinum_conf; /* configuration information */ -extern struct mtx plexmutex[]; /* mutexes for plexes to use */ -#else -extern struct __vinum_conf vinum_conf; /* configuration information */ -#endif - -#ifdef VINUMDEBUG -extern int debug; /* debug flags */ -#endif - -/* Physical read and write drive */ -#define read_drive(a, b, c, d) driveio (a, b, c, d, BIO_READ) -#define write_drive(a, b, c, d) driveio (a, b, c, d, BIO_WRITE) - -#define CHECKALLOC(ptr, msg) \ - if (ptr == NULL) \ - { \ - printf (msg); \ - longjmp (command_fail, -1); \ - } -#ifndef _KERNEL -struct vnode; -struct thread; -#endif - -#ifdef _KERNEL -int vinum_inactive(int); -void free_vinum(int); -int give_sd_to_plex(int plexno, int sdno); -void give_sd_to_drive(int sdno); -int give_plex_to_volume(int, int, int); -struct drive *check_drive(char *); -enum drive_label_info read_drive_label(struct drive *, int); -int parse_config(char *, struct keywordset *, int); -int parse_user_config(char *cptr, struct keywordset *keyset); -u_int64_t sizespec(char *spec); -int volume_index(struct volume *volume); -int plex_index(struct plex *plex); -int sd_index(struct sd *sd); -int drive_index(struct drive *drive); -int my_plex(int volno, int plexno); -int my_sd(int plexno, int sdno); -int get_empty_drive(void); -int find_drive(const char *name, int create); -int find_drive_by_name(const char *devname, int create); -int get_empty_sd(void); -int find_subdisk(const char *name, int create); -void return_drive_space(int driveno, int64_t offset, int length); -void free_sd(int sdno); -void free_volume(int volno); -int get_empty_plex(void); -int find_plex(const char *name, int create); -void free_plex(int plexno); -int get_empty_volume(void); -int find_volume(const char *name, int create); -void config_subdisk(int); -void config_plex(int); -void config_volume(int); -void config_drive(int); -void updateconfig(int); -void update_sd_config(int sdno, int kernelstate); -void update_plex_config(int plexno, int kernelstate); -void update_volume_config(int volno); -void update_config(void); -void drive_io_done(struct buf *); -void save_config(void); -void daemon_save_config(void); -void write_config(char *, int); -int start_config(int); -void finish_config(int); -void remove(struct vinum_ioctl_msg *msg); -void remove_drive_entry(int driveno, int force); -void remove_sd_entry(int sdno, int force, int recurse); -void remove_plex_entry(int plexno, int force, int recurse); -void remove_volume_entry(int volno, int force, int recurse); - -void checkdiskconfig(char *); -int open_drive(struct drive *, struct thread *, int); -void close_drive(struct drive *drive); -void close_locked_drive(struct drive *drive); -int driveio(struct drive *, char *, size_t, off_t, int); -int set_drive_parms(struct drive *drive); -int init_drive(struct drive *, int); -/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */ -void throw_rude_remark(int, char *,...); - -void format_config(char *config, int len); -void checkkernel(char *op); -void free_drive(struct drive *drive); -void down_drive(struct drive *drive); -void remove_drive(int driveno); - -int vinum_scandisk(char *drivename); - -/* I/O */ -d_open_t vinumopen; -d_close_t vinumclose; -d_strategy_t vinumstrategy; -d_ioctl_t vinumioctl; - -int vinum_super_ioctl(struct cdev *, u_long, caddr_t); -int vinumstart(struct buf *bp, int reviveok); -int launch_requests(struct request *rq, int reviveok); -void sdio(struct buf *bp); - -/* XXX Do we need this? */ -int vinumpart(struct cdev *); - -extern jmp_buf command_fail; /* return here if config fails */ - -#ifdef VINUMDEBUG -/* Memory allocation and request tracing */ -void vinum_meminfo(caddr_t data); -int vinum_mallocinfo(caddr_t data); -int vinum_rqinfo(caddr_t data); -void LongJmp(jmp_buf, int); -char *basename(char *); -#endif - -#ifdef VINUMDEBUG -void expand_table(void **, int, int, char *, int); -#else -void expand_table(void **, int, int); -#endif - -struct disklabel; -struct request; -struct rqgroup *allocrqg(struct request *rq, int elements); -void deallocrqg(struct rqgroup *rqg); - -/* Device number decoding */ -int Volno(struct cdev *x); -int Plexno(struct cdev *x); -int Sdno(struct cdev *x); - -/* State transitions */ -int set_drive_state(int driveno, enum drivestate state, enum setstateflags flags); -int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags); -enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend); -int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags); -int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags); -void update_sd_state(int sdno); -void forceup(int plexno); -void update_plex_state(int plexno); -void update_volume_state(int volno); -void invalidate_subdisks(struct plex *, enum sdstate); -void start_object(struct vinum_ioctl_msg *); -void stop_object(struct vinum_ioctl_msg *); -void setstate(struct vinum_ioctl_msg *msg); -void setstate_by_force(struct vinum_ioctl_msg *msg); -void vinum_label(int); -int vinum_writedisklabel(struct volume *, struct disklabel *); -int initsd(int, int); -struct buf *parityrebuild(struct plex *, u_int64_t, int, enum parityop, struct rangelock **, off_t *); -enum requeststatus sddownstate(struct request *rq); - -int restart_plex(int plexno); -int revive_read(struct sd *sd); -int revive_block(int sdno); -void parityops(struct vinum_ioctl_msg *); - -/* Auxiliary functions */ -enum sdstates sdstatemap(struct plex *plex); -enum volplexstate vpstate(struct plex *plex); -#endif - -struct drive *validdrive(int driveno, struct _ioctl_reply *); -struct sd *validsd(int sdno, struct _ioctl_reply *); -struct plex *validplex(int plexno, struct _ioctl_reply *); -struct volume *validvol(int volno, struct _ioctl_reply *); -void resetstats(struct vinum_ioctl_msg *msg); - -/* Locking */ -#ifdef VINUMDEBUG -int lockdrive(struct drive *drive, char *, int); -#else -int lockdrive(struct drive *drive); -#endif -void unlockdrive(struct drive *drive); -int lockvol(struct volume *vol); -void unlockvol(struct volume *vol); -int lockplex(struct plex *plex); -void unlockplex(struct plex *plex); -struct rangelock *lockrange(daddr_t stripe, struct buf *bp, struct plex *plex); -int lock_config(void); -void unlock_config(void); - -/* Dæmon */ - -void vinum_daemon(void); -int vinum_finddaemon(void); -int vinum_setdaemonopts(int); -extern struct daemonq *daemonq; /* daemon's work queue */ -extern struct daemonq *dqend; /* and the end of the queue */ -extern struct cdevsw vinum_cdevsw; - -#undef Free /* defined in some funny net stuff */ -#ifdef _KERNEL -#ifdef VINUMDEBUG -#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */ -#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */ -caddr_t MMalloc(int size, char *, int); -void FFree(void *mem, char *, int); -#define LOCKDRIVE(d) lockdrive (d, __FILE__, __LINE__) -#else -#define Malloc(x) malloc((x), M_DEVBUF, \ - curthread->td_intr_nesting_level == 0? M_WAITOK: M_NOWAIT) -#define Free(x) free((x), M_DEVBUF) -#define LOCKDRIVE(d) lockdrive (d) -#endif -#else -#define Malloc(x) malloc ((x)) /* just the size */ -#define Free(x) free ((x)) /* just the address */ -#endif - -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumhdr.h b/sys/dev/vinum/vinumhdr.h deleted file mode 100644 index 4117821..0000000 --- a/sys/dev/vinum/vinumhdr.h +++ /dev/null @@ -1,81 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - */ - -/* Header files used by all modules */ -/* - * $Id: vinumhdr.h,v 1.19 2001/05/22 04:07:22 grog Exp grog $ - * $FreeBSD$ - */ - -#include <sys/param.h> -#ifdef _KERNEL -#include "opt_vinum.h" -#include <sys/systm.h> -#include <sys/kdb.h> -#include <sys/kernel.h> -#include <sys/proc.h> -#include <sys/conf.h> -#include <sys/mount.h> -#include <sys/vnode.h> -#include <sys/sysctl.h> -#endif -#include <sys/errno.h> -#include <sys/time.h> -#include <sys/bio.h> -#include <sys/buf.h> -#include <sys/malloc.h> -#include <sys/uio.h> -#include <sys/namei.h> -#include <sys/stat.h> -#include <sys/disk.h> -#include <sys/disklabel.h> -#include <sys/syslog.h> -#include <sys/fcntl.h> -#include <sys/queue.h> -#ifdef _KERNEL -#include <machine/setjmp.h> -#include <machine/stdarg.h> -#else -#include <setjmp.h> -#include <stdarg.h> -#endif -#include <vm/vm.h> -#include <dev/vinum/vinumvar.h> -#include <dev/vinum/vinumio.h> -#include <dev/vinum/vinumkw.h> -#include <dev/vinum/vinumext.h> -#include <dev/vinum/vinumutil.h> -#include <machine/cpu.h> diff --git a/sys/dev/vinum/vinuminterrupt.c b/sys/dev/vinum/vinuminterrupt.c deleted file mode 100644 index d031ef9..0000000 --- a/sys/dev/vinum/vinuminterrupt.c +++ /dev/null @@ -1,473 +0,0 @@ -/* vinuminterrupt.c: bottom half of the driver */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinuminterrupt.c,v 1.41 2003/08/24 17:55:56 obrien Exp $ - */ - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> -#include <sys/resourcevar.h> - -void complete_raid5_write(struct rqelement *); -void complete_rqe(struct buf *bp); -void sdio_done(struct buf *bp); - -/* - * Take a completed buffer, transfer the data back if - * it's a read, and complete the high-level request - * if this is the last subrequest. - * - * The bp parameter is in fact a struct rqelement, which - * includes a couple of extras at the end. - */ -void -complete_rqe(struct buf *bp) -{ - struct rqelement *rqe; - struct request *rq; - struct rqgroup *rqg; - struct buf *ubp; /* user buffer */ - struct drive *drive; - struct sd *sd; - char *gravity; /* for error messages */ - - rqe = (struct rqelement *) bp; /* point to the element element that completed */ - rqg = rqe->rqg; /* and the request group */ - rq = rqg->rq; /* and the complete request */ - ubp = rq->bp; /* user buffer */ - -#ifdef VINUMDEBUG - if (debug & DEBUG_LASTREQS) - logrq(loginfo_iodone, (union rqinfou) rqe, ubp); -#endif - drive = &DRIVE[rqe->driveno]; - drive->active--; /* one less outstanding I/O on this drive */ - vinum_conf.active--; /* one less outstanding I/O globally */ - if ((drive->active == (DRIVE_MAXACTIVE - 1)) /* we were at the drive limit */ - ||(vinum_conf.active == VINUM_MAXACTIVE)) /* or the global limit */ - wakeup(&launch_requests); /* let another one at it */ - if ((bp->b_io.bio_flags & BIO_ERROR) != 0) { /* transfer in error */ - gravity = ""; - sd = &SD[rqe->sdno]; - - if (bp->b_error != 0) /* did it return a number? */ - rq->error = bp->b_error; /* yes, put it in. */ - else if (rq->error == 0) /* no: do we have one already? */ - rq->error = EIO; /* no: catchall "I/O error" */ - sd->lasterror = rq->error; - if (bp->b_iocmd == BIO_READ) { /* read operation */ - if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) { - gravity = " fatal"; - set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */ - } - log(LOG_ERR, - "%s:%s read error, block %lld for %ld bytes\n", - gravity, - sd->name, - (long long)bp->b_blkno, - bp->b_bcount); - } else { /* write operation */ - if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) { - gravity = "fatal "; - set_sd_state(rqe->sdno, sd_stale, setstate_force); /* subdisk is stale */ - } - log(LOG_ERR, - "%s:%s write error, block %lld for %ld bytes\n", - gravity, - sd->name, - (long long)bp->b_blkno, - bp->b_bcount); - } - log(LOG_ERR, - "%s: user buffer block %lld for %ld bytes\n", - sd->name, - (long long)ubp->b_blkno, - ubp->b_bcount); - if (rq->error == ENXIO) { /* the drive's down too */ - log(LOG_ERR, - "%s: fatal drive I/O error, block %lld for %ld bytes\n", - DRIVE[rqe->driveno].label.name, - (long long)bp->b_blkno, - bp->b_bcount); - DRIVE[rqe->driveno].lasterror = rq->error; - set_drive_state(rqe->driveno, /* take the drive down */ - drive_down, - setstate_force); - } - } - /* Now update the statistics */ - if (bp->b_iocmd == BIO_READ) { /* read operation */ - DRIVE[rqe->driveno].reads++; - DRIVE[rqe->driveno].bytes_read += bp->b_bcount; - SD[rqe->sdno].reads++; - SD[rqe->sdno].bytes_read += bp->b_bcount; - PLEX[rqe->rqg->plexno].reads++; - PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount; - if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */ - VOL[PLEX[rqe->rqg->plexno].volno].reads++; - VOL[PLEX[rqe->rqg->plexno].volno].bytes_read += bp->b_bcount; - } - } else { /* write operation */ - DRIVE[rqe->driveno].writes++; - DRIVE[rqe->driveno].bytes_written += bp->b_bcount; - SD[rqe->sdno].writes++; - SD[rqe->sdno].bytes_written += bp->b_bcount; - PLEX[rqe->rqg->plexno].writes++; - PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount; - if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */ - VOL[PLEX[rqe->rqg->plexno].volno].writes++; - VOL[PLEX[rqe->rqg->plexno].volno].bytes_written += bp->b_bcount; - } - } - if (rqg->flags & XFR_RECOVERY_READ) { /* recovery read, */ - int *sdata; /* source */ - int *data; /* and group data */ - int length; /* and count involved */ - int count; /* loop counter */ - struct rqelement *urqe = &rqg->rqe[rqg->badsdno]; /* rqe of the bad subdisk */ - - /* XOR destination is the user data */ - sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* old data contents */ - data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */ - length = urqe->grouplen * (DEV_BSIZE / sizeof(int)); /* and number of ints */ - - for (count = 0; count < length; count++) - data[count] ^= sdata[count]; - - /* - * In a normal read, we will normally read directly - * into the user buffer. This doesn't work if - * we're also doing a recovery, so we have to - * copy it - */ - if (rqe->flags & XFR_NORMAL_READ) { /* normal read as well, */ - char *src = &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* read data is here */ - char *dst; - - dst = (char *) ubp->b_data + (rqe->useroffset << DEV_BSHIFT); /* where to put it in user buffer */ - length = rqe->datalen << DEV_BSHIFT; /* and count involved */ - bcopy(src, dst, length); /* move it */ - } - } else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 4/5 group write operation */ - &&(rqg->active == 1)) /* and this is the last active request */ - complete_raid5_write(rqe); - /* - * This is the earliest place where we can be - * sure that the request has really finished, - * since complete_raid5_write can issue new - * requests. - */ - rqg->active--; /* this request now finished */ - if (rqg->active == 0) { /* request group finished, */ - rq->active--; /* one less */ - if (rqg->lock) { /* got a lock? */ - unlockrange(rqg->plexno, rqg->lock); /* yes, free it */ - rqg->lock = 0; - } - } - if (rq->active == 0) { /* request finished, */ -#ifdef VINUMDEBUG - if (debug & DEBUG_RESID) { - if (ubp->b_resid != 0) /* still something to transfer? */ - kdb_enter("resid"); - } -#endif - - if (rq->error) { /* did we have an error? */ - if (rq->isplex) { /* plex operation, */ - ubp->b_io.bio_flags |= BIO_ERROR; /* yes, propagate to user */ - ubp->b_error = rq->error; - } else /* try to recover */ - queue_daemon_request(daemonrq_ioerror, (union daemoninfo) rq); /* let the daemon complete */ - } else { - ubp->b_resid = 0; /* completed our transfer */ - if (rq->isplex == 0) /* volume request, */ - VOL[rq->volplex.volno].active--; /* another request finished */ - if (rq->flags & XFR_COPYBUF) { - Free(ubp->b_data); - ubp->b_data = rq->save_data; - } - bufdone(ubp); /* top level buffer completed */ - freerq(rq); /* return the request storage */ - } - } -} - -/* Free a request block and anything hanging off it */ -void -freerq(struct request *rq) -{ - struct rqgroup *rqg; - struct rqgroup *nrqg; /* next in chain */ - int rqno; - - for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */ - if (rqg->lock) /* got a lock? */ - unlockrange(rqg->plexno, rqg->lock); /* yes, free it */ - for (rqno = 0; rqno < rqg->count; rqno++) { - if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */ - &&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */ - Free(rqg->rqe[rqno].b.b_data); /* free it */ - if (rqg->rqe[rqno].flags & XFR_BUFLOCKED) { /* locked this buffer, */ - BUF_UNLOCK(&rqg->rqe[rqno].b); /* unlock it again */ - BUF_LOCKFREE(&rqg->rqe[rqno].b); - } - } - nrqg = rqg->next; /* note the next one */ - Free(rqg); /* and free this one */ - } - Free(rq); /* free the request itself */ -} - -/* I/O on subdisk completed */ -void -sdio_done(struct buf *bp) -{ - struct sdbuf *sbp; - - sbp = (struct sdbuf *) bp; - if (sbp->b.b_io.bio_flags & BIO_ERROR) { /* had an error */ - sbp->bp->b_io.bio_flags |= BIO_ERROR; /* propagate upwards */ - sbp->bp->b_error = sbp->b.b_error; - } -#ifdef VINUMDEBUG - if (debug & DEBUG_LASTREQS) - logrq(loginfo_sdiodone, (union rqinfou) bp, bp); -#endif - sbp->bp->b_resid = sbp->b.b_resid; /* copy the resid field */ - /* Now update the statistics */ - if (bp->b_iocmd == BIO_READ) { /* read operation */ - DRIVE[sbp->driveno].reads++; - DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount; - SD[sbp->sdno].reads++; - SD[sbp->sdno].bytes_read += sbp->b.b_bcount; - } else { /* write operation */ - DRIVE[sbp->driveno].writes++; - DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount; - SD[sbp->sdno].writes++; - SD[sbp->sdno].bytes_written += sbp->b.b_bcount; - } - bufdone(sbp->bp); /* complete the caller's I/O */ - BUF_UNLOCK(&sbp->b); - BUF_LOCKFREE(&sbp->b); - Free(sbp); -} - -/* Start the second phase of a RAID-4 or RAID-5 group write operation. */ -void -complete_raid5_write(struct rqelement *rqe) -{ - int *sdata; /* source */ - int *pdata; /* and parity block data */ - int length; /* and count involved */ - int count; /* loop counter */ - int rqno; /* request index */ - int rqoffset; /* offset of request data from parity data */ - struct buf *ubp; /* user buffer header */ - struct request *rq; /* pointer to our request */ - struct rqgroup *rqg; /* and to the request group */ - struct rqelement *prqe; /* point to the parity block */ - struct drive *drive; /* drive to access */ - - rqg = rqe->rqg; /* and to our request group */ - rq = rqg->rq; /* point to our request */ - ubp = rq->bp; /* user's buffer header */ - prqe = &rqg->rqe[0]; /* point to the parity block */ - - /* - * If we get to this function, we have normal or - * degraded writes, or a combination of both. We do - * the same thing in each case: we perform an - * exclusive or to the parity block. The only - * difference is the origin of the data and the - * address range. - */ - if (rqe->flags & XFR_DEGRADED_WRITE) { /* do the degraded write stuff */ - pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */ - bzero(pdata, prqe->grouplen << DEV_BSHIFT); /* start with nothing in the parity block */ - - /* Now get what data we need from each block */ - for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */ - rqe = &rqg->rqe[rqno]; /* this request */ - sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */ - length = rqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */ - - /* - * Add the data block to the parity block. Before - * we started the request, we zeroed the parity - * block, so the result of adding all the other - * blocks and the block we want to write will be - * the correct parity block. - */ - for (count = 0; count < length; count++) - pdata[count] ^= sdata[count]; - if ((rqe->flags & XFR_MALLOCED) /* the buffer was malloced, */ - &&((rqg->flags & XFR_NORMAL_WRITE) == 0)) { /* and we have no normal write, */ - Free(rqe->b.b_data); /* free it now */ - rqe->flags &= ~XFR_MALLOCED; - } - } - } - if (rqg->flags & XFR_NORMAL_WRITE) { /* do normal write stuff */ - /* Get what data we need from each block */ - for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */ - rqe = &rqg->rqe[rqno]; /* this request */ - if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE)) - == (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) { /* good data block to write */ - sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */ - rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */ - pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */ - length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */ - - /* - * "remove" the old data block - * from the parity block - */ - if ((pdata < ((int *) prqe->b.b_data)) - || (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount))) - || (sdata < ((int *) rqe->b.b_data)) - || (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount)))) - panic("complete_raid5_write: bounds overflow"); - for (count = 0; count < length; count++) - pdata[count] ^= sdata[count]; - - /* "add" the new data block */ - sdata = (int *) (&ubp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */ - if ((sdata < ((int *) ubp->b_data)) - || (&sdata[length] > ((int *) (ubp->b_data + ubp->b_bcount)))) - panic("complete_raid5_write: bounds overflow"); - for (count = 0; count < length; count++) - pdata[count] ^= sdata[count]; - - /* Free the malloced buffer */ - if (rqe->flags & XFR_MALLOCED) { /* the buffer was malloced, */ - Free(rqe->b.b_data); /* free it */ - rqe->flags &= ~XFR_MALLOCED; - } else - panic("complete_raid5_write: malloc conflict"); - - if ((rqe->b.b_iocmd == BIO_READ) /* this was a read */ - &&((rqe->flags & XFR_BAD_SUBDISK) == 0)) { /* and we can write this block */ - rqe->b.b_flags &= ~B_DONE; /* start a new request */ - rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */ - rqe->b.b_iodone = complete_rqe; /* call us here when done */ - rqe->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */ - rqe->b.b_data = &ubp->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */ - rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */ - rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim more */ - rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */ - rqe->b.b_blkno += rqe->dataoffset; /* point to the correct block */ - rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT; - rqe->b.b_iooffset = rqe->b.b_offset; - rqg->active++; /* another active request */ - drive = &DRIVE[rqe->driveno]; /* drive to access */ - - /* We can't sleep here, so we just increment the counters. */ - drive->active++; - if (drive->active >= drive->maxactive) - drive->maxactive = drive->active; - vinum_conf.active++; - if (vinum_conf.active >= vinum_conf.maxactive) - vinum_conf.maxactive = vinum_conf.active; -#ifdef VINUMDEBUG - if (debug & DEBUG_ADDRESSES) - log(LOG_DEBUG, - " %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n", - rqe->b.b_iocmd == BIO_READ ? "Read" : "Write", - major(rqe->b.b_dev), - minor(rqe->b.b_dev), - rqe->sdno, - (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset), - (long long)rqe->b.b_blkno, - rqe->b.b_bcount); - if (debug & DEBUG_LASTREQS) - logrq(loginfo_raid5_data, (union rqinfou) rqe, ubp); -#endif - DEV_STRATEGY(&rqe->b); - } - } - } - } - /* Finally, write the parity block */ - rqe = &rqg->rqe[0]; - rqe->b.b_flags &= ~B_DONE; /* we're not done */ - rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */ - rqe->b.b_iodone = complete_rqe; /* call us here when done */ - rqg->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */ - rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT; /* length to write */ - rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT; - rqe->b.b_iooffset = rqe->b.b_offset; - rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim we have more */ - rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */ - rqg->active++; /* another active request */ - drive = &DRIVE[rqe->driveno]; /* drive to access */ - - /* We can't sleep here, so we just increment the counters. */ - drive->active++; - if (drive->active >= drive->maxactive) - drive->maxactive = drive->active; - vinum_conf.active++; - if (vinum_conf.active >= vinum_conf.maxactive) - vinum_conf.maxactive = vinum_conf.active; - -#ifdef VINUMDEBUG - if (debug & DEBUG_ADDRESSES) - log(LOG_DEBUG, - " %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n", - rqe->b.b_iocmd == BIO_READ ? "Read" : "Write", - major(rqe->b.b_dev), - minor(rqe->b.b_dev), - rqe->sdno, - (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset), - (long long)rqe->b.b_blkno, - rqe->b.b_bcount); - if (debug & DEBUG_LASTREQS) - logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubp); -#endif - DEV_STRATEGY(&rqe->b); -} - -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumio.c b/sys/dev/vinum/vinumio.c deleted file mode 100644 index d7d554d..0000000 --- a/sys/dev/vinum/vinumio.c +++ /dev/null @@ -1,918 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumio.c,v 1.39 2003/05/23 00:59:53 grog Exp grog $ - * $FreeBSD$ - */ - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -static char *sappend(char *txt, char *s); -static int drivecmp(const void *va, const void *vb); - -/* - * Open the device associated with the drive, and - * set drive's vp. Return an error number. - */ -int -open_drive(struct drive *drive, struct thread *td, int verbose) -{ - struct cdevsw *dsw; /* pointer to cdevsw entry */ - - if (drive->flags & VF_OPEN) /* open already, */ - return EBUSY; /* don't do it again */ - - drive->dev = getdiskbyname(drive->devicename); - if (drive->dev == NULL) /* didn't find anything */ - return ENOENT; - dev_ref(drive->dev); - - drive->dev->si_iosize_max = DFLTPHYS; - dsw = devsw(drive->dev); - if (dsw == NULL) /* sanity, should not happen */ - drive->lasterror = ENOENT; - else if ((dsw->d_flags & D_DISK) == 0) - drive->lasterror = ENOTBLK; - else { - DROP_GIANT(); - drive->lasterror = (dsw->d_open) (drive->dev, FWRITE | FREAD, 0, td); - PICKUP_GIANT(); - } - - if (drive->lasterror != 0) { /* failed */ - drive->state = drive_down; /* just force it down */ - if (verbose) - log(LOG_WARNING, - "vinum open_drive %s: failed with error %d\n", - drive->devicename, drive->lasterror); - } else - drive->flags |= VF_OPEN; /* we're open now */ - - return drive->lasterror; -} - -/* - * Set some variables in the drive struct in more - * convenient form. Return error indication. - */ -int -set_drive_parms(struct drive *drive) -{ - drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */ - drive->secsperblock = drive->blocksize /* number of sectors per block */ - / drive->sectorsize; - - /* Now update the label part */ - bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */ - microtime(&drive->label.date_of_birth); /* and current time */ - drive->label.drive_size = drive->mediasize; /* size of the drive in bytes */ -#ifdef VINUMDEBUG - if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */ - drive->label.drive_size *= 100; -#endif - - /* number of sectors available for subdisks */ - drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART; - - /* - * Bug in 3.0 as of January 1998: you can open - * non-existent slices. They have a length of 0. - */ - if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */ - set_drive_state(drive->driveno, drive_down, setstate_force); - drive->lasterror = ENOSPC; - return ENOSPC; - } - drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */ - drive->freelist = (struct drive_freelist *) - Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist)); - if (drive->freelist == NULL) /* can't malloc, dammit */ - return ENOSPC; - drive->freelist_entries = 1; /* just (almost) the complete drive */ - drive->freelist[0].offset = DATASTART; /* starts here */ - drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */ - if (drive->label.name[0] != '\0') /* got a name */ - set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */ - else /* we know about it, but that's all */ - drive->state = drive_referenced; - return 0; -} - -/* - * Initialize a drive: open the device and add - * device information. - */ -int -init_drive(struct drive *drive, int verbose) -{ - - drive->lasterror = open_drive(drive, curthread, verbose); /* open the drive */ - if (drive->lasterror) - return drive->lasterror; - - DROP_GIANT(); - drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev, - DIOCGSECTORSIZE, - (caddr_t) & drive->sectorsize, - FREAD, - curthread); - if (drive->lasterror == 0) - drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev, - DIOCGMEDIASIZE, - (caddr_t) & drive->mediasize, - FREAD, - curthread); - PICKUP_GIANT(); - if (drive->lasterror) { - if (verbose) - log(LOG_ERR, - "vinum: Can't get drive dimensions for %s: error %d\n", - drive->devicename, - drive->lasterror); - close_drive(drive); - return drive->lasterror; - } - return set_drive_parms(drive); /* set various odds and ends */ -} - -/* Close a drive if it's open. */ -void -close_drive(struct drive *drive) -{ - LOCKDRIVE(drive); /* keep the daemon out */ - if (drive->flags & VF_OPEN) - close_locked_drive(drive); /* and close it */ - if (drive->state > drive_down) /* if it's up */ - drive->state = drive_down; /* make sure it's down */ - unlockdrive(drive); -} - -/* - * Real drive close code, called with drive already locked. - * We have also checked that the drive is open. No errors. - */ -void -close_locked_drive(struct drive *drive) -{ - int error; - - /* - * If we can't access the drive, we can't flush - * the queues, which spec_close() will try to - * do. Get rid of them here first. - */ - DROP_GIANT(); - error = (*devsw(drive->dev)->d_close) (drive->dev, FWRITE | FREAD, 0, NULL); - PICKUP_GIANT(); - drive->flags &= ~VF_OPEN; /* no longer open */ - if (drive->lasterror == 0) - drive->lasterror = error; -} - -/* - * Remove drive from the configuration. - * Caller must ensure that it isn't active. - */ -void -remove_drive(int driveno) -{ - struct drive *drive = &vinum_conf.drive[driveno]; - struct vinum_hdr *vhdr; /* buffer for header */ - int error; - - if (drive->state > drive_referenced) { /* real drive */ - if (drive->state == drive_up) { - vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */ - CHECKALLOC(vhdr, "Can't allocate memory"); - error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); - if (error) - drive->lasterror = error; - else { - vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */ - write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); - } - Free(vhdr); - } - free_drive(drive); /* close it and free resources */ - save_config(); /* and save the updated configuration */ - } -} - -/* - * Transfer drive data. Usually called from one of these defines; - * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ) - * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE) - * - * length and offset are in bytes, but must be multiples of sector - * size. The function *does not check* for this condition, and - * truncates ruthlessly. - * Return error number. - */ -int -driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag) -{ - int error; - struct buf *bp; - - error = 0; /* to keep the compiler happy */ - while (length) { /* divide into small enough blocks */ - int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */ - - bp = geteblk(len); /* get a buffer header */ - bp->b_flags = 0; - bp->b_iocmd = flag; - bp->b_dev = drive->dev; /* device */ - bp->b_blkno = offset / drive->sectorsize; /* block number */ - bp->b_offset = offset; - bp->b_iooffset = offset; - bp->b_saveaddr = bp->b_data; - bp->b_data = buf; - bp->b_bcount = len; - DEV_STRATEGY(bp); /* initiate the transfer */ - error = bufwait(bp); - bp->b_data = bp->b_saveaddr; - bp->b_flags |= B_INVAL | B_AGE; - bp->b_ioflags &= ~BIO_ERROR; - brelse(bp); - if (error) - break; - length -= len; /* update pointers */ - buf += len; - offset += len; - } - return error; -} - -/* - * Check a drive for a vinum header. If found, - * update the drive information. We come here - * with a partially populated drive structure - * which includes the device name. - * - * Return information on what we found. - * - * This function is called from two places: check_drive, - * which wants to find out whether the drive is a - * Vinum drive, and config_drive, which asserts that - * it is a vinum drive. In the first case, we don't - * print error messages (verbose==0), in the second - * we do (verbose==1). - */ -enum drive_label_info -read_drive_label(struct drive *drive, int verbose) -{ - int error; - int result; /* result of our search */ - struct vinum_hdr *vhdr; /* and as header */ - - error = init_drive(drive, 0); /* find the drive */ - if (error) /* find the drive */ - return DL_CANT_OPEN; /* not ours */ - - vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */ - CHECKALLOC(vhdr, "Can't allocate memory"); - - drive->state = drive_up; /* be optimistic */ - error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET); - if (vhdr->magic == VINUM_MAGIC) { /* ours! */ - if (drive->label.name[0] /* we have a name for this drive */ - &&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */ - drive->lasterror = EINVAL; - result = DL_WRONG_DRIVE; /* it's the wrong drive */ - drive->state = drive_unallocated; /* put it back, it's not ours */ - } else - result = DL_OURS; - /* - * We copy the drive anyway so that we have - * the correct name in the drive info. This - * may not be the name specified - */ - drive->label = vhdr->label; /* put in the label information */ - } else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */ - result = DL_DELETED_LABEL; /* and return the info */ - else - result = DL_NOT_OURS; /* we could have it, but we don't yet */ - Free(vhdr); /* that's all. */ - return result; -} - -/* - * Check a drive for a vinum header. If found, - * read configuration information from the drive and - * incorporate the data into the configuration. - * - * Return drive number. - */ -struct drive * -check_drive(char *devicename) -{ - int driveno; - int i; - struct drive *drive; - - driveno = find_drive_by_name(devicename, 1); /* if entry doesn't exist, create it */ - drive = &vinum_conf.drive[driveno]; /* and get a pointer */ - - if (drive->state >= drive_down) /* up or down, we know it */ - return drive; - if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */ - for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */ - if ((i != driveno) /* not this drive */ - &&(DRIVE[i].state != drive_unallocated) /* and it's allocated */ - &&(strcmp(DRIVE[i].label.name, - DRIVE[driveno].label.name) == 0)) { /* and it has the same name */ - struct drive *mydrive = &DRIVE[i]; - - if (mydrive->devicename[0] == '/') { /* we know a device name for it */ - /* - * set an error, but don't take the - * drive down: that would cause unneeded - * error messages. - */ - drive->lasterror = EEXIST; - break; - } else { /* it's just a place holder, */ - int sdno; - - for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */ - if ((SD[sdno].driveno == i) /* it's pointing to this one, */ - &&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */ - SD[sdno].driveno = drive->driveno; /* point to the one we found */ - update_sd_state(sdno); /* and update its state */ - } - } - bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */ - } - } - } - return drive; - } else { /* not ours, */ - close_drive(drive); - free_drive(drive); /* get rid of it */ - return NULL; - } -} - -static char * -sappend(char *txt, char *s) -{ - while ((*s++ = *txt++) != 0); - return s - 1; -} - -void -format_config(char *config, int len) -{ - int i; - int j; - char *s = config; - char *configend = &config[len]; - - bzero(config, len); - - /* First write the volume configuration */ - for (i = 0; i < vinum_conf.volumes_allocated; i++) { - struct volume *vol; - - vol = &vinum_conf.volume[i]; - if ((vol->state > volume_uninit) - && (vol->name[0] != '\0')) { /* paranoia */ - snprintf(s, - configend - s, - "volume %s state %s", - vol->name, - volume_state(vol->state)); - while (*s) - s++; /* find the end */ - s = sappend("\n", s); - } - } - - /* Then the plex configuration */ - for (i = 0; i < vinum_conf.plexes_allocated; i++) { - struct plex *plex; - struct volume *vol; - - plex = &vinum_conf.plex[i]; - if ((plex->state > plex_referenced) - && (plex->name[0] != '\0')) { /* paranoia */ - snprintf(s, - configend - s, - "plex name %s state %s org %s ", - plex->name, - plex_state(plex->state), - plex_org(plex->organization)); - while (*s) - s++; /* find the end */ - if (isstriped(plex)) { - snprintf(s, - configend - s, - "%ds ", - (int) plex->stripesize); - while (*s) - s++; /* find the end */ - } - if (plex->volno >= 0) { /* we have a volume */ - vol = &VOL[plex->volno]; - snprintf(s, - configend - s, - "vol %s ", - vol->name); - while (*s) - s++; /* find the end */ - if ((vol->preferred_plex >= 0) /* has a preferred plex */ - &&vol->plex[vol->preferred_plex] == i) /* and it's us */ - snprintf(s, configend - s, "preferred "); - while (*s) - s++; /* find the end */ - } - for (j = 0; j < plex->subdisks; j++) { - snprintf(s, - configend - s, - " sd %s", - vinum_conf.sd[plex->sdnos[j]].name); - } - s = sappend("\n", s); - } - } - - /* And finally the subdisk configuration */ - for (i = 0; i < vinum_conf.subdisks_allocated; i++) { - struct sd *sd; - char *drivename; - - sd = &SD[i]; - if ((sd->state != sd_referenced) - && (sd->state != sd_unallocated) - && (sd->name[0] != '\0')) { /* paranoia */ - drivename = vinum_conf.drive[sd->driveno].label.name; - /* - * XXX We've seen cases of dead subdisks - * which don't have a drive. If we let them - * through here, the drive name is null, so - * they get the drive named 'plex'. - * - * This is a breakage limiter, not a fix. - */ - if (drivename[0] == '\0') - drivename = "*invalid*"; - snprintf(s, - configend - s, - "sd name %s drive %s len %llus driveoffset %llus state %s", - sd->name, - drivename, - (unsigned long long) sd->sectors, - (unsigned long long) sd->driveoffset, - sd_state(sd->state)); - while (*s) - s++; /* find the end */ - if (sd->plexno >= 0) - snprintf(s, - configend - s, - " plex %s plexoffset %llds", - vinum_conf.plex[sd->plexno].name, - (long long) sd->plexoffset); - else - snprintf(s, configend - s, " detached"); - while (*s) - s++; /* find the end */ - if (sd->flags & VF_RETRYERRORS) { - snprintf(s, configend - s, " retryerrors"); - while (*s) - s++; /* find the end */ - } - snprintf(s, configend - s, " \n"); - while (*s) - s++; /* find the end */ - } - } - if (s > &config[len - 2]) - panic("vinum: configuration data overflow"); -} - -/* - * issue a save config request to the dæmon. The actual work - * is done in process context by daemon_save_config. - */ -void -save_config(void) -{ - queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) 0); -} - -/* - * Write the configuration to all vinum slices. This - * is performed by the daemon only. - */ -void -daemon_save_config(void) -{ - int error; - int written_config; /* set when we first write the config to disk */ - int driveno; - struct drive *drive; /* point to current drive info */ - struct vinum_hdr *vhdr; /* and as header */ - char *config; /* point to config data */ - - /* don't save the configuration while we're still working on it */ - if (vinum_conf.flags & VF_CONFIGURING) - return; - written_config = 0; /* no config written yet */ - /* Build a volume header */ - vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */ - CHECKALLOC(vhdr, "Can't allocate config data"); - vhdr->magic = VINUM_MAGIC; /* magic number */ - vhdr->config_length = MAXCONFIG; /* length of following config info */ - - config = Malloc(MAXCONFIG); /* get space for the config data */ - CHECKALLOC(config, "Can't allocate config data"); - - format_config(config, MAXCONFIG); - error = 0; /* no errors yet */ - for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) { - drive = &vinum_conf.drive[driveno]; /* point to drive */ - if (drive->state > drive_referenced) { - LOCKDRIVE(drive); /* don't let it change */ - - /* - * First, do some drive consistency checks. Some - * of these are kludges, others require a process - * context and couldn't be done before. - */ - if ((drive->devicename[0] == '\0') - || (drive->label.name[0] == '\0')) { - unlockdrive(drive); - free_drive(drive); /* get rid of it */ - break; - } - if (((drive->flags & VF_OPEN) == 0) /* drive not open */ - &&(drive->state > drive_down)) { /* and it thinks it's not down */ - unlockdrive(drive); - set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */ - continue; - } - if ((drive->state == drive_down) /* it's down */ - &&(drive->flags & VF_OPEN)) { /* but open, */ - unlockdrive(drive); - close_drive(drive); /* close it */ - } else if (drive->state > drive_down) { - microtime(&drive->label.last_update); /* time of last update is now */ - bcopy((char *) &drive->label, /* and the label info from the drive structure */ - (char *) &vhdr->label, - sizeof(vhdr->label)); - if ((drive->state != drive_unallocated) - && (drive->state != drive_referenced)) { /* and it's a real drive */ - error = write_drive(drive, - (char *) vhdr, - VINUMHEADERLEN, - VINUM_LABEL_OFFSET); - if (error == 0) /* first config copy */ - error = write_drive(drive, - config, - MAXCONFIG, - VINUM_CONFIG_OFFSET); - if (error == 0) - error = write_drive(drive, /* second copy */ - config, - MAXCONFIG, - VINUM_CONFIG_OFFSET + MAXCONFIG); - unlockdrive(drive); - if (error) { - log(LOG_ERR, - "vinum: Can't write config to %s, error %d\n", - drive->devicename, - error); - set_drive_state(drive->driveno, drive_down, setstate_force); - } else - written_config = 1; /* we've written it on at least one drive */ - } - } else /* not worth looking at, */ - unlockdrive(drive); /* just unlock it again */ - } - } - Free(vhdr); - Free(config); -} - -/* - * Search disks on system for vinum slices and add - * them to the configuuration if they're not - * there already. devicename is a blank-separate - * list of device names. If not provided, use - * sysctl to get a list of all disks on the - * system. - * - * Return an error indication. - */ -int -vinum_scandisk(char *devicename) -{ - struct drive *volatile drive; - volatile int driveno; - int firstdrive; /* first drive in this list */ - volatile int gooddrives; /* number of usable drives found */ - int firsttime; /* set if we have never configured before */ - int error; - char *config_text; /* read the config info from disk into here */ - char *volatile cptr; /* pointer into config information */ - char *eptr; /* end pointer into config information */ - char *config_line; /* copy the config line to */ - volatile int status; - int *drivelist; /* list of drive indices */ - char *partname; /* for creating partition names */ - char *cp; /* pointer to start of disk name */ - char *ep; /* and to first char after name */ - char *np; /* name pointer in naem we build */ - size_t alloclen; - int malloced; - int partnamelen; /* length of partition name */ - int drives; - int goodpart; /* good vinum drives on this disk */ - - malloced = 0; /* devicename not malloced */ - if (devicename == NULL) { /* no devices specified, */ - /* get a list of all disks in the system */ - /* Get size of disk list */ - error = kernel_sysctlbyname(&thread0, "kern.disks", NULL, - NULL, NULL, 0, &alloclen); - if (error) { - log(LOG_ERR, "vinum: can't get disk list: %d\n", error); - return EINVAL; - } - devicename = Malloc(alloclen); - if (devicename == NULL) { - printf("vinum: can't allocate memory for drive list"); - return ENOMEM; - } else - malloced = 1; - /* Now get the list of disks */ - kernel_sysctlbyname(&thread0, "kern.disks", devicename, - &alloclen, NULL, 0, NULL); - } - status = 0; /* success indication */ - vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */ - partname = Malloc(MAXPATHLEN); /* extract name of disk here */ - if (partname == NULL) { - printf("vinum_scandisk: can't allocate memory for drive name"); - return ENOMEM; - } - gooddrives = 0; /* number of usable drives found */ - firstdrive = vinum_conf.drives_used; /* the first drive */ - firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */ - - /* allocate a drive pointer list */ - drives = 256; /* should be enough for most cases */ - drivelist = (int *) Malloc(drives * sizeof(int)); - CHECKALLOC(drivelist, "Can't allocate memory"); - error = lock_config(); /* make sure we're alone here */ - if (error) - return error; - error = setjmp(command_fail); /* come back here on error */ - if (error) /* longjmped out */ - return error; - - /* Open all drives and find which was modified most recently */ - for (cp = devicename; *cp; cp = ep) { - char part; /* UNIX partition */ -#ifdef __i386__ - int slice; -#endif - - while (*cp == ' ') - cp++; /* find start of name */ - if (*cp == '\0') /* done, */ - break; - ep = cp; - while (*ep && (*ep != ' ')) /* find end of name */ - ep++; - - np = partname; /* start building up a name here */ - if (*cp != '/') { /* name doesn't start with /, */ - strcpy(np, "/dev/"); /* assume /dev */ - np += strlen("/dev/"); - } - memcpy(np, cp, ep - cp); /* put in name */ - np += ep - cp; /* and point past */ - - goodpart = 0; /* no partitions on this disk yet */ - partnamelen = MAXPATHLEN + np - partname; /* remaining length in partition name */ -#ifdef __i386__ - /* first try the partition table */ - for (slice = 1; slice < 5; slice++) - for (part = 'a'; part < 'i'; part++) { - if (part != 'c') { /* don't do the c partition */ - snprintf(np, - partnamelen, - "s%d%c", - slice, - part); - drive = check_drive(partname); /* try to open it */ - if (drive) { /* got something, */ - if (drive->flags & VF_CONFIGURED) /* already read this config, */ - log(LOG_WARNING, - "vinum: already read config from %s\n", /* say so */ - drive->label.name); - else { - if (gooddrives == drives) /* ran out of entries */ - EXPAND(drivelist, int, drives, drives); /* double the size */ - drivelist[gooddrives] = drive->driveno; /* keep the drive index */ - drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */ - gooddrives++; - goodpart++; - } - } - } - } -#endif - /* - * If the machine doesn't have a BIOS - * partition table, try normal devices. - */ - if (goodpart == 0) { /* didn't find anything, */ - for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */ - if (part != 'c') { /* don't do the c partition */ - snprintf(np, - partnamelen, - "%c", - part); - drive = check_drive(partname); /* try to open it */ - if (drive) { /* got something, */ - if (drive->flags & VF_CONFIGURED) /* already read this config, */ - log(LOG_WARNING, - "vinum: already read config from %s\n", /* say so */ - drive->label.name); - else { - if (gooddrives == drives) /* ran out of entries */ - EXPAND(drivelist, int, drives, drives); /* double the size */ - drivelist[gooddrives] = drive->driveno; /* keep the drive index */ - drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */ - gooddrives++; - goodpart++; - } - } - } - } - } - Free(partname); - - if (gooddrives == 0) { - if (firsttime) - log(LOG_WARNING, "vinum: no drives found\n"); - else - log(LOG_INFO, "vinum: no additional drives found\n"); - if (malloced) - Free(devicename); - unlock_config(); - return ENOENT; - } - /* - * We now have at least one drive open. Sort - * them in order of config time and merge the - * config info with what we have already. - */ - qsort(drivelist, gooddrives, sizeof(int), drivecmp); - config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */ - CHECKALLOC(config_text, "Can't allocate memory"); - config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */ - CHECKALLOC(config_line, "Can't allocate memory"); - for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */ - drive = &DRIVE[drivelist[driveno]]; /* point to the drive */ - - if (firsttime && (driveno == 0)) /* we've never configured before, */ - log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename); - else - log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename); - - if (drive->state == drive_up) - /* Read in both copies of the configuration information */ - error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET); - else { - error = EIO; - printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state)); - } - - if (error != 0) { - log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error); - free_drive(drive); /* give it back */ - status = error; - } - /* - * At this point, check that the two copies - * are the same, and do something useful if - * not. In particular, consider which is - * newer, and what this means for the - * integrity of the data on the drive. - */ - else { - vinum_conf.drives_used++; /* another drive in use */ - /* Parse the configuration, and add it to the global configuration */ - for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */ - volatile int parse_status; /* return value from parse_config */ - - for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */ - *eptr++ = *cptr++; - *eptr = '\0'; /* and delimit */ - if (setjmp(command_fail) == 0) { /* come back here on error and continue */ - parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */ - /* - * parse_config recognizes referenced - * drives and builds a drive entry for - * them. This may expand the drive - * table, thus invalidating the pointer. - */ - drive = &DRIVE[drivelist[driveno]]; /* point to the drive */ - - if (parse_status < 0) { /* error in config */ - /* - * This config should have been parsed - * in user space. If we run into - * problems here, something serious is - * afoot. Complain and let the user - * snarf the config to see what's - * wrong. - */ - log(LOG_ERR, - "vinum: Config error on %s, aborting integration\n", - drive->devicename); - free_drive(drive); /* give it back */ - status = EINVAL; - } - } - while (*cptr == '\n') - cptr++; /* skip to next line */ - } - } - drive->flags |= VF_CONFIGURED; /* this drive's configuration is complete */ - } - - Free(config_line); - Free(config_text); - Free(drivelist); - vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */ - if (status != 0) - printf("vinum: couldn't read configuration"); - else - updateconfig(VF_READING_CONFIG); /* update from disk config */ - if (malloced) - Free(devicename); - unlock_config(); - return status; -} - -/* - * Compare the modification dates of the drives, for qsort. - * Return 1 if a < b, 0 if a == b, 01 if a > b: in other - * words, sort backwards. - */ -int -drivecmp(const void *va, const void *vb) -{ - const struct drive *a = &DRIVE[*(const int *) va]; - const struct drive *b = &DRIVE[*(const int *) vb]; - - if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec) - && (a->label.last_update.tv_usec == b->label.last_update.tv_usec)) - return 0; - else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec) - || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec) - && (a->label.last_update.tv_usec > b->label.last_update.tv_usec))) - return -1; - else - return 1; -} -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumio.h b/sys/dev/vinum/vinumio.h deleted file mode 100644 index bf5134a..0000000 --- a/sys/dev/vinum/vinumio.h +++ /dev/null @@ -1,154 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumio.h,v 1.23 2003/05/04 05:25:46 grog Exp grog $ - * $FreeBSD$ - */ - -#define L 'F' /* ID letter of our ioctls */ - -#define MAX_IOCTL_REPLY 1024 - -#ifdef VINUMDEBUG -struct debuginfo { - int changeit; - int param; -}; - -#endif - -enum objecttype { - drive_object, - sd_object, - plex_object, - volume_object, - invalid_object -}; - -/* - * The state to set with VINUM_SETSTATE. Since each object has a - * different set of states, we need to translate later. - */ -enum objectstate { - object_down, - object_initializing, - object_initialized, - object_up -}; - -/* - * This structure is used for modifying objects - * (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH, - * VINUM_DETACH, VINUM_REPLACE - */ -struct vinum_ioctl_msg { - int index; - enum objecttype type; - enum objectstate state; /* state to set (VINUM_SETSTATE) */ - enum parityop op; /* for parity ops */ - int force; /* do it even if it doesn't make sense */ - int recurse; /* recurse (VINUM_REMOVE) */ - int verify; /* verify (initsd, rebuildparity) */ - int otherobject; /* superordinate object (attach), - * replacement object (replace) */ - int rename; /* rename object (attach) */ - int64_t offset; /* offset of subdisk (for attach) */ - int blocksize; /* size of block to revive (bytes) */ -}; - -/* VINUM_CREATE returns a buffer of this kind */ -struct _ioctl_reply { - int error; - char msg[MAX_IOCTL_REPLY]; -}; - -struct vinum_rename_msg { - int index; - int recurse; /* rename subordinate objects too */ - enum objecttype type; - char newname[MAXNAME]; /* new name to give to object */ -}; - -/* ioctl requests */ -#define BUFSIZE 1024 /* size of buffer, including continuations */ -#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */ -#define VINUM_GETCONFIG _IOR(L, 65, struct __vinum_conf) /* get global config */ -#define VINUM_DRIVECONFIG _IOWR(L, 66, struct _drive) /* get drive config */ -#define VINUM_SDCONFIG _IOWR(L, 67, struct _sd) /* get subdisk config */ -#define VINUM_PLEXCONFIG _IOWR(L, 68, struct _plex) /* get plex config */ -#define VINUM_VOLCONFIG _IOWR(L, 69, struct _volume) /* get volume config */ -#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct _sd) /* get sd config for plex (plex, sdno) */ -#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */ -#define VINUM_SAVECONFIG _IOW(L, 72, int) /* write config to disk */ -#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */ -#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */ -#define VINUM_READCONFIG _IOC(IOC_IN | IOC_OUT, L, 75, BUFSIZE) /* read config from disk */ -#ifdef VINUMDEBUG -#define VINUM_DEBUG _IOWR(L, 127, struct debuginfo) /* call the debugger from ioctl () */ -#endif - -/* - * Start an object. Pass two integers: - * msg [0] index in vinum_conf.<object> - * msg [1] type of object (see below) - * - * Return ioctl_reply - */ -#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */ -#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */ -#define VINUM_STARTCONFIG _IOW(L, 78, int) /* start a configuration operation */ -#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */ -#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */ -#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */ -#define VINUM_REMOVE _IOWR(L, 83, struct _ioctl_reply) /* remove an object */ -#define VINUM_READPOL _IOWR(L, 84, struct _ioctl_reply) /* set read policy */ -#define VINUM_SETSTATE_FORCE _IOC(IOC_IN | IOC_OUT, L, 85, MAX_IOCTL_REPLY) /* diddle object state */ -#define VINUM_RESETSTATS _IOWR(L, 86, struct _ioctl_reply) /* reset object stats */ -#define VINUM_ATTACH _IOWR(L, 87, struct _ioctl_reply) /* attach an object */ -#define VINUM_DETACH _IOWR(L, 88, struct _ioctl_reply) /* remove an object */ - -#define VINUM_RENAME _IOWR(L, 89, struct _ioctl_reply) /* rename an object */ -#define VINUM_REPLACE _IOWR(L, 90, struct _ioctl_reply) /* replace an object */ - -#ifdef VINUMDEBUG -#define VINUM_RQINFO _IOWR(L, 91, struct rqinfo) /* get request info [i] from trace buffer */ -#endif - -#define VINUM_DAEMON _IOC(0, L, 92, 0) /* perform the kernel part of Vinum daemon */ -#define VINUM_FINDDAEMON _IOC(0, L, 93, 0) /* check for presence of Vinum daemon */ -#define VINUM_SETDAEMON _IOW(L, 94, int) /* set daemon flags */ -#define VINUM_GETDAEMON _IOR(L, 95, int) /* get daemon flags */ -#define VINUM_PARITYOP _IOWR(L, 96, struct _ioctl_reply) /* check/rebuild RAID-4/5 parity */ -#define VINUM_MOVE _IOWR(L, 98, struct _ioctl_reply) /* move an object */ diff --git a/sys/dev/vinum/vinumioctl.c b/sys/dev/vinum/vinumioctl.c deleted file mode 100644 index 235b125..0000000 --- a/sys/dev/vinum/vinumioctl.c +++ /dev/null @@ -1,960 +0,0 @@ -/* - * XXX replace all the checks on object validity with - * calls to valid<object> - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumioctl.c,v 1.23 2003/05/23 01:02:22 grog Exp grog $ - */ - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -#ifdef VINUMDEBUG -#include <sys/reboot.h> -#endif - -void attachobject(struct vinum_ioctl_msg *); -void detachobject(struct vinum_ioctl_msg *); -void renameobject(struct vinum_rename_msg *); -void replaceobject(struct vinum_ioctl_msg *); -void moveobject(struct vinum_ioctl_msg *); -void setreadpol(struct vinum_ioctl_msg *); - -jmp_buf command_fail; /* return on a failed command */ - -/* ioctl routine */ -int -vinumioctl(struct cdev *dev, - u_long cmd, - caddr_t data, - int flag, - struct thread *td) -{ - unsigned int objno; - struct sd *sd; - struct plex *plex; - struct volume *vol; - - /* First, decide what we're looking at */ - if ((minor(dev) == VINUM_SUPERDEV_MINOR) - || (minor(dev) == VINUM_DAEMON_MINOR)) - return vinum_super_ioctl(dev, cmd, data); - else /* real device */ - switch (DEVTYPE(dev)) { - case VINUM_SD_TYPE: - case VINUM_SD2_TYPE: /* second half of sd namespace */ - objno = Sdno(dev); - - sd = &SD[objno]; - - switch (cmd) { - case DIOCGSECTORSIZE: - *(u_int *) data = sd->sectorsize; - return 0; - - case DIOCGMEDIASIZE: - *(u_int64_t *) data = sd->sectors * sd->sectorsize; - return 0; - - /* - * We don't have this stuff on hardware, - * so just pretend to do it so that - * utilities don't get upset. - */ - case DIOCWDINFO: /* write partition info */ - case DIOCSDINFO: /* set partition info */ - return 0; /* not a titty */ - - default: - return ENOTTY; /* not my kind of ioctl */ - } - - return 0; /* pretend we did it */ - - case VINUM_PLEX_TYPE: - objno = Plexno(dev); - - plex = &PLEX[objno]; - - switch (cmd) { - case DIOCGSECTORSIZE: - *(u_int64_t *) data = plex->sectorsize; - return 0; - - case DIOCGMEDIASIZE: - *(u_int64_t *) data = plex->length * plex->sectorsize; - return 0; - - /* - * We don't have this stuff on hardware, - * so just pretend to do it so that - * utilities don't get upset. - */ - case DIOCWDINFO: /* write partition info */ - case DIOCSDINFO: /* set partition info */ - return 0; /* not a titty */ - - default: - return ENOTTY; /* not my kind of ioctl */ - } - - return 0; /* pretend we did it */ - - case VINUM_VOLUME_TYPE: - objno = Volno(dev); - - if ((unsigned) objno >= (unsigned) vinum_conf.volumes_allocated) /* not a valid volume */ - return ENXIO; - vol = &VOL[objno]; - if (vol->state != volume_up) /* not up, */ - return EIO; /* I/O error */ - - switch (cmd) { - case DIOCGSECTORSIZE: - *(u_int *) data = vol->sectorsize; - return 0; - - case DIOCGMEDIASIZE: - *(u_int64_t *) data = vol->size * vol->sectorsize; - return 0; - - /* - * We don't have this stuff on hardware, - * so just pretend to do it so that - * utilities don't get upset. - */ - case DIOCWDINFO: /* write partition info */ - case DIOCSDINFO: /* set partition info */ - return 0; /* not a titty */ - - default: - return ENOTTY; /* not my kind of ioctl */ - } - break; - } - return 0; /* XXX */ -} - -/* Handle ioctls for the super device */ -int -vinum_super_ioctl(struct cdev *dev, - u_long cmd, - caddr_t data) -{ - int error = 0; - unsigned int index; /* for transferring config info */ - unsigned int sdno; /* for transferring config info */ - int fe; /* free list element number */ - struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */ - - ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */ - if (error) /* bombed out */ - return 0; /* the reply will contain meaningful info */ - switch (cmd) { -#ifdef VINUMDEBUG - case VINUM_DEBUG: - if (((struct debuginfo *) data)->changeit) /* change debug settings */ - debug = (((struct debuginfo *) data)->param); - else { - if (debug & DEBUG_REMOTEGDB) - boothowto |= RB_GDB; /* serial debug line */ - else - boothowto &= ~RB_GDB; /* local ddb */ - kdb_enter("vinum debug"); - } - ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ - ioctl_reply->error = 0; - return 0; -#endif - - case VINUM_CREATE: /* create a vinum object */ - error = lock_config(); /* get the config for us alone */ - if (error) /* can't do it, */ - return error; /* give up */ - error = setjmp(command_fail); /* come back here on error */ - if (error == 0) /* first time, */ - ioctl_reply->error = parse_user_config((char *) data, /* update the config */ - &keyword_set); - else if (ioctl_reply->error == 0) { /* longjmp, but no error status */ - ioctl_reply->error = EINVAL; /* note that something's up */ - ioctl_reply->msg[0] = '\0'; /* no message? */ - } - unlock_config(); - return 0; /* must be 0 to return the real error info */ - - case VINUM_GETCONFIG: /* get the configuration information */ - bcopy(&vinum_conf, data, sizeof(vinum_conf)); - return 0; - - /* start configuring the subsystem */ - case VINUM_STARTCONFIG: - return start_config(*(int *) data); /* just lock it. Parameter is 'force' */ - - /* - * Move the individual parts of the config to user space. - * - * Specify the index of the object in the first word of data, - * and return the object there - */ - case VINUM_DRIVECONFIG: - index = *(int *) data; /* get the index */ - if (index >= (unsigned) vinum_conf.drives_allocated) /* can't do it */ - return ENXIO; /* bang */ - bcopy(&DRIVE[index], data, sizeof(struct _drive)); /* copy the config item out */ - return 0; - - case VINUM_SDCONFIG: - index = *(int *) data; /* get the index */ - if (index >= (unsigned) vinum_conf.subdisks_allocated) /* can't do it */ - return ENXIO; /* bang */ - bcopy(&SD[index], data, sizeof(struct _sd)); /* copy the config item out */ - return 0; - - case VINUM_PLEXCONFIG: - index = *(int *) data; /* get the index */ - if (index >= (unsigned) vinum_conf.plexes_allocated) /* can't do it */ - return ENXIO; /* bang */ - bcopy(&PLEX[index], data, sizeof(struct _plex)); /* copy the config item out */ - return 0; - - case VINUM_VOLCONFIG: - index = *(int *) data; /* get the index */ - if (index >= (unsigned) vinum_conf.volumes_allocated) /* can't do it */ - return ENXIO; /* bang */ - bcopy(&VOL[index], data, sizeof(struct _volume)); /* copy the config item out */ - return 0; - - case VINUM_PLEXSDCONFIG: - index = *(int *) data; /* get the plex index */ - sdno = ((int *) data)[1]; /* and the sd index */ - if ((index >= (unsigned) vinum_conf.plexes_allocated) /* plex doesn't exist */ - ||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */ - return ENXIO; /* bang */ - bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */ - data, - sizeof(struct _sd)); - return 0; - - /* - * We get called in two places: one from the - * userland config routines, which call us - * to complete the config and save it. This - * call supplies the value 0 as a parameter. - * - * The other place is from the user "saveconfig" - * routine, which can only work if we're *not* - * configuring. In this case, supply parameter 1. - */ - case VINUM_SAVECONFIG: - if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */ - if (*(int *) data == 0) /* finish config */ - finish_config(1); /* finish the configuration and update it */ - else - return EBUSY; /* can't do it now */ - } - save_config(); /* save configuration to disk */ - return 0; - - case VINUM_RELEASECONFIG: /* release the config */ - if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */ - finish_config(0); /* finish the configuration, don't change it */ - save_config(); /* save configuration to disk */ - } else - error = EINVAL; /* release what config? */ - return error; - - case VINUM_READCONFIG: - if (((char *) data)[0] == '\0') - ioctl_reply->error = vinum_scandisk(NULL); /* built your own list */ - else - ioctl_reply->error = vinum_scandisk((char *) data); - if (ioctl_reply->error == ENOENT) { - if (vinum_conf.drives_used > 0) - strcpy(ioctl_reply->msg, "no additional drives found"); - else - strcpy(ioctl_reply->msg, "no drives found"); - } else if (ioctl_reply->error) - strcpy(ioctl_reply->msg, "can't read configuration information, see log file"); - return 0; /* must be 0 to return the real error info */ - - case VINUM_INIT: - ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ - ioctl_reply->error = 0; - return 0; - - case VINUM_RESETCONFIG: - if (vinum_inactive(0)) { /* if the volumes are not active */ - /* - * Note the open count. We may be called from v, so we'll be open. - * Keep the count so we don't underflow - */ - free_vinum(1); /* clean up everything */ - log(LOG_NOTICE, "vinum: CONFIGURATION OBLITERATED\n"); - ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ - ioctl_reply->error = 0; - return 0; - } - return EBUSY; - - case VINUM_SETSTATE: - setstate((struct vinum_ioctl_msg *) data); /* set an object state */ - return 0; - - /* - * Set state by force, without changing - * anything else. - */ - case VINUM_SETSTATE_FORCE: - setstate_by_force((struct vinum_ioctl_msg *) data); /* set an object state */ - return 0; - -#ifdef VINUMDEBUG - case VINUM_MEMINFO: - vinum_meminfo(data); - return 0; - - case VINUM_MALLOCINFO: - return vinum_mallocinfo(data); - - case VINUM_RQINFO: - return vinum_rqinfo(data); -#endif - - case VINUM_REMOVE: - remove((struct vinum_ioctl_msg *) data); /* remove an object */ - return 0; - - case VINUM_GETFREELIST: /* get a drive free list element */ - index = *(int *) data; /* get the drive index */ - fe = ((int *) data)[1]; /* and the free list element */ - if ((index >= (unsigned) vinum_conf.drives_allocated) /* plex doesn't exist */ - ||(DRIVE[index].state == drive_unallocated)) - return ENODEV; - if (fe >= DRIVE[index].freelist_entries) /* no such entry */ - return ENOENT; - bcopy(&DRIVE[index].freelist[fe], - data, - sizeof(struct drive_freelist)); - return 0; - - case VINUM_RESETSTATS: - resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */ - return 0; - - /* attach an object to a superordinate object */ - case VINUM_ATTACH: - attachobject((struct vinum_ioctl_msg *) data); - return 0; - - /* detach an object from a superordinate object */ - case VINUM_DETACH: - detachobject((struct vinum_ioctl_msg *) data); - return 0; - - /* rename an object */ - case VINUM_RENAME: - renameobject((struct vinum_rename_msg *) data); - return 0; - - /* replace an object */ - case VINUM_REPLACE: - replaceobject((struct vinum_ioctl_msg *) data); - return 0; - - case VINUM_DAEMON: - vinum_daemon(); /* perform the daemon */ - return 0; - - case VINUM_FINDDAEMON: /* check for presence of daemon */ - return vinum_finddaemon(); - return 0; - - case VINUM_SETDAEMON: /* set daemon flags */ - return vinum_setdaemonopts(*(int *) data); - - case VINUM_GETDAEMON: /* get daemon flags */ - *(int *) data = daemon_options; - return 0; - - case VINUM_PARITYOP: /* check/rebuild RAID-4/5 parity */ - parityops((struct vinum_ioctl_msg *) data); - return 0; - - /* move an object */ - case VINUM_MOVE: - moveobject((struct vinum_ioctl_msg *) data); - return 0; - - case VINUM_READPOL: - setreadpol((struct vinum_ioctl_msg *) data); - return 0; - - default: - /* FALLTHROUGH */ - break; - } - return 0; /* to keep the compiler happy */ -} - -/* - * The following four functions check the supplied - * object index and return a pointer to the object - * if it exists. Otherwise they longjump out via - * throw_rude_remark. - */ -struct drive * -validdrive(int driveno, struct _ioctl_reply *reply) -{ - if ((driveno < vinum_conf.drives_allocated) - && (DRIVE[driveno].state > drive_referenced)) - return &DRIVE[driveno]; - strcpy(reply->msg, "No such drive"); - reply->error = ENOENT; - return NULL; -} - -struct sd * -validsd(int sdno, struct _ioctl_reply *reply) -{ - if ((sdno < vinum_conf.subdisks_allocated) - && (SD[sdno].state > sd_referenced)) - return &SD[sdno]; - strcpy(reply->msg, "No such subdisk"); - reply->error = ENOENT; - return NULL; -} - -struct plex * -validplex(int plexno, struct _ioctl_reply *reply) -{ - if ((plexno < vinum_conf.plexes_allocated) - && (PLEX[plexno].state > plex_referenced)) - return &PLEX[plexno]; - strcpy(reply->msg, "No such plex"); - reply->error = ENOENT; - return NULL; -} - -struct volume * -validvol(int volno, struct _ioctl_reply *reply) -{ - if ((volno < vinum_conf.volumes_allocated) - && (VOL[volno].state > volume_uninit)) - return &VOL[volno]; - strcpy(reply->msg, "No such volume"); - reply->error = ENOENT; - return NULL; -} - -/* reset an object's stats */ -void -resetstats(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - - switch (msg->type) { - case drive_object: - if (msg->index < vinum_conf.drives_allocated) { - struct drive *drive = &DRIVE[msg->index]; - if (drive->state > drive_referenced) { - drive->reads = 0; /* number of reads on this drive */ - drive->writes = 0; /* number of writes on this drive */ - drive->bytes_read = 0; /* number of bytes read */ - drive->bytes_written = 0; /* number of bytes written */ - reply->error = 0; - return; - } - reply->error = EINVAL; - return; - } - case sd_object: - if (msg->index < vinum_conf.subdisks_allocated) { - struct sd *sd = &SD[msg->index]; - if (sd->state > sd_referenced) { - sd->reads = 0; /* number of reads on this subdisk */ - sd->writes = 0; /* number of writes on this subdisk */ - sd->bytes_read = 0; /* number of bytes read */ - sd->bytes_written = 0; /* number of bytes written */ - reply->error = 0; - return; - } - reply->error = EINVAL; - return; - } - break; - - case plex_object: - if (msg->index < vinum_conf.plexes_allocated) { - struct plex *plex = &PLEX[msg->index]; - if (plex->state > plex_referenced) { - plex->reads = 0; - plex->writes = 0; /* number of writes on this plex */ - plex->bytes_read = 0; /* number of bytes read */ - plex->bytes_written = 0; /* number of bytes written */ - plex->recovered_reads = 0; /* number of recovered read operations */ - plex->degraded_writes = 0; /* number of degraded writes */ - plex->parityless_writes = 0; /* number of parityless writes */ - plex->multiblock = 0; /* requests that needed more than one block */ - plex->multistripe = 0; /* requests that needed more than one stripe */ - reply->error = 0; - return; - } - reply->error = EINVAL; - return; - } - break; - - case volume_object: - if (msg->index < vinum_conf.volumes_allocated) { - struct volume *vol = &VOL[msg->index]; - if (vol->state > volume_uninit) { - vol->bytes_read = 0; /* number of bytes read */ - vol->bytes_written = 0; /* number of bytes written */ - vol->reads = 0; /* number of reads on this volume */ - vol->writes = 0; /* number of writes on this volume */ - vol->recovered_reads = 0; /* reads recovered from another plex */ - reply->error = 0; - return; - } - reply->error = EINVAL; - return; - } - case invalid_object: /* can't get this */ - reply->error = EINVAL; - return; - } -} - -/* attach an object to a superior object */ -void -attachobject(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - int sdno; - struct sd *sd; - struct plex *plex; - struct volume *vol; - - switch (msg->type) { - case drive_object: /* you can't attach a drive to anything */ - case volume_object: /* nor a volume */ - case invalid_object: /* "this can't happen" */ - reply->error = EINVAL; - reply->msg[0] = '\0'; /* vinum(8) doesn't do this */ - return; - - case sd_object: - sd = validsd(msg->index, reply); - if (sd == NULL) /* not a valid subdisk */ - return; - plex = validplex(msg->otherobject, reply); - if (plex) { - /* - * We should be more intelligent about this. - * We should be able to reattach a dead - * subdisk, but if we want to increase the total - * number of subdisks, we have a lot of reshuffling - * to do. XXX - */ - if ((plex->organization != plex_concat) /* can't attach to striped and RAID-4/5 */ - &&(!msg->force)) { /* without using force */ - reply->error = EINVAL; /* no message, the user should check */ - strcpy(reply->msg, "Can't attach to this plex organization"); - } else if (sd->plexno >= 0) { /* already belong to a plex */ - reply->error = EBUSY; /* no message, the user should check */ - sprintf(reply->msg, "%s is already attached to %s", - sd->name, - sd[sd->plexno].name); - reply->msg[0] = '\0'; - } else { - sd->plexoffset = msg->offset; /* this is where we want it */ - set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */ - give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */ - update_sd_config(sd->sdno, 0); - save_config(); - if (sd->state == sd_reviving) - reply->error = EAGAIN; /* need to revive it */ - else - reply->error = 0; - } - } - break; - - case plex_object: - plex = validplex(msg->index, reply); /* get plex */ - if (plex == NULL) - return; - vol = validvol(msg->otherobject, reply); /* and volume information */ - if (vol) { - if (vol->plexes == MAXPLEX) { /* we have too many already */ - reply->error = ENOSPC; /* nowhere to put it */ - strcpy(reply->msg, "Too many plexes"); - } else if (plex->volno >= 0) { /* the plex has an owner */ - reply->error = EBUSY; /* no message, the user should check */ - sprintf(reply->msg, "%s is already attached to %s", - plex->name, - VOL[plex->volno].name); - } else { - for (sdno = 0; sdno < plex->subdisks; sdno++) { - sd = &SD[plex->sdnos[sdno]]; - - if (sd->state > sd_down) /* real subdisk, vaguely accessible */ - set_sd_state(plex->sdnos[sdno], sd_stale, setstate_force); /* make it stale */ - } - set_plex_state(plex->plexno, plex_up, setstate_none); /* update plex state */ - give_plex_to_volume(msg->otherobject, msg->index, 0); /* and give it to the volume */ - update_plex_config(plex->plexno, 0); - save_config(); - reply->error = 0; /* all went well */ - } - } - } -} - -/* detach an object from a superior object */ -void -detachobject(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - struct sd *sd; - struct plex *plex; - struct volume *vol; - int sdno; - int plexno; - - switch (msg->type) { - case drive_object: /* you can't detach a drive from anything */ - case volume_object: /* nor a volume */ - case invalid_object: /* "this can't happen" */ - reply->error = EINVAL; - reply->msg[0] = '\0'; /* vinum(8) doesn't do this */ - return; - - case sd_object: - sd = validsd(msg->index, reply); - if (sd == NULL) - return; - if (sd->plexno < 0) { /* doesn't belong to a plex */ - reply->error = ENOENT; - strcpy(reply->msg, "Subdisk is not attached"); - return; - } else { /* valid plex number */ - plex = &PLEX[sd->plexno]; - if ((!msg->force) /* don't force things */ - &&((plex->state == plex_up) /* and the plex is up */ - ||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */ - reply->error = EBUSY; /* we need this sd */ - reply->msg[0] = '\0'; - return; - } - sd->plexno = -1; /* anonymous sd */ - if (plex->subdisks == 1) { /* this was the only subdisk */ - Free(plex->sdnos); /* free the subdisk array */ - plex->sdnos = NULL; /* and note the fact */ - plex->subdisks_allocated = 0; /* no subdisk space */ - } else { - for (sdno = 0; sdno < plex->subdisks; sdno++) { - if (plex->sdnos[sdno] == msg->index) /* found our subdisk */ - break; - } - if (sdno < (plex->subdisks - 1)) /* not the last one, compact */ - bcopy(&plex->sdnos[sdno + 1], - &plex->sdnos[sdno], - (plex->subdisks - 1 - sdno) * sizeof(int)); - } - plex->subdisks--; - if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1)) - /* this subdisk is named after the plex */ - { - bcopy(sd->name, - &sd->name[3], - min(strlen(sd->name) + 1, MAXSDNAME - 3)); - bcopy("ex-", sd->name, 3); - sd->name[MAXSDNAME - 1] = '\0'; - } - update_plex_config(plex->plexno, 0); - if (isstriped(plex)) /* we've just mutilated our plex, */ - set_plex_state(plex->plexno, - plex_down, - setstate_force | setstate_configuring); - if (plex->volno >= 0) /* plex attached to volume, */ - update_volume_config(plex->volno); - save_config(); - reply->error = 0; - } - return; - - case plex_object: - plex = validplex(msg->index, reply); /* get plex */ - if (plex == NULL) - return; - if (plex->volno >= 0) { - int volno = plex->volno; - - vol = &VOL[volno]; - if ((!msg->force) /* don't force things */ - &&((vol->state == volume_up) /* and the volume is up */ - &&(vol->plexes == 1))) { /* and this is the last plex */ - /* - * XXX As elsewhere, check whether we will lose - * mapping by removing this plex - */ - reply->error = EBUSY; /* we need this plex */ - reply->msg[0] = '\0'; - return; - } - plex->volno = -1; /* anonymous plex */ - for (plexno = 0; plexno < vol->plexes; plexno++) { - if (vol->plex[plexno] == msg->index) /* found our plex */ - break; - } - if (plexno < (vol->plexes - 1)) /* not the last one, compact */ - bcopy(&vol->plex[plexno + 1], - &vol->plex[plexno], - (vol->plexes - 1 - plexno) * sizeof(int)); - vol->plexes--; - vol->last_plex_read = 0; /* don't go beyond the end */ - if (!bcmp(vol->name, plex->name, strlen(vol->name) + 1)) - /* this plex is named after the volume */ - { - /* First, check if the subdisks are the same */ - if (msg->recurse) { - int sdno; - - for (sdno = 0; sdno < plex->subdisks; sdno++) { - struct sd *sd = &SD[plex->sdnos[sdno]]; - - if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1)) - /* subdisk is named after the plex */ - { - bcopy(sd->name, - &sd->name[3], - min(strlen(sd->name) + 1, MAXSDNAME - 3)); - bcopy("ex-", sd->name, 3); - sd->name[MAXSDNAME - 1] = '\0'; - } - } - } - bcopy(plex->name, - &plex->name[3], - min(strlen(plex->name) + 1, MAXPLEXNAME - 3)); - bcopy("ex-", plex->name, 3); - plex->name[MAXPLEXNAME - 1] = '\0'; - } - update_volume_config(volno); - save_config(); - reply->error = 0; - } else { - reply->error = ENOENT; - strcpy(reply->msg, "Plex is not attached"); - } - } -} - -void -renameobject(struct vinum_rename_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - struct drive *drive; - struct sd *sd; - struct plex *plex; - struct volume *vol; - - switch (msg->type) { - case drive_object: /* you can't attach a drive to anything */ - if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */ - reply->error = EEXIST; - reply->msg[0] = '\0'; - return; - } - drive = validdrive(msg->index, reply); - if (drive) { - bcopy(msg->newname, drive->label.name, MAXDRIVENAME); - save_config(); - reply->error = 0; - } - return; - - case sd_object: /* you can't attach a subdisk to anything */ - if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */ - reply->error = EEXIST; - reply->msg[0] = '\0'; - return; - } - sd = validsd(msg->index, reply); - if (sd) { - bcopy(msg->newname, sd->name, MAXSDNAME); - update_sd_config(sd->sdno, 0); - save_config(); - reply->error = 0; - } - return; - - case plex_object: /* you can't attach a plex to anything */ - if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */ - reply->error = EEXIST; - reply->msg[0] = '\0'; - return; - } - plex = validplex(msg->index, reply); - if (plex) { - bcopy(msg->newname, plex->name, MAXPLEXNAME); - update_plex_config(plex->plexno, 0); - save_config(); - reply->error = 0; - } - return; - - case volume_object: /* you can't attach a volume to anything */ - if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */ - reply->error = EEXIST; - reply->msg[0] = '\0'; - return; - } - vol = validvol(msg->index, reply); - if (vol) { - bcopy(msg->newname, vol->name, MAXVOLNAME); - update_volume_config(msg->index); - save_config(); - reply->error = 0; - } - return; - - case invalid_object: - reply->error = EINVAL; - reply->msg[0] = '\0'; - } -} - -/* - * Replace one object with another. - * Currently only for drives. - * message->index is the drive number of the old drive - * message->otherobject is the drive number of the new drive - */ -void -replaceobject(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - - reply->error = ENODEV; /* until I know how to do this */ - strcpy(reply->msg, "replace not implemented yet"); -/* save_config (); */ -} - -void -moveobject(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - struct drive *drive; - struct sd *sd; - - /* Check that our objects are valid (i.e. they exist) */ - drive = validdrive(msg->index, (struct _ioctl_reply *) msg); - if (drive == NULL) - return; - sd = validsd(msg->otherobject, (struct _ioctl_reply *) msg); - if (sd == NULL) - return; - if (sd->driveno == msg->index) /* sd already belongs to drive */ - return; - - if (sd->state > sd_stale) - set_sd_state(sd->sdno, sd_stale, setstate_force); /* make the subdisk stale */ - else - sd->state = sd_empty; - if (sd->plexno >= 0) /* part of a plex, */ - update_plex_state(sd->plexno); /* update its state */ - - /* Return the space on the old drive */ - if ((sd->driveno >= 0) /* we have a drive, */ - &&(sd->sectors > 0)) /* and some space on it */ - return_drive_space(sd->driveno, /* return the space */ - sd->driveoffset, - sd->sectors); - - /* Reassign the old subdisk */ - sd->driveno = msg->index; - sd->driveoffset = -1; /* let the drive decide where to put us */ - give_sd_to_drive(sd->sdno); - reply->error = 0; -} - -void -setreadpol(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *reply = (struct _ioctl_reply *) msg; - struct volume *vol; - struct plex *plex; - int myplexno = -1; - - /* Check that our objects are valid (i.e. they exist) */ - vol = validvol(msg->index, reply); - if (vol == NULL) - return; - - /* If a plex was specified, check that is is valid */ - if (msg->otherobject >= 0) { - plex = validplex(msg->otherobject, reply); - if (vol == NULL) - return; - - /* Is it attached to this volume? */ - myplexno = my_plex(msg->index, msg->otherobject); - if (myplexno < 0) { - strcpy(reply->msg, "Plex is not attached to volume"); - reply->error = ENOENT; - return; - } - } - lock_config(); - vol->preferred_plex = myplexno; - save_config(); - unlock_config(); - reply->error = 0; -} - -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumkw.h b/sys/dev/vinum/vinumkw.h deleted file mode 100644 index d7bc7a5..0000000 --- a/sys/dev/vinum/vinumkw.h +++ /dev/null @@ -1,152 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumkw.h,v 1.20 2003/05/07 03:32:09 grog Exp grog $ - * $FreeBSD$ - */ - -/* - * Command keywords that vinum knows. These include both user-level - * and kernel-level stuff - */ - -/* - * Our complete vocabulary. The names of the commands are - * the same as the identifier without the kw_ at the beginning - * (i.e. kw_create defines the "create" keyword). Preprocessor - * magic in parser.c does the rest. - * - * To add a new word: put it in the table below and one of the - * lists in vinumparser.c (probably keywords). - */ -enum keyword { - kw_create, - kw_modify, - kw_list, - kw_l = kw_list, - kw_ld, /* list drive */ - kw_ls, /* list subdisk */ - kw_lp, /* list plex */ - kw_lv, /* list volume */ - kw_set, - kw_rm, - kw_mv, /* move object */ - kw_move, /* synonym for mv */ - kw_start, - kw_stop, - kw_makedev, /* make /dev/vinum devices */ - kw_setdaemon, /* set daemon flags */ - kw_getdaemon, /* set daemon flags */ - kw_help, - kw_drive, - kw_partition, - kw_sd, - kw_subdisk = kw_sd, - kw_plex, - kw_volume, - kw_vol = kw_volume, - kw_read, - kw_readpol, - kw_org, - kw_name, - kw_concat, - kw_striped, - kw_raid4, - kw_raid5, - kw_driveoffset, - kw_plexoffset, - kw_len, - kw_length = kw_len, - kw_size = kw_len, - kw_state, - kw_setupstate, - kw_d, /* flag names */ - kw_f, - kw_r, - kw_s, - kw_v, - kw_w, - kw_round, /* round robin */ - /* - * The first of these is a volume attibute ("prefer plex"), and the - * second is a plex attribute ("preferred" means that the volume - * prefers this plex). - */ - kw_prefer, /* prefer plex */ - kw_preferred, /* preferred plex */ - kw_device, - kw_init, - kw_resetconfig, - kw_writethrough, - kw_writeback, - kw_replace, - kw_resetstats, - kw_attach, - kw_detach, - kw_rename, - kw_printconfig, - kw_saveconfig, - kw_hotspare, - kw_detached, - kw_debug, /* go into debugger */ - kw_stripe, - kw_mirror, - kw_info, - kw_quit, - kw_max, - kw_setstate, - kw_checkparity, - kw_rebuildparity, - kw_dumpconfig, - kw_retryerrors, - kw_invalid_keyword = -1 -}; - -struct _keywords { - char *name; - enum keyword keyword; -}; - -struct keywordset { - int size; - struct _keywords *k; -}; - -extern struct _keywords keywords[]; -extern struct _keywords flag_keywords[]; - -extern struct keywordset keyword_set; -extern struct keywordset flag_set; - -/* Parser functions */ - -enum keyword get_keyword(char *, struct keywordset *); -int tokenize(char *, char *[], int); diff --git a/sys/dev/vinum/vinumlock.c b/sys/dev/vinum/vinumlock.c deleted file mode 100644 index f1a2ea3..0000000 --- a/sys/dev/vinum/vinumlock.c +++ /dev/null @@ -1,266 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumlock.c,v 1.19 2003/05/23 01:07:18 grog Exp $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -/* Lock a drive, wait if it's in use */ -#ifdef VINUMDEBUG -int -lockdrive(struct drive *drive, char *file, int line) -#else -int -lockdrive(struct drive *drive) -#endif -{ - int error; - - /* XXX get rid of drive->flags |= VF_LOCKING; */ - if ((drive->flags & VF_LOCKED) /* it's locked */ - &&(drive->pid == curproc->p_pid)) { /* by us! */ -#ifdef VINUMDEBUG - log(LOG_WARNING, - "vinum lockdrive: already locking %s from %s:%d, called from %s:%d\n", - drive->label.name, - drive->lockfilename, - drive->lockline, - basename(file), - line); -#else - log(LOG_WARNING, - "vinum lockdrive: already locking %s\n", - drive->label.name); -#endif - return 0; - } - while ((drive->flags & VF_LOCKED) != 0) { - /* - * There are problems sleeping on a unique identifier, - * since the drive structure can move, and the unlock - * function can be called after killing the drive. - * Solve this by waiting on this function; the number - * of conflicts is negligible. - */ - if ((error = tsleep(&lockdrive, - PRIBIO, - "vindrv", - 0)) != 0) - return error; - } - drive->flags |= VF_LOCKED; - drive->pid = curproc->p_pid; /* it's a panic error if curproc is null */ -#ifdef VINUMDEBUG - bcopy(basename(file), drive->lockfilename, 15); - drive->lockfilename[15] = '\0'; /* truncate if necessary */ - drive->lockline = line; -#endif - return 0; -} - -/* Unlock a drive and let the next one at it */ -void -unlockdrive(struct drive *drive) -{ - drive->flags &= ~VF_LOCKED; - /* we don't reset pid: it's of hysterical interest */ - wakeup(&lockdrive); -} - -/* Lock a stripe of a plex, wait if it's in use */ -struct rangelock * -lockrange(daddr_t stripe, struct buf *bp, struct plex *plex) -{ - struct rangelock *lock; - struct rangelock *pos; /* position of first free lock */ - int foundlocks; /* number of locks found */ - - /* - * We could get by without counting the number - * of locks we find, but we have a linear search - * through a table which in most cases will be - * empty. It's faster to stop when we've found - * all the locks that are there. This is also - * the reason why we put pos at the beginning - * instead of the end, though it requires an - * extra test. - */ - pos = NULL; - foundlocks = 0; - - /* - * we can't use 0 as a valid address, so - * increment all addresses by 1. - */ - stripe++; - mtx_lock(plex->lockmtx); - - /* Wait here if the table is full */ - while (plex->usedlocks == PLEX_LOCKS) /* all in use */ - msleep(&plex->usedlocks, plex->lockmtx, PRIBIO, "vlock", 0); - -#ifdef DIAGNOSTIC - if (plex->usedlocks >= PLEX_LOCKS) - panic("lockrange: Too many locks in use"); -#endif - - lock = plex->lock; /* pointer in lock table */ - if (plex->usedlocks > 0) /* something locked, */ - /* Search the lock table for our stripe */ - for (; lock < &plex->lock[PLEX_LOCKS] - && foundlocks < plex->usedlocks; - lock++) { - if (lock->stripe) { /* in use */ - foundlocks++; /* found another one in use */ - if ((lock->stripe == stripe) /* it's our stripe */ - &&(lock->bp != bp)) { /* but not our request */ -#ifdef VINUMDEBUG - if (debug & DEBUG_LOCKREQS) { - struct rangelockinfo lockinfo; - - lockinfo.stripe = stripe; - lockinfo.bp = bp; - lockinfo.plexno = plex->plexno; - logrq(loginfo_lockwait, (union rqinfou) &lockinfo, bp); - } -#endif - plex->lockwaits++; /* waited one more time */ - msleep(lock, plex->lockmtx, PRIBIO, "vrlock", 0); - lock = &plex->lock[-1]; /* start again */ - foundlocks = 0; - pos = NULL; - } - } else if (pos == NULL) /* still looking for somewhere? */ - pos = lock; /* a place to put this one */ - } - /* - * This untidy looking code ensures that we'll - * always end up pointing to the first free lock - * entry, thus minimizing the number of - * iterations necessary. - */ - if (pos == NULL) /* didn't find one on the way, */ - pos = lock; /* use the one we're pointing to */ - - /* - * The address range is free, and we're pointing - * to the first unused entry. Make it ours. - */ - pos->stripe = stripe; - pos->bp = bp; - plex->usedlocks++; /* one more lock */ - mtx_unlock(plex->lockmtx); -#ifdef VINUMDEBUG - if (debug & DEBUG_LOCKREQS) { - struct rangelockinfo lockinfo; - - lockinfo.stripe = stripe; - lockinfo.bp = bp; - lockinfo.plexno = plex->plexno; - logrq(loginfo_lock, (union rqinfou) &lockinfo, bp); - } -#endif - return pos; -} - -/* Unlock a volume and let the next one at it */ -void -unlockrange(int plexno, struct rangelock *lock) -{ - struct plex *plex; - - plex = &PLEX[plexno]; -#ifdef DIAGNOSTIC - if (lock < &plex->lock[0] || lock >= &plex->lock[PLEX_LOCKS]) - panic("vinum: rangelock %p on plex %d invalid, not between %p and %p", - lock, - plexno, - &plex->lock[0], - &plex->lock[PLEX_LOCKS]); -#endif -#ifdef VINUMDEBUG - if (debug & DEBUG_LOCKREQS) { - struct rangelockinfo lockinfo; - - lockinfo.stripe = lock->stripe; - lockinfo.bp = lock->bp; - lockinfo.plexno = plex->plexno; - logrq(loginfo_lockwait, (union rqinfou) &lockinfo, lock->bp); - } -#endif - lock->stripe = 0; /* no longer used */ - plex->usedlocks--; /* one less lock */ - if (plex->usedlocks == PLEX_LOCKS - 1) /* we were full, */ - wakeup(&plex->usedlocks); /* get a waiter if one's there */ - wakeup((void *) lock); -} - -/* Get a lock for the global config. Wait if it's not available. */ -int -lock_config(void) -{ - int error; - - while ((vinum_conf.flags & VF_LOCKED) != 0) { - vinum_conf.flags |= VF_LOCKING; - if ((error = tsleep(&vinum_conf, PRIBIO, "vincfg", 0)) != 0) - return error; - } - vinum_conf.flags |= VF_LOCKED; - return 0; -} - -/* Unlock global config and wake up any waiters. */ -void -unlock_config(void) -{ - vinum_conf.flags &= ~VF_LOCKED; - if ((vinum_conf.flags & VF_LOCKING) != 0) { - vinum_conf.flags &= ~VF_LOCKING; - wakeup(&vinum_conf); - } -} -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinummemory.c b/sys/dev/vinum/vinummemory.c deleted file mode 100644 index 43e1937..0000000 --- a/sys/dev/vinum/vinummemory.c +++ /dev/null @@ -1,290 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinummemory.c,v 1.31 2003/05/23 01:08:36 grog Exp $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <dev/vinum/vinumhdr.h> - -#ifdef VINUMDEBUG -#include <dev/vinum/request.h> -extern struct rqinfo rqinfo[]; -extern struct rqinfo *rqip; -int rqinfo_size = RQINFO_SIZE; /* for debugger */ - -#undef longjmp /* this was defined as LongJmp */ -#define strrchr rindex -#ifdef __i386__ /* check for validity */ -void -LongJmp(jmp_buf buf, int retval) -{ -/* - * longjmp is not documented, not even jmp_buf. - * This is what's in i386/i386/support.s: - * ENTRY(longjmp) - * movl 4(%esp),%eax - * movl (%eax),%ebx restore ebx - * movl 4(%eax),%esp restore esp - * movl 8(%eax),%ebp restore ebp - * movl 12(%eax),%esi restore esi - * movl 16(%eax),%edi restore edi - * movl 20(%eax),%edx get rta - * movl %edx,(%esp) put in return frame - * xorl %eax,%eax return(1); - * incl %eax - * ret - * - * from which we deduce the structure of jmp_buf: - */ - struct JmpBuf { - int jb_ebx; - int jb_esp; - int jb_ebp; - int jb_esi; - int jb_edi; - int jb_eip; - }; - - struct JmpBuf *jb = (struct JmpBuf *) buf; - - if ((jb->jb_esp < 0xc0000000) - || (jb->jb_ebp < 0xc0000000) - || (jb->jb_eip < 0xc0000000)) - panic("Invalid longjmp"); - longjmp(buf, retval); -} - -#else /* not i386 */ -#define LongJmp longjmp /* just use the kernel function */ -#endif /* i386 */ - -/* find the base name of a path name */ -char * -basename(char *file) -{ - char *f = strrchr(file, '/'); /* chop off dirname if present */ - - if (f == NULL) - return file; - else - return ++f; /* skip the / */ -} -#endif /* VINUMDEBUG */ - -#ifdef VINUMDEBUG -void -expand_table(void **table, int oldsize, int newsize, char *file, int line) -#else -void -expand_table(void **table, int oldsize, int newsize) -#endif -{ - if (newsize > oldsize) { - int *temp; - int s; - - s = splhigh(); -#ifdef VINUMDEBUG - temp = (int *) MMalloc(newsize, file, line); /* allocate a new table */ -#else - temp = (int *) Malloc(newsize); /* allocate a new table */ -#endif - CHECKALLOC(temp, "vinum: Can't expand table\n"); - bzero((char *) temp, newsize); /* clean it all out */ - if (*table != NULL) { /* already something there, */ - bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */ -#ifdef VINUMDEBUG - FFree(*table, file, line); -#else - Free(*table); -#endif - } - *table = temp; - splx(s); - } -} - -#ifdef VINUMDEBUG -#define MALLOCENTRIES 16384 -int malloccount = 0; -int highwater = 0; /* highest index ever allocated */ -struct mc malloced[MALLOCENTRIES]; - -#define FREECOUNT 64 -int freecount = FREECOUNT; /* for debugger */ -int lastfree = 0; -struct mc freeinfo[FREECOUNT]; - -int total_malloced; -static int mallocseq = 0; - -caddr_t -MMalloc(int size, char *file, int line) -{ - int s; - caddr_t result; - int i; - - if (malloccount >= MALLOCENTRIES) { /* too many */ - log(LOG_ERR, "vinum: can't allocate table space to trace memory allocation"); - return 0; /* can't continue */ - } - /* Wait for malloc if we can */ - result = malloc(size, - M_DEVBUF, - curthread->td_intr_nesting_level == 0 ? M_WAITOK : M_NOWAIT); - if (result == NULL) - log(LOG_ERR, "vinum: can't allocate %d bytes from %s:%d\n", size, file, line); - else { - s = splhigh(); - for (i = 0; i < malloccount; i++) { - if (((result + size) > malloced[i].address) - && (result < malloced[i].address + malloced[i].size)) /* overlap */ - kdb_enter("Malloc overlap"); - } - if (result) { - char *f = basename(file); - - i = malloccount++; - total_malloced += size; - microtime(&malloced[i].time); - malloced[i].seq = mallocseq++; - malloced[i].size = size; - malloced[i].line = line; - malloced[i].address = result; - strlcpy(malloced[i].file, f, MCFILENAMELEN); - } - if (malloccount > highwater) - highwater = malloccount; - splx(s); - } - return result; -} - -void -FFree(void *mem, char *file, int line) -{ - int s; - int i; - - s = splhigh(); - for (i = 0; i < malloccount; i++) { - if ((caddr_t) mem == malloced[i].address) { /* found it */ - bzero(mem, malloced[i].size); /* XXX */ - free(mem, M_DEVBUF); - malloccount--; - total_malloced -= malloced[i].size; - if (debug & DEBUG_MEMFREE) { /* keep track of recent frees */ - char *f = strrchr(file, '/'); /* chop off dirname if present */ - - if (f == NULL) - f = file; - else - f++; /* skip the / */ - - microtime(&freeinfo[lastfree].time); - freeinfo[lastfree].seq = malloced[i].seq; - freeinfo[lastfree].size = malloced[i].size; - freeinfo[lastfree].line = line; - freeinfo[lastfree].address = mem; - bcopy(f, freeinfo[lastfree].file, MCFILENAMELEN); - if (++lastfree == FREECOUNT) - lastfree = 0; - } - if (i < malloccount) /* more coming after */ - bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc)); - splx(s); - return; - } - } - splx(s); - log(LOG_ERR, - "Freeing unallocated data at 0x%p from %s, line %d\n", - mem, - file, - line); - kdb_enter("Free"); -} - -void -vinum_meminfo(caddr_t data) -{ - struct meminfo *m = (struct meminfo *) data; - - m->mallocs = malloccount; - m->total_malloced = total_malloced; - m->malloced = malloced; - m->highwater = highwater; -} - -int -vinum_mallocinfo(caddr_t data) -{ - struct mc *m = (struct mc *) data; - unsigned int ent = m->seq; /* index of entry to return */ - - if (ent >= malloccount) - return ENOENT; - m->address = malloced[ent].address; - m->size = malloced[ent].size; - m->line = malloced[ent].line; - m->seq = malloced[ent].seq; - strlcpy(m->file, malloced[ent].file, MCFILENAMELEN); - return 0; -} - -/* - * return the nth request trace buffer entry. This - * is indexed back from the current entry (which - * has index 0) - */ -int -vinum_rqinfo(caddr_t data) -{ - struct rqinfo *rq = (struct rqinfo *) data; - int ent = *(int *) data; /* 1st word is index */ - int lastent = rqip - rqinfo; /* entry number of current entry */ - - if (ent >= RQINFO_SIZE) /* out of the table */ - return ENOENT; - if ((ent = lastent - ent - 1) < 0) - ent += RQINFO_SIZE; /* roll over backwards */ - bcopy(&rqinfo[ent], rq, sizeof(struct rqinfo)); - return 0; -} -#endif diff --git a/sys/dev/vinum/vinumobj.h b/sys/dev/vinum/vinumobj.h deleted file mode 100644 index d6a4d87..0000000 --- a/sys/dev/vinum/vinumobj.h +++ /dev/null @@ -1,321 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumobj.h,v 1.7 2003/05/23 01:08:58 grog Exp $ - * $FreeBSD$ - */ - -/* - * Definitions of Vinum objects: drive, subdisk, plex and volume. - * This file is included both by userland programs and by kernel code. - * The userland structures are a subset of the kernel structures, and - * all userland fields are at the beginning, so that a simple copy in - * the length of the userland structure will be sufficient. In order - * to perform this copy, vinumioctl must know both structures, so it - * includes this file again with _KERNEL reset. - */ - -#ifndef _KERNEL -/* - * Flags for all objects. Most of them only apply - * to specific objects, but we currently have - * space for all in any 32 bit flags word. - */ -enum objflags { - VF_LOCKED = 1, /* somebody has locked access to this object */ - VF_LOCKING = 2, /* we want access to this object */ - VF_OPEN = 4, /* object has openers */ - VF_WRITETHROUGH = 8, /* volume: write through */ - VF_INITED = 0x10, /* unit has been initialized */ - VF_WLABEL = 0x20, /* label area is writable */ - VF_LABELLING = 0x40, /* unit is currently being labelled */ - VF_WANTED = 0x80, /* someone is waiting to obtain a lock */ - VF_RAW = 0x100, /* raw volume (no file system) */ - VF_LOADED = 0x200, /* module is loaded */ - VF_CONFIGURING = 0x400, /* somebody is changing the config */ - VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */ - VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */ - VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */ - VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */ - VF_FORCECONFIG = 0x8000, /* configure drives even with different names */ - VF_NEWBORN = 0x10000, /* for objects: we've just created it */ - VF_CONFIGURED = 0x20000, /* for drives: we read the config */ - VF_STOPPING = 0x40000, /* for vinum_conf: stop on last close */ - VF_DAEMONOPEN = 0x80000, /* the daemon has us open (only superdev) */ - VF_CREATED = 0x100000, /* for volumes: freshly created, more then new */ - VF_HOTSPARE = 0x200000, /* for drives: use as hot spare */ - VF_RETRYERRORS = 0x400000, /* don't down subdisks on I/O errors */ - VF_HASDEBUG = 0x800000, /* set if we support debug */ -}; - -#endif - -/* Global configuration information for the vinum subsystem */ -#ifdef _KERNEL -struct _vinum_conf -#else -struct __vinum_conf -#endif -{ - int version; /* version of structures */ -#ifdef _KERNEL - /* Pointers to vinum structures */ - struct drive *drive; - struct sd *sd; - struct plex *plex; - struct volume *volume; -#else - /* Pointers to vinum structures */ - struct _drive *drive; - struct _sd *sd; - struct _plex *plex; - struct _volume *volume; -#endif - - /* the number allocated of each object */ - int drives_allocated; - int subdisks_allocated; - int plexes_allocated; - int volumes_allocated; - - /* and the number currently in use */ - /* - * Note that drives_used is not valid during drive recognition - * (vinum_scandisk and friends). Many invalid drives are added and - * later removed; the count isn't correct until we leave - * vinum_scandisk. - */ - int drives_used; - int subdisks_used; - int plexes_used; - int volumes_used; - - int flags; /* see above */ - -#define VINUM_MAXACTIVE 30000 /* maximum number of active requests */ - int active; /* current number of requests outstanding */ - int maxactive; /* maximum number of requests ever outstanding */ -#ifdef _KERNEL -#ifdef VINUMDEBUG - struct request *lastrq; - struct buf *lastbuf; -#endif -#endif -}; - -/* Use these defines to simplify code */ -#define DRIVE vinum_conf.drive -#define SD vinum_conf.sd -#define PLEX vinum_conf.plex -#define VOL vinum_conf.volume -#define VFLAGS vinum_conf.flags - -/* - * A drive corresponds to a disk slice. We use a different term to show - * the difference in usage: it doesn't have to be a slice, and could - * theoretically be a complete, unpartitioned disk - */ - -#ifdef _KERNEL -struct drive -#else -struct _drive -#endif -{ - char devicename[MAXDRIVENAME]; /* name of the slice it's on */ - struct vinum_label label; /* and the label information */ - enum drivestate state; /* current state */ - int flags; /* flags */ - int subdisks_allocated; /* number of entries in sd */ - int subdisks_used; /* and the number used */ - int blocksize; /* size of fs blocks */ - int pid; /* of locker */ - u_int64_t sectors_available; /* number of sectors still available */ - int secsperblock; - int lasterror; /* last error on drive */ - int driveno; /* index of drive in vinum_conf */ - int opencount; /* number of up subdisks */ - u_int64_t reads; /* number of reads on this drive */ - u_int64_t writes; /* number of writes on this drive */ - u_int64_t bytes_read; /* number of bytes read */ - u_int64_t bytes_written; /* number of bytes written */ -#define DRIVE_MAXACTIVE 30000 /* maximum number of active requests */ - int active; /* current number of requests outstanding */ - int maxactive; /* maximum number of requests ever outstanding */ - int freelist_size; /* number of entries alloced in free list */ - int freelist_entries; /* number of entries used in free list */ - struct drive_freelist *freelist; /* sorted list of free space on drive */ -#ifdef _KERNEL - u_int sectorsize; - off_t mediasize; - struct cdev *dev; /* device information */ -#ifdef VINUMDEBUG - char lockfilename[16]; /* name of file from which we were locked */ - int lockline; /* and the line number */ -#endif -#endif -}; - -#ifdef _KERNEL -struct sd -#else -struct _sd -#endif -{ - char name[MAXSDNAME]; /* name of subdisk */ - enum sdstate state; /* state */ - int flags; - int lasterror; /* last error occurred */ - /* offsets in blocks */ - int64_t driveoffset; /* offset on drive */ - /* - * plexoffset is the offset from the beginning - * of the plex to the very first part of the - * subdisk, in sectors. For striped, RAID-4 and - * RAID-5 plexes, only the first stripe is - * located at this offset - */ - int64_t plexoffset; /* offset in plex */ - u_int64_t sectors; /* and length in sectors */ - int sectorsize; /* sector size for DIOCGSECTORSIZE */ - int plexno; /* index of plex, if it belongs */ - int driveno; /* index of the drive on which it is located */ - int sdno; /* our index in vinum_conf */ - int plexsdno; /* and our number in our plex */ - /* (undefined if no plex) */ - u_int64_t reads; /* number of reads on this subdisk */ - u_int64_t writes; /* number of writes on this subdisk */ - u_int64_t bytes_read; /* number of bytes read */ - u_int64_t bytes_written; /* number of bytes written */ - /* revive parameters */ - u_int64_t revived; /* block number of current revive request */ - int revive_blocksize; /* revive block size (bytes) */ - int revive_interval; /* and time to wait between transfers */ - pid_t reviver; /* PID of reviving process */ - /* init parameters */ - u_int64_t initialized; /* block number of current init request */ - int init_blocksize; /* init block size (bytes) */ - int init_interval; /* and time to wait between transfers */ -#ifdef _KERNEL - struct request *waitlist; /* list of requests waiting on revive op */ - struct cdev *dev; /* associated device */ -#endif -}; - -#ifdef _KERNEL -struct plex -#else -struct _plex -#endif -{ - enum plexorg organization; /* Plex organization */ - enum plexstate state; /* and current state */ - u_int64_t length; /* total length of plex (sectors) */ - int flags; - int stripesize; /* size of stripe or raid band, in sectors */ - int sectorsize; /* sector size for DIOCGSECTORSIZE */ - int subdisks; /* number of associated subdisks */ - int subdisks_allocated; /* number of subdisks allocated space for */ - int *sdnos; /* list of component subdisks */ - int plexno; /* index of plex in vinum_conf */ - int volno; /* index of volume */ - int volplexno; /* number of plex in volume */ - /* Statistics */ - u_int64_t reads; /* number of reads on this plex */ - u_int64_t writes; /* number of writes on this plex */ - u_int64_t bytes_read; /* number of bytes read */ - u_int64_t bytes_written; /* number of bytes written */ - u_int64_t recovered_reads; /* number of recovered read operations */ - u_int64_t degraded_writes; /* number of degraded writes */ - u_int64_t parityless_writes; /* number of parityless writes */ - u_int64_t multiblock; /* requests that needed more than one block */ - u_int64_t multistripe; /* requests that needed more than one stripe */ - int sddowncount; /* number of subdisks down */ - /* Lock information */ - int usedlocks; /* number currently in use */ - int lockwaits; /* and number of waits for locks */ - off_t checkblock; /* block number for parity op */ - char name[MAXPLEXNAME]; /* name of plex */ -#ifdef _KERNEL - struct rangelock *lock; /* ranges of locked addresses */ - struct mtx *lockmtx; /* lock mutex, one of plexmutex [] */ - daddr_t last_addr; /* last address read from this plex */ - struct cdev *dev; /* associated device */ -#endif -}; - -#ifdef _KERNEL -struct volume -#else -struct _volume -#endif -{ - char name[MAXVOLNAME]; /* name of volume */ - enum volumestate state; /* current state */ - int plexes; /* number of plexes */ - int preferred_plex; /* index of plex to read from, - * -1 for round-robin */ - /* - * index of plex used for last read, for - * round-robin. - */ - int last_plex_read; - int volno; /* volume number */ - int flags; /* status and configuration flags */ - int openflags; /* flags supplied to last open(2) */ - u_int64_t size; /* size of volume */ - int blocksize; /* logical block size */ - int sectorsize; /* sector size for DIOCGSECTORSIZE */ - int active; /* number of outstanding requests active */ - int subops; /* and the number of suboperations */ - /* Statistics */ - u_int64_t bytes_read; /* number of bytes read */ - u_int64_t bytes_written; /* number of bytes written */ - u_int64_t reads; /* number of reads on this volume */ - u_int64_t writes; /* number of writes on this volume */ - u_int64_t recovered_reads; /* reads recovered from another plex */ - /* - * Unlike subdisks in the plex, space for the - * plex pointers is static. - */ - int plex[MAXPLEX]; /* index of plexes */ -#ifdef _KERNEL - struct cdev *dev; /* associated device */ -#endif -}; diff --git a/sys/dev/vinum/vinumparser.c b/sys/dev/vinum/vinumparser.c deleted file mode 100644 index 49da34b..0000000 --- a/sys/dev/vinum/vinumparser.c +++ /dev/null @@ -1,236 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumparser.c,v 1.25 2003/05/07 03:33:28 grog Exp grog $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * This file contains the parser for the configuration routines. It's used - * both in the kernel and in the user interface program, thus the separate file. - */ - -/* - * Go through a text and split up into text tokens. These are either non-blank - * sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or - * " characters may be escaped by \, which otherwise has no special meaning. - * - * Delimit by following with a \0, and return pointers to the starts at token []. - * Return the number of tokens found as the return value. - * - * This method has the restriction that a closing " or ' must be followed by - * grey space. - * - * Error conditions are end of line before end of quote, or no space after - * a closing quote. In this case, tokenize() returns -1. - */ - -#include <sys/param.h> -#include <dev/vinum/vinumkw.h> -#ifdef _KERNEL -#include <sys/systm.h> -#include <sys/conf.h> -#include <machine/setjmp.h> -/* All this mess for a single struct definition */ -#include <sys/uio.h> -#include <sys/namei.h> -#include <sys/mount.h> - -#include <dev/vinum/vinumvar.h> -#include <dev/vinum/vinumio.h> -#include <dev/vinum/vinumext.h> -#define iswhite(c) ((c == ' ') || (c == '\t')) /* check for white space */ -#else /* userland */ -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <string.h> -#define iswhite isspace /* use the ctype macro */ -#endif - -/* enum keyword is defined in vinumvar.h */ - -#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */ -#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */ -#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x} - -/* Normal keywords. These are all the words that vinum knows. */ -struct _keywords keywords[] = -{keypair(drive), - keypair(partition), - keypair(sd), - keypair(subdisk), - keypair(plex), - keypair(volume), - keypair(vol), - keypair(setupstate), - keypair(readpol), - keypair(org), - keypair(name), - keypair(writethrough), - keypair(writeback), - keypair(device), - keypair(concat), - keypair(raid4), - keypair(raid5), - keypair(striped), - keypair(plexoffset), - keypair(driveoffset), - keypair(length), - keypair(len), - keypair(size), - keypair(state), - keypair(round), - keypair(prefer), - keypair(preferred), - keypair(rename), - keypair(detached), -#ifndef _KERNEL /* for vinum(8) only */ - keypair(debug), - keypair(stripe), - keypair(mirror), -#endif - keypair(attach), - keypair(detach), - keypair(printconfig), - keypair(saveconfig), - keypair(replace), - keypair(create), - keypair(read), - keypair(modify), - keypair(list), - keypair(l), - keypair(ld), - keypair(ls), - keypair(lp), - keypair(lv), - keypair(info), - keypair(set), - keypair(rm), - keypair(mv), - keypair(move), - keypair(init), - keypair(resetconfig), - keypair(start), - keypair(stop), - keypair(makedev), - keypair(help), - keypair(quit), - keypair(setdaemon), - keypair(getdaemon), - keypair(max), - keypair(replace), - keypair(readpol), - keypair(resetstats), - keypair(setstate), - keypair(checkparity), - keypair(rebuildparity), - keypair(dumpconfig), - keypair(retryerrors) -}; -struct keywordset keyword_set = KEYWORDSET(keywords); - -#ifndef _KERNEL -struct _keywords flag_keywords[] = -{flagkeypair(f), - flagkeypair(d), - flagkeypair(v), - flagkeypair(s), - flagkeypair(r), - flagkeypair(w) -}; -struct keywordset flag_set = KEYWORDSET(flag_keywords); - -#endif - -/* - * Take a blank separated list of tokens and turn it into a list of - * individual nul-delimited strings. Build a list of pointers at - * token, which must have enough space for the tokens. Return the - * number of tokens, or -1 on error (typically a missing string - * delimiter). - */ -int -tokenize(char *cptr, char *token[], int maxtoken) -{ - char delim; /* delimiter for searching for the partner */ - int tokennr; /* index of this token */ - - for (tokennr = 0; tokennr < maxtoken;) { - while (iswhite(*cptr)) - cptr++; /* skip initial white space */ - if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */ - return tokennr; /* return number of tokens found */ - delim = *cptr; - token[tokennr] = cptr; /* point to it */ - tokennr++; /* one more */ - if (tokennr == maxtoken) /* run off the end? */ - return tokennr; - if ((delim == '\'') || (delim == '"')) { /* delimitered */ - for (;;) { - cptr++; - if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */ - cptr++; /* move on past */ - if (!iswhite(*cptr)) /* error, no space after closing quote */ - return -1; - *cptr++ = '\0'; /* delimit */ - } else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */ - return -1; - } - } else { /* not quoted */ - while ((*cptr != '\0') && (!iswhite(*cptr)) && (*cptr != '\n')) - cptr++; - if (*cptr != '\0') /* not end of the line, */ - *cptr++ = '\0'; /* delimit and move to the next */ - } - } - return maxtoken; /* can't get here */ -} - -/* Find a keyword and return an index */ -enum keyword -get_keyword(char *name, struct keywordset *keywordset) -{ - int i; - struct _keywords *keywords = keywordset->k; /* point to the keywords */ - if (name != NULL) { /* parameter exists */ - for (i = 0; i < keywordset->size; i++) - if (!strcmp(name, keywords[i].name)) - return (enum keyword) keywords[i].keyword; - } - return kw_invalid_keyword; -} diff --git a/sys/dev/vinum/vinumraid5.c b/sys/dev/vinum/vinumraid5.c deleted file mode 100644 index 4bdd64f..0000000 --- a/sys/dev/vinum/vinumraid5.c +++ /dev/null @@ -1,700 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Cybernet Corporation and Nan Yang Computer Services Limited. - * All rights reserved. - * - * This software was developed as part of the NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Cybernet Corporation - * and Nan Yang Computer Services Limited - * 4. Neither the name of the Companies nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumraid5.c,v 1.23 2003/02/08 03:32:45 grog Exp $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> -#include <sys/resourcevar.h> - -/* - * Parameters which describe the current transfer. - * These are only used for calculation, but they - * need to be passed to other functions, so it's - * tidier to put them in a struct - */ -struct metrics { - daddr_t stripebase; /* base address of stripe (1st subdisk) */ - int stripeoffset; /* offset in stripe */ - int stripesectors; /* total sectors to transfer in this stripe */ - daddr_t sdbase; /* offset in subdisk of stripe base */ - int sdcount; /* number of disks involved in this transfer */ - daddr_t diskstart; /* remember where this transfer starts */ - int psdno; /* number of parity subdisk */ - int badsdno; /* number of down subdisk, if there is one */ - int firstsdno; /* first data subdisk number */ - /* These correspond to the fields in rqelement, sort of */ - int useroffset; - /* - * Initial offset and length values for the first - * data block - */ - int initoffset; /* start address of block to transfer */ - short initlen; /* length in sectors of data transfer */ - /* Define a normal operation */ - int dataoffset; /* start address of block to transfer */ - int datalen; /* length in sectors of data transfer */ - /* Define a group operation */ - int groupoffset; /* subdisk offset of group operation */ - int grouplen; /* length in sectors of group operation */ - /* Define a normal write operation */ - int writeoffset; /* subdisk offset of normal write */ - int writelen; /* length in sectors of write operation */ - enum xferinfo flags; /* to check what we're doing */ - int rqcount; /* number of elements in request */ -}; - -enum requeststatus bre5(struct request *rq, - int plexno, - daddr_t * diskstart, - daddr_t diskend); -void complete_raid5_write(struct rqelement *); -enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex); -void setrqebounds(struct rqelement *rqe, struct metrics *mp); - -/* - * define the low-level requests needed to perform - * a high-level I/O operation for a specific plex - * 'plexno'. - * - * Return 0 if all subdisks involved in the - * request are up, 1 if some subdisks are not up, - * and -1 if the request is at least partially - * outside the bounds of the subdisks. - * - * Modify the pointer *diskstart to point to the - * end address. On read, return on the first bad - * subdisk, so that the caller - * (build_read_request) can try alternatives. - * - * On entry to this routine, the prq structures - * are not assigned. The assignment is performed - * by expandrq(). Strictly speaking, the elements - * rqe->sdno of all entries should be set to -1, - * since 0 (from bzero) is a valid subdisk number. - * We avoid this problem by initializing the ones - * we use, and not looking at the others (index >= - * prq->requests). - */ -enum requeststatus -bre5(struct request *rq, - int plexno, - daddr_t * diskaddr, - daddr_t diskend) -{ - struct metrics m; /* most of the information */ - struct sd *sd; - struct plex *plex; - struct buf *bp; /* user's bp */ - struct rqgroup *rqg; /* the request group that we will create */ - struct rqelement *rqe; /* point to this request information */ - int rsectors; /* sectors remaining in this stripe */ - int mysdno; /* another sd index in loops */ - int rqno; /* request number */ - - rqg = NULL; /* shut up, damn compiler */ - m.diskstart = *diskaddr; /* start of transfer */ - bp = rq->bp; /* buffer pointer */ - plex = &PLEX[plexno]; /* point to the plex */ - - - while (*diskaddr < diskend) { /* until we get it all sorted out */ - if (*diskaddr >= plex->length) /* beyond the end of the plex */ - return REQUEST_EOF; /* can't continue */ - - m.badsdno = -1; /* no bad subdisk yet */ - - /* Part A: Define the request */ - /* - * First, calculate some sizes: - * The offset of the start address from - * the start of the stripe. - */ - m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1)); - - /* - * The plex-relative address of the - * start of the stripe. - */ - m.stripebase = *diskaddr - m.stripeoffset; - - /* subdisk containing the parity stripe */ - if (plex->organization == plex_raid5) - m.psdno = plex->subdisks - 1 - - (*diskaddr / (plex->stripesize * (plex->subdisks - 1))) - % plex->subdisks; - else /* RAID-4 */ - m.psdno = plex->subdisks - 1; - - /* - * The number of the subdisk in which - * the start is located. - */ - m.firstsdno = m.stripeoffset / plex->stripesize; - if (m.firstsdno >= m.psdno) /* at or past parity sd */ - m.firstsdno++; /* increment it */ - - /* - * The offset from the beginning of - * the stripe on this subdisk. - */ - m.initoffset = m.stripeoffset % plex->stripesize; - - /* The offset of the stripe start relative to this subdisk */ - m.sdbase = m.stripebase / (plex->subdisks - 1); - - m.useroffset = *diskaddr - m.diskstart; /* The offset of the start in the user buffer */ - - /* - * The number of sectors to transfer in the - * current (first) subdisk. - */ - m.initlen = min(diskend - *diskaddr, /* the amount remaining to transfer */ - plex->stripesize - m.initoffset); /* and the amount left in this block */ - - /* - * The number of sectors to transfer in this stripe - * is the minumum of the amount remaining to transfer - * and the amount left in this stripe. - */ - m.stripesectors = min(diskend - *diskaddr, - plex->stripesize * (plex->subdisks - 1) - m.stripeoffset); - - /* The number of data subdisks involved in this request */ - m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize; - - /* Part B: decide what kind of transfer this will be. - - * start and end addresses of the transfer in - * the current block. - * - * There are a number of different kinds of - * transfer, each of which relates to a - * specific subdisk: - * - * 1. Normal read. All participating subdisks - * are up, and the transfer can be made - * directly to the user buffer. The bounds - * of the transfer are described by - * m.dataoffset and m.datalen. We have - * already calculated m.initoffset and - * m.initlen, which define the parameters - * for the first data block. - * - * 2. Recovery read. One participating - * subdisk is down. To recover data, all - * the other subdisks, including the parity - * subdisk, must be read. The data is - * recovered by exclusive-oring all the - * other blocks. The bounds of the - * transfer are described by m.groupoffset - * and m.grouplen. - * - * 3. A read request may request reading both - * available data (normal read) and - * non-available data (recovery read). - * This can be a problem if the address - * ranges of the two reads do not coincide: - * in this case, the normal read needs to - * be extended to cover the address range - * of the recovery read, and must thus be - * performed out of malloced memory. - * - * 4. Normal write. All the participating - * subdisks are up. The bounds of the - * transfer are described by m.dataoffset - * and m.datalen. Since these values - * differ for each block, we calculate the - * bounds for the parity block - * independently as the maximum of the - * individual blocks and store these values - * in m.writeoffset and m.writelen. This - * write proceeds in four phases: - * - * i. Read the old contents of each block - * and the parity block. - * ii. ``Remove'' the old contents from - * the parity block with exclusive or. - * iii. ``Insert'' the new contents of the - * block in the parity block, again - * with exclusive or. - * - * iv. Write the new contents of the data - * blocks and the parity block. The data - * block transfers can be made directly from - * the user buffer. - * - * 5. Degraded write where the data block is - * not available. The bounds of the - * transfer are described by m.groupoffset - * and m.grouplen. This requires the - * following steps: - * - * i. Read in all the other data blocks, - * excluding the parity block. - * - * ii. Recreate the parity block from the - * other data blocks and the data to be - * written. - * - * iii. Write the parity block. - * - * 6. Parityless write, a write where the - * parity block is not available. This is - * in fact the simplest: just write the - * data blocks. This can proceed directly - * from the user buffer. The bounds of the - * transfer are described by m.dataoffset - * and m.datalen. - * - * 7. Combination of degraded data block write - * and normal write. In this case the - * address ranges of the reads may also - * need to be extended to cover all - * participating blocks. - * - * All requests in a group transfer transfer - * the same address range relative to their - * subdisk. The individual transfers may - * vary, but since our group of requests is - * all in a single slice, we can define a - * range in which they all fall. - * - * In the following code section, we determine - * which kind of transfer we will perform. If - * there is a group transfer, we also decide - * its bounds relative to the subdisks. At - * the end, we have the following values: - * - * m.flags indicates the kinds of transfers - * we will perform. - * m.initoffset indicates the offset of the - * beginning of any data operation relative - * to the beginning of the stripe base. - * m.initlen specifies the length of any data - * operation. - * m.dataoffset contains the same value as - * m.initoffset. - * m.datalen contains the same value as - * m.initlen. Initially dataoffset and - * datalen describe the parameters for the - * first data block; while building the data - * block requests, they are updated for each - * block. - * m.groupoffset indicates the offset of any - * group operation relative to the beginning - * of the stripe base. - * m.grouplen specifies the length of any - * group operation. - * m.writeoffset indicates the offset of a - * normal write relative to the beginning of - * the stripe base. This value differs from - * m.dataoffset in that it applies to the - * entire operation, and not just the first - * block. - * m.writelen specifies the total span of a - * normal write operation. writeoffset and - * writelen are used to define the parity - * block. - */ - m.groupoffset = 0; /* assume no group... */ - m.grouplen = 0; /* until we know we have one */ - m.writeoffset = m.initoffset; /* start offset of transfer */ - m.writelen = 0; /* nothing to write yet */ - m.flags = 0; /* no flags yet */ - rsectors = m.stripesectors; /* remaining sectors to examine */ - m.dataoffset = m.initoffset; /* start at the beginning of the transfer */ - m.datalen = m.initlen; - - if (m.sdcount > 1) { - plex->multiblock++; /* more than one block for the request */ - /* - * If we have two transfers that don't overlap, - * (one at the end of the first block, the other - * at the beginning of the second block), - * it's cheaper to split them. - */ - if (rsectors < plex->stripesize) { - m.sdcount = 1; /* just one subdisk */ - m.stripesectors = m.initlen; /* and just this many sectors */ - rsectors = m.initlen; /* and in the loop counter */ - } - } - if (SD[plex->sdnos[m.psdno]].state < sd_reborn) /* is our parity subdisk down? */ - m.badsdno = m.psdno; /* note that it's down */ - if (bp->b_iocmd == BIO_READ) { /* read operation */ - for (mysdno = m.firstsdno; rsectors > 0; mysdno++) { - if (mysdno == m.psdno) /* ignore parity on read */ - mysdno++; - if (mysdno == plex->subdisks) /* wraparound */ - mysdno = 0; - if (mysdno == m.psdno) /* parity, */ - mysdno++; /* we've given already */ - - if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */ - if (m.badsdno >= 0) /* we had one already, */ - return REQUEST_DOWN; /* we can't take a second */ - m.badsdno = mysdno; /* got the first */ - m.groupoffset = m.dataoffset; /* define the bounds */ - m.grouplen = m.datalen; - m.flags |= XFR_RECOVERY_READ; /* we need recovery */ - plex->recovered_reads++; /* count another one */ - } else - m.flags |= XFR_NORMAL_READ; /* normal read */ - - /* Update the pointers for the next block */ - m.dataoffset = 0; /* back to the start of the stripe */ - rsectors -= m.datalen; /* remaining sectors to examine */ - m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */ - } - } else { /* write operation */ - for (mysdno = m.firstsdno; rsectors > 0; mysdno++) { - if (mysdno == m.psdno) /* parity stripe, we've dealt with that */ - mysdno++; - if (mysdno == plex->subdisks) /* wraparound */ - mysdno = 0; - if (mysdno == m.psdno) /* parity, */ - mysdno++; /* we've given already */ - - sd = &SD[plex->sdnos[mysdno]]; - if (sd->state != sd_up) { - enum requeststatus s; - - s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */ - if (s && (m.badsdno >= 0)) { /* second bad disk, */ - int sdno; - /* - * If the parity disk is down, there's - * no recovery. We make all involved - * subdisks stale. Otherwise, we - * should be able to recover, but it's - * like pulling teeth. Fix it later. - */ - for (sdno = 0; sdno < m.sdcount; sdno++) { - struct sd *sd = &SD[plex->sdnos[sdno]]; - if (sd->state >= sd_reborn) /* sort of up, */ - set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */ - } - return s; /* and crap out */ - } - m.badsdno = mysdno; /* note which one is bad */ - m.flags |= XFR_DEGRADED_WRITE; /* we need recovery */ - plex->degraded_writes++; /* count another one */ - m.groupoffset = m.dataoffset; /* define the bounds */ - m.grouplen = m.datalen; - } else { - m.flags |= XFR_NORMAL_WRITE; /* normal write operation */ - if (m.writeoffset > m.dataoffset) { /* move write operation lower */ - m.writelen = max(m.writeoffset + m.writelen, - m.dataoffset + m.datalen) - - m.dataoffset; - m.writeoffset = m.dataoffset; - } else - m.writelen = max(m.writeoffset + m.writelen, - m.dataoffset + m.datalen) - - m.writeoffset; - } - - /* Update the pointers for the next block */ - m.dataoffset = 0; /* back to the start of the stripe */ - rsectors -= m.datalen; /* remaining sectors to examine */ - m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */ - } - if (m.badsdno == m.psdno) { /* got a bad parity block, */ - struct sd *psd = &SD[plex->sdnos[m.psdno]]; - - if (psd->state == sd_down) - set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */ - else if (psd->state == sd_crashed) - set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */ - m.flags &= ~XFR_NORMAL_WRITE; /* this write isn't normal, */ - m.flags |= XFR_PARITYLESS_WRITE; /* it's parityless */ - plex->parityless_writes++; /* count another one */ - } - } - - /* reset the initial transfer values */ - m.dataoffset = m.initoffset; /* start at the beginning of the transfer */ - m.datalen = m.initlen; - - /* decide how many requests we need */ - if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)) - /* doing a recovery read or degraded write, */ - m.rqcount = plex->subdisks; /* all subdisks */ - else if (m.flags & XFR_NORMAL_WRITE) /* normal write, */ - m.rqcount = m.sdcount + 1; /* all data blocks and the parity block */ - else /* parityless write or normal read */ - m.rqcount = m.sdcount; /* just the data blocks */ - - /* Part C: build the requests */ - rqg = allocrqg(rq, m.rqcount); /* get a request group */ - if (rqg == NULL) { /* malloc failed */ - bp->b_error = ENOMEM; - bp->b_ioflags |= BIO_ERROR; - return REQUEST_ENOMEM; - } - rqg->plexno = plexno; - rqg->flags = m.flags; - rqno = 0; /* index in the request group */ - - /* 1: PARITY BLOCK */ - /* - * Are we performing an operation which requires parity? In that case, - * work out the parameters and define the parity block. - * XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE - */ - if (m.flags & XFR_PARITYOP) { /* need parity */ - rqe = &rqg->rqe[rqno]; /* point to element */ - sd = &SD[plex->sdnos[m.psdno]]; /* the subdisk in question */ - rqe->rqg = rqg; /* point back to group */ - rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */ - &~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE); /* transfer flags without data op stuf */ - setrqebounds(rqe, &m); /* set up the bounds of the transfer */ - rqe->sdno = sd->sdno; /* subdisk number */ - rqe->driveno = sd->driveno; - if (build_rq_buffer(rqe, plex)) /* build the buffer */ - return REQUEST_ENOMEM; /* can't do it */ - rqe->b.b_iocmd = BIO_READ; /* we must read first */ - m.sdcount++; /* adjust the subdisk count */ - rqno++; /* and point to the next request */ - } - /* - * 2: DATA BLOCKS - * Now build up requests for the blocks required - * for individual transfers - */ - for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) { - if (mysdno == m.psdno) /* parity, */ - mysdno++; /* we've given already */ - if (mysdno == plex->subdisks) /* got to the end, */ - mysdno = 0; /* wrap around */ - if (mysdno == m.psdno) /* parity, */ - mysdno++; /* we've given already */ - - rqe = &rqg->rqe[rqno]; /* point to element */ - sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */ - rqe->rqg = rqg; /* point to group */ - if (m.flags & XFR_NEEDS_MALLOC) /* we need a malloced buffer first */ - rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */ - else - rqe->flags = m.flags | XFR_DATA_BLOCK; /* transfer flags */ - if (mysdno == m.badsdno) { /* this is the bad subdisk */ - rqg->badsdno = rqno; /* note which one */ - rqe->flags |= XFR_BAD_SUBDISK; /* note that it's dead */ - /* - * we can't read or write from/to it, - * but we don't need to malloc - */ - rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE); - } - setrqebounds(rqe, &m); /* set up the bounds of the transfer */ - rqe->useroffset = m.useroffset; /* offset in user buffer */ - rqe->sdno = sd->sdno; /* subdisk number */ - rqe->driveno = sd->driveno; - if (build_rq_buffer(rqe, plex)) /* build the buffer */ - return REQUEST_ENOMEM; /* can't do it */ - if ((m.flags & XFR_PARITYOP) /* parity operation, */ - &&((m.flags & XFR_BAD_SUBDISK) == 0)) /* and not the bad subdisk, */ - rqe->b.b_iocmd = BIO_READ; /* we must read first */ - - /* Now update pointers for the next block */ - *diskaddr += m.datalen; /* skip past what we've done */ - m.stripesectors -= m.datalen; /* deduct from what's left */ - m.useroffset += m.datalen; /* and move on in the user buffer */ - m.datalen = min(m.stripesectors, plex->stripesize); /* and recalculate */ - m.dataoffset = 0; /* start at the beginning of next block */ - } - - /* - * 3: REMAINING BLOCKS FOR RECOVERY - * Finally, if we have a recovery operation, build - * up transfers for the other subdisks. Follow the - * subdisks around until we get to where we started. - * These requests use only the group parameters. - */ - if ((rqno < m.rqcount) /* haven't done them all already */ - &&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) { - for (; rqno < m.rqcount; rqno++, mysdno++) { - if (mysdno == m.psdno) /* parity, */ - mysdno++; /* we've given already */ - if (mysdno == plex->subdisks) /* got to the end, */ - mysdno = 0; /* wrap around */ - if (mysdno == m.psdno) /* parity, */ - mysdno++; /* we've given already */ - - rqe = &rqg->rqe[rqno]; /* point to element */ - sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */ - rqe->rqg = rqg; /* point to group */ - - rqe->sdoffset = m.sdbase + m.groupoffset; /* start of transfer */ - rqe->dataoffset = 0; /* for tidiness' sake */ - rqe->groupoffset = 0; /* group starts at the beginining */ - rqe->datalen = 0; - rqe->grouplen = m.grouplen; - rqe->buflen = m.grouplen; - rqe->flags = (m.flags | XFR_MALLOCED) /* transfer flags without data op stuf */ - &~XFR_DATAOP; - rqe->sdno = sd->sdno; /* subdisk number */ - rqe->driveno = sd->driveno; - if (build_rq_buffer(rqe, plex)) /* build the buffer */ - return REQUEST_ENOMEM; /* can't do it */ - rqe->b.b_iocmd = BIO_READ; /* we must read first */ - } - } - /* - * We need to lock the address range before - * doing anything. We don't have to be - * performing a recovery operation: somebody - * else could be doing so, and the results could - * influence us. Note the fact here, we'll perform - * the lock in launch_requests. - */ - rqg->lockbase = m.stripebase; - if (*diskaddr < diskend) /* didn't finish the request on this stripe */ - plex->multistripe++; /* count another one */ - } - return REQUEST_OK; -} - -/* - * Helper function for rqe5: adjust the bounds of - * the transfers to minimize the buffer - * allocation. - * - * Each request can handle two of three different - * data ranges: - * - * 1. The range described by the parameters - * dataoffset and datalen, for normal read or - * parityless write. - * 2. The range described by the parameters - * groupoffset and grouplen, for recovery read - * and degraded write. - * 3. For normal write, the range depends on the - * kind of block. For data blocks, the range - * is defined by dataoffset and datalen. For - * parity blocks, it is defined by writeoffset - * and writelen. - * - * In order not to allocate more memory than - * necessary, this function adjusts the bounds - * parameter for each request to cover just the - * minimum necessary for the function it performs. - * This will normally vary from one request to the - * next. - * - * Things are slightly different for the parity - * block. In this case, the bounds defined by - * mp->writeoffset and mp->writelen also play a - * rôle. Select this case by setting the - * parameter forparity != 0. - */ -void -setrqebounds(struct rqelement *rqe, struct metrics *mp) -{ - /* parity block of a normal write */ - if ((rqe->flags & (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) - == (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) { /* case 3 */ - if (rqe->flags & XFR_DEGRADED_WRITE) { /* also degraded write */ - /* - * With a combined normal and degraded write, we - * will zero out the area of the degraded write - * in the second phase, so we don't need to read - * it in. Unfortunately, we need a way to tell - * build_request_buffer the size of the buffer, - * and currently that's the length of the read. - * As a result, we read everything, even the stuff - * that we're going to nuke. - * FIXME XXX - */ - if (mp->groupoffset < mp->writeoffset) { /* group operation starts lower */ - rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */ - rqe->dataoffset = mp->writeoffset - mp->groupoffset; /* data starts here */ - rqe->groupoffset = 0; /* and the group at the beginning */ - } else { /* individual data starts first */ - rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */ - rqe->dataoffset = 0; /* individual data starts at the beginning */ - rqe->groupoffset = mp->groupoffset - mp->writeoffset; /* group starts here */ - } - rqe->datalen = mp->writelen; - rqe->grouplen = mp->grouplen; - } else { /* just normal write (case 3) */ - rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */ - rqe->dataoffset = 0; /* degradation starts at the beginning */ - rqe->groupoffset = 0; /* for tidiness' sake */ - rqe->datalen = mp->writelen; - rqe->grouplen = 0; - } - } else if (rqe->flags & XFR_DATAOP) { /* data operation (case 1 or 3) */ - if (rqe->flags & XFR_GROUPOP) { /* also a group operation (case 2) */ - if (mp->groupoffset < mp->dataoffset) { /* group operation starts lower */ - rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */ - rqe->dataoffset = mp->dataoffset - mp->groupoffset; /* data starts here */ - rqe->groupoffset = 0; /* and the group at the beginning */ - } else { /* individual data starts first */ - rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */ - rqe->dataoffset = 0; /* individual data starts at the beginning */ - rqe->groupoffset = mp->groupoffset - mp->dataoffset; /* group starts here */ - } - rqe->datalen = mp->datalen; - rqe->grouplen = mp->grouplen; - } else { /* just data operation (case 1) */ - rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */ - rqe->dataoffset = 0; /* degradation starts at the beginning */ - rqe->groupoffset = 0; /* for tidiness' sake */ - rqe->datalen = mp->datalen; - rqe->grouplen = 0; - } - } else { /* just group operations (case 2) */ - rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */ - rqe->dataoffset = 0; /* for tidiness' sake */ - rqe->groupoffset = 0; /* group starts at the beginining */ - rqe->datalen = 0; - rqe->grouplen = mp->grouplen; - } - rqe->buflen = max(rqe->dataoffset + rqe->datalen, /* total buffer length */ - rqe->groupoffset + rqe->grouplen); -} -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumrequest.c b/sys/dev/vinum/vinumrequest.c deleted file mode 100644 index 0915f8c..0000000 --- a/sys/dev/vinum/vinumrequest.c +++ /dev/null @@ -1,1125 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumrequest.c,v 1.69 2003/10/18 17:57:48 phk Exp $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> -#include <sys/resourcevar.h> - -enum requeststatus bre(struct request *rq, - int plexno, - daddr_t * diskstart, - daddr_t diskend); -enum requeststatus bre5(struct request *rq, - int plexno, - daddr_t * diskstart, - daddr_t diskend); -enum requeststatus build_read_request(struct request *rq, int volplexno); -enum requeststatus build_write_request(struct request *rq); -enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex); -int find_alternate_sd(struct request *rq); -int check_range_covered(struct request *); -void complete_rqe(struct buf *bp); -void complete_raid5_write(struct rqelement *); -int abortrequest(struct request *rq, int error); -void sdio_done(struct buf *bp); -int vinum_bounds_check(struct buf *bp, struct volume *vol); -caddr_t allocdatabuf(struct rqelement *rqe); -void freedatabuf(struct rqelement *rqe); - -#ifdef VINUMDEBUG -struct rqinfo rqinfo[RQINFO_SIZE]; -struct rqinfo *rqip = rqinfo; - -void -logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp) -{ - int s = splhigh(); - - microtime(&rqip->timestamp); /* when did this happen? */ - rqip->type = type; - rqip->bp = ubp; /* user buffer */ - switch (type) { - case loginfo_user_bp: - case loginfo_user_bpl: - case loginfo_sdio: /* subdisk I/O */ - case loginfo_sdiol: /* subdisk I/O launch */ - case loginfo_sdiodone: /* subdisk I/O complete */ - bcopy(info.bp, &rqip->info.b, sizeof(struct buf)); - rqip->devmajor = major(info.bp->b_dev); - rqip->devminor = minor(info.bp->b_dev); - break; - - case loginfo_iodone: - case loginfo_rqe: - case loginfo_raid5_data: - case loginfo_raid5_parity: - bcopy(info.rqe, &rqip->info.rqe, sizeof(struct rqelement)); - rqip->devmajor = major(info.rqe->b.b_dev); - rqip->devminor = minor(info.rqe->b.b_dev); - break; - - case loginfo_lockwait: - case loginfo_lock: - case loginfo_unlock: - bcopy(info.lockinfo, &rqip->info.lockinfo, sizeof(struct rangelock)); - - break; - - case loginfo_unused: - break; - } - rqip++; - if (rqip >= &rqinfo[RQINFO_SIZE]) /* wrap around */ - rqip = rqinfo; - splx(s); -} - -#endif - -void -vinumstrategy(struct bio *biop) -{ - struct buf *bp = (struct buf *) biop; - int volno; - struct volume *vol = NULL; - - switch (DEVTYPE(bp->b_dev)) { - case VINUM_SD_TYPE: - case VINUM_SD2_TYPE: - sdio(bp); - return; - - default: - bp->b_error = EIO; /* I/O error */ - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return; - - case VINUM_VOLUME_TYPE: /* volume I/O */ - volno = Volno(bp->b_dev); - vol = &VOL[volno]; - if (vol->state != volume_up) { /* can't access this volume */ - bp->b_error = EIO; /* I/O error */ - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return; - } - if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */ - bufdone(bp); - return; - } - /* FALLTHROUGH */ - /* - * Plex I/O is pretty much the same as volume I/O - * for a single plex. Indicate this by passing a NULL - * pointer (set above) for the volume - */ - case VINUM_PLEX_TYPE: - bp->b_resid = bp->b_bcount; /* transfer everything */ - vinumstart(bp, 0); - return; - } -} - -/* - * Start a transfer. Return -1 on error, 0 if OK, - * 1 if we need to retry. Parameter reviveok is - * set when doing transfers for revives: it allows - * transfers to be started immediately when a - * revive is in progress. During revive, normal - * transfers are queued if they share address - * space with a currently active revive operation. - */ -int -vinumstart(struct buf *bp, int reviveok) -{ - int plexno; - int maxplex; /* maximum number of plexes to handle */ - struct volume *vol; - struct request *rq; /* build up our request here */ - enum requeststatus status; - -#ifdef VINUMDEBUG - if (debug & DEBUG_LASTREQS) - logrq(loginfo_user_bp, (union rqinfou) bp, bp); -#endif - - if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */ - bp->b_error = EINVAL; /* invalid size */ - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return -1; - } - rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */ - if (rq == NULL) { /* can't do it */ - bp->b_error = ENOMEM; /* can't get memory */ - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return -1; - } - bzero(rq, sizeof(struct request)); - - /* - * Note the volume ID. This can be NULL, which - * the request building functions use as an - * indication for single plex I/O. - */ - rq->bp = bp; /* and the user buffer struct */ - - if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */ - rq->volplex.volno = Volno(bp->b_dev); /* get the volume number */ - vol = &VOL[rq->volplex.volno]; /* and point to it */ - vol->active++; /* one more active request */ - maxplex = vol->plexes; /* consider all its plexes */ - } else { - vol = NULL; /* no volume */ - rq->volplex.plexno = Plexno(bp->b_dev); /* point to the plex */ - rq->isplex = 1; /* note that it's a plex */ - maxplex = 1; /* just the one plex */ - } - - if (bp->b_iocmd == BIO_READ) { - /* - * This is a read request. Decide - * which plex to read from. - * - * There's a potential race condition here, - * since we're not locked, and we could end - * up multiply incrementing the round-robin - * counter. This doesn't have any serious - * effects, however. - */ - if (vol != NULL) { - plexno = vol->preferred_plex; /* get the plex to use */ - if (plexno < 0) { /* round robin */ - for (plexno = 0; plexno < vol->plexes; plexno++) - if (abs(bp->b_blkno - PLEX[vol->plex[plexno]].last_addr) <= ROUNDROBIN_SWITCH) - break; - if (plexno >= vol->plexes) { - vol->last_plex_read++; - if (vol->last_plex_read >= vol->plexes) - vol->last_plex_read = 0; - plexno = vol->last_plex_read; - } else { - vol->last_plex_read = plexno; - }; - PLEX[vol->plex[plexno]].last_addr = bp->b_blkno; - } - status = build_read_request(rq, plexno); /* build a request */ - } else { - daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */ - status = bre(rq, /* build a request list */ - rq->volplex.plexno, - &diskaddr, - diskaddr + (bp->b_bcount / DEV_BSIZE)); - } - - if (status > REQUEST_RECOVERED) { /* can't satisfy it */ - if (status == REQUEST_DOWN) { /* not enough subdisks */ - bp->b_error = EIO; /* I/O error */ - bp->b_io.bio_flags |= BIO_ERROR; - } - bufdone(bp); - freerq(rq); - return -1; - } - return launch_requests(rq, reviveok); /* now start the requests if we can */ - } else - /* - * This is a write operation. We write to all plexes. If this is - * a RAID-4 or RAID-5 plex, we must also update the parity stripe. - */ - { - if (vol != NULL) { - if ((vol->plexes > 0) /* multiple plex */ - ||(isparity((&PLEX[vol->plex[0]])))) { /* or RAID-[45], */ - rq->save_data = bp->b_data; /* save the data buffer address */ - bp->b_data = Malloc(bp->b_bcount); - bcopy(rq->save_data, bp->b_data, bp->b_bcount); /* make a copy */ - rq->flags |= XFR_COPYBUF; /* and note that we did it */ - } - status = build_write_request(rq); - } else { /* plex I/O */ - daddr_t diskstart; - - diskstart = bp->b_blkno; /* start offset of transfer */ - status = bre(rq, - Plexno(bp->b_dev), - &diskstart, - bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */ - } - if (status > REQUEST_RECOVERED) { /* can't satisfy it */ - if (status == REQUEST_DOWN) { /* not enough subdisks */ - bp->b_error = EIO; /* I/O error */ - bp->b_io.bio_flags |= BIO_ERROR; - } - if (rq->flags & XFR_COPYBUF) { - Free(bp->b_data); - bp->b_data = rq->save_data; - } - bufdone(bp); - freerq(rq); - return -1; - } - return launch_requests(rq, reviveok); /* now start the requests if we can */ - } -} - -/* - * Call the low-level strategy routines to - * perform the requests in a struct request - */ -int -launch_requests(struct request *rq, int reviveok) -{ - struct rqgroup *rqg; - int rqno; /* loop index */ - struct rqelement *rqe; /* current element */ - struct drive *drive; - int rcount; /* request count */ - - /* - * First find out whether we're reviving, and - * the request contains a conflict. If so, we - * hang the request off plex->waitlist of the - * first plex we find which is reviving. - */ - - if ((rq->flags & XFR_REVIVECONFLICT) /* possible revive conflict */ - &&(!reviveok)) { /* and we don't want to do it now, */ - struct sd *sd; - struct request *waitlist; /* point to the waitlist */ - - sd = &SD[rq->sdno]; - if (sd->waitlist != NULL) { /* something there already, */ - waitlist = sd->waitlist; - while (waitlist->next != NULL) /* find the end */ - waitlist = waitlist->next; - waitlist->next = rq; /* hook our request there */ - } else - sd->waitlist = rq; /* hook our request at the front */ - -#ifdef VINUMDEBUG - if (debug & DEBUG_REVIVECONFLICT) - log(LOG_DEBUG, - "Revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n", - rq->sdno, - rq, - rq->bp->b_iocmd == BIO_READ ? "Read" : "Write", - major(rq->bp->b_dev), - minor(rq->bp->b_dev), - (intmax_t) rq->bp->b_blkno, - rq->bp->b_bcount); -#endif - return 0; /* and get out of here */ - } - rq->active = 0; /* nothing yet */ -#ifdef VINUMDEBUG - /* XXX This is probably due to a bug */ - if (rq->rqg == NULL) { /* no request */ - log(LOG_ERR, "vinum: null rqg\n"); - abortrequest(rq, EINVAL); - return -1; - } -#endif -#ifdef VINUMDEBUG - if (debug & DEBUG_ADDRESSES) - log(LOG_DEBUG, - "Request: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n", - rq, - rq->bp->b_iocmd == BIO_READ ? "Read" : "Write", - major(rq->bp->b_dev), - minor(rq->bp->b_dev), - (intmax_t) rq->bp->b_blkno, - rq->bp->b_bcount); - vinum_conf.lastrq = rq; - vinum_conf.lastbuf = rq->bp; - if (debug & DEBUG_LASTREQS) - logrq(loginfo_user_bpl, (union rqinfou) rq->bp, rq->bp); -#endif - - /* - * We used to have an splbio() here anyway, out - * of superstition. With the division of labour - * below (first count the requests, then issue - * them), it looks as if we don't need this - * splbio() protection. In fact, as dillon - * points out, there's a race condition - * incrementing and decrementing rq->active and - * rqg->active. This splbio() didn't help - * there, because the device strategy routine - * can sleep. Solve this by putting shorter - * duration locks on the code. - */ - /* - * This loop happens without any participation - * of the bottom half, so it requires no - * protection. - */ - for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) { /* through the whole request chain */ - rqg->active = rqg->count; /* they're all active */ - for (rqno = 0; rqno < rqg->count; rqno++) { - rqe = &rqg->rqe[rqno]; - if (rqe->flags & XFR_BAD_SUBDISK) /* this subdisk is bad, */ - rqg->active--; /* one less active request */ - } - if (rqg->active) /* we have at least one active request, */ - rq->active++; /* one more active request group */ - } - - /* - * Now fire off the requests. In this loop the - * bottom half could be completing requests - * before we finish. We avoid splbio() - * protection by ensuring we don't tread in the - * same places that the bottom half does. - */ - for (rqg = rq->rqg; rqg != NULL;) { /* through the whole request chain */ - if (rqg->lockbase >= 0) /* this rqg needs a lock first */ - rqg->lock = lockrange(rqg->lockbase, rqg->rq->bp, &PLEX[rqg->plexno]); - rcount = rqg->count; - for (rqno = 0; rqno < rcount;) { - rqe = &rqg->rqe[rqno]; - - /* - * Point to next rqg before the bottom half - * changes the structures. - */ - if (++rqno >= rcount) - rqg = rqg->next; - if ((rqe->flags & XFR_BAD_SUBDISK) == 0) { /* this subdisk is good, */ - drive = &DRIVE[rqe->driveno]; /* look at drive */ - drive->active++; - if (drive->active >= drive->maxactive) - drive->maxactive = drive->active; - vinum_conf.active++; - if (vinum_conf.active >= vinum_conf.maxactive) - vinum_conf.maxactive = vinum_conf.active; - -#ifdef VINUMDEBUG - if (debug & DEBUG_ADDRESSES) - log(LOG_DEBUG, - " %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%jx, length %ld\n", - rqe->b.b_iocmd == BIO_READ ? "Read" : "Write", - major(rqe->b.b_dev), - minor(rqe->b.b_dev), - rqe->sdno, - (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset), - (intmax_t) rqe->b.b_blkno, - rqe->b.b_bcount); - if (debug & DEBUG_LASTREQS) { - microtime(&rqe->launchtime); /* time we launched this request */ - logrq(loginfo_rqe, (union rqinfou) rqe, rq->bp); - } -#endif - /* fire off the request */ - rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT; - rqe->b.b_iooffset = rqe->b.b_offset; - DEV_STRATEGY(&rqe->b); - } - } - } - return 0; -} - -/* - * define the low-level requests needed to perform a - * high-level I/O operation for a specific plex 'plexno'. - * - * Return REQUEST_OK if all subdisks involved in the request are up, - * REQUEST_DOWN if some subdisks are not up, and REQUEST_EOF if the - * request is at least partially outside the bounds of the subdisks. - * - * Modify the pointer *diskstart to point to the end address. On - * read, return on the first bad subdisk, so that the caller - * (build_read_request) can try alternatives. - * - * On entry to this routine, the rqg structures are not assigned. The - * assignment is performed by expandrq(). Strictly speaking, the - * elements rqe->sdno of all entries should be set to -1, since 0 - * (from bzero) is a valid subdisk number. We avoid this problem by - * initializing the ones we use, and not looking at the others (index - * >= rqg->requests). - */ -enum requeststatus -bre(struct request *rq, - int plexno, - daddr_t * diskaddr, - daddr_t diskend) -{ - int sdno; - struct sd *sd; - struct rqgroup *rqg; - struct buf *bp; /* user's bp */ - struct plex *plex; - enum requeststatus status; /* return value */ - daddr_t plexoffset; /* offset of transfer in plex */ - daddr_t stripebase; /* base address of stripe (1st subdisk) */ - daddr_t stripeoffset; /* offset in stripe */ - daddr_t blockoffset; /* offset in stripe on subdisk */ - struct rqelement *rqe; /* point to this request information */ - daddr_t diskstart = *diskaddr; /* remember where this transfer starts */ - enum requeststatus s; /* temp return value */ - - bp = rq->bp; /* buffer pointer */ - status = REQUEST_OK; /* return value: OK until proven otherwise */ - plex = &PLEX[plexno]; /* point to the plex */ - - switch (plex->organization) { - case plex_concat: - sd = NULL; /* (keep compiler quiet) */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { - sd = &SD[plex->sdnos[sdno]]; - if (*diskaddr < sd->plexoffset) /* we must have a hole, */ - status = REQUEST_DEGRADED; /* note the fact */ - if (*diskaddr < (sd->plexoffset + sd->sectors)) { /* the request starts in this subdisk */ - rqg = allocrqg(rq, 1); /* space for the request */ - if (rqg == NULL) { /* malloc failed */ - bp->b_error = ENOMEM; - bp->b_io.bio_flags |= BIO_ERROR; - return REQUEST_ENOMEM; - } - rqg->plexno = plexno; - - rqe = &rqg->rqe[0]; /* point to the element */ - rqe->rqg = rqg; /* group */ - rqe->sdno = sd->sdno; /* put in the subdisk number */ - plexoffset = *diskaddr; /* start offset in plex */ - rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */ - rqe->useroffset = plexoffset - diskstart; /* start offset in user buffer */ - rqe->dataoffset = 0; - rqe->datalen = min(diskend - *diskaddr, /* number of sectors to transfer in this sd */ - sd->sectors - rqe->sdoffset); - rqe->groupoffset = 0; /* no groups for concatenated plexes */ - rqe->grouplen = 0; - rqe->buflen = rqe->datalen; /* buffer length is data buffer length */ - rqe->flags = 0; - rqe->driveno = sd->driveno; - if (sd->state != sd_up) { /* *now* we find the sd is down */ - s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */ - if (s == REQUEST_DOWN) { /* down? */ - rqe->flags = XFR_BAD_SUBDISK; /* yup */ - if (rq->bp->b_iocmd == BIO_READ) /* read request, */ - return REQUEST_DEGRADED; /* give up here */ - /* - * If we're writing, don't give up - * because of a bad subdisk. Go - * through to the bitter end, but note - * which ones we can't access. - */ - status = REQUEST_DEGRADED; /* can't do it all */ - } - } - *diskaddr += rqe->datalen; /* bump the address */ - if (build_rq_buffer(rqe, plex)) { /* build the buffer */ - deallocrqg(rqg); - bp->b_error = ENOMEM; - bp->b_io.bio_flags |= BIO_ERROR; - return REQUEST_ENOMEM; /* can't do it */ - } - } - if (*diskaddr == diskend) /* we're finished, */ - break; /* get out of here */ - } - /* - * We've got to the end of the plex. Have we got to the end of - * the transfer? It would seem that having an offset beyond the - * end of the subdisk is an error, but in fact it can happen if - * the volume has another plex of different size. There's a valid - * question as to why you would want to do this, but currently - * it's allowed. - * - * In a previous version, I returned REQUEST_DOWN here. I think - * REQUEST_EOF is more appropriate now. - */ - if (diskend > sd->sectors + sd->plexoffset) /* pointing beyond EOF? */ - status = REQUEST_EOF; - break; - - case plex_striped: - { - while (*diskaddr < diskend) { /* until we get it all sorted out */ - if (*diskaddr >= plex->length) /* beyond the end of the plex */ - return REQUEST_EOF; /* can't continue */ - - /* The offset of the start address from the start of the stripe. */ - stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks); - - /* The plex-relative address of the start of the stripe. */ - stripebase = *diskaddr - stripeoffset; - - /* The number of the subdisk in which the start is located. */ - sdno = stripeoffset / plex->stripesize; - - /* The offset from the beginning of the stripe on this subdisk. */ - blockoffset = stripeoffset % plex->stripesize; - - sd = &SD[plex->sdnos[sdno]]; /* the subdisk in question */ - rqg = allocrqg(rq, 1); /* space for the request */ - if (rqg == NULL) { /* malloc failed */ - bp->b_error = ENOMEM; - bp->b_io.bio_flags |= BIO_ERROR; - return REQUEST_ENOMEM; - } - rqg->plexno = plexno; - - rqe = &rqg->rqe[0]; /* point to the element */ - rqe->rqg = rqg; - rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */ - rqe->useroffset = *diskaddr - diskstart; /* The offset of the start in the user buffer */ - rqe->dataoffset = 0; - rqe->datalen = min(diskend - *diskaddr, /* the amount remaining to transfer */ - plex->stripesize - blockoffset); /* and the amount left in this stripe */ - rqe->groupoffset = 0; /* no groups for striped plexes */ - rqe->grouplen = 0; - rqe->buflen = rqe->datalen; /* buffer length is data buffer length */ - rqe->flags = 0; - rqe->sdno = sd->sdno; /* put in the subdisk number */ - rqe->driveno = sd->driveno; - - if (sd->state != sd_up) { /* *now* we find the sd is down */ - s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */ - if (s == REQUEST_DOWN) { /* down? */ - rqe->flags = XFR_BAD_SUBDISK; /* yup */ - if (rq->bp->b_iocmd == BIO_READ) /* read request, */ - return REQUEST_DEGRADED; /* give up here */ - /* - * If we're writing, don't give up - * because of a bad subdisk. Go through - * to the bitter end, but note which - * ones we can't access. - */ - status = REQUEST_DEGRADED; /* can't do it all */ - } - } - /* - * It would seem that having an offset - * beyond the end of the subdisk is an - * error, but in fact it can happen if the - * volume has another plex of different - * size. There's a valid question as to why - * you would want to do this, but currently - * it's allowed. - */ - if (rqe->sdoffset + rqe->datalen > sd->sectors) { /* ends beyond the end of the subdisk? */ - rqe->datalen = sd->sectors - rqe->sdoffset; /* truncate */ -#ifdef VINUMDEBUG - if (debug & DEBUG_EOFINFO) { /* tell on the request */ - log(LOG_DEBUG, - "vinum: EOF on plex %s, sd %s offset %x (user offset 0x%jx)\n", - plex->name, - sd->name, - (u_int) sd->sectors, - (intmax_t) bp->b_blkno); - log(LOG_DEBUG, - "vinum: stripebase %#jx, stripeoffset %#jx, blockoffset %#jx\n", - (intmax_t) stripebase, - (intmax_t) stripeoffset, - (intmax_t) blockoffset); - } -#endif - } - if (build_rq_buffer(rqe, plex)) { /* build the buffer */ - deallocrqg(rqg); - bp->b_error = ENOMEM; - bp->b_io.bio_flags |= BIO_ERROR; - return REQUEST_ENOMEM; /* can't do it */ - } - *diskaddr += rqe->datalen; /* look at the remainder */ - if ((*diskaddr < diskend) /* didn't finish the request on this stripe */ - &&(*diskaddr < plex->length)) { /* and there's more to come */ - plex->multiblock++; /* count another one */ - if (sdno == plex->subdisks - 1) /* last subdisk, */ - plex->multistripe++; /* another stripe as well */ - } - } - } - break; - - /* - * RAID-4 and RAID-5 are complicated enough to have their own - * function. - */ - case plex_raid4: - case plex_raid5: - status = bre5(rq, plexno, diskaddr, diskend); - break; - - default: - log(LOG_ERR, "vinum: invalid plex type %d in bre\n", plex->organization); - status = REQUEST_DOWN; /* can't access it */ - } - - return status; -} - -/* - * Build up a request structure for reading volumes. - * This function is not needed for plex reads, since there's - * no recovery if a plex read can't be satisified. - */ -enum requeststatus -build_read_request(struct request *rq, /* request */ - int plexindex) -{ /* index in the volume's plex table */ - struct buf *bp; - daddr_t startaddr; /* offset of previous part of transfer */ - daddr_t diskaddr; /* offset of current part of transfer */ - daddr_t diskend; /* and end offset of transfer */ - int plexno; /* plex index in vinum_conf */ - struct rqgroup *rqg; /* point to the request we're working on */ - struct volume *vol; /* volume in question */ - int recovered = 0; /* set if we recover a read */ - enum requeststatus status = REQUEST_OK; - int plexmask; /* bit mask of plexes, for recovery */ - - bp = rq->bp; /* buffer pointer */ - diskaddr = bp->b_blkno; /* start offset of transfer */ - diskend = diskaddr + (bp->b_bcount / DEV_BSIZE); /* and end offset of transfer */ - rqg = &rq->rqg[plexindex]; /* plex request */ - vol = &VOL[rq->volplex.volno]; /* point to volume */ - - while (diskaddr < diskend) { /* build up request components */ - startaddr = diskaddr; - status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */ - switch (status) { - case REQUEST_OK: - continue; - - case REQUEST_RECOVERED: - /* - * XXX FIXME if we have more than one plex, and we can - * satisfy the request from another, don't use the - * recovered request, since it's more expensive. - */ - recovered = 1; - break; - - case REQUEST_ENOMEM: - return status; - /* - * If we get here, our request is not complete. Try - * to fill in the missing parts from another plex. - * This can happen multiple times in this function, - * and we reinitialize the plex mask each time, since - * we could have a hole in our plexes. - */ - case REQUEST_EOF: - case REQUEST_DOWN: /* can't access the plex */ - case REQUEST_DEGRADED: /* can't access the plex */ - plexmask = ((1 << vol->plexes) - 1) /* all plexes in the volume */ - &~(1 << plexindex); /* except for the one we were looking at */ - for (plexno = 0; plexno < vol->plexes; plexno++) { - if (plexmask == 0) /* no plexes left to try */ - return REQUEST_DOWN; /* failed */ - diskaddr = startaddr; /* start at the beginning again */ - if (plexmask & (1 << plexno)) { /* we haven't tried this plex yet */ - bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */ - if (diskaddr > startaddr) { /* we satisfied another part */ - recovered = 1; /* we recovered from the problem */ - status = REQUEST_OK; /* don't complain about it */ - break; - } - } - } - if (diskaddr == startaddr) /* didn't get any further, */ - return status; - } - if (recovered) - vol->recovered_reads += recovered; /* adjust our recovery count */ - } - return status; -} - -/* - * Build up a request structure for writes. - * Return 0 if all subdisks involved in the request are up, 1 if some - * subdisks are not up, and -1 if the request is at least partially - * outside the bounds of the subdisks. - */ -enum requeststatus -build_write_request(struct request *rq) -{ /* request */ - struct buf *bp; - daddr_t diskstart; /* offset of current part of transfer */ - daddr_t diskend; /* and end offset of transfer */ - int plexno; /* plex index in vinum_conf */ - struct volume *vol; /* volume in question */ - enum requeststatus status; - - bp = rq->bp; /* buffer pointer */ - vol = &VOL[rq->volplex.volno]; /* point to volume */ - diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); /* end offset of transfer */ - status = REQUEST_DOWN; /* assume the worst */ - for (plexno = 0; plexno < vol->plexes; plexno++) { - diskstart = bp->b_blkno; /* start offset of transfer */ - /* - * Build requests for the plex. - * We take the best possible result here (min, - * not max): we're happy if we can write at all - */ - status = min(status, bre(rq, - vol->plex[plexno], - &diskstart, - diskend)); - } - return status; -} - -/* Fill in the struct buf part of a request element. */ -enum requeststatus -build_rq_buffer(struct rqelement *rqe, struct plex *plex) -{ - struct sd *sd; /* point to subdisk */ - struct volume *vol; - struct buf *bp; - struct buf *ubp; /* user (high level) buffer header */ - - vol = &VOL[rqe->rqg->rq->volplex.volno]; - sd = &SD[rqe->sdno]; /* point to subdisk */ - bp = &rqe->b; - ubp = rqe->rqg->rq->bp; /* pointer to user buffer header */ - - /* Initialize the buf struct */ - /* copy these flags from user bp */ - bp->b_flags = ubp->b_flags & (B_NOCACHE | B_ASYNC); - bp->b_io.bio_flags = 0; - bp->b_iocmd = ubp->b_iocmd; -#ifdef VINUMDEBUG - if (rqe->flags & XFR_BUFLOCKED) /* paranoia */ - panic("build_rq_buffer: rqe already locked"); /* XXX remove this when we're sure */ -#endif - BUF_LOCKINIT(bp); /* get a lock for the buffer */ - BUF_LOCK(bp, LK_EXCLUSIVE, NULL); /* and lock it */ - BUF_KERNPROC(bp); - rqe->flags |= XFR_BUFLOCKED; - bp->b_iodone = complete_rqe; - /* - * You'd think that we wouldn't need to even - * build the request buffer for a dead subdisk, - * but in some cases we need information like - * the user buffer address. Err on the side of - * generosity and supply what we can. That - * obviously doesn't include drive information - * when the drive is dead. - */ - if ((rqe->flags & XFR_BAD_SUBDISK) == 0) /* subdisk is accessible, */ - bp->b_dev = DRIVE[rqe->driveno].dev; /* drive device */ - bp->b_blkno = rqe->sdoffset + sd->driveoffset; /* start address */ - bp->b_bcount = rqe->buflen << DEV_BSHIFT; /* number of bytes to transfer */ - bp->b_resid = bp->b_bcount; /* and it's still all waiting */ - bp->b_bufsize = bp->b_bcount; /* and buffer size */ - bp->b_rcred = FSCRED; /* we have the file system credentials */ - bp->b_wcred = FSCRED; /* we have the file system credentials */ - - if (rqe->flags & XFR_MALLOCED) { /* this operation requires a malloced buffer */ - bp->b_data = Malloc(bp->b_bcount); /* get a buffer to put it in */ - if (bp->b_data == NULL) { /* failed */ - abortrequest(rqe->rqg->rq, ENOMEM); - return REQUEST_ENOMEM; /* no memory */ - } - } else - /* - * Point directly to user buffer data. This means - * that we don't need to do anything when we have - * finished the transfer - */ - bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE; - /* - * On a recovery read, we perform an XOR of - * all blocks to the user buffer. To make - * this work, we first clean out the buffer - */ - if ((rqe->flags & (XFR_RECOVERY_READ | XFR_BAD_SUBDISK)) - == (XFR_RECOVERY_READ | XFR_BAD_SUBDISK)) { /* bad subdisk of a recovery read */ - int length = rqe->grouplen << DEV_BSHIFT; /* and count involved */ - char *data = (char *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* destination */ - - bzero(data, length); /* clean it out */ - } - return 0; -} - -/* - * Abort a request: free resources and complete the - * user request with the specified error - */ -int -abortrequest(struct request *rq, int error) -{ - struct buf *bp = rq->bp; /* user buffer */ - - bp->b_error = error; - freerq(rq); /* free everything we're doing */ - bp->b_io.bio_flags |= BIO_ERROR; - return error; /* and give up */ -} - -/* - * Check that our transfer will cover the - * complete address space of the user request. - * - * Return 1 if it can, otherwise 0 - */ -int -check_range_covered(struct request *rq) -{ - return 1; -} - -/* Perform I/O on a subdisk */ -void -sdio(struct buf *bp) -{ - int s; /* spl */ - struct sd *sd; - struct sdbuf *sbp; - daddr_t endoffset; - struct drive *drive; - -#ifdef VINUMDEBUG - if (debug & DEBUG_LASTREQS) - logrq(loginfo_sdio, (union rqinfou) bp, bp); -#endif - sd = &SD[Sdno(bp->b_dev)]; /* point to the subdisk */ - drive = &DRIVE[sd->driveno]; - - if (drive->state != drive_up) { - if (sd->state >= sd_crashed) { - if (bp->b_iocmd == BIO_WRITE) /* writing, */ - set_sd_state(sd->sdno, sd_stale, setstate_force); - else - set_sd_state(sd->sdno, sd_crashed, setstate_force); - } - bp->b_error = EIO; - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return; - } - /* - * We allow access to any kind of subdisk as long as we can expect - * to get the I/O performed. - */ - if (sd->state < sd_empty) { /* nothing to talk to, */ - bp->b_error = EIO; - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return; - } - /* Get a buffer */ - sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf)); - if (sbp == NULL) { - bp->b_error = ENOMEM; - bp->b_io.bio_flags |= BIO_ERROR; - bufdone(bp); - return; - } - bzero(sbp, sizeof(struct sdbuf)); /* start with nothing */ - sbp->b.b_flags = bp->b_flags; - sbp->b.b_iocmd = bp->b_iocmd; - sbp->b.b_bufsize = bp->b_bcount; /* buffer size */ - sbp->b.b_bcount = bp->b_bcount; /* number of bytes to transfer */ - sbp->b.b_resid = bp->b_resid; /* and amount waiting */ - sbp->b.b_dev = DRIVE[sd->driveno].dev; /* device */ - sbp->b.b_data = bp->b_data; /* data buffer */ - sbp->b.b_blkno = bp->b_blkno + sd->driveoffset; - sbp->b.b_iodone = sdio_done; /* come here on completion */ - BUF_LOCKINIT(&sbp->b); /* get a lock for the buffer */ - BUF_LOCK(&sbp->b, LK_EXCLUSIVE, NULL); /* and lock it */ - BUF_KERNPROC(&sbp->b); - sbp->bp = bp; /* note the address of the original header */ - sbp->sdno = sd->sdno; /* note for statistics */ - sbp->driveno = sd->driveno; - endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE; /* final sector offset */ - if (endoffset > sd->sectors) { /* beyond the end */ - sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */ - if (sbp->b.b_bcount <= 0) { /* nothing to transfer */ - bp->b_resid = bp->b_bcount; /* nothing transferred */ - bufdone(bp); - BUF_UNLOCK(&sbp->b); - BUF_LOCKFREE(&sbp->b); - Free(sbp); - return; - } - } -#ifdef VINUMDEBUG - if (debug & DEBUG_ADDRESSES) - log(LOG_DEBUG, - " %s dev %d.%d, sd %d, offset 0x%jx, devoffset 0x%jx, length %ld\n", - sbp->b.b_iocmd == BIO_READ ? "Read" : "Write", - major(sbp->b.b_dev), - minor(sbp->b.b_dev), - sbp->sdno, - (intmax_t) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset), - (intmax_t) sbp->b.b_blkno, - sbp->b.b_bcount); -#endif - s = splbio(); -#ifdef VINUMDEBUG - if (debug & DEBUG_LASTREQS) - logrq(loginfo_sdiol, (union rqinfou) &sbp->b, &sbp->b); -#endif - sbp->b.b_offset = sbp->b.b_blkno << DEV_BSHIFT; - sbp->b.b_iooffset = sbp->b.b_offset; - DEV_STRATEGY(&sbp->b); - splx(s); -} - -/* - * Simplified version of bounds_check_with_label - * Determine the size of the transfer, and make sure it is - * within the boundaries of the partition. Adjust transfer - * if needed, and signal errors or early completion. - * - * Volumes are simpler than disk slices: they only contain - * one component (though we call them a, b and c to make - * system utilities happy), and they always take up the - * complete space of the "partition". - * - * I'm still not happy with this: why should the label be - * protected? If it weren't so damned difficult to write - * one in the first pleace (because it's protected), it wouldn't - * be a problem. - */ -int -vinum_bounds_check(struct buf *bp, struct volume *vol) -{ - int64_t maxsize = vol->size; /* size of the partition (sectors) */ - int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */ - -#ifdef LABELSECTOR - /* Would this transfer overwrite the disk label? */ - if (bp->b_blkno <= LABELSECTOR /* starts before or at the label */ -#if LABELSECTOR != 0 - && bp->b_blkno + size > LABELSECTOR /* and finishes after */ -#endif - && (bp->b_iocmd == BIO_WRITE) /* and it's a write */ - &&(!vol->flags & (VF_WLABEL | VF_LABELLING))) { /* and we're not allowed to write the label */ - bp->b_error = EROFS; /* read-only */ - bp->b_io.bio_flags |= BIO_ERROR; - return -1; - } -#endif - if (size == 0) /* no transfer specified, */ - return 0; /* treat as EOF */ - /* beyond partition? */ - if (bp->b_blkno < 0 /* negative start */ - || bp->b_blkno + size > maxsize) { /* or goes beyond the end of the partition */ - /* if exactly at end of disk, return an EOF */ - if (bp->b_blkno == maxsize) { - bp->b_resid = bp->b_bcount; - return 0; - } - /* or truncate if part of it fits */ - size = maxsize - bp->b_blkno; - if (size <= 0) { /* nothing to transfer */ - bp->b_error = EINVAL; - bp->b_io.bio_flags |= BIO_ERROR; - return -1; - } - bp->b_bcount = size << DEV_BSHIFT; - } - return 1; -} - -/* - * Allocate a request group and hook - * it in in the list for rq - */ -struct rqgroup * -allocrqg(struct request *rq, int elements) -{ - struct rqgroup *rqg; /* the one we're going to allocate */ - int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement); - - rqg = (struct rqgroup *) Malloc(size); - if (rqg != NULL) { /* malloc OK, */ - if (rq->rqg) /* we already have requests */ - rq->lrqg->next = rqg; /* hang it off the end */ - else /* first request */ - rq->rqg = rqg; /* at the start */ - rq->lrqg = rqg; /* this one is the last in the list */ - - bzero(rqg, size); /* no old junk */ - rqg->rq = rq; /* point back to the parent request */ - rqg->count = elements; /* number of requests in the group */ - rqg->lockbase = -1; /* no lock required yet */ - } - return rqg; -} - -/* - * Deallocate a request group out of a chain. We do - * this by linear search: the chain is short, this - * almost never happens, and currently it can only - * happen to the first member of the chain. - */ -void -deallocrqg(struct rqgroup *rqg) -{ - struct rqgroup *rqgc = rqg->rq->rqg; /* point to the request chain */ - - if (rqg->lock) /* got a lock? */ - unlockrange(rqg->plexno, rqg->lock); /* yes, free it */ - if (rqgc == rqg) /* we're first in line */ - rqg->rq->rqg = rqg->next; /* unhook ourselves */ - else { - while ((rqgc->next != NULL) /* find the group */ - &&(rqgc->next != rqg)) - rqgc = rqgc->next; - if (rqgc->next == NULL) - log(LOG_ERR, - "vinum deallocrqg: rqg %p not found in request %p\n", - rqg->rq, - rqg); - else - rqgc->next = rqg->next; /* make the chain jump over us */ - } - Free(rqg); -} - -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumrevive.c b/sys/dev/vinum/vinumrevive.c deleted file mode 100644 index 59a91a9..0000000 --- a/sys/dev/vinum/vinumrevive.c +++ /dev/null @@ -1,620 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumrevive.c,v 1.19 2003/05/08 04:34:47 grog Exp grog $ - */ - -#include <sys/cdefs.h> - -__FBSDID("$FreeBSD$"); -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -/* - * Revive a block of a subdisk. Return an error - * indication. EAGAIN means successful copy, but - * that more blocks remain to be copied. EINVAL - * means that the subdisk isn't associated with a - * plex (which means a programming error if we get - * here at all; FIXME). - */ - -int -revive_block(int sdno) -{ - int s; /* priority level */ - struct sd *sd; - struct plex *plex; - struct volume *vol; - struct buf *bp; - int error = EAGAIN; - int size; /* size of revive block, bytes */ - daddr_t plexblkno; /* lblkno in plex */ - int psd; /* parity subdisk number */ - u_int64_t stripe; /* stripe number */ - int paritysd = 0; /* set if this is the parity stripe */ - struct rangelock *lock; /* for locking */ - daddr_t stripeoffset; /* offset in stripe */ - - plexblkno = 0; /* to keep the compiler happy */ - sd = &SD[sdno]; - lock = NULL; - if (sd->plexno < 0) /* no plex? */ - return EINVAL; - plex = &PLEX[sd->plexno]; /* point to plex */ - if (plex->volno >= 0) - vol = &VOL[plex->volno]; - else - vol = NULL; - - if ((sd->revive_blocksize == 0) /* no block size */ - ||(sd->revive_blocksize & ((1 << DEV_BSHIFT) - 1))) /* or invalid block size */ - sd->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE; - else if (sd->revive_blocksize > MAX_REVIVE_BLOCKSIZE) - sd->revive_blocksize = MAX_REVIVE_BLOCKSIZE; - size = min(sd->revive_blocksize >> DEV_BSHIFT, sd->sectors - sd->revived) << DEV_BSHIFT; - sd->reviver = curproc->p_pid; /* note who last had a bash at it */ - - /* Now decide where to read from */ - switch (plex->organization) { - case plex_concat: - plexblkno = sd->revived + sd->plexoffset; /* corresponding address in plex */ - break; - - case plex_striped: - stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */ - if (stripeoffset + (size >> DEV_BSHIFT) > plex->stripesize) - size = (plex->stripesize - stripeoffset) << DEV_BSHIFT; - plexblkno = sd->plexoffset /* base */ - + (sd->revived - stripeoffset) * plex->subdisks /* offset to beginning of stripe */ - + stripeoffset; /* offset from beginning of stripe */ - break; - - case plex_raid4: - case plex_raid5: - stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */ - plexblkno = sd->plexoffset /* base */ - + (sd->revived - stripeoffset) * (plex->subdisks - 1) /* offset to beginning of stripe */ - +stripeoffset; /* offset from beginning of stripe */ - stripe = (sd->revived / plex->stripesize); /* stripe number */ - - /* Make sure we don't go beyond the end of the band. */ - size = min(size, (plex->stripesize - stripeoffset) << DEV_BSHIFT); - if (plex->organization == plex_raid4) - psd = plex->subdisks - 1; /* parity subdisk for this stripe */ - else - psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */ - paritysd = plex->sdnos[psd] == sdno; /* note if it's the parity subdisk */ - - /* - * Now adjust for the strangenesses - * in RAID-4 and RAID-5 striping. - */ - if (sd->plexsdno > psd) /* beyond the parity stripe, */ - plexblkno -= plex->stripesize; /* one stripe less */ - else if (paritysd) - plexblkno -= plex->stripesize * sd->plexsdno; /* go back to the beginning of the band */ - break; - - case plex_disorg: /* to keep the compiler happy */ - break; /* to keep the pedants happy */ - } - - if (paritysd) { /* we're reviving a parity block, */ - bp = parityrebuild(plex, sd->revived, size, rebuildparity, &lock, NULL); /* do the grunt work */ - if (bp == NULL) /* no buffer space */ - return ENOMEM; /* chicken out */ - } else { /* data block */ - s = splbio(); - bp = geteblk(size); /* Get a buffer */ - splx(s); - if (bp == NULL) - return ENOMEM; - - /* - * Amount to transfer: block size, unless it - * would overlap the end. - */ - bp->b_bcount = size; - bp->b_resid = bp->b_bcount; - bp->b_blkno = plexblkno; /* start here */ - if (isstriped(plex)) /* we need to lock striped plexes */ - lock = lockrange(plexblkno << DEV_BSHIFT, bp, plex); /* lock it */ - if (vol != NULL) /* it's part of a volume, */ - /* - * First, read the data from the volume. We - * don't care which plex, that's bre's job. - */ - bp->b_dev = VOL[plex->volno].dev; /* create the device number */ - else /* it's an unattached plex */ - bp->b_dev = PLEX[sd->plexno].dev; /* create the device number */ - - bp->b_iocmd = BIO_READ; /* either way, read it */ - bp->b_flags = 0; - vinumstart(bp, 1); - bufwait(bp); - } - - if (bp->b_ioflags & BIO_ERROR) { - error = bp->b_error; - if (lock) /* we took a lock, */ - unlockrange(sd->plexno, lock); /* give it back */ - } else - /* Now write to the subdisk */ - { - bp->b_dev = SD[sdno].dev; /* create the device number */ - bp->b_flags &= ~B_DONE; /* no longer done */ - bp->b_ioflags = 0; - bp->b_iocmd = BIO_WRITE; - bp->b_resid = bp->b_bcount; - bp->b_blkno = sd->revived; /* write it to here */ - sdio(bp); /* perform the I/O */ - bufwait(bp); - if (bp->b_ioflags & BIO_ERROR) - error = bp->b_error; - else { - sd->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */ - if (sd->revived >= sd->sectors) { /* finished */ - sd->revived = 0; - set_sd_state(sdno, sd_up, setstate_force); /* bring the sd up */ - log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state)); - save_config(); /* and save the updated configuration */ - error = 0; /* we're done */ - } - } - if (lock) /* we took a lock, */ - unlockrange(sd->plexno, lock); /* give it back */ - while (sd->waitlist) { /* we have waiting requests */ -#ifdef VINUMDEBUG - struct request *rq = sd->waitlist; - - if (debug & DEBUG_REVIVECONFLICT) - log(LOG_DEBUG, - "Relaunch revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n", - rq->sdno, - rq, - rq->bp->b_iocmd == BIO_READ ? "Read" : "Write", - major(rq->bp->b_dev), - minor(rq->bp->b_dev), - (intmax_t) rq->bp->b_blkno, - rq->bp->b_bcount); -#endif - launch_requests(sd->waitlist, 1); /* do them now */ - sd->waitlist = sd->waitlist->next; /* and move on to the next */ - } - } - if (bp->b_qindex == 0) { /* not on a queue, */ - bp->b_flags |= B_INVAL; - bp->b_ioflags &= ~BIO_ERROR; - brelse(bp); /* is this kosher? */ - } - return error; -} - -/* - * Check or rebuild the parity blocks of a RAID-4 - * or RAID-5 plex. - * - * The variables plex->checkblock and - * plex->rebuildblock represent the - * subdisk-relative address of the stripe we're - * looking at, not the plex-relative address. We - * store it in the plex and not as a local - * variable because this function could be - * stopped, and we don't want to repeat the part - * we've already done. This is also the reason - * why we don't initialize it here except at the - * end. It gets initialized with the plex on - * creation. - * - * Each call to this function processes at most - * one stripe. We can't loop in this function, - * because we're unstoppable, so we have to be - * called repeatedly from userland. - */ -void -parityops(struct vinum_ioctl_msg *data) -{ - int plexno; - struct plex *plex; - int size; /* I/O transfer size, bytes */ - int stripe; /* stripe number in plex */ - int psd; /* parity subdisk number */ - struct rangelock *lock; /* lock on stripe */ - struct _ioctl_reply *reply; - off_t pstripe; /* pointer to our stripe counter */ - struct buf *pbp; - off_t errorloc; /* offset of parity error */ - enum parityop op; /* operation to perform */ - - plexno = data->index; - op = data->op; - pbp = NULL; - reply = (struct _ioctl_reply *) data; - reply->error = EAGAIN; /* expect to repeat this call */ - plex = &PLEX[plexno]; - if (!isparity(plex)) { /* not RAID-4 or RAID-5 */ - reply->error = EINVAL; - return; - } else if (plex->state < plex_flaky) { - reply->error = EIO; - strcpy(reply->msg, "Plex is not completely accessible\n"); - return; - } - pstripe = data->offset; - stripe = pstripe / plex->stripesize; /* stripe number */ - psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */ - size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */ - plex->stripesize << DEV_BSHIFT); - - pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */ - if (pbp == NULL) { /* no buffer space */ - reply->error = ENOMEM; - return; /* chicken out */ - } - /* - * Now we have a result in the data buffer of - * the parity buffer header, which we have kept. - * Decide what to do with it. - */ - reply->msg[0] = '\0'; /* until shown otherwise */ - if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */ - if ((op == rebuildparity) - || (op == rebuildandcheckparity)) { - pbp->b_iocmd = BIO_WRITE; - pbp->b_resid = pbp->b_bcount; - sdio(pbp); /* write the parity block */ - bufwait(pbp); - } - if (((op == checkparity) - || (op == rebuildandcheckparity)) - && (errorloc != -1)) { - if (op == checkparity) - reply->error = EIO; - sprintf(reply->msg, - "Parity incorrect at offset 0x%jx\n", - (intmax_t) errorloc); - } - if (reply->error == EAGAIN) { /* still OK, */ - plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */ - if (plex->checkblock >= SD[plex->sdnos[0]].sectors) { /* finished */ - plex->checkblock = 0; - reply->error = 0; - } - } - } - if (pbp->b_ioflags & BIO_ERROR) - reply->error = pbp->b_error; - pbp->b_flags |= B_INVAL; - pbp->b_ioflags &= ~BIO_ERROR; - brelse(pbp); - unlockrange(plexno, lock); -} - -/* - * Rebuild a parity stripe. Return pointer to - * parity bp. On return, - * - * 1. The band is locked. The caller must unlock - * the band and release the buffer header. - * - * 2. All buffer headers except php have been - * released. The caller must release pbp. - * - * 3. For checkparity and rebuildandcheckparity, - * the parity is compared with the current - * parity block. If it's different, the - * offset of the error is returned to - * errorloc. The caller can set the value of - * the pointer to NULL if this is called for - * rebuilding parity. - * - * pstripe is the subdisk-relative base address of - * the data to be reconstructed, size is the size - * of the transfer in bytes. - */ -struct buf * -parityrebuild(struct plex *plex, - u_int64_t pstripe, - int size, - enum parityop op, - struct rangelock **lockp, - off_t * errorloc) -{ - int error; - int s; - int sdno; - u_int64_t stripe; /* stripe number */ - int *parity_buf; /* buffer address for current parity block */ - int *newparity_buf; /* and for new parity block */ - int mysize; /* I/O transfer size for this transfer */ - int isize; /* mysize in ints */ - int i; - int psd; /* parity subdisk number */ - int newpsd; /* and "subdisk number" of new parity */ - struct buf **bpp; /* pointers to our bps */ - struct buf *pbp; /* buffer header for parity stripe */ - int *sbuf; - int bufcount; /* number of buffers we need */ - - stripe = pstripe / plex->stripesize; /* stripe number */ - psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */ - parity_buf = NULL; /* to keep the compiler happy */ - error = 0; - - /* - * It's possible that the default transfer size - * we chose is not a factor of the stripe size. - * We *must* limit this operation to a single - * stripe, at least for RAID-5 rebuild, since - * the parity subdisk changes between stripes, - * so in this case we need to perform a short - * transfer. Set variable mysize to reflect - * this. - */ - mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT); - isize = mysize / (sizeof(int)); /* number of ints in the buffer */ - bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */ - newpsd = plex->subdisks; - bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */ - - /* First, build requests for all subdisks */ - for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */ - if ((sdno != psd) || (op != rebuildparity)) { - /* Get a buffer header and initialize it. */ - s = splbio(); - bpp[sdno] = geteblk(mysize); /* Get a buffer */ - if (bpp[sdno] == NULL) { - while (sdno-- > 0) { /* release the ones we got */ - bpp[sdno]->b_flags |= B_INVAL; - brelse(bpp[sdno]); /* give back our resources */ - } - splx(s); - printf("vinum: can't allocate buffer space for parity op.\n"); - return NULL; /* no bpps */ - } - splx(s); - if (sdno == psd) - parity_buf = (int *) bpp[sdno]->b_data; - if (sdno == newpsd) /* the new one? */ - bpp[sdno]->b_dev = SD[plex->sdnos[psd]].dev; /* write back to the parity SD */ - else - bpp[sdno]->b_dev = SD[plex->sdnos[sdno]].dev; /* device number */ - bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */ - bpp[sdno]->b_flags = 0; - bpp[sdno]->b_bcount = mysize; - bpp[sdno]->b_resid = bpp[sdno]->b_bcount; - bpp[sdno]->b_blkno = pstripe; /* transfer from here */ - } - } - - /* Initialize result buffer */ - pbp = bpp[newpsd]; - newparity_buf = (int *) bpp[newpsd]->b_data; - bzero(newparity_buf, mysize); - - /* - * Now lock the stripe with the first non-parity - * bp as locking bp. - */ - *lockp = lockrange(pstripe * plex->stripesize * (plex->subdisks - 1), - bpp[psd ? 0 : 1], - plex); - - /* - * Then issue requests for all subdisks in - * parallel. Don't transfer the parity stripe - * if we're rebuilding parity, unless we also - * want to check it. - */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */ - if ((sdno != psd) || (op != rebuildparity)) { - sdio(bpp[sdno]); - } - } - - /* - * Next, wait for the requests to complete. - * We wait in the order in which they were - * issued, which isn't necessarily the order in - * which they complete, but we don't have a - * convenient way of doing the latter, and the - * delay is minimal. - */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ - if ((sdno != psd) || (op != rebuildparity)) { - bufwait(bpp[sdno]); - if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */ - error = bpp[sdno]->b_error; - else if (sdno != psd) { /* update parity */ - sbuf = (int *) bpp[sdno]->b_data; - for (i = 0; i < isize; i++) - ((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */ - } - } - if (sdno != psd) { /* release all bps except parity */ - bpp[sdno]->b_flags |= B_INVAL; - brelse(bpp[sdno]); /* give back our resources */ - } - } - - /* - * If we're checking, compare the calculated - * and the read parity block. If they're - * different, return the plex-relative offset; - * otherwise return -1. - */ - if ((op == checkparity) - || (op == rebuildandcheckparity)) { - *errorloc = -1; /* no error yet */ - for (i = 0; i < isize; i++) { - if (parity_buf[i] != newparity_buf[i]) { - *errorloc = (off_t) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1) - + i * sizeof(int); - break; - } - } - bpp[psd]->b_flags |= B_INVAL; - brelse(bpp[psd]); /* give back our resources */ - } - /* release our resources */ - Free(bpp); - if (error) { - pbp->b_ioflags |= BIO_ERROR; - pbp->b_error = error; - } - return pbp; -} - -/* - * Initialize a subdisk by writing zeroes to the - * complete address space. If verify is set, - * check each transfer for correctness. - * - * Each call to this function writes (and maybe - * checks) a single block. - */ -int -initsd(int sdno, int verify) -{ - int s; /* priority level */ - struct sd *sd; - struct plex *plex; - struct volume *vol; - struct buf *bp; - int error; - int size; /* size of init block, bytes */ - daddr_t plexblkno; /* lblkno in plex */ - int verified; /* set when we're happy with what we wrote */ - - error = 0; - plexblkno = 0; /* to keep the compiler happy */ - sd = &SD[sdno]; - if (sd->plexno < 0) /* no plex? */ - return EINVAL; - plex = &PLEX[sd->plexno]; /* point to plex */ - if (plex->volno >= 0) - vol = &VOL[plex->volno]; - else - vol = NULL; - - if (sd->init_blocksize == 0) { - sd->init_blocksize = DEFAULT_REVIVE_BLOCKSIZE; - } else if (sd->init_blocksize > MAX_REVIVE_BLOCKSIZE) - sd->init_blocksize = MAX_REVIVE_BLOCKSIZE; - - size = min(sd->init_blocksize >> DEV_BSHIFT, sd->sectors - sd->initialized) << DEV_BSHIFT; - - verified = 0; - while (!verified) { /* until we're happy with it, */ - s = splbio(); - bp = geteblk(size); /* Get a buffer */ - splx(s); - if (bp == NULL) - return ENOMEM; - - bp->b_bcount = size; - bp->b_resid = bp->b_bcount; - bp->b_blkno = sd->initialized; /* write it to here */ - bzero(bp->b_data, bp->b_bcount); - bp->b_dev = SD[sdno].dev; /* create the device number */ - bp->b_iocmd = BIO_WRITE; - sdio(bp); /* perform the I/O */ - bufwait(bp); - if (bp->b_ioflags & BIO_ERROR) - error = bp->b_error; - if (bp->b_qindex == 0) { /* not on a queue, */ - bp->b_flags |= B_INVAL; - bp->b_ioflags &= ~BIO_ERROR; - brelse(bp); /* is this kosher? */ - } - if ((error == 0) && verify) { /* check that it got there */ - s = splbio(); - bp = geteblk(size); /* get a buffer */ - if (bp == NULL) { - splx(s); - error = ENOMEM; - } else { - bp->b_bcount = size; - bp->b_resid = bp->b_bcount; - bp->b_blkno = sd->initialized; /* read from here */ - bp->b_dev = SD[sdno].dev; /* create the device number */ - bp->b_iocmd = BIO_READ; /* read it back */ - splx(s); - sdio(bp); - bufwait(bp); - /* - * XXX Bug fix code. This is hopefully no - * longer needed (21 February 2000). - */ - if (bp->b_ioflags & BIO_ERROR) - error = bp->b_error; - else if ((*bp->b_data != 0) /* first word spammed */ - ||(bcmp(bp->b_data, &bp->b_data[1], bp->b_bcount - 1))) { /* or one of the others */ - printf("vinum: init error on %s, offset 0x%llx sectors\n", - sd->name, - (long long) sd->initialized); - verified = 0; - } else - verified = 1; - if (bp->b_qindex == 0) { /* not on a queue, */ - bp->b_flags |= B_INVAL; - bp->b_ioflags &= ~BIO_ERROR; - brelse(bp); /* is this kosher? */ - } - } - } else - verified = 1; - } - if (error == 0) { /* did it, */ - sd->initialized += size >> DEV_BSHIFT; /* moved this much further down */ - if (sd->initialized >= sd->sectors) { /* finished */ - sd->initialized = 0; - set_sd_state(sdno, sd_initialized, setstate_force); /* bring the sd up */ - log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state)); - save_config(); /* and save the updated configuration */ - } else /* more to go, */ - error = EAGAIN; /* ya'll come back, see? */ - } - return error; -} - -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumstate.c b/sys/dev/vinum/vinumstate.c deleted file mode 100644 index 94b45c1..0000000 --- a/sys/dev/vinum/vinumstate.c +++ /dev/null @@ -1,1095 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumstate.c,v 2.21 2003/04/28 02:54:43 grog Exp $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/request.h> - -/* Update drive state */ -/* Return 1 if the state changes, otherwise 0 */ -int -set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags) -{ - struct drive *drive = &DRIVE[driveno]; - int oldstate = drive->state; - int sdno; - - if (drive->state == drive_unallocated) /* no drive to do anything with, */ - return 0; - - if (newstate == oldstate) /* don't change it if it's not different */ - return 1; /* all OK */ - if ((newstate == drive_down) /* the drive's going down */ - &&(!(flags & setstate_force)) - && (drive->opencount != 0)) /* we can't do it */ - return 0; /* don't do it */ - drive->state = newstate; /* set the state */ - if (drive->label.name[0] != '\0') /* we have a name, */ - log(LOG_INFO, - "vinum: drive %s is %s\n", - drive->label.name, - drive_state(drive->state)); - if (drive->state != oldstate) { /* state has changed */ - for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */ - if ((SD[sdno].state >= sd_referenced) - && (SD[sdno].driveno == driveno)) /* belongs to this drive */ - update_sd_state(sdno); /* update the state */ - } - } - if (newstate == drive_up) { /* want to bring it up */ - if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */ - init_drive(drive, 1); /* which changes the state again */ - } else /* taking it down or worse */ - queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */ - (union daemoninfo) drive); - if ((flags & setstate_configuring) == 0) /* configuring? */ - save_config(); /* no: save the updated configuration now */ - return 1; -} - -/* - * Try to set the subdisk state. Return 1 if - * state changed to what we wanted, -1 if it - * changed to something else, and 0 if no change. - * - * This routine is called both from the user (up, - * down states only) and internally. - * - * The setstate_force bit in the flags enables the - * state change even if it could be dangerous to - * data consistency. It shouldn't allow nonsense. - */ -int -set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) -{ - struct sd *sd = &SD[sdno]; - struct plex *plex; - struct volume *vol; - int oldstate = sd->state; - int status = 1; /* status to return */ - - if (newstate == oldstate) /* already there, */ - return 1; - else if (sd->state == sd_unallocated) /* no subdisk to do anything with, */ - return 0; /* can't do it */ - - if (sd->driveoffset < 0) { /* not allocated space */ - sd->state = sd_down; - if (newstate != sd_down) { - if (sd->plexno >= 0) - sdstatemap(&PLEX[sd->plexno]); /* count up subdisks */ - return -1; - } - } else { /* space allocated */ - switch (newstate) { - case sd_down: /* take it down? */ - /* - * If we're attached to a plex, and we're - * not reborn, we won't go down without - * use of force. - */ - if ((!flags & setstate_force) - && (sd->plexno >= 0) - && (sd->state != sd_reborn)) - return 0; /* don't do it */ - break; - - case sd_initialized: - if ((sd->state == sd_initializing) /* we were initializing */ - ||(flags & setstate_force)) /* or we forced it */ - break; - return 0; /* can't do it otherwise */ - - case sd_up: - if (DRIVE[sd->driveno].state != drive_up) /* can't bring the sd up if the drive isn't, */ - return 0; /* not even by force */ - if (flags & setstate_force) /* forcing it, */ - break; /* just do it, and damn the consequences */ - switch (sd->state) { - /* - * Perform the necessary tests. To allow - * the state transition, just break out of - * the switch. - */ - case sd_crashed: - case sd_reborn: - case sd_down: /* been down, no data lost */ - /* - * If we're associated with a plex, and - * the plex isn't up, or we're the only - * subdisk in the plex, we can do it. - */ - if ((sd->plexno >= 0) - && (((PLEX[sd->plexno].state < plex_firstup) - || (PLEX[sd->plexno].subdisks > 1)))) - break; /* do it */ - if (oldstate != sd_reborn) { - sd->state = sd_reborn; /* here it is again */ - log(LOG_INFO, - "vinum: %s is %s, not %s\n", - sd->name, - sd_state(sd->state), - sd_state(newstate)); - } - status = -1; - break; - - case sd_init: /* brand new */ - if (flags & setstate_configuring) /* we're doing this while configuring */ - break; - /* otherwise it's like being empty */ - /* FALLTHROUGH */ - - case sd_empty: - case sd_initialized: - /* - * If we're not part of a plex, or the - * plex is not part of a volume with other - * plexes which are up, we can come up - * without being inconsistent. - * - * If we're part of a parity plex, we'll - * come up if the caller uses force. This - * is the way we bring them up after - * initialization. - */ - if ((sd->plexno < 0) - || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0) - || (isparity((&PLEX[sd->plexno])) - && (flags & setstate_force))) - break; - - /* Otherwise it's just out of date */ - /* FALLTHROUGH */ - - case sd_stale: /* out of date info, need reviving */ - case sd_obsolete: - /* - - * 1. If the subdisk is not part of a - * plex, bring it up, don't revive. - * - * 2. If the subdisk is part of a - * one-plex volume or an unattached - * plex, and it's not RAID-4 or - * RAID-5, we *can't revive*. The - * subdisk doesn't change its state. - * - * 3. If the subdisk is part of a - * one-plex volume or an unattached - * plex, and it's RAID-4 or RAID-5, - * but more than one subdisk is down, - * we *still can't revive*. The - * subdisk doesn't change its state. - * - * 4. If the subdisk is part of a - * multi-plex volume, we'll change to - * reviving and let the revive - * routines find out whether it will - * work or not. If they don't, the - * revive stops with an error message, - * but the state doesn't change - * (FWIW). - */ - if (sd->plexno < 0) /* no plex associated, */ - break; /* bring it up */ - plex = &PLEX[sd->plexno]; - if (plex->volno >= 0) /* have a volume */ - vol = &VOL[plex->volno]; - else - vol = NULL; - /* - * We can't do it if: - * - * 1: we don't have a volume - * 2: we're the only plex in the volume - * 3: we're a RAID-4 or RAID-5 plex, and - * more than one subdisk is down. - */ - if (((vol == NULL) - || (vol->plexes == 1)) - && ((!isparity(plex)) - || (plex->sddowncount > 1))) { - if (sd->state == sd_initializing) /* it's finished initializing */ - sd->state = sd_initialized; - else - return 0; /* can't do it */ - } else { - sd->state = sd_reviving; /* put in reviving state */ - sd->revived = 0; /* nothing done yet */ - status = EAGAIN; /* need to repeat */ - } - break; - - case sd_reviving: - if (flags & setstate_force) /* insist, */ - break; - return EAGAIN; /* no, try again */ - - default: /* can't do it */ - /* - * There's no way to bring subdisks up directly from - * other states. First they need to be initialized - * or revived. - */ - return 0; - } - break; - - default: /* other ones, only internal with force */ - if ((flags & setstate_force) == 0) /* no force? What's this? */ - return 0; /* don't do it */ - } - } - if (status == 1) { /* we can do it, */ - sd->state = newstate; - if (flags & setstate_force) - log(LOG_INFO, "vinum: %s is %s by force\n", sd->name, sd_state(sd->state)); - else - log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state)); - } else /* we don't get here with status 0 */ - log(LOG_INFO, - "vinum: %s is %s, not %s\n", - sd->name, - sd_state(sd->state), - sd_state(newstate)); - if (sd->plexno >= 0) /* we belong to a plex */ - update_plex_state(sd->plexno); /* update plex state */ - if ((flags & setstate_configuring) == 0) /* save config now */ - save_config(); - return status; -} - -/* - * Set the state of a plex dependent on its subdisks. - * This time round, we'll let plex state just reflect - * aggregate subdisk state, so this becomes an order of - * magnitude less complicated. In particular, ignore - * the requested state. - */ -int -set_plex_state(int plexno, enum plexstate state, enum setstateflags flags) -{ - struct plex *plex; /* point to our plex */ - enum plexstate oldstate; - enum volplexstate vps; /* how do we compare with the other plexes? */ - - plex = &PLEX[plexno]; /* point to our plex */ - oldstate = plex->state; - - /* If the plex isn't allocated, we can't do it. */ - if (plex->state == plex_unallocated) - return 0; - - /* - * If it's already in the the state we want, - * and it's not up, just return. If it's up, - * we still need to do some housekeeping. - */ - if ((state == oldstate) - && (state != plex_up)) - return 1; - vps = vpstate(plex); /* how do we compare with the other plexes? */ - switch (state) { - /* - * We can't bring the plex up, even by force, - * unless it's ready. update_plex_state - * checks that. - */ - case plex_up: /* bring the plex up */ - update_plex_state(plex->plexno); /* it'll come up if it can */ - break; - - case plex_down: /* want to take it down */ - /* - * If we're the only one, or the only one - * which is up, we need force to do it. - */ - if (((vps == volplex_onlyus) - || (vps == volplex_onlyusup)) - && (!(flags & setstate_force))) - return 0; /* can't do it */ - plex->state = state; /* do it */ - invalidate_subdisks(plex, sd_down); /* and down all up subdisks */ - break; - - /* - * This is only requested internally. - * Trust ourselves - */ - case plex_faulty: - plex->state = state; /* do it */ - invalidate_subdisks(plex, sd_crashed); /* and crash all up subdisks */ - break; - - case plex_initializing: - /* XXX consider what safeguards we need here */ - if ((flags & setstate_force) == 0) - return 0; - plex->state = state; /* do it */ - break; - - /* What's this? */ - default: - return 0; - } - if (plex->state != oldstate) /* we've changed, */ - log(LOG_INFO, /* tell them about it */ - "vinum: %s is %s\n", - plex->name, - plex_state(plex->state)); - /* - * Now see what we have left, and whether - * we're taking the volume down - */ - if (plex->volno >= 0) /* we have a volume */ - update_volume_state(plex->volno); /* update its state */ - if ((flags & setstate_configuring) == 0) /* save config now */ - save_config(); /* yes: save the updated configuration */ - return 1; -} - -/* Update the state of a plex dependent on its plexes. */ -int -set_volume_state(int volno, enum volumestate state, enum setstateflags flags) -{ - struct volume *vol = &VOL[volno]; /* point to our volume */ - - if (vol->state == volume_unallocated) /* no volume to do anything with, */ - return 0; - if (vol->state == state) /* we're there already */ - return 1; - - if (state == volume_up) /* want to come up */ - update_volume_state(volno); - else if (state == volume_down) { /* want to go down */ - if (((vol->flags & VF_OPEN) == 0) /* not open */ - ||((flags & setstate_force) != 0)) { /* or we're forcing */ - vol->state = volume_down; - log(LOG_INFO, - "vinum: volume %s is %s\n", - vol->name, - volume_state(vol->state)); - if ((flags & setstate_configuring) == 0) /* save config now */ - save_config(); /* yes: save the updated configuration */ - return 1; - } - } - return 0; /* no change */ -} - -/* Set the state of a subdisk based on its environment */ -void -update_sd_state(int sdno) -{ - struct sd *sd; - struct drive *drive; - enum sdstate oldstate; - - sd = &SD[sdno]; - oldstate = sd->state; - drive = &DRIVE[sd->driveno]; - - if (drive->state == drive_up) { - switch (sd->state) { - case sd_down: - case sd_crashed: - sd->state = sd_reborn; /* back up again with no loss */ - break; - - default: - break; - } - } else { /* down or worse */ - switch (sd->state) { - case sd_up: - case sd_reborn: - case sd_reviving: - case sd_empty: - sd->state = sd_crashed; /* lost our drive */ - break; - - default: - break; - } - } - if (sd->state != oldstate) /* state has changed, */ - log(LOG_INFO, /* say so */ - "vinum: %s is %s\n", - sd->name, - sd_state(sd->state)); - if (sd->plexno >= 0) /* we're part of a plex, */ - update_plex_state(sd->plexno); /* update its state */ -} - -/* - * Force a plex and all its subdisks - * into an 'up' state. This is a helper - * for update_plex_state. - */ -void -forceup(int plexno) -{ - struct plex *plex; - int sdno; - - plex = &PLEX[plexno]; /* point to the plex */ - plex->state = plex_up; /* and bring it up */ - - /* change the subdisks to up state */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { - SD[plex->sdnos[sdno]].state = sd_up; - log(LOG_INFO, /* tell them about it */ - "vinum: %s is up\n", - SD[plex->sdnos[sdno]].name); - } -} - -/* Set the state of a plex based on its environment */ -void -update_plex_state(int plexno) -{ - struct plex *plex; /* point to our plex */ - enum plexstate oldstate; - enum sdstates statemap; /* get a map of the subdisk states */ - enum volplexstate vps; /* how do we compare with the other plexes? */ - - plex = &PLEX[plexno]; /* point to our plex */ - oldstate = plex->state; - statemap = sdstatemap(plex); /* get a map of the subdisk states */ - vps = vpstate(plex); /* how do we compare with the other plexes? */ - - if (statemap & sd_initstate) /* something initializing? */ - plex->state = plex_initializing; /* yup, that makes the plex the same */ - else if (statemap == sd_upstate) - /* - * All the subdisks are up. This also means that - * they are consistent, so we can just bring - * the plex up - */ - plex->state = plex_up; - else if (isparity(plex) /* RAID-4 or RAID-5 plex */ - &&(plex->sddowncount == 1)) /* and exactly one subdisk down */ - plex->state = plex_degraded; /* limping a bit */ - else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */ - ||((statemap & ~sd_downstate) - == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) { - if ((vps & volplex_otherup) == 0) { /* no other plex is up */ - struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */ - - /* - * If we're a striped or concat plex - * associated with a volume, none of whose - * plexes are up, and we're new and untested, - * and the volume has the setupstate bit set, - * we can pretend to be in a consistent state. - * - * We need to do this in one swell foop: on - * the next call we will no longer be just - * empty. - * - * This code assumes that all the other plexes - * are also capable of coming up (i.e. all the - * sds are up), but that's OK: we'll come back - * to this function for the remaining plexes - * in the volume. - */ - if ((plex->state == plex_init) - && (plex->volno >= 0) - && (vol->flags & VF_CONFIG_SETUPSTATE)) { - for (plexno = 0; plexno < vol->plexes; plexno++) - forceup(VOL[plex->volno].plex[plexno]); - } else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */ - ||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */ - ||(plex->organization == plex_striped)) - forceup(plexno); /* we'll do it */ - /* - * This leaves a case where things don't get - * done: the plex is RAID-4 or RAID-5, and - * the subdisks are all empty. They need to - * be initialized first. - */ - } else { - if (statemap == sd_upstate) /* all subdisks up */ - plex->state = plex_up; /* we can come up too */ - else - plex->state = plex_faulty; - } - } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */ - plex->state = plex_flaky; - else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */ - plex->state = plex_corrupt; /* corrupt */ - else if (statemap & (sd_initstate | sd_emptystate)) /* some subdisks empty or initializing */ - plex->state = plex_initializing; - else /* nothing at all up */ - plex->state = plex_faulty; - - if (plex->state != oldstate) /* state has changed, */ - log(LOG_INFO, /* tell them about it */ - "vinum: %s is %s\n", - plex->name, - plex_state(plex->state)); - if (plex->volno >= 0) /* we're part of a volume, */ - update_volume_state(plex->volno); /* update its state */ -} - -/* Set volume state based on its components */ -void -update_volume_state(int volno) -{ - struct volume *vol; /* our volume */ - int plexno; - enum volumestate oldstate; - - vol = &VOL[volno]; /* point to our volume */ - oldstate = vol->state; - - for (plexno = 0; plexno < vol->plexes; plexno++) { - struct plex *plex = &PLEX[vol->plex[plexno]]; /* point to the plex */ - if (plex->state >= plex_corrupt) { /* something accessible, */ - vol->state = volume_up; - break; - } - } - if (plexno == vol->plexes) /* didn't find an up plex */ - vol->state = volume_down; - - if (vol->state != oldstate) { /* state changed */ - log(LOG_INFO, "vinum: %s is %s\n", vol->name, volume_state(vol->state)); - save_config(); /* save the updated configuration */ - } -} - -/* - * Called from request routines when they find - * a subdisk which is not kosher. Decide whether - * it warrants changing the state. Return - * REQUEST_DOWN if we can't use the subdisk, - * REQUEST_OK if we can. - */ -/* - * A prior version of this function checked the plex - * state as well. At the moment, consider plex states - * information for the user only. We'll ignore them - * and use the subdisk state only. The last version of - * this file with the old logic was 2.7. XXX - */ -enum requeststatus -checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend) -{ - struct plex *plex = &PLEX[sd->plexno]; - int writeop = (rq->bp->b_iocmd == BIO_WRITE); /* note if we're writing */ - - switch (sd->state) { - /* We shouldn't get called if the subdisk is up */ - case sd_up: - return REQUEST_OK; - - case sd_reviving: - /* - * Access to a reviving subdisk depends on the - * organization of the plex: - * - * - If it's concatenated, access the subdisk - * up to its current revive point. If we - * want to write to the subdisk overlapping - * the current revive block, set the - * conflict flag in the request, asking the - * caller to put the request on the wait - * list, which will be attended to by - * revive_block when it's done. - * - if it's striped, we can't do it (we could - * do some hairy calculations, but it's - * unlikely to work). - * - if it's RAID-4 or RAID-5, we can do it as - * long as only one subdisk is down - */ - if (plex->organization == plex_striped) /* plex is striped, */ - return REQUEST_DOWN; - else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */ - if (plex->sddowncount > 1) /* with more than one sd down, */ - return REQUEST_DOWN; - else - /* - * XXX We shouldn't do this if we can find a - * better way. Check the other plexes - * first, and return a DOWN if another - * plex will do it better - */ - return REQUEST_OK; /* OK, we'll find a way */ - } - if (diskaddr > (sd->revived - + sd->plexoffset - + (sd->revive_blocksize >> DEV_BSHIFT))) /* we're beyond the end */ - return REQUEST_DOWN; - else if (diskend > (sd->revived + sd->plexoffset)) { /* we finish beyond the end */ - if (writeop) { - rq->flags |= XFR_REVIVECONFLICT; /* note a potential conflict */ - rq->sdno = sd->sdno; /* and which sd last caused it */ - } else - return REQUEST_DOWN; - } - return REQUEST_OK; - - case sd_reborn: - if (writeop) - return REQUEST_OK; /* always write to a reborn disk */ - else /* don't allow a read */ - /* - * Handle the mapping. We don't want to reject - * a read request to a reborn subdisk if that's - * all we have. XXX - */ - return REQUEST_DOWN; - - case sd_down: - if (writeop) /* writing to a consistent down disk */ - set_sd_state(sd->sdno, sd_obsolete, setstate_force); /* it's not consistent now */ - return REQUEST_DOWN; - - case sd_crashed: - if (writeop) /* writing to a consistent down disk */ - set_sd_state(sd->sdno, sd_stale, setstate_force); /* it's not consistent now */ - return REQUEST_DOWN; - - default: - return REQUEST_DOWN; - } -} - -/* return a state map for the subdisks of a plex */ -enum sdstates -sdstatemap(struct plex *plex) -{ - int sdno; - enum sdstates statemap = 0; /* note the states we find */ - - plex->sddowncount = 0; /* no subdisks down yet */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { - struct sd *sd = &SD[plex->sdnos[sdno]]; /* point to the subdisk */ - - switch (sd->state) { - case sd_empty: - statemap |= sd_emptystate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_init: - statemap |= sd_initstate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_down: - statemap |= sd_downstate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_crashed: - statemap |= sd_crashedstate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_obsolete: - statemap |= sd_obsoletestate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_stale: - statemap |= sd_stalestate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_reborn: - statemap |= sd_rebornstate; - break; - - case sd_up: - statemap |= sd_upstate; - break; - - case sd_initializing: - statemap |= sd_initstate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_initialized: - statemap |= sd_initializedstate; - (plex->sddowncount)++; /* another unusable subdisk */ - break; - - case sd_unallocated: - case sd_uninit: - case sd_reviving: - case sd_referenced: - statemap |= sd_otherstate; - (plex->sddowncount)++; /* another unusable subdisk */ - } - } - return statemap; -} - -/* determine the state of the volume relative to this plex */ -enum volplexstate -vpstate(struct plex *plex) -{ - struct volume *vol; - enum volplexstate state = volplex_onlyusdown; /* state to return */ - int plexno; - - if (plex->volno < 0) { /* not associated with a volume */ - if (plex->state > plex_degraded) - return volplex_onlyus; /* just us */ - else - return volplex_onlyusdown; /* assume the worst */ - } - vol = &VOL[plex->volno]; /* point to our volume */ - for (plexno = 0; plexno < vol->plexes; plexno++) { - if (&PLEX[vol->plex[plexno]] == plex) { /* us */ - if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* are we up? */ - state |= volplex_onlyus; /* yes */ - } else { - if (PLEX[vol->plex[plexno]].state >= plex_degraded) /* not us */ - state |= volplex_otherup; /* and when they were up, they were up */ - else - state |= volplex_alldown; /* and when they were down, they were down */ - } - } - return state; /* and when they were only halfway up */ -} /* they were neither up nor down */ - -/* Check if all bits b are set in a */ -int allset(int a, int b); - -int -allset(int a, int b) -{ - return (a & b) == b; -} - -/* Invalidate the subdisks belonging to a plex */ -void -invalidate_subdisks(struct plex *plex, enum sdstate state) -{ - int sdno; - - for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */ - struct sd *sd = &SD[plex->sdnos[sdno]]; - - switch (sd->state) { - case sd_unallocated: - case sd_uninit: - case sd_init: - case sd_initializing: - case sd_initialized: - case sd_empty: - case sd_obsolete: - case sd_stale: - case sd_crashed: - case sd_down: - case sd_referenced: - break; - - case sd_reviving: - case sd_reborn: - case sd_up: - set_sd_state(plex->sdnos[sdno], state, setstate_force); - } - } -} - -/* - * Start an object, in other words do what we can to get it up. - * This is called from vinumioctl (VINUMSTART). - * Return error indications via ioctl_reply - */ -void -start_object(struct vinum_ioctl_msg *data) -{ - int status; - int objindex = data->index; /* data gets overwritten */ - struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ - enum setstateflags flags; - - if (data->force != 0) /* are we going to use force? */ - flags = setstate_force; /* yes */ - else - flags = setstate_none; /* no */ - - switch (data->type) { - case drive_object: - status = set_drive_state(objindex, drive_up, flags); - if (DRIVE[objindex].state != drive_up) /* set status on whether we really did it */ - ioctl_reply->error = EBUSY; - else - ioctl_reply->error = 0; - break; - - case sd_object: - if (DRIVE[SD[objindex].driveno].state != drive_up) { - ioctl_reply->error = EIO; - strcpy(ioctl_reply->msg, "Drive is down"); - return; - } - if (data->blocksize) - SD[objindex].revive_blocksize = data->blocksize; - if ((SD[objindex].state == sd_reviving) /* reviving, */ - ||(SD[objindex].state == sd_stale)) { /* or stale, will revive */ - SD[objindex].state = sd_reviving; /* make sure we're reviving */ - ioctl_reply->error = revive_block(objindex); /* revive another block */ - ioctl_reply->msg[0] = '\0'; /* no comment */ - return; - } else if (SD[objindex].state == sd_initializing) { /* initializing, */ - if (data->blocksize) - SD[objindex].init_blocksize = data->blocksize; - ioctl_reply->error = initsd(objindex, data->verify); /* initialize another block */ - ioctl_reply->msg[0] = '\0'; /* no comment */ - return; - } - status = set_sd_state(objindex, sd_up, flags); /* set state */ - if (status != EAGAIN) { /* not first revive or initialize, */ - if (SD[objindex].state != sd_up) /* set status on whether we really did it */ - ioctl_reply->error = EBUSY; - else - ioctl_reply->error = 0; - } else - ioctl_reply->error = status; - break; - - case plex_object: - status = set_plex_state(objindex, plex_up, flags); - if (PLEX[objindex].state != plex_up) /* set status on whether we really did it */ - ioctl_reply->error = EBUSY; - else - ioctl_reply->error = 0; - break; - - case volume_object: - status = set_volume_state(objindex, volume_up, flags); - if (VOL[objindex].state != volume_up) /* set status on whether we really did it */ - ioctl_reply->error = EBUSY; - else - ioctl_reply->error = 0; - break; - - default: - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "Invalid object type"); - return; - } - /* - * There's no point in saying anything here: - * the userland program does it better - */ - ioctl_reply->msg[0] = '\0'; -} - -/* - * Stop an object, in other words do what we can to get it down - * This is called from vinumioctl (VINUMSTOP). - * Return error indications via ioctl_reply. - */ -void -stop_object(struct vinum_ioctl_msg *data) -{ - int status = 1; - int objindex = data->index; /* save the number from change */ - struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* format for returning replies */ - - switch (data->type) { - case drive_object: - status = set_drive_state(objindex, drive_down, data->force); - break; - - case sd_object: - status = set_sd_state(objindex, sd_down, data->force); - break; - - case plex_object: - status = set_plex_state(objindex, plex_down, data->force); - break; - - case volume_object: - status = set_volume_state(objindex, volume_down, data->force); - break; - - default: - ioctl_reply->error = EINVAL; - strcpy(ioctl_reply->msg, "Invalid object type"); - return; - } - ioctl_reply->msg[0] = '\0'; - if (status == 0) /* couldn't do it */ - ioctl_reply->error = EBUSY; - else - ioctl_reply->error = 0; -} - -/* - * VINUM_SETSTATE ioctl: set an object state. - * msg is the message passed by the user. - */ -void -setstate(struct vinum_ioctl_msg *msg) -{ - int sdno; - struct sd *sd; - struct plex *plex; - struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */ - - switch (msg->state) { - case object_down: - stop_object(msg); - break; - - case object_initializing: - switch (msg->type) { - case sd_object: - sd = &SD[msg->index]; - if ((msg->index >= vinum_conf.subdisks_allocated) - || (sd->state <= sd_referenced)) { - sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index); - ioctl_reply->error = EFAULT; - return; - } - set_sd_state(msg->index, sd_initializing, msg->force); - if (sd->state != sd_initializing) { - strcpy(ioctl_reply->msg, "Can't set state"); - ioctl_reply->error = EBUSY; - } else - ioctl_reply->error = 0; - break; - - case plex_object: - plex = &PLEX[msg->index]; - if ((msg->index >= vinum_conf.plexes_allocated) - || (plex->state <= plex_unallocated)) { - sprintf(ioctl_reply->msg, "Invalid plex %d", msg->index); - ioctl_reply->error = EFAULT; - return; - } - set_plex_state(msg->index, plex_initializing, msg->force); - if (plex->state != plex_initializing) { - strcpy(ioctl_reply->msg, "Can't set state"); - ioctl_reply->error = EBUSY; - } else { - ioctl_reply->error = 0; - for (sdno = 0; sdno < plex->subdisks; sdno++) { - sd = &SD[plex->sdnos[sdno]]; - set_sd_state(plex->sdnos[sdno], sd_initializing, msg->force); - if (sd->state != sd_initializing) { - strcpy(ioctl_reply->msg, "Can't set state"); - ioctl_reply->error = EBUSY; - break; - } - } - } - break; - - default: - strcpy(ioctl_reply->msg, "Invalid object"); - ioctl_reply->error = EINVAL; - } - break; - - case object_initialized: - if (msg->type == sd_object) { - sd = &SD[msg->index]; - if ((msg->index >= vinum_conf.subdisks_allocated) - || (sd->state <= sd_referenced)) { - sprintf(ioctl_reply->msg, "Invalid subdisk %d", msg->index); - ioctl_reply->error = EFAULT; - return; - } - set_sd_state(msg->index, sd_initialized, msg->force); - if (sd->state != sd_initializing) { - strcpy(ioctl_reply->msg, "Can't set state"); - ioctl_reply->error = EBUSY; - } else - ioctl_reply->error = 0; - } else { - strcpy(ioctl_reply->msg, "Invalid object"); - ioctl_reply->error = EINVAL; - } - break; - - case object_up: - start_object(msg); - } -} - -/* - * Brute force set state function. Don't look at - * any dependencies, just do it. This is mainly - * intended for testing and recovery. - */ -void -setstate_by_force(struct vinum_ioctl_msg *msg) -{ - struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) msg; /* format for returning replies */ - - switch (msg->type) { - case drive_object: - DRIVE[msg->index].state = msg->state; - break; - - case sd_object: - SD[msg->index].state = msg->state; - break; - - case plex_object: - PLEX[msg->index].state = msg->state; - break; - - case volume_object: - VOL[msg->index].state = msg->state; - break; - - default: - break; - } - ioctl_reply->error = 0; -} -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumstate.h b/sys/dev/vinum/vinumstate.h deleted file mode 100644 index 572f317..0000000 --- a/sys/dev/vinum/vinumstate.h +++ /dev/null @@ -1,257 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 - * Nan Yang Computer Services Limited. All rights reserved. - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $FreeBSD$ - */ - -/* - * This file gets read by makestatetext to create text files - * with the names of the states, so don't change the file - * format - */ - -enum volumestate { - volume_unallocated, - /* present but unused. Must be 0 */ - - volume_uninit, - /* mentioned elsewhere but not known to the configuration */ - - volume_down, - - /* The volume is up and functional, but not all plexes may be available */ - volume_up, - volume_laststate = volume_up /* last value, for table dimensions */ -}; - -enum plexstate { - /* An empty entry, not a plex at all. */ - plex_unallocated, - - /* The plex has been referenced by a volume */ - plex_referenced, - /* - * The plex has been allocated, but there configuration - * is not complete - */ - plex_init, - - /* - * A plex which has gone completely down because of - * I/O errors. - */ - plex_faulty, - - /* - * A plex which has been taken down by the - * administrator. - */ - plex_down, - - /* A plex which is being initialized */ - plex_initializing, - - /* - * *** The remaining states represent plexes which are - * at least partially up. Keep these separate so that - * they can be checked more easily. - */ - - /* - * A plex entry which is at least partially up. Not - * all subdisks are available, and an inconsistency - * has occurred. If no other plex is uncorrupted, - * the volume is no longer consistent. - */ - plex_corrupt, - - plex_firstup = plex_corrupt, /* first "up" state */ - - /* - * A RAID-5 plex entry which is accessible, but one - * subdisk is down, requiring recovery for many - * I/O requests. - */ - plex_degraded, - - /* - * A plex which is really up, but which has a reborn - * subdisk which we don't completely trust, and - * which we don't want to read if we can avoid it - */ - plex_flaky, - - /* - * A plex entry which is completely up. All subdisks - * are up. - */ - plex_up, - - plex_laststate = plex_up /* last value, for table dimensions */ -}; - -/* subdisk states */ -enum sdstate { - /* An empty entry, not a subdisk at all. */ - sd_unallocated, - - /* - * A subdisk entry which has not been created - * completely. Some fields may be empty. - */ - sd_uninit, - - /* The subdisk has been referenced by a plex */ - sd_referenced, - - /* - * A subdisk entry which has been created completely. - * All fields are correct, but the disk hasn't - * been updated. - */ - sd_init, - - /* - * A subdisk entry which has been created completely. - * All fields are correct, and the disk has been - * updated, but there is no data on the disk. - */ - sd_empty, - - /* - * A subdisk entry which has been created completely and - * which is currently being initialized - */ - sd_initializing, - - /* - * A subdisk entry which has been initialized, - * but which can't come up because it would - * cause inconsistencies. - */ - sd_initialized, - - /* *** The following states represent invalid data */ - /* - * A subdisk entry which has been created completely. - * All fields are correct, the config on disk has been - * updated, and the data was valid, but since then the - * drive has been taken down, and as a result updates - * have been missed. - */ - sd_obsolete, - - /* - * A subdisk entry which has been created completely. - * All fields are correct, the disk has been updated, - * and the data was valid, but since then the drive - * has been crashed and updates have been lost. - */ - sd_stale, - - /* *** The following states represent valid, inaccessible data */ - - /* - * A subdisk entry which has been created completely. - * All fields are correct, the disk has been updated, - * and the data was valid, but since then the drive - * has gone down. No attempt has been made to write - * to the subdisk since the crash, so the data is valid. - */ - sd_crashed, - - /* - * A subdisk entry which was up, which contained - * valid data, and which was taken down by the - * administrator. The data is valid. - */ - sd_down, - - /* - * *** This is invalid data (the subdisk previously had - * a numerically lower state), but it is currently in the - * process of being revived. We can write but not read. - */ - sd_reviving, - - /* - * *** The following states represent accessible subdisks - * with valid data - */ - - /* - * A subdisk entry which has been created completely. - * All fields are correct, the disk has been updated, - * and the data was valid, but since then the drive - * has gone down and up again. No updates were lost, - * but it is possible that the subdisk has been - * damaged. We won't read from this subdisk if we - * have a choice. If this is the only subdisk which - * covers this address space in the plex, we set its - * state to sd_up under these circumstances, so this - * status implies that there is another subdisk to - * fulfil the request. - */ - sd_reborn, - - /* - * A subdisk entry which has been created completely. - * All fields are correct, the disk has been updated, - * and the data is valid. - */ - sd_up, - - sd_laststate = sd_up /* last value, for table dimensions */ -}; - -enum drivestate { - drive_unallocated, - /* present but unused. Must be 0 */ - - drive_referenced, - /* just mentioned in some other config entry */ - - drive_down, - /* not accessible */ - - drive_up, - /* up and running */ - - drive_laststate = drive_up /* last value, for table dimensions */ -}; - -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ diff --git a/sys/dev/vinum/vinumutil.c b/sys/dev/vinum/vinumutil.c deleted file mode 100644 index f63bbd7..0000000 --- a/sys/dev/vinum/vinumutil.c +++ /dev/null @@ -1,311 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumutil.c,v 1.17 2003/04/28 02:54:43 grog Exp $ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* This file contains utility routines used both in kernel and user context */ - -#include <dev/vinum/vinumhdr.h> -#include <dev/vinum/statetexts.h> -#ifndef _KERNEL -#include <stdio.h> -#include <string.h> -extern jmp_buf command_fail; /* return on a failed command */ -#endif - -static char numeric_state[32]; /* temporary buffer for ASCII conversions */ -#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *)) -/* Return drive state as a string */ -char * -drive_state(enum drivestate state) -{ - if (((unsigned) state) >= STATECOUNT(drive)) { - sprintf(numeric_state, "Invalid state %d", (int) state); - return numeric_state; - } else - return drivestatetext[state]; -} - -/* Return volume state as a string */ -char * -volume_state(enum volumestate state) -{ - if (((unsigned) state) >= STATECOUNT(vol)) { - sprintf(numeric_state, "Invalid state %d", (int) state); - return numeric_state; - } else - return volstatetext[state]; -} - -/* Return plex state as a string */ -char * -plex_state(enum plexstate state) -{ - if (((unsigned) state) >= STATECOUNT(plex)) { - sprintf(numeric_state, "Invalid state %d", (int) state); - return numeric_state; - } else - return plexstatetext[state]; -} - -/* Return plex organization as a string */ -char * -plex_org(enum plexorg org) -{ - switch (org) { - case plex_disorg: /* disorganized */ - return "disorg"; - break; - - case plex_concat: /* concatenated plex */ - return "concat"; - break; - - case plex_striped: /* striped plex */ - return "striped"; - break; - - case plex_raid4: /* RAID-4 plex */ - return "raid4"; - - case plex_raid5: /* RAID-5 plex */ - return "raid5"; - break; - - default: - sprintf(numeric_state, "Invalid org %d", (int) org); - return numeric_state; - } -} - -/* Return sd state as a string */ -char * -sd_state(enum sdstate state) -{ - if (((unsigned) state) >= STATECOUNT(sd)) { - sprintf(numeric_state, "Invalid state %d", (int) state); - return numeric_state; - } else - return sdstatetext[state]; -} - -/* Now convert in the other direction */ -/* - * These are currently used only internally, - * so we don't do too much error checking - */ -enum drivestate -DriveState(char *text) -{ - int i; - for (i = 0; i < STATECOUNT(drive); i++) - if (strcmp(text, drivestatetext[i]) == 0) /* found it */ - return (enum drivestate) i; - return -1; -} - -enum sdstate -SdState(char *text) -{ - int i; - for (i = 0; i < STATECOUNT(sd); i++) - if (strcmp(text, sdstatetext[i]) == 0) /* found it */ - return (enum sdstate) i; - return -1; -} - -enum plexstate -PlexState(char *text) -{ - int i; - for (i = 0; i < STATECOUNT(plex); i++) - if (strcmp(text, plexstatetext[i]) == 0) /* found it */ - return (enum plexstate) i; - return -1; -} - -enum volumestate -VolState(char *text) -{ - int i; - for (i = 0; i < STATECOUNT(vol); i++) - if (strcmp(text, volstatetext[i]) == 0) /* found it */ - return (enum volumestate) i; - return -1; -} - -/* - * Take a number with an optional scale factor and convert - * it to a number of bytes. - * - * The scale factors are: - * - * s sectors (of 512 bytes) - * b blocks (of 512 bytes). This unit is deprecated, - * because it's confusing, but maintained to avoid - * confusing Veritas users. - * k kilobytes (1024 bytes) - * m megabytes (of 1024 * 1024 bytes) - * g gigabytes (of 1024 * 1024 * 1024 bytes) - */ -u_int64_t -sizespec(char *spec) -{ - u_int64_t size; - char *s; - int sign = 1; /* -1 if negative */ - - size = 0; - if (spec != NULL) { /* we have a parameter */ - s = spec; - if (*s == '-') { /* negative, */ - sign = -1; - s++; /* skip */ - } - if ((*s >= '0') && (*s <= '9')) { /* it's numeric */ - while ((*s >= '0') && (*s <= '9')) /* it's numeric */ - size = size * 10 + *s++ - '0'; /* convert it */ - switch (*s) { - case '\0': - return size * sign; - - case 'B': - case 'b': - case 'S': - case 's': - return size * sign * 512; - - case 'K': - case 'k': - return size * sign * 1024; - - case 'M': - case 'm': - return size * sign * 1024 * 1024; - - case 'G': - case 'g': - return size * sign * 1024 * 1024 * 1024; - } - } -#ifdef _KERNEL - throw_rude_remark(EINVAL, "Invalid length specification: %s", spec); -#else - fprintf(stderr, "Invalid length specification: %s", spec); - longjmp(command_fail, 1); -#endif - } -#ifdef _KERNEL - throw_rude_remark(EINVAL, "Missing length specification"); -#else - fprintf(stderr, "Missing length specification"); - longjmp(command_fail, 1); -#endif - /* NOTREACHED */ - return -1; -} - -#ifdef _KERNEL -#define FOOTYPE struct cdev * -#else -#define FOOTYPE dev_t -#endif -/* - * Extract the volume number from a device number. Check that it's - * the correct type, and that it isn't one of the superdevs. - */ -int -Volno(FOOTYPE dev) -{ - int volno = minor(dev); - - if (OBJTYPE(dev) != VINUM_VOLUME_TYPE) - return -1; - else - volno = ((volno & 0x3fff0000) >> 8) | (volno & 0xff); - if ((volno == VINUM_SUPERDEV_VOL) - || (volno == VINUM_DAEMON_VOL)) - return -1; - else - return volno; -} - -/* - * Extract a plex number from a device number. - * Don't check the major number, but check the - * type. Return -1 for invalid types. - */ -int -Plexno(FOOTYPE dev) -{ - int plexno = minor(dev); - - if (OBJTYPE(dev) != VINUM_PLEX_TYPE) - return -1; - else - return ((plexno & 0x3fff0000) >> 8) | (plexno & 0xff); -} - -/* - * Extract a subdisk number from a device number. - * Don't check the major number, but check the - * type. Return -1 for invalid types. - */ -int -Sdno(FOOTYPE dev) -{ - int sdno = minor(dev); - - /* - * Care: VINUM_SD_TYPE is 2 or 3, which is why we use < instead of - * !=. It's not clear that this makes any sense abstracting it to - * this level. - */ - if (OBJTYPE(dev) < VINUM_SD_TYPE) - return -1; - else -/* - * Note that the number we return includes the low-order bit of the - * type field. This gives us twice as many potential subdisks as - * plexes or volumes. - */ - return ((sdno & 0x7fff0000) >> 8) | (sdno & 0xff); -} diff --git a/sys/dev/vinum/vinumutil.h b/sys/dev/vinum/vinumutil.h deleted file mode 100644 index 2efa42c..0000000 --- a/sys/dev/vinum/vinumutil.h +++ /dev/null @@ -1,54 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumutil.h,v 1.1 2001/05/22 04:07:22 grog Exp grog $ - * $FreeBSD$ - */ - -/* - * Functions defined in vinumutil.c, which is used both in userland - * and in the kernel. - */ -char *drive_state(enum drivestate); -char *volume_state(enum volumestate); -char *plex_state(enum plexstate); -char *plex_org(enum plexorg); -char *sd_state(enum sdstate); -enum drivestate DriveState(char *text); -enum sdstate SdState(char *text); -enum plexstate PlexState(char *text); -enum volumestate VolState(char *text); diff --git a/sys/dev/vinum/vinumvar.h b/sys/dev/vinum/vinumvar.h deleted file mode 100644 index 8e7edd8..0000000 --- a/sys/dev/vinum/vinumvar.h +++ /dev/null @@ -1,395 +0,0 @@ -/*- - * Copyright (c) 1997, 1998, 1999 - * Nan Yang Computer Services Limited. All rights reserved. - * - * Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project. - * - * Written by Greg Lehey - * - * This software is distributed under the so-called ``Berkeley - * License'': - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Nan Yang Computer - * Services Limited. - * 4. Neither the name of the Company nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * This software is provided ``as is'', and any express or implied - * warranties, including, but not limited to, the implied warranties of - * merchantability and fitness for a particular purpose are disclaimed. - * In no event shall the company or contributors be liable for any - * direct, indirect, incidental, special, exemplary, or consequential - * damages (including, but not limited to, procurement of substitute - * goods or services; loss of use, data, or profits; or business - * interruption) however caused and on any theory of liability, whether - * in contract, strict liability, or tort (including negligence or - * otherwise) arising in any way out of the use of this software, even if - * advised of the possibility of such damage. - * - * $Id: vinumvar.h,v 1.33 2003/05/23 01:09:23 grog Exp grog $ - * $FreeBSD$ - */ - -#include <sys/time.h> -#include <dev/vinum/vinumstate.h> -#include <sys/mutex.h> - -/* Directory for device nodes. */ -#define VINUM_DIR "/dev/vinum" - -/* - * Some configuration maxima. They're an enum because - * we can't define global constants. Sorry about that. - * - * These aren't as bad as they look: most of them are soft limits. - */ - -#define VINUMROOT -enum constants { - /* - * Current version of the data structures. This - * is used to ensure synchronization between - * kernel module and userland vinum(8). - */ - VINUMVERSION = 1, - VINUM_HEADER = 512, /* size of header on disk */ - MAXCONFIGLINE = 1024, /* maximum size of a single config line */ - MINVINUMSLICE = 1048576, /* minimum size of a slice */ - - ROUND_ROBIN_READPOL = -1, /* round robin read policy */ - - /* - * Type field in high-order two bits of minor - * number. Subdisks are in fact both type 2 and - * type 3, giving twice the number of subdisks. - * This causes some ugliness in the code. - */ - VINUM_VOLUME_TYPE = 0, - VINUM_PLEX_TYPE = 1, - VINUM_SD_TYPE = 2, - VINUM_SD2_TYPE = 3, - - - /* - * Define a minor device number. - * This is not used directly; instead, it's - * called by the other macros. - */ -#define VINUMMINOR(o,t) ((o & 0xff) | ((o & 0x3fff00) << 8) | (t << VINUM_TYPE_SHIFT)) - - VINUM_TYPE_SHIFT = 30, - VINUM_MAXVOL = 0x3ffffd, /* highest numbered volume */ - - /* - * The super device and the daemon device are - * magic: they're the two highest-numbered - * volumes. - */ - VINUM_SUPERDEV_VOL = 0x3ffffe, - VINUM_DAEMON_VOL = 0x3fffff, - VINUM_MAXPLEX = 0x3fffff, - VINUM_MAXSD = 0x7fffff, - -#define VINUM_SUPERDEV_MINOR VINUMMINOR (VINUM_SUPERDEV_VOL, VINUM_VOLUME_TYPE) -#define VINUM_DAEMON_MINOR VINUMMINOR (VINUM_DAEMON_VOL, VINUM_VOLUME_TYPE) - - /* - * Mask for the number part of each object. - * Plexes and volumes are the same, subdisks use - * the low-order bit of the type field and thus - * have twice the number. - */ - - MAJORDEV_SHIFT = 8, - - MAXPLEX = 8, /* maximum number of plexes in a volume */ - MAXSD = 256, /* maximum number of subdisks in a plex */ - MAXDRIVENAME = 32, /* maximum length of a device name */ - MAXSDNAME = 64, /* maximum length of a subdisk name */ - MAXPLEXNAME = 64, /* maximum length of a plex name */ - MAXVOLNAME = 64, /* maximum length of a volume name */ - MAXNAME = 64, /* maximum length of any name */ - - -#define OBJTYPE(x) ((minor(x) >> VINUM_TYPE_SHIFT) & 3) - - /* extract device type */ -#define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 3) - -#define VINUM_SUPERDEV_NAME VINUM_DIR"/control" /* normal super device */ -#define VINUM_DAEMON_DEV_NAME VINUM_DIR"/controld" /* super device for daemon only */ - - /* - * the number of object entries to cater for initially, and also the - * value by which they are incremented. It doesn't take long - * to extend them, so theoretically we could start with 1 of each, but - * it's untidy to allocate such small areas. These values are - * probably too small. - */ - - INITIAL_DRIVES = 4, - INITIAL_VOLUMES = 4, - INITIAL_PLEXES = 8, - INITIAL_SUBDISKS = 16, - INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */ - INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */ - INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */ - PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */ - PLEX_LOCKS = 256, /* number of locks to allocate to a plex */ - PLEXMUTEXES = 32, - MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */ - DEFAULT_REVIVE_BLOCKSIZE = 65536, /* default revive block size */ - VINUMHOSTNAMELEN = 32, /* host name field in label */ -}; - -/* - * Slice header - * - * Vinum drives start with this structure: - * - *\ Sector - * |--------------------------------------| - * | PDP-11 memorial boot block | 0 - * |--------------------------------------| - * | Disk label, maybe | 1 - * |--------------------------------------| - * | Slice definition (vinum_hdr) | 8 - * |--------------------------------------| - * | | - * | Configuration info, first copy | 9 - * | | - * |--------------------------------------| - * | | - * | Configuration info, second copy | 9 + size of config - * | | - * |--------------------------------------| - */ - -/* Sizes and offsets of our information */ -enum { - VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */ - VINUMHEADERLEN = 512, /* size of vinum label */ - VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */ - MAXCONFIG = 65536, /* and size of config copy */ - DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */ -}; - -/* - * hostname is 256 bytes long, but we don't need to shlep - * multiple copies in vinum. We use the host name just - * to identify this system, and 32 bytes should be ample - * for that purpose - */ - -struct vinum_label { - char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */ - char name[MAXDRIVENAME]; /* our name of the drive */ - struct timeval date_of_birth; /* the time it was created */ - struct timeval last_update; /* and the time of last update */ - /* - * total size in bytes of the drive. This value - * includes the headers. - */ - off_t drive_size; -}; - -struct vinum_hdr { - uint64_t magic; /* we're long on magic numbers */ -#define VINUM_MAGIC 22322600044678729LL /* should be this */ -#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */ - /* - * Size in bytes of each copy of the - * configuration info. This must be a multiple - * of the sector size. - */ - int config_length; - struct vinum_label label; /* unique label */ -}; - -/* Information returned from read_drive_label */ -enum drive_label_info { - DL_CANT_OPEN, /* invalid partition */ - DL_NOT_OURS, /* valid partition, but no vinum label */ - DL_DELETED_LABEL, /* valid partition, deleted label found */ - DL_WRONG_DRIVE, /* drive name doesn't match */ - DL_OURS /* valid partition and label found */ -}; - -/* kinds of plex organization */ -enum plexorg { - plex_disorg, /* disorganized */ - plex_concat, /* concatenated plex */ - plex_striped, /* striped plex */ - plex_raid4, /* RAID4 plex */ - plex_raid5 /* RAID5 plex */ -}; - -/* Recognize plex organizations */ -#define isstriped(p) (p->organization >= plex_striped) /* RAID 1, 4 or 5 */ -#define isparity(p) (p->organization >= plex_raid4) /* RAID 4 or 5 */ - -/* Address range definitions, for locking volumes */ -struct rangelock { - daddr_t stripe; /* address + 1 of the range being locked */ - struct buf *bp; /* user's buffer pointer */ -}; - -struct drive_freelist { /* sorted list of free space on drive */ - u_int64_t offset; /* offset of entry */ - u_int64_t sectors; /* and length in sectors */ -}; - -/* - * Include the structure definitions shared - * between userland and kernel. - */ - -#ifdef _KERNEL -#include <dev/vinum/vinumobj.h> -#undef _KERNEL -#include <dev/vinum/vinumobj.h> -#define _KERNEL -#else -#include <dev/vinum/vinumobj.h> -#endif - -/* - * Table expansion. Expand table, which contains oldcount - * entries of type element, by increment entries, and change - * oldcount accordingly - */ -#ifdef VINUMDEBUG -#define EXPAND(table, element, oldcount, increment) \ -{ \ - expand_table ((void **) &table, \ - oldcount * sizeof (element), \ - (oldcount + increment) * sizeof (element), \ - __FILE__, \ - __LINE__ ); \ - oldcount += increment; \ - } -#else -#define EXPAND(table, element, oldcount, increment) \ -{ \ - expand_table ((void **) &table, \ - oldcount * sizeof (element), \ - (oldcount + increment) * sizeof (element)); \ - oldcount += increment; \ - } -#endif - -/* Information on vinum's memory usage */ -struct meminfo { - int mallocs; /* number of malloced blocks */ - int total_malloced; /* total amount malloced */ - int highwater; /* maximum number of mallocs */ - struct mc *malloced; /* pointer to kernel table */ -}; - -#define MCFILENAMELEN 16 -struct mc { - struct timeval time; - int seq; - int size; - short line; - caddr_t address; - char file[MCFILENAMELEN]; -}; - -/* - * These enums are used by the state transition - * routines. They're in bit map format: - * - * Bit 0: Other plexes in the volume are down - * Bit 1: Other plexes in the volume are up - * Bit 2: The current plex is up - * Maybe they should be local to - * state.c - */ -enum volplexstate { - volplex_onlyusdown = 0, /* 0: we're the only plex, and we're down */ - volplex_alldown, /* 1: another plex is down, and so are we */ - volplex_otherup, /* 2: another plex is up */ - volplex_otherupdown, /* 3: other plexes are up and down */ - volplex_onlyus, /* 4: we're up and alone */ - volplex_onlyusup, /* 5: only we are up, others are down */ - volplex_allup, /* 6: all plexes are up */ - volplex_someup /* 7: some plexes are up, including us */ -}; - -/* state map for plex */ -enum sdstates { - sd_emptystate = 1, - sd_downstate = 2, /* SD is down */ - sd_crashedstate = 4, /* SD is crashed */ - sd_obsoletestate = 8, /* SD is obsolete */ - sd_stalestate = 16, /* SD is stale */ - sd_rebornstate = 32, /* SD is reborn */ - sd_upstate = 64, /* SD is up */ - sd_initstate = 128, /* SD is initializing */ - sd_initializedstate = 256, /* SD is initialized */ - sd_otherstate = 512, /* SD is in some other state */ -}; - -/* - * This is really just a parameter to pass to - * set_<foo>_state, but since it needs to be known - * in the external definitions, we need to define - * it here - */ -enum setstateflags { - setstate_none = 0, /* no flags */ - setstate_force = 1, /* force the state change */ - setstate_configuring = 2, /* we're currently configuring, don't save */ -}; - -/* Operations for parityops to perform. */ -enum parityop { - checkparity, - rebuildparity, - rebuildandcheckparity, /* rebuildparity with the -v option */ -}; - -/* - * When doing round-robin reads from a multi-plex volume, switch to the - * next plex if the difference of the last read sector and the next sector - * to be read is this many sectors. - */ -#define ROUNDROBIN_SWITCH 128 /* 64k */ - -#ifdef VINUMDEBUG -/* Debugging stuff */ -enum debugflags { - DEBUG_ADDRESSES = 1, /* show buffer information during requests */ - DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */ - DEBUG_RESID = 4, /* go into debugger in complete_rqe */ - DEBUG_LASTREQS = 8, /* keep a circular buffer of last requests */ - DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */ - DEBUG_EOFINFO = 32, /* print info about EOF detection */ - DEBUG_MEMFREE = 64, /* keep info about Frees */ - DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times the size */ - DEBUG_REMOTEGDB = 256, /* go into remote gdb */ - DEBUG_WARNINGS = 512, /* log various relatively harmless warnings */ - DEBUG_LOCKREQS = 1024, /* log locking requests */ -}; - -#ifdef _KERNEL -#ifdef __i386__ -#define longjmp LongJmp /* test our longjmps */ -#endif -#endif -#endif -/* Local Variables: */ -/* fill-column: 50 */ -/* End: */ |