diff options
Diffstat (limited to 'sys/dev/raidframe/rf_freebsdkintf.c')
-rw-r--r-- | sys/dev/raidframe/rf_freebsdkintf.c | 3294 |
1 files changed, 3294 insertions, 0 deletions
diff --git a/sys/dev/raidframe/rf_freebsdkintf.c b/sys/dev/raidframe/rf_freebsdkintf.c new file mode 100644 index 0000000..b7003b5 --- /dev/null +++ b/sys/dev/raidframe/rf_freebsdkintf.c @@ -0,0 +1,3294 @@ +/*- + * Copyright (c) 2002 Scott Long <scottl@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* $NetBSD: rf_netbsdkintf.c,v 1.105 2001/04/05 02:48:51 oster Exp $ */ +/*- + * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Greg Oster; Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1988 University of Utah. + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: Utah $Hdr: cd.c 1.6 90/11/28$ + * + * @(#)cd.c 8.2 (Berkeley) 11/16/93 + */ + + + + +/* + * Copyright (c) 1995 Carnegie-Mellon University. + * All rights reserved. + * + * Authors: Mark Holland, Jim Zelenka + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +/*********************************************************** + * + * rf_kintf.c -- the kernel interface routines for RAIDframe + * + ***********************************************************/ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/stat.h> +#include <sys/ioccom.h> +#include <sys/filio.h> +#include <sys/fcntl.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/disk.h> +#include <sys/diskslice.h> +#include <sys/disklabel.h> +#include <sys/conf.h> +#include <sys/lock.h> +#include <sys/reboot.h> +#include <sys/module.h> +#include <sys/devicestat.h> +#include <vm/uma.h> + +#include "opt_raid.h" +#include <dev/raidframe/rf_raid.h> +#include <dev/raidframe/rf_raidframe.h> +#include <dev/raidframe/rf_copyback.h> +#include <dev/raidframe/rf_dag.h> +#include <dev/raidframe/rf_dagflags.h> +#include <dev/raidframe/rf_desc.h> +#include <dev/raidframe/rf_diskqueue.h> +#include <dev/raidframe/rf_acctrace.h> +#include <dev/raidframe/rf_etimer.h> +#include <dev/raidframe/rf_general.h> +#include <dev/raidframe/rf_debugMem.h> +#include <dev/raidframe/rf_kintf.h> +#include <dev/raidframe/rf_options.h> +#include <dev/raidframe/rf_driver.h> +#include <dev/raidframe/rf_parityscan.h> +#include <dev/raidframe/rf_debugprint.h> +#include <dev/raidframe/rf_threadstuff.h> +#include <dev/raidframe/rf_configure.h> + +RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) + +static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a + * spare table */ +static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from + * installation process */ + +/* prototypes */ +static void KernelWakeupFunc(struct bio *); +static void InitBP(struct bio *, struct vnode *, unsigned rw_flag, + dev_t dev, RF_SectorNum_t startSect, + RF_SectorCount_t numSect, caddr_t buf, + void (*cbFunc) (struct bio *), void *cbArg, + int logBytesPerSector, struct proc * b_proc); +static dev_t raidinit(RF_Raid_t *); +static void rf_search_label(dev_t, struct disklabel *, + RF_AutoConfig_t **) __unused; + +static int raid_modevent(module_t, int, void*); +void raidattach(void); +d_psize_t raidsize; +d_open_t raidopen; +d_close_t raidclose; +d_ioctl_t raidioctl; +d_write_t raidwrite; +d_read_t raidread; +d_strategy_t raidstrategy; +#if 0 +d_dump_t raiddump; +#endif + +d_open_t raidctlopen; +d_close_t raidctlclose; +d_ioctl_t raidctlioctl; + +static struct cdevsw raid_cdevsw = { + raidopen, + raidclose, + raidread, + raidwrite, + raidioctl, + nopoll, + nommap, + raidstrategy, + "raid", + 200, + nodump, + nopsize, + D_DISK, +}; + +static struct cdevsw raidctl_cdevsw = { + raidctlopen, + raidctlclose, + noread, + nowrite, + raidctlioctl, + nopoll, + nommap, + nostrategy, + "raidctl", + 201, + nodump, + nopsize, + 0, +}; + +static struct cdevsw raiddisk_cdevsw; + +/* + * Pilfered from ccd.c + */ + +struct raidbuf { + struct bio rf_buf; /* new I/O buf. MUST BE FIRST!!! */ + struct bio *rf_obp; /* ptr. to original I/O buf */ + int rf_flags; /* misc. flags */ + RF_DiskQueueData_t *req;/* the request that this was part of.. */ +}; + + +#define RAIDGETBUF(sc) uma_zalloc((sc)->sc_cbufpool, M_NOWAIT) +#define RAIDPUTBUF(sc, cbp) uma_zfree((sc)->sc_cbufpool, cbp) + +#define RF_MAX_ARRAYS 32 + +/* Raid control device */ +struct raidctl_softc { + dev_t sc_dev; /* Device node */ + int sc_flags; /* flags */ + int sc_numraid; /* Number of configured raid devices */ + dev_t sc_raiddevs[RF_MAX_ARRAYS]; +}; + +struct raid_softc { + dev_t sc_dev; /* Our device */ + dev_t sc_parent_dev; + int sc_flags; /* flags */ + int sc_busycount; /* How many times are we opened? */ + size_t sc_size; /* size of the raid device */ + dev_t sc_parent; /* Parent device */ + struct disk sc_dkdev; /* generic disk device info */ + uma_zone_t sc_cbufpool; /* component buffer pool */ + RF_Raid_t *raidPtr; /* Raid information struct */ + struct bio_queue_head bio_queue; /* used for the device queue */ + struct devstat device_stats; /* devstat gathering */ +}; +/* sc_flags */ +#define RAIDF_OPEN 0x01 /* unit has been initialized */ +#define RAIDF_WLABEL 0x02 /* label area is writable */ +#define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ +#define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ +#define RAIDF_LOCKED 0x80 /* unit is locked */ + +/* + * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. + * Be aware that large numbers can allow the driver to consume a lot of + * kernel memory, especially on writes, and in degraded mode reads. + * + * For example: with a stripe width of 64 blocks (32k) and 5 disks, + * a single 64K write will typically require 64K for the old data, + * 64K for the old parity, and 64K for the new parity, for a total + * of 192K (if the parity buffer is not re-used immediately). + * Even it if is used immedately, that's still 128K, which when multiplied + * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. + * + * Now in degraded mode, for example, a 64K read on the above setup may + * require data reconstruction, which will require *all* of the 4 remaining + * disks to participate -- 4 * 32K/disk == 128K again. + */ + +#ifndef RAIDOUTSTANDING +#define RAIDOUTSTANDING 10 +#endif + +#define RAIDLABELDEV(dev) dkmodpart(dev, RAW_PART) +#define DISKPART(dev) dkpart(dev) + +static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disk*); +static int raidlock(struct raid_softc *); +static void raidunlock(struct raid_softc *); + +static void rf_markalldirty(RF_Raid_t *); + +static dev_t raidctl_dev; + +void rf_ReconThread(struct rf_recon_req *); +/* XXX what I want is: */ +/*void rf_ReconThread(RF_Raid_t *raidPtr); */ +void rf_RewriteParityThread(RF_Raid_t *raidPtr); +void rf_CopybackThread(RF_Raid_t *raidPtr); +void rf_ReconstructInPlaceThread(struct rf_recon_req *); +void rf_buildroothack(void *, struct raidctl_softc *); + +RF_AutoConfig_t *rf_find_raid_components(void); +RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); +static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); +static int rf_reasonable_label(RF_ComponentLabel_t *); +void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); +int rf_set_autoconfig(RF_Raid_t *, int); +int rf_set_rootpartition(RF_Raid_t *, int); +void rf_release_all_vps(RF_ConfigSet_t *); +void rf_cleanup_config_set(RF_ConfigSet_t *); +int rf_have_enough_components(RF_ConfigSet_t *); +int rf_auto_config_set(RF_ConfigSet_t *, int *, struct raidctl_softc *); +static int raidgetunit(struct raidctl_softc *, int); +static int raidshutdown(void); + +void +raidattach(void) +{ + struct raidctl_softc *parent_sc = NULL; + RF_AutoConfig_t *ac_list; /* autoconfig list */ + RF_ConfigSet_t *config_sets; + int autoconfig = 0; + + /* This is where all the initialization stuff gets done. */ + + if(rf_mutex_init(&rf_sparet_wait_mutex, __FUNCTION__)) { + rf_printf(0, "RAIDframe: failed to initialize mutexes\n"); + return; + } + + rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; + + if (rf_BootRaidframe() != 0) { + rf_printf(0, "Serious error booting RAIDframe!!\n"); + return; + } + + rf_printf(0, "Kernelized RAIDframe activated\n"); + MALLOC(parent_sc, struct raidctl_softc *, sizeof(*parent_sc), + M_RAIDFRAME, M_NOWAIT|M_ZERO); + if (parent_sc == NULL) { + RF_PANIC(); + return; + } + + parent_sc->sc_dev= make_dev(&raidctl_cdevsw, 0, 0, 0, 0x644, "raidctl"); + parent_sc->sc_dev->si_drv1 = parent_sc; + raidctl_dev = parent_sc->sc_dev; + +#if RAID_AUTOCONFIG + autoconfig = 1; +#endif + + if (autoconfig) { + /* 1. locate all RAID components on the system */ + + rf_printf(0, "Searching for raid components...\n"); + ac_list = rf_find_raid_components(); + if (ac_list == NULL) + return; + + /* 2. sort them into their respective sets */ + + config_sets = rf_create_auto_sets(ac_list); + + /* 3. evaluate each set and configure the valid ones + This gets done in rf_buildroothack() */ + + /* schedule the creation of the thread to do the + "/ on RAID" stuff */ + + rf_buildroothack(config_sets, parent_sc); +#if 0 + kthread_create(rf_buildroothack,config_sets); + +#endif /* RAID_AUTOCONFIG */ + } +} + +void +rf_buildroothack(arg, parent_sc) + void *arg; + struct raidctl_softc *parent_sc; +{ + RF_ConfigSet_t *config_sets = arg; + RF_ConfigSet_t *cset; + RF_ConfigSet_t *next_cset; + int retcode; + int raidID; + int rootID; + int num_root; + + rootID = 0; + num_root = 0; + cset = config_sets; + while(cset != NULL ) { + next_cset = cset->next; + if (rf_have_enough_components(cset) && + cset->ac->clabel->autoconfigure==1) { + retcode = rf_auto_config_set(cset, &raidID, parent_sc); + if (!retcode) { + if (cset->rootable) { + rootID = raidID; + num_root++; + } + } else { + /* The autoconfig didn't work :( */ + rf_printf(1, "Autoconfig failed with code %d" + "for raid%d\n", retcode, raidID); + rf_release_all_vps(cset); + } + } else { + /* we're not autoconfiguring this set... + release the associated resources */ + rf_release_all_vps(cset); + } + /* cleanup */ + rf_cleanup_config_set(cset); + cset = next_cset; + } + if (boothowto & RB_ASKNAME) { + /* We don't auto-config... */ + } else { + /* They didn't ask, and we found something bootable... */ + +#if 0 + if (num_root == 1) { + booted_device = &raidrootdev[rootID]; + } else if (num_root > 1) { + /* we can't guess.. require the user to answer... */ + boothowto |= RB_ASKNAME; + } +#endif + } +} + +int +raidctlopen(dev_t dev, int flags, int fmt, struct thread *td) +{ + struct raidctl_softc *parent_sc; + + parent_sc = dev->si_drv1; + + if ((parent_sc->sc_flags & RAIDF_OPEN) != 0) + return (EBUSY); + + parent_sc->sc_flags |= RAIDF_OPEN; + return (0); +} + +int +raidctlclose(dev_t dev, int flags, int fmt, struct thread *td) +{ + struct raidctl_softc *parent_sc; + + parent_sc = dev->si_drv1; + + parent_sc->sc_flags &= ~RAIDF_OPEN; + return (0); +} + +int +raidctlioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct thread *td) +{ + struct raidctl_softc *parent_sc; + struct raid_softc *sc; + RF_Config_t *u_cfg, *k_cfg; + RF_Raid_t *raidPtr; + u_char *specific_buf; + u_int unit; + int retcode = 0; + + parent_sc = dev->si_drv1; + + switch (cmd) { + /* configure the system */ + case RAIDFRAME_CONFIGURE: + + /* copy-in the configuration information */ + /* data points to a pointer to the configuration structure */ + + u_cfg = *((RF_Config_t **) data); + RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); + if (k_cfg == NULL) { + return (ENOMEM); + } + retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, + sizeof(RF_Config_t)); + if (retcode) { + RF_Free(k_cfg, sizeof(RF_Config_t)); + rf_printf(2, "raidctlioctl: retcode=%d copyin.1\n", + retcode); + return (retcode); + } + /* allocate a buffer for the layout-specific data, and copy it + * in */ + if (k_cfg->layoutSpecificSize) { + if (k_cfg->layoutSpecificSize > 10000) { + /* sanity check */ + RF_Free(k_cfg, sizeof(RF_Config_t)); + return (EINVAL); + } + RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, + (u_char *)); + if (specific_buf == NULL) { + RF_Free(k_cfg, sizeof(RF_Config_t)); + return (ENOMEM); + } + retcode = copyin(k_cfg->layoutSpecific, + (caddr_t) specific_buf, + k_cfg->layoutSpecificSize); + if (retcode) { + RF_Free(k_cfg, sizeof(RF_Config_t)); + RF_Free(specific_buf, + k_cfg->layoutSpecificSize); + rf_printf(2, "raidctlioctl: retcode=%d " + "copyin.2\n", retcode); + return (retcode); + } + } else + specific_buf = NULL; + k_cfg->layoutSpecific = specific_buf; + + /* should do some kind of sanity check on the configuration. + * Store the sum of all the bytes in the last byte? */ + + /* configure the system */ + + RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *)); + if (raidPtr == NULL) { + rf_printf(0, "No memory for raid device\n"); + RF_Free(k_cfg, sizeof(RF_Config_t)); + retcode = ENOMEM; + } + bzero((char *) raidPtr, sizeof(RF_Raid_t)); + + /* Request a unit number for this soon-to-be device. */ + unit = raidgetunit(parent_sc, 0); + if (unit == -1) { + rf_printf(0, "Cannot allocate raid unit\n"); + RF_Free(raidPtr, sizeof(*raidPtr)); + goto out; + } + raidPtr->raidid = unit; + + if ((retcode = rf_Configure(raidPtr, k_cfg, NULL)) == 0) { + + /* allow this many simultaneous IO's to + this RAID device */ + raidPtr->openings = RAIDOUTSTANDING; + + parent_sc->sc_raiddevs[unit] = raidinit(raidPtr); + if (parent_sc->sc_raiddevs[unit] == NULL) { + rf_printf(0, "Could not create raid device\n"); + RF_Free(raidPtr, sizeof(*raidPtr)); + goto out; + } + parent_sc->sc_numraid++; + ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = dev; + rf_markalldirty(raidPtr); + } else { + parent_sc->sc_raiddevs[unit] = NULL; + RF_Free(raidPtr, sizeof(*raidPtr)); + } + +out: + /* free the buffers. No return code here. */ + if (k_cfg->layoutSpecificSize) { + RF_Free(specific_buf, k_cfg->layoutSpecificSize); + } + RF_Free(k_cfg, sizeof(RF_Config_t)); + break; + + case RAIDFRAME_SHUTDOWN: + + unit = *(u_int *)data; + if ((unit >= RF_MAX_ARRAYS) || + (parent_sc->sc_raiddevs[unit] == NULL)) + return (EINVAL); + + sc = parent_sc->sc_raiddevs[unit]->si_drv1; + if ((retcode = raidlock(sc)) != 0) + return (retcode); + + /* + * If somebody has a partition mounted, we shouldn't + * shutdown. + */ + + if ((sc->sc_flags & RAIDF_OPEN) != 0) { + raidunlock(sc); + return (EBUSY); + } + + rf_printf(0, "Shutting down RAIDframe engine\n"); + retcode = rf_Shutdown(sc->raidPtr); + RF_THREADGROUP_WAIT_STOP(&sc->raidPtr->engine_tg); + + devstat_remove_entry(&sc->device_stats); + + disk_destroy(parent_sc->sc_raiddevs[unit]); + raidunlock(sc); + + /* XXX Need to be able to destroy the zone */ + uma_zdestroy(sc->sc_cbufpool); + + parent_sc->sc_numraid--; + parent_sc->sc_raiddevs[unit] = NULL; + + RF_Free(sc->raidPtr, sizeof(*raidPtr)); + RF_Free(sc, sizeof(*sc)); + + break; + + default: + retcode = ENOIOCTL; + } + + return (retcode); +} + +#if 0 /* XXX DUMP!!!! */ +int +raiddump(dev) + dev_t dev; +{ + /* Not implemented. */ + return ENXIO; +} +#endif + +/* ARGSUSED */ +int +raidopen(dev, flags, fmt, td) + dev_t dev; + int flags, fmt; + struct thread *td; +{ + struct raid_softc *sc; + struct disk *dp; + int error = 0; + + sc = dev->si_drv1; + + if ((error = raidlock(sc)) != 0) + return (error); + dp = &sc->sc_dkdev; + + rf_printf(1, "Opening raid device %s\n", dev->si_name); + + /* Generate overall disklabel */ + raidgetdefaultlabel(sc->raidPtr, sc, dp); + + if (sc->sc_busycount == 0) { + /* First one... mark things as dirty... Note that we *MUST* + have done a configure before this. I DO NOT WANT TO BE + SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED + THAT THEY BELONG TOGETHER!!!!! */ + /* XXX should check to see if we're only open for reading + here... If so, we needn't do this, but then need some + other way of keeping track of what's happened.. */ + + rf_markalldirty( sc->raidPtr ); + sc->sc_flags |= RAIDF_OPEN; + } + + /* Prevent this unit from being unconfigured while open. */ + sc->sc_busycount++; + + raidunlock(sc); + + return (error); + + +} +/* ARGSUSED */ +int +raidclose(dev, flags, fmt, td) + dev_t dev; + int flags, fmt; + struct thread *td; +{ + struct raid_softc *sc; + int error = 0; + + sc = dev->si_drv1; + + if ((error = raidlock(sc)) != 0) + return (error); + + sc->sc_busycount--; + if (sc->sc_busycount == 0) { + sc->sc_flags &= ~RAIDF_OPEN; + rf_update_component_labels(sc->raidPtr, + RF_FINAL_COMPONENT_UPDATE); + } + + raidunlock(sc); + return (0); + +} + +void +raidstrategy(bp) + struct bio *bp; +{ + RF_Raid_t *raidPtr; + struct raid_softc *sc = bp->bio_dev->si_drv1; + int s; + + raidPtr = sc->raidPtr; + if (raidPtr == NULL) { + bp->bio_error = ENODEV; + bp->bio_flags |= BIO_ERROR; + bp->bio_resid = bp->bio_bcount; + biodone(bp); + return; + } + if (!raidPtr->valid) { + bp->bio_error = ENODEV; + bp->bio_flags |= BIO_ERROR; + bp->bio_resid = bp->bio_bcount; + biodone(bp); + return; + } + if (bp->bio_bcount == 0) { + rf_printf(2, "b_bcount is zero..\n"); + biodone(bp); + return; + } + + s = splbio(); + + bp->bio_resid = 0; + + /* stuff it onto our queue. XXX locking? */ + bioq_insert_tail(&sc->bio_queue, bp); + + raidstart(raidPtr); + + splx(s); +} + +int +raidread(dev, uio, flags) + dev_t dev; + struct uio *uio; + int flags; +{ + struct raid_softc *sc; + + sc = dev->si_drv1; + + return (physio(dev, uio, BIO_READ)); + +} + +int +raidwrite(dev, uio, flags) + dev_t dev; + struct uio *uio; + int flags; +{ + struct raid_softc *sc; + int ret; + + sc = dev->si_drv1; + + rf_printf(3, "raidwrite\n"); + ret = physio(dev, uio, BIO_WRITE); + + return (ret); + +} + +int +raidioctl(dev, cmd, data, flag, td) + dev_t dev; + u_long cmd; + caddr_t data; + int flag; + struct thread *td; +{ + struct raid_softc *sc; + RF_Raid_t *raidPtr; + RF_RaidDisk_t *diskPtr; + RF_AccTotals_t *totals; + RF_DeviceConfig_t *d_cfg, **ucfgp; + struct rf_recon_req *rrcopy, *rr; + RF_ComponentLabel_t *clabel; + RF_ComponentLabel_t *ci_label; + RF_SingleComponent_t *sparePtr,*componentPtr; + RF_SingleComponent_t *hot_spare, *component; + RF_ProgressInfo_t progressInfo; + int retcode = 0; + int row, column; + int unit; + int i, j, d; + + sc = dev->si_drv1; + raidPtr = sc->raidPtr; + + rf_printf(2, "raidioctl: %s %ld\n", dev->si_name, cmd); + + switch (cmd) { + + case RAIDFRAME_GET_COMPONENT_LABEL: + /* need to read the component label for the disk indicated + by row,column in clabel */ + + /* For practice, let's get it directly fromdisk, rather + than from the in-core copy */ + RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), + (RF_ComponentLabel_t *)); + if (clabel == NULL) + return (ENOMEM); + + bzero((char *) clabel, sizeof(RF_ComponentLabel_t)); + + bcopy(data, clabel, sizeof(RF_ComponentLabel_t)); + + row = clabel->row; + column = clabel->column; + + if ((row < 0) || (row >= raidPtr->numRow) || + (column < 0) || (column >= raidPtr->numCol + + raidPtr->numSpare)) { + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); + return(EINVAL); + } + + raidread_component_label(raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, + clabel ); + + bcopy(clabel, data, sizeof(RF_ComponentLabel_t)); + RF_Free( clabel, sizeof(RF_ComponentLabel_t)); + return (retcode); + + case RAIDFRAME_SET_COMPONENT_LABEL: + clabel = (RF_ComponentLabel_t *) data; + + /* XXX check the label for valid stuff... */ + /* Note that some things *should not* get modified -- + the user should be re-initing the labels instead of + trying to patch things. + */ + + rf_printf(1, "Got component label:\n"); + rf_printf(1, "Version: %d\n",clabel->version); + rf_printf(1, "Serial Number: %d\n",clabel->serial_number); + rf_printf(1, "Mod counter: %d\n",clabel->mod_counter); + rf_printf(1, "Row: %d\n", clabel->row); + rf_printf(1, "Column: %d\n", clabel->column); + rf_printf(1, "Num Rows: %d\n", clabel->num_rows); + rf_printf(1, "Num Columns: %d\n", clabel->num_columns); + rf_printf(1, "Clean: %d\n", clabel->clean); + rf_printf(1, "Status: %d\n", clabel->status); + + row = clabel->row; + column = clabel->column; + + if ((row < 0) || (row >= raidPtr->numRow) || + (column < 0) || (column >= raidPtr->numCol)) { + return(EINVAL); + } + + /* XXX this isn't allowed to do anything for now :-) */ + + /* XXX and before it is, we need to fill in the rest + of the fields!?!?!?! */ +#if 0 + raidwrite_component_label( + raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, + clabel ); +#endif + return (0); + + case RAIDFRAME_INIT_LABELS: + MALLOC(ci_label, RF_ComponentLabel_t *, + sizeof(RF_ComponentLabel_t), M_RAIDFRAME, + M_WAITOK | M_ZERO); + clabel = (RF_ComponentLabel_t *) data; + /* + we only want the serial number from + the above. We get all the rest of the information + from the config that was used to create this RAID + set. + */ + + raidPtr->serial_number = clabel->serial_number; + + raid_init_component_label(raidPtr, ci_label); + ci_label->serial_number = clabel->serial_number; + + for(row=0;row<raidPtr->numRow;row++) { + ci_label->row = row; + for(column=0;column<raidPtr->numCol;column++) { + diskPtr = &raidPtr->Disks[row][column]; + if (!RF_DEAD_DISK(diskPtr->status)) { + ci_label->partitionSize = + diskPtr->partitionSize; + ci_label->column = column; + raidwrite_component_label( + raidPtr->Disks[row][column].dev, + raidPtr->raid_cinfo[row][column].ci_vp, + ci_label ); + } + } + } + + FREE(ci_label, M_RAIDFRAME); + return (retcode); + case RAIDFRAME_SET_AUTOCONFIG: + d = rf_set_autoconfig(raidPtr, *(int *) data); + rf_printf(1, "New autoconfig value is: %d\n", d); + *(int *) data = d; + return (retcode); + + case RAIDFRAME_SET_ROOT: + d = rf_set_rootpartition(raidPtr, *(int *) data); + rf_printf(1, "New rootpartition value is: %d\n", d); + *(int *) data = d; + return (retcode); + + /* initialize all parity */ + case RAIDFRAME_REWRITEPARITY: + + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* Parity for RAID 0 is trivially correct */ + raidPtr->parity_good = RF_RAID_CLEAN; + return(0); + } + + if (raidPtr->parity_rewrite_in_progress == 1) { + /* Re-write is already in progress! */ + return(EINVAL); + } + + retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, + rf_RewriteParityThread, + raidPtr,"raid_parity"); + return (retcode); + + + case RAIDFRAME_ADD_HOT_SPARE: + MALLOC(hot_spare, RF_SingleComponent_t *, + sizeof(RF_SingleComponent_t), M_RAIDFRAME, + M_WAITOK | M_ZERO); + sparePtr = (RF_SingleComponent_t *) data; + memcpy( hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); + retcode = rf_add_hot_spare(raidPtr, hot_spare); + FREE(hot_spare, M_RAIDFRAME); + return(retcode); + + case RAIDFRAME_REMOVE_HOT_SPARE: + return(retcode); + + case RAIDFRAME_DELETE_COMPONENT: + MALLOC(component, RF_SingleComponent_t *, + sizeof(RF_SingleComponent_t), M_RAIDFRAME, + M_WAITOK | M_ZERO); + componentPtr = (RF_SingleComponent_t *)data; + memcpy( component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_delete_component(raidPtr, component); + FREE(component, M_RAIDFRAME); + return(retcode); + + case RAIDFRAME_INCORPORATE_HOT_SPARE: + MALLOC(component, RF_SingleComponent_t *, + sizeof(RF_SingleComponent_t), M_RAIDFRAME, + M_WAITOK | M_ZERO); + componentPtr = (RF_SingleComponent_t *)data; + memcpy( component, componentPtr, + sizeof(RF_SingleComponent_t)); + retcode = rf_incorporate_hot_spare(raidPtr, component); + FREE(component, M_RAIDFRAME); + return(retcode); + + case RAIDFRAME_REBUILD_IN_PLACE: + + MALLOC(component, RF_SingleComponent_t *, + sizeof(RF_SingleComponent_t), M_RAIDFRAME, + M_WAITOK | M_ZERO); + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* Can't do this on a RAID 0!! */ + FREE(component, M_RAIDFRAME); + return(EINVAL); + } + + if (raidPtr->recon_in_progress == 1) { + /* a reconstruct is already in progress! */ + FREE(component, M_RAIDFRAME); + return(EINVAL); + } + + componentPtr = (RF_SingleComponent_t *) data; + memcpy( component, componentPtr, + sizeof(RF_SingleComponent_t)); + row = component->row; + column = component->column; + unit = raidPtr->raidid; + rf_printf(0, "raid%d Rebuild: %d %d\n", unit, row, column); + if ((row < 0) || (row >= raidPtr->numRow) || + (column < 0) || (column >= raidPtr->numCol)) { + FREE(component, M_RAIDFRAME); + return(EINVAL); + } + + RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); + if (rrcopy == NULL) { + FREE(component, M_RAIDFRAME); + return(ENOMEM); + } + + rrcopy->raidPtr = (void *) raidPtr; + rrcopy->row = row; + rrcopy->col = column; + + retcode = RF_CREATE_THREAD(raidPtr->recon_thread, + rf_ReconstructInPlaceThread, + rrcopy,"raid_reconip"); + FREE(component, M_RAIDFRAME); + return(retcode); + + case RAIDFRAME_GET_UNIT: + + *(int *)data = raidPtr->raidid; + return (0); + + case RAIDFRAME_GET_INFO: + if (!raidPtr->valid) + return (ENODEV); + ucfgp = (RF_DeviceConfig_t **) data; + RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), + (RF_DeviceConfig_t *)); + if (d_cfg == NULL) + return (ENOMEM); + bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t)); + d_cfg->rows = raidPtr->numRow; + d_cfg->cols = raidPtr->numCol; + d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; + if (d_cfg->ndevs >= RF_MAX_DISKS) { + RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); + return (ENOMEM); + } + d_cfg->nspares = raidPtr->numSpare; + if (d_cfg->nspares >= RF_MAX_DISKS) { + RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); + return (ENOMEM); + } + d_cfg->maxqdepth = raidPtr->maxQueueDepth; + d = 0; + for (i = 0; i < d_cfg->rows; i++) { + for (j = 0; j < d_cfg->cols; j++) { + d_cfg->devs[d] = raidPtr->Disks[i][j]; + d++; + } + } + for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { + d_cfg->spares[i] = raidPtr->Disks[0][j]; + } + + retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); + + RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); + + return (retcode); + + case RAIDFRAME_CHECK_PARITY: + *(int *) data = raidPtr->parity_good; + return (0); + + case RAIDFRAME_RESET_ACCTOTALS: + bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals)); + return (0); + + case RAIDFRAME_GET_ACCTOTALS: + totals = (RF_AccTotals_t *) data; + *totals = raidPtr->acc_totals; + return (0); + + case RAIDFRAME_KEEP_ACCTOTALS: + raidPtr->keep_acc_totals = *(int *)data; + return (0); + + case RAIDFRAME_GET_SIZE: + *(int *) data = raidPtr->totalSectors; + return (0); + + /* fail a disk & optionally start reconstruction */ + case RAIDFRAME_FAIL_DISK: + + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* Can't do this on a RAID 0!! */ + return(EINVAL); + } + + rr = (struct rf_recon_req *) data; + + if (rr->row < 0 || rr->row >= raidPtr->numRow + || rr->col < 0 || rr->col >= raidPtr->numCol) + return (EINVAL); + + rf_printf(0, "%s: Failing the disk: row: %d col: %d\n", + dev->si_name, rr->row, rr->col); + + /* make a copy of the recon request so that we don't rely on + * the user's buffer */ + RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); + if (rrcopy == NULL) + return(ENOMEM); + bcopy(rr, rrcopy, sizeof(*rr)); + rrcopy->raidPtr = (void *) raidPtr; + + retcode = RF_CREATE_THREAD(raidPtr->recon_thread, + rf_ReconThread, + rrcopy,"raid_recon"); + return (0); + + /* invoke a copyback operation after recon on whatever disk + * needs it, if any */ + case RAIDFRAME_COPYBACK: + + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* This makes no sense on a RAID 0!! */ + return(EINVAL); + } + + if (raidPtr->copyback_in_progress == 1) { + /* Copyback is already in progress! */ + return(EINVAL); + } + + retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, + rf_CopybackThread, + raidPtr,"raid_copyback"); + return (retcode); + + /* return the percentage completion of reconstruction */ + case RAIDFRAME_CHECK_RECON_STATUS: + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); + } + row = 0; /* XXX we only consider a single row... */ + if (raidPtr->status[row] != rf_rs_reconstructing) + *(int *) data = 100; + else + *(int *) data = raidPtr->reconControl[row]->percentComplete; + return (0); + case RAIDFRAME_CHECK_RECON_STATUS_EXT: + row = 0; /* XXX we only consider a single row... */ + if (raidPtr->status[row] != rf_rs_reconstructing) { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } else { + progressInfo.total = + raidPtr->reconControl[row]->numRUsTotal; + progressInfo.completed = + raidPtr->reconControl[row]->numRUsComplete; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } + bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); + return (retcode); + + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* This makes no sense on a RAID 0, so tell the + user it's done. */ + *(int *) data = 100; + return(0); + } + if (raidPtr->parity_rewrite_in_progress == 1) { + *(int *) data = 100 * + raidPtr->parity_rewrite_stripes_done / + raidPtr->Layout.numStripe; + } else { + *(int *) data = 100; + } + return (0); + + case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: + if (raidPtr->parity_rewrite_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->parity_rewrite_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); + return (retcode); + + case RAIDFRAME_CHECK_COPYBACK_STATUS: + if (raidPtr->Layout.map->faultsTolerated == 0) { + /* This makes no sense on a RAID 0 */ + *(int *) data = 100; + return(0); + } + if (raidPtr->copyback_in_progress == 1) { + *(int *) data = 100 * raidPtr->copyback_stripes_done / + raidPtr->Layout.numStripe; + } else { + *(int *) data = 100; + } + return (0); + + case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: + if (raidPtr->copyback_in_progress == 1) { + progressInfo.total = raidPtr->Layout.numStripe; + progressInfo.completed = + raidPtr->copyback_stripes_done; + progressInfo.remaining = progressInfo.total - + progressInfo.completed; + } else { + progressInfo.remaining = 0; + progressInfo.completed = 100; + progressInfo.total = 100; + } + bcopy((caddr_t) &progressInfo, data, sizeof(RF_ProgressInfo_t)); + return (retcode); + + /* the sparetable daemon calls this to wait for the kernel to + * need a spare table. this ioctl does not return until a + * spare table is needed. XXX -- calling mpsleep here in the + * ioctl code is almost certainly wrong and evil. -- XXX XXX + * -- I should either compute the spare table in the kernel, + * or have a different -- XXX XXX -- interface (a different + * character device) for delivering the table -- XXX */ +#if 0 + case RAIDFRAME_SPARET_WAIT: + RF_LOCK_MUTEX(rf_sparet_wait_mutex); + while (!rf_sparet_wait_queue) + mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); + waitreq = rf_sparet_wait_queue; + rf_sparet_wait_queue = rf_sparet_wait_queue->next; + RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); + + /* structure assignment */ + *((RF_SparetWait_t *) data) = *waitreq; + + RF_Free(waitreq, sizeof(*waitreq)); + return (0); + + /* wakes up a process waiting on SPARET_WAIT and puts an error + * code in it that will cause the dameon to exit */ + case RAIDFRAME_ABORT_SPARET_WAIT: + RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); + waitreq->fcol = -1; + RF_LOCK_MUTEX(rf_sparet_wait_mutex); + waitreq->next = rf_sparet_wait_queue; + rf_sparet_wait_queue = waitreq; + RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); + wakeup(&rf_sparet_wait_queue); + return (0); + + /* used by the spare table daemon to deliver a spare table + * into the kernel */ + case RAIDFRAME_SEND_SPARET: + + /* install the spare table */ + retcode = rf_SetSpareTable(raidPtr, *(void **) data); + + /* respond to the requestor. the return status of the spare + * table installation is passed in the "fcol" field */ + RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); + waitreq->fcol = retcode; + RF_LOCK_MUTEX(rf_sparet_wait_mutex); + waitreq->next = rf_sparet_resp_queue; + rf_sparet_resp_queue = waitreq; + wakeup(&rf_sparet_resp_queue); + RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); + + return (retcode); +#endif + + default: + retcode = ENOIOCTL; + break; /* fall through to the os-specific code below */ + + } + + return (retcode); + +} + + +/* raidinit -- complete the rest of the initialization for the + RAIDframe device. */ + + +static dev_t +raidinit(raidPtr) + RF_Raid_t *raidPtr; +{ + struct raid_softc *sc; + dev_t diskdev; + + RF_Malloc(sc, sizeof(struct raid_softc), (struct raid_softc *)); + if (sc == NULL) { + rf_printf(1, "No memory for raid device\n"); + return(NULL); + } + + sc->raidPtr = raidPtr; + + /* XXX Should check return code here */ + bioq_init(&sc->bio_queue); + sc->sc_cbufpool = uma_zcreate("raidpl", sizeof(struct raidbuf), NULL, + NULL, NULL, NULL, 0, 0); + + /* XXX There may be a weird interaction here between this, and + * protectedSectors, as used in RAIDframe. */ + + sc->sc_size = raidPtr->totalSectors; + + /* Create the disk device */ + diskdev = disk_create(raidPtr->raidid, &sc->sc_dkdev, 0, &raid_cdevsw, + &raiddisk_cdevsw); + if (diskdev == NODEV) { + rf_printf(1, "disk_create failed\n"); + return (NULL); + } + sc->sc_dkdev.d_dev->si_drv1 = sc; + sc->sc_dev = diskdev; + raidPtr->sc = sc; + + /* Register with devstat */ + devstat_add_entry(&sc->device_stats, "raid", raidPtr->raidid, 0, + DEVSTAT_NO_BLOCKSIZE | DEVSTAT_NO_ORDERED_TAGS, + DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_ARRAY); + + return (diskdev); +} + +/* wake up the daemon & tell it to get us a spare table + * XXX + * the entries in the queues should be tagged with the raidPtr + * so that in the extremely rare case that two recons happen at once, + * we know for which device were requesting a spare table + * XXX + * + * XXX This code is not currently used. GO + */ +int +rf_GetSpareTableFromDaemon(req) + RF_SparetWait_t *req; +{ + int retcode; + + RF_LOCK_MUTEX(rf_sparet_wait_mutex); + req->next = rf_sparet_wait_queue; + rf_sparet_wait_queue = req; + wakeup(&rf_sparet_wait_queue); + + /* mpsleep unlocks the mutex */ + while (!rf_sparet_resp_queue) { + tsleep(&rf_sparet_resp_queue, PRIBIO, + "raidframe getsparetable", 0); + } + req = rf_sparet_resp_queue; + rf_sparet_resp_queue = req->next; + RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); + + retcode = req->fcol; + RF_Free(req, sizeof(*req)); /* this is not the same req as we + * alloc'd */ + return (retcode); +} + +/* a wrapper around rf_DoAccess that extracts appropriate info from the + * bp & passes it down. + * any calls originating in the kernel must use non-blocking I/O + * do some extra sanity checking to return "appropriate" error values for + * certain conditions (to make some standard utilities work) + * + * Formerly known as: rf_DoAccessKernel + */ +void +raidstart(raidPtr) + RF_Raid_t *raidPtr; +{ + RF_SectorCount_t num_blocks, pb, sum; + RF_RaidAddr_t raid_addr; + struct raid_softc *sc; + struct bio *bp; + daddr_t blocknum; + int unit, retcode, do_async; + + unit = raidPtr->raidid; + sc = raidPtr->sc; + + /* quick check to see if anything has died recently */ + RF_LOCK_MUTEX(raidPtr->mutex); + if (raidPtr->numNewFailures > 0) { + raidPtr->numNewFailures--; + RF_UNLOCK_MUTEX(raidPtr->mutex); + rf_update_component_labels(raidPtr, + RF_NORMAL_COMPONENT_UPDATE); + } else + RF_UNLOCK_MUTEX(raidPtr->mutex); + + /* Check to see if we're at the limit... */ + RF_LOCK_MUTEX(raidPtr->mutex); + while (raidPtr->openings > 0) { + RF_UNLOCK_MUTEX(raidPtr->mutex); + + /* get the next item, if any, from the queue */ + if ((bp = bioq_first(&sc->bio_queue)) == NULL) { + /* nothing more to do */ + return; + } + bioq_remove(&sc->bio_queue, bp); + + /* Ok, for the bp we have here, bp->b_blkno is relative to the + * partition.. Need to make it absolute to the underlying + * device.. */ + + blocknum = bp->bio_blkno; +#if 0 /* XXX Is this needed? */ + if (DISKPART(bp->bio_dev) != RAW_PART) { + struct partition *pp; + pp = &sc->sc_dkdev.d_label.d_partitions[DISKPART( + bp->bio_dev)]; + blocknum += pp->p_offset; + } +#endif + + rf_printf(3, "Blocks: %ld, %ld\n", (long)bp->bio_blkno, (long)blocknum); + + rf_printf(3, "bp->bio_bcount = %d\n", (int) bp->bio_bcount); + rf_printf(3, "bp->bio_resid = %d\n", (int) bp->bio_resid); + + /* *THIS* is where we adjust what block we're going to... + * but DO NOT TOUCH bp->bio_blkno!!! */ + raid_addr = blocknum; + + num_blocks = bp->bio_bcount >> raidPtr->logBytesPerSector; + pb = (bp->bio_bcount & raidPtr->sectorMask) ? 1 : 0; + sum = raid_addr + num_blocks + pb; + if (rf_debugKernelAccess) { + rf_printf(0, "raid_addr=0x%x sum=%d num_blocks=%d(+%d) " + "(%d)\n", (int)raid_addr, (int)sum, + (int)num_blocks, (int)pb, + (int)bp->bio_resid); + } + if ((sum > raidPtr->totalSectors) || (sum < raid_addr) + || (sum < num_blocks) || (sum < pb)) { + bp->bio_error = ENOSPC; + bp->bio_flags |= BIO_ERROR; + bp->bio_resid = bp->bio_bcount; + biodone(bp); + RF_LOCK_MUTEX(raidPtr->mutex); + continue; + } + /* + * XXX rf_DoAccess() should do this, not just DoAccessKernel() + */ + + if (bp->bio_bcount & raidPtr->sectorMask) { + bp->bio_error = EINVAL; + bp->bio_flags |= BIO_ERROR; + bp->bio_resid = bp->bio_bcount; + biodone(bp); + RF_LOCK_MUTEX(raidPtr->mutex); + continue; + + } + rf_printf(3, "Calling DoAccess..\n"); + + + RF_LOCK_MUTEX(raidPtr->mutex); + raidPtr->openings--; + RF_UNLOCK_MUTEX(raidPtr->mutex); + + /* + * Everything is async. + */ + do_async = 1; + + devstat_start_transaction(&sc->device_stats); + + /* XXX we're still at splbio() here... do we *really* + need to be? */ + + /* don't ever condition on bp->bio_cmd & BIO_WRITE. + * always condition on BIO_READ instead */ + + retcode = rf_DoAccess(raidPtr, (bp->bio_cmd & BIO_READ) ? + RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, + do_async, raid_addr, num_blocks, + bp->bio_data, bp, NULL, NULL, + RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL); + + + RF_LOCK_MUTEX(raidPtr->mutex); + } + RF_UNLOCK_MUTEX(raidPtr->mutex); +} + + + + +/* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ + +int +rf_DispatchKernelIO(queue, req) + RF_DiskQueue_t *queue; + RF_DiskQueueData_t *req; +{ + int op = (req->type == RF_IO_TYPE_READ) ? BIO_READ : BIO_WRITE; + struct bio *bp; + struct raidbuf *raidbp = NULL; + struct raid_softc *sc; + + /* XXX along with the vnode, we also need the softc associated with + * this device.. */ + + req->queue = queue; + + sc = queue->raidPtr->sc; + + rf_printf(3, "DispatchKernelIO %s\n", sc->sc_dev->si_name); + + bp = req->bp; +#if 1 + /* XXX when there is a physical disk failure, someone is passing us a + * buffer that contains old stuff!! Attempt to deal with this problem + * without taking a performance hit... (not sure where the real bug + * is. It's buried in RAIDframe somewhere) :-( GO ) */ + + if (bp->bio_flags & BIO_ERROR) { + bp->bio_flags &= ~BIO_ERROR; + } + if (bp->bio_error != 0) { + bp->bio_error = 0; + } +#endif + raidbp = RAIDGETBUF(sc); + + raidbp->rf_flags = 0; /* XXX not really used anywhere... */ + + /* + * context for raidiodone + */ + raidbp->rf_obp = bp; + raidbp->req = req; + +#if 0 /* XXX */ + LIST_INIT(&raidbp->rf_buf.b_dep); +#endif + + switch (req->type) { + case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ + /* XXX need to do something extra here.. */ + /* I'm leaving this in, as I've never actually seen it used, + * and I'd like folks to report it... GO */ + rf_printf(2, "WAKEUP CALLED\n"); + queue->numOutstanding++; + + /* XXX need to glue the original buffer into this? */ + + KernelWakeupFunc(&raidbp->rf_buf); + break; + + case RF_IO_TYPE_READ: + case RF_IO_TYPE_WRITE: + + if (req->tracerec) { + RF_ETIMER_START(req->tracerec->timer); + } + InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, + op | bp->bio_cmd, queue->rf_cinfo->ci_dev, + req->sectorOffset, req->numSector, + req->buf, KernelWakeupFunc, (void *) req, + queue->raidPtr->logBytesPerSector, req->b_proc); + + if (rf_debugKernelAccess) { + rf_printf(0, "dispatch: bp->bio_blkno = %ld\n", + (long) bp->bio_blkno); + } + queue->numOutstanding++; + queue->last_deq_sector = req->sectorOffset; + /* acc wouldn't have been let in if there were any pending + * reqs at any other priority */ + queue->curPriority = req->priority; + + rf_printf(3, "Going for %c to %s row %d col %d\n", + req->type, sc->sc_dev->si_name, queue->row, queue->col); + rf_printf(3, "sector %d count %d (%d bytes) %d\n", + (int) req->sectorOffset, (int) req->numSector, + (int) (req->numSector << + queue->raidPtr->logBytesPerSector), + (int) queue->raidPtr->logBytesPerSector); +#if 0 /* XXX */ + if ((raidbp->rf_buf.bio_cmd & BIO_READ) == 0) { + raidbp->rf_buf.b_vp->v_numoutput++; + } +#endif + BIO_STRATEGY(&raidbp->rf_buf, 0); + + break; + + default: + panic("bad req->type in rf_DispatchKernelIO"); + } + rf_printf(3, "Exiting from DispatchKernelIO\n"); + /* splx(s); */ /* want to test this */ + return (0); +} +/* this is the callback function associated with a I/O invoked from + kernel code. + */ +static void +KernelWakeupFunc(vbp) + struct bio *vbp; +{ + RF_DiskQueueData_t *req = NULL; + RF_DiskQueue_t *queue; + struct raidbuf *raidbp = (struct raidbuf *) vbp; + struct bio *bp; + struct raid_softc *sc; + int s; + + s = splbio(); + rf_printf(2, "recovering the request queue:\n"); + req = raidbp->req; + + bp = raidbp->rf_obp; + queue = (RF_DiskQueue_t *) req->queue; + sc = queue->raidPtr->sc; + + if (raidbp->rf_buf.bio_flags & BIO_ERROR) { + bp->bio_flags |= BIO_ERROR; + bp->bio_error = raidbp->rf_buf.bio_error ? + raidbp->rf_buf.bio_error : EIO; + } + + /* XXX methinks this could be wrong... */ +#if 1 + bp->bio_resid = raidbp->rf_buf.bio_resid; +#endif + + if (req->tracerec) { + RF_ETIMER_STOP(req->tracerec->timer); + RF_ETIMER_EVAL(req->tracerec->timer); + RF_LOCK_MUTEX(rf_tracing_mutex); + req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); + req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); + req->tracerec->num_phys_ios++; + RF_UNLOCK_MUTEX(rf_tracing_mutex); + } + bp->bio_bcount = raidbp->rf_buf.bio_bcount; /* XXXX ? */ + + /* XXX Ok, let's get aggressive... If BIO_ERROR is set, let's go + * ballistic, and mark the component as hosed... */ + + if (bp->bio_flags & BIO_ERROR) { + /* Mark the disk as dead */ + /* but only mark it once... */ + if (queue->raidPtr->Disks[queue->row][queue->col].status == + rf_ds_optimal) { + rf_printf(0, "%s: IO Error. Marking %s as " + "failed.\n", sc->sc_dev->si_name, queue->raidPtr-> + Disks[queue->row][queue->col].devname); + queue->raidPtr->Disks[queue->row][queue->col].status = + rf_ds_failed; + queue->raidPtr->status[queue->row] = rf_rs_degraded; + queue->raidPtr->numFailures++; + queue->raidPtr->numNewFailures++; + } else { /* Disk is already dead... */ + /* printf("Disk already marked as dead!\n"); */ + } + + } + + RAIDPUTBUF(sc, raidbp); + + rf_DiskIOComplete(queue, req, (bp->bio_flags & BIO_ERROR) ? 1 : 0); + (req->CompleteFunc)(req->argument, (bp->bio_flags & BIO_ERROR) ? 1 : 0); + + splx(s); +} + + + +/* + * initialize a buf structure for doing an I/O in the kernel. + */ +static void +InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, + logBytesPerSector, b_proc) + struct bio *bp; + struct vnode *b_vp; + unsigned rw_flag; + dev_t dev; + RF_SectorNum_t startSect; + RF_SectorCount_t numSect; + caddr_t buf; + void (*cbFunc) (struct bio *); + void *cbArg; + int logBytesPerSector; + struct proc *b_proc; +{ + /* bp->b_flags = B_PHYS | rw_flag; */ + bp->bio_cmd = rw_flag; /* XXX need B_PHYS here too? */ + bp->bio_bcount = numSect << logBytesPerSector; +#if 0 /* XXX */ + bp->bio_bufsize = bp->bio_bcount; +#endif + bp->bio_error = 0; + bp->bio_dev = dev; + bp->bio_data = buf; + bp->bio_blkno = startSect; + bp->bio_resid = bp->bio_bcount; /* XXX is this right!?!?!! */ + if (bp->bio_bcount == 0) { + panic("bp->bio_bcount is zero in InitBP!!\n"); + } +/* + bp->b_proc = b_proc; + bp->b_vp = b_vp; +*/ + bp->bio_done = cbFunc; + +} + +static void +raidgetdefaultlabel(raidPtr, sc, dp) + RF_Raid_t *raidPtr; + struct raid_softc *sc; + struct disk *dp; +{ + rf_printf(1, "Building a default label...\n"); + if (dp == NULL) + panic("raidgetdefaultlabel(): dp is NULL\n"); + + /* fabricate a label... */ + dp->d_mediasize = raidPtr->totalSectors * raidPtr->bytesPerSector; + dp->d_sectorsize = raidPtr->bytesPerSector; + dp->d_fwsectors = raidPtr->Layout.dataSectorsPerStripe; + dp->d_fwheads = 4 * raidPtr->numCol; + +} +/* + * Lookup the provided name in the filesystem. If the file exists, + * is a valid block device, and isn't being used by anyone else, + * set *vpp to the file's vnode. + * You'll find the original of this in ccd.c + */ +int +raidlookup(path, td, vpp) + char *path; + struct thread *td; + struct vnode **vpp; /* result */ +{ + struct nameidata *nd; + struct vnode *vp; + struct vattr *va; + struct proc *p; + int error = 0, flags; + + MALLOC(nd, struct nameidata *, sizeof(struct nameidata), M_TEMP, M_NOWAIT | M_ZERO); + MALLOC(va, struct vattr *, sizeof(struct vattr), M_TEMP, M_NOWAIT | M_ZERO); + if ((nd == NULL) || (va == NULL)) { + printf("Out of memory?\n"); + return (ENOMEM); + } + + /* Sanity check the p_fd fields. This is really just a hack */ + p = td->td_proc; + if (!p->p_fd->fd_rdir || !p->p_fd->fd_cdir) + printf("Warning: p_fd fields not set\n"); + + if (!td->td_proc->p_fd->fd_rdir) + p->p_fd->fd_rdir = rootvnode; + + if (!p->p_fd->fd_cdir) + p->p_fd->fd_cdir = rootvnode; + + NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, curthread); + flags = FREAD | FWRITE; + if ((error = vn_open(nd, &flags, 0)) != 0) { + rf_printf(2, "RAIDframe: vn_open returned %d\n", error); + goto end1; + } + vp = nd->ni_vp; + if (vp->v_usecount > 1) { + rf_printf(1, "raidlookup() vp->v_usecount= %d\n", vp->v_usecount); + error = EBUSY; + goto end; + } + if ((error = VOP_GETATTR(vp, va, td->td_ucred, td)) != 0) { + rf_printf(1, "raidlookup() VOP_GETATTR returned %d", error); + goto end; + } + /* XXX: eventually we should handle VREG, too. */ + if (va->va_type != VCHR) { + rf_printf(1, "Returning ENOTBLK\n"); + error = ENOTBLK; + } + *vpp = vp; + +end: + VOP_UNLOCK(vp, 0, td); + NDFREE(nd, NDF_ONLY_PNBUF); +end1: + FREE(nd, M_TEMP); + FREE(va, M_TEMP); + return (error); +} +/* + * Wait interruptibly for an exclusive lock. + * + * XXX + * Several drivers do this; it should be abstracted and made MP-safe. + * (Hmm... where have we seen this warning before :-> GO ) + */ +static int +raidlock(sc) + struct raid_softc *sc; +{ + int error; + + while ((sc->sc_flags & RAIDF_LOCKED) != 0) { + sc->sc_flags |= RAIDF_WANTED; + if ((error = + tsleep(sc, PRIBIO | PCATCH, "raidlck", 0)) != 0) + return (error); + } + sc->sc_flags |= RAIDF_LOCKED; + return (0); +} +/* + * Unlock and wake up any waiters. + */ +static void +raidunlock(sc) + struct raid_softc *sc; +{ + + sc->sc_flags &= ~RAIDF_LOCKED; + if ((sc->sc_flags & RAIDF_WANTED) != 0) { + sc->sc_flags &= ~RAIDF_WANTED; + wakeup(sc); + } +} + + +#define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ +#define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ + +int +raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) +{ + RF_ComponentLabel_t *clabel; + + MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT | M_ZERO); + if (clabel == NULL) { + printf("raidmarkclean: Out of memory?\n"); + return (ENOMEM); + } + + raidread_component_label(dev, b_vp, clabel); + clabel->mod_counter = mod_counter; + clabel->clean = RF_RAID_CLEAN; + raidwrite_component_label(dev, b_vp, clabel); + FREE(clabel, M_RAIDFRAME); + return(0); +} + + +int +raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) +{ + RF_ComponentLabel_t *clabel; + + MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT | M_ZERO); + if (clabel == NULL) { + printf("raidmarkclean: Out of memory?\n"); + return (ENOMEM); + } + + raidread_component_label(dev, b_vp, clabel); + clabel->mod_counter = mod_counter; + clabel->clean = RF_RAID_DIRTY; + raidwrite_component_label(dev, b_vp, clabel); + FREE(clabel, M_RAIDFRAME); + return(0); +} + +/* ARGSUSED */ +int +raidread_component_label(dev, b_vp, clabel) + dev_t dev; + struct vnode *b_vp; + RF_ComponentLabel_t *clabel; +{ + struct buf *bp; + int error; + + /* XXX should probably ensure that we don't try to do this if + someone has changed rf_protected_sectors. */ + + if (b_vp == NULL) { + /* For whatever reason, this component is not valid. + Don't try to read a component label from it. */ + return(EINVAL); + } + + /* get a block of the appropriate size... */ + bp = geteblk((int)RF_COMPONENT_INFO_SIZE); + bp->b_dev = dev; + + /* get our ducks in a row for the read */ + bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; + bp->b_bcount = RF_COMPONENT_INFO_SIZE; + bp->b_iocmd = BIO_READ; + bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; + + DEV_STRATEGY(bp, 0); + error = bufwait(bp); + + if (!error) { + memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t)); +#if 0 + rf_print_component_label( clabel ); +#endif + } else { +#if 0 + rf_printf(0, "Failed to read RAID component label!\n"); +#endif + } + + bp->b_flags |= B_INVAL | B_AGE; + brelse(bp); + return(error); +} +/* ARGSUSED */ +int +raidwrite_component_label(dev, b_vp, clabel) + dev_t dev; + struct vnode *b_vp; + RF_ComponentLabel_t *clabel; +{ + struct buf *bp; + int error; + + /* get a block of the appropriate size... */ + bp = geteblk((int)RF_COMPONENT_INFO_SIZE); + bp->b_dev = dev; + + /* get our ducks in a row for the write */ + bp->b_flags = 0; + bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; + bp->b_bcount = RF_COMPONENT_INFO_SIZE; + bp->b_iocmd = BIO_WRITE; + bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; + + memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); + + memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); + + DEV_STRATEGY(bp, 0); + error = bufwait(bp); + + bp->b_flags |= B_INVAL | B_AGE; + brelse(bp); + if (error) { +#if 1 + rf_printf(0, "Failed to write RAID component info!\n"); + rf_printf(0, "b_error= %d\n", bp->b_error); +#endif + } + + return(error); +} + +void +rf_markalldirty(raidPtr) + RF_Raid_t *raidPtr; +{ + RF_ComponentLabel_t *clabel; + int r,c; + + MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT | M_ZERO); + + if (clabel == NULL) { + printf("rf_markalldirty: Out of memory?\n"); + return; + } + + raidPtr->mod_counter++; + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + /* we don't want to touch (at all) a disk that has + failed */ + if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { + raidread_component_label( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + clabel); + if (clabel->status == rf_ds_spared) { + /* XXX do something special... + but whatever you do, don't + try to access it!! */ + } else { +#if 0 + clabel->status = + raidPtr->Disks[r][c].status; + raidwrite_component_label( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + clabel); +#endif + raidmarkdirty( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); + } + } + } + } + /* printf("Component labels marked dirty.\n"); */ +#if 0 + for( c = 0; c < raidPtr->numSpare ; c++) { + sparecol = raidPtr->numCol + c; + if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { + /* + + XXX this is where we get fancy and map this spare + into it's correct spot in the array. + + */ + /* + + we claim this disk is "optimal" if it's + rf_ds_used_spare, as that means it should be + directly substitutable for the disk it replaced. + We note that too... + + */ + + for(i=0;i<raidPtr->numRow;i++) { + for(j=0;j<raidPtr->numCol;j++) { + if ((raidPtr->Disks[i][j].spareRow == + r) && + (raidPtr->Disks[i][j].spareCol == + sparecol)) { + srow = r; + scol = sparecol; + break; + } + } + } + + raidread_component_label( + raidPtr->Disks[r][sparecol].dev, + raidPtr->raid_cinfo[r][sparecol].ci_vp, + &clabel); + /* make sure status is noted */ + clabel.version = RF_COMPONENT_LABEL_VERSION; + clabel.mod_counter = raidPtr->mod_counter; + clabel.serial_number = raidPtr->serial_number; + clabel.row = srow; + clabel.column = scol; + clabel.num_rows = raidPtr->numRow; + clabel.num_columns = raidPtr->numCol; + clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ + clabel.status = rf_ds_optimal; + raidwrite_component_label( + raidPtr->Disks[r][sparecol].dev, + raidPtr->raid_cinfo[r][sparecol].ci_vp, + &clabel); + raidmarkclean( raidPtr->Disks[r][sparecol].dev, + raidPtr->raid_cinfo[r][sparecol].ci_vp); + } + } + +#endif + FREE(clabel, M_RAIDFRAME); +} + + +void +rf_update_component_labels(raidPtr, final) + RF_Raid_t *raidPtr; + int final; +{ + RF_ComponentLabel_t *clabel; + int sparecol; + int r,c; + int i,j; + int srow, scol; + + srow = -1; + scol = -1; + + MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_NOWAIT | M_ZERO); + if (clabel == NULL) { + printf("rf_update_component_labels: Out of memory?\n"); + return; + } + + /* XXX should do extra checks to make sure things really are clean, + rather than blindly setting the clean bit... */ + + raidPtr->mod_counter++; + + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + if (raidPtr->Disks[r][c].status == rf_ds_optimal) { + raidread_component_label( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + clabel); + /* make sure status is noted */ + clabel->status = rf_ds_optimal; + /* bump the counter */ + clabel->mod_counter = raidPtr->mod_counter; + + raidwrite_component_label( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( + raidPtr->Disks[r][c].dev, + raidPtr->raid_cinfo[r][c].ci_vp, + raidPtr->mod_counter); + } + } + } + /* else we don't touch it.. */ + } + } + + for( c = 0; c < raidPtr->numSpare ; c++) { + sparecol = raidPtr->numCol + c; + if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { + /* + + we claim this disk is "optimal" if it's + rf_ds_used_spare, as that means it should be + directly substitutable for the disk it replaced. + We note that too... + + */ + + for(i=0;i<raidPtr->numRow;i++) { + for(j=0;j<raidPtr->numCol;j++) { + if ((raidPtr->Disks[i][j].spareRow == + 0) && + (raidPtr->Disks[i][j].spareCol == + sparecol)) { + srow = i; + scol = j; + break; + } + } + } + + /* XXX shouldn't *really* need this... */ + raidread_component_label( + raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, + clabel); + /* make sure status is noted */ + + raid_init_component_label(raidPtr, clabel); + + clabel->mod_counter = raidPtr->mod_counter; + clabel->row = srow; + clabel->column = scol; + clabel->status = rf_ds_optimal; + + raidwrite_component_label( + raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, + clabel); + if (final == RF_FINAL_COMPONENT_UPDATE) { + if (raidPtr->parity_good == RF_RAID_CLEAN) { + raidmarkclean( raidPtr->Disks[0][sparecol].dev, + raidPtr->raid_cinfo[0][sparecol].ci_vp, + raidPtr->mod_counter); + } + } + } + } + FREE(clabel, M_RAIDFRAME); + rf_printf(1, "Component labels updated\n"); +} + +void +rf_close_component(raidPtr, vp, auto_configured) + RF_Raid_t *raidPtr; + struct vnode *vp; + int auto_configured; +{ + struct thread *td; + + td = raidPtr->engine_thread; + + if (vp != NULL) { + if (auto_configured == 1) { + VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); + + vrele(vp); + } else { + vn_close(vp, FREAD | FWRITE, td->td_ucred, td); + } + } else { + rf_printf(1, "vnode was NULL\n"); + } +} + + +void +rf_UnconfigureVnodes(raidPtr) + RF_Raid_t *raidPtr; +{ + int r,c; + struct thread *td; + struct vnode *vp; + int acd; + + + /* We take this opportunity to close the vnodes like we should.. */ + + td = raidPtr->engine_thread; + + for (r = 0; r < raidPtr->numRow; r++) { + for (c = 0; c < raidPtr->numCol; c++) { + rf_printf(1, "Closing vnode for row: %d col: %d\n", r, c); + vp = raidPtr->raid_cinfo[r][c].ci_vp; + acd = raidPtr->Disks[r][c].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[r][c].ci_vp = NULL; + raidPtr->Disks[r][c].auto_configured = 0; + } + } + for (r = 0; r < raidPtr->numSpare; r++) { + rf_printf(1, "Closing vnode for spare: %d\n", r); + vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; + acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; + rf_close_component(raidPtr, vp, acd); + raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; + raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; + } +} + + +void +rf_ReconThread(req) + struct rf_recon_req *req; +{ + RF_Raid_t *raidPtr; + + mtx_lock(&Giant); + raidPtr = (RF_Raid_t *) req->raidPtr; + raidPtr->recon_in_progress = 1; + + rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, + ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); + + /* XXX get rid of this! we don't need it at all.. */ + RF_Free(req, sizeof(*req)); + + raidPtr->recon_in_progress = 0; + + /* That's all... */ + RF_THREAD_EXIT(0); /* does not return */ +} + +void +rf_RewriteParityThread(raidPtr) + RF_Raid_t *raidPtr; +{ + int retcode; + + mtx_lock(&Giant); + raidPtr->parity_rewrite_in_progress = 1; + retcode = rf_RewriteParity(raidPtr); + if (retcode) { + rf_printf(0, "raid%d: Error re-writing parity!\n",raidPtr->raidid); + } else { + /* set the clean bit! If we shutdown correctly, + the clean bit on each component label will get + set */ + raidPtr->parity_good = RF_RAID_CLEAN; + } + raidPtr->parity_rewrite_in_progress = 0; + + /* Anyone waiting for us to stop? If so, inform them... */ + if (raidPtr->waitShutdown) { + wakeup(&raidPtr->parity_rewrite_in_progress); + } + + /* That's all... */ + RF_THREAD_EXIT(0); /* does not return */ +} + + +void +rf_CopybackThread(raidPtr) + RF_Raid_t *raidPtr; +{ + mtx_lock(&Giant); + raidPtr->copyback_in_progress = 1; + rf_CopybackReconstructedData(raidPtr); + raidPtr->copyback_in_progress = 0; + + /* That's all... */ + RF_THREAD_EXIT(0); /* does not return */ +} + + +void +rf_ReconstructInPlaceThread(req) + struct rf_recon_req *req; +{ + int retcode; + RF_Raid_t *raidPtr; + + mtx_lock(&Giant); + raidPtr = req->raidPtr; + raidPtr->recon_in_progress = 1; + retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); + RF_Free(req, sizeof(*req)); + raidPtr->recon_in_progress = 0; + + /* That's all... */ + RF_THREAD_EXIT(0); /* does not return */ +} + +RF_AutoConfig_t * +rf_find_raid_components() +{ + RF_AutoConfig_t *ac_list = NULL; +#if 0 /* XXX GEOM */ + struct vnode *vp; + struct disklabel *label; + struct diskslice *slice; + struct diskslices *slices; + struct disk *disk; + struct thread *td; + dev_t dev; + char *devname; + int error, j; + int nslices; + + td = curthread; + + MALLOC(label, struct disklabel *, sizeof(struct disklabel), + M_RAIDFRAME, M_NOWAIT|M_ZERO); + MALLOC(slices, struct diskslices *, sizeof(struct diskslices), + M_RAIDFRAME, M_NOWAIT|M_ZERO); + if ((label == NULL) || (slices == NULL)) { + printf("rf_find_raid_components: Out of Memory?\n"); + return (NULL); + } + + /* initialize the AutoConfig list */ + ac_list = NULL; + + /* we begin by trolling through *all* the disk devices on the system */ + + disk = NULL; + while ((disk = disk_enumerate(disk))) { + + /* we don't care about floppies... */ + devname = disk->d_dev->si_name; + if (!strncmp(devname, "fd", 2) || + !strncmp(devname, "cd", 2) || + !strncmp(devname, "acd", 3)) + continue; + + rf_printf(1, "Examining %s\n", disk->d_dev->si_name); + if (bdevvp(disk->d_dev, &vp)) + panic("RAIDframe can't alloc vnode"); + vref(vp); + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_OPEN(vp, FREAD, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + if (error) { + vput(vp); + continue; + } + + error = VOP_IOCTL(vp, DIOCGSLICEINFO, (caddr_t)slices, + FREAD, td->td_ucred, td); + VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); + vrele(vp); + if (error) { + /* No slice table. */ + continue; + } + + nslices = slices->dss_nslices; + if ((nslices == 0) || (nslices > MAX_SLICES)) + continue; + + /* Iterate through the slices */ + for (j = 1; j < nslices; j++) { + + rf_printf(1, "Examining slice %d\n", j); + slice = &slices->dss_slices[j - 1]; + dev = dkmodslice(disk->d_dev, j); + if (bdevvp(dev, &vp)) + panic("RAIDframe can't alloc vnode"); + + vref(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_OPEN(vp, FREAD, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + if (error) { + continue; + } + + error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)label, + FREAD, td->td_ucred, td); + VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); + vrele(vp); + if (error) + continue; + + rf_search_label(dev, label, &ac_list); + } + } + + FREE(label, M_RAIDFRAME); + FREE(slices, M_RAIDFRAME); +#endif + return (ac_list); +} + +static void +rf_search_label(dev_t dev, struct disklabel *label, RF_AutoConfig_t **ac_list) +{ + RF_AutoConfig_t *ac; + RF_ComponentLabel_t *clabel; + struct vnode *vp; + struct thread *td; + dev_t dev1; + int i, error, good_one; + + td = curthread; + + /* Iterate through the partitions */ + for (i=0; i < label->d_npartitions; i++) { + /* We only support partitions marked as RAID */ + if (label->d_partitions[i].p_fstype != FS_RAID) + continue; + + dev1 = dkmodpart(dev, i); + if (dev1 == NULL) { + rf_printf(1, "dev1 == null\n"); + continue; + } + if (bdevvp(dev1, &vp)) + panic("RAIDframe can't alloc vnode"); + + vref(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_OPEN(vp, FREAD, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + if (error) { + /* Whatever... */ + continue; + } + + good_one = 0; + + clabel = (RF_ComponentLabel_t *) + malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, + M_NOWAIT); + if (clabel == NULL) { + /* XXX CLEANUP HERE */ + panic("RAID autoconfig: no memory!\n"); + } + + if (!raidread_component_label(dev1, vp, clabel)) { + /* Got the label. Is it reasonable? */ + if (rf_reasonable_label(clabel) && + (clabel->partitionSize <= + label->d_partitions[i].p_size)) { + rf_printf(1, "Component on: %s: %d\n", + dev1->si_name, label->d_partitions[i].p_size); + rf_print_component_label(clabel); + /* if it's reasonable, add it, else ignore it */ + ac = (RF_AutoConfig_t *) + malloc(sizeof(RF_AutoConfig_t), + M_RAIDFRAME, M_NOWAIT); + if (ac == NULL) { + /* XXX should panic? */ + panic("RAID autoconfig: no memory!\n"); + } + + sprintf(ac->devname, "%s", dev->si_name); + ac->dev = dev1; + ac->vp = vp; + ac->clabel = clabel; + ac->next = *ac_list; + *ac_list = ac; + good_one = 1; + } + } + if (!good_one) { + /* cleanup */ + free(clabel, M_RAIDFRAME); + VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); + vrele(vp); + } + } +} + +static int +rf_reasonable_label(clabel) + RF_ComponentLabel_t *clabel; +{ + + if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || + (clabel->version==RF_COMPONENT_LABEL_VERSION)) && + ((clabel->clean == RF_RAID_CLEAN) || + (clabel->clean == RF_RAID_DIRTY)) && + clabel->row >=0 && + clabel->column >= 0 && + clabel->num_rows > 0 && + clabel->num_columns > 0 && + clabel->row < clabel->num_rows && + clabel->column < clabel->num_columns && + clabel->blockSize > 0 && + clabel->numBlocks > 0) { + /* label looks reasonable enough... */ + return(1); + } + return(0); +} + + +void +rf_print_component_label(clabel) + RF_ComponentLabel_t *clabel; +{ + rf_printf(1, " Row: %d Column: %d Num Rows: %d Num Columns: %d\n", + clabel->row, clabel->column, + clabel->num_rows, clabel->num_columns); + rf_printf(1, " Version: %d Serial Number: %d Mod Counter: %d\n", + clabel->version, clabel->serial_number, + clabel->mod_counter); + rf_printf(1, " Clean: %s Status: %d\n", + clabel->clean ? "Yes" : "No", clabel->status ); + rf_printf(1, " sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", + clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); + rf_printf(1, " RAID Level: %c blocksize: %d numBlocks: %d\n", + (char) clabel->parityConfig, clabel->blockSize, + clabel->numBlocks); + rf_printf(1, " Autoconfig: %s\n", clabel->autoconfigure ? "Yes":"No"); + rf_printf(1, " Contains root partition: %s\n", + clabel->root_partition ? "Yes" : "No" ); + rf_printf(1, " Last configured as: raid%d\n", clabel->last_unit ); +#if 0 + rf_printf(1, " Config order: %d\n", clabel->config_order); +#endif + +} + +RF_ConfigSet_t * +rf_create_auto_sets(ac_list) + RF_AutoConfig_t *ac_list; +{ + RF_AutoConfig_t *ac; + RF_ConfigSet_t *config_sets; + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac_next; + + + config_sets = NULL; + + /* Go through the AutoConfig list, and figure out which components + belong to what sets. */ + ac = ac_list; + while(ac!=NULL) { + /* we're going to putz with ac->next, so save it here + for use at the end of the loop */ + ac_next = ac->next; + + if (config_sets == NULL) { + /* will need at least this one... */ + config_sets = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (config_sets == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + /* this one is easy :) */ + config_sets->ac = ac; + config_sets->next = NULL; + config_sets->rootable = 0; + ac->next = NULL; + } else { + /* which set does this component fit into? */ + cset = config_sets; + while(cset!=NULL) { + if (rf_does_it_fit(cset, ac)) { + /* looks like it matches... */ + ac->next = cset->ac; + cset->ac = ac; + break; + } + cset = cset->next; + } + if (cset==NULL) { + /* didn't find a match above... new set..*/ + cset = (RF_ConfigSet_t *) + malloc(sizeof(RF_ConfigSet_t), + M_RAIDFRAME, M_NOWAIT); + if (cset == NULL) { + panic("rf_create_auto_sets: No memory!\n"); + } + cset->ac = ac; + ac->next = NULL; + cset->next = config_sets; + cset->rootable = 0; + config_sets = cset; + } + } + ac = ac_next; + } + + + return(config_sets); +} + +static int +rf_does_it_fit(cset, ac) + RF_ConfigSet_t *cset; + RF_AutoConfig_t *ac; +{ + RF_ComponentLabel_t *clabel1, *clabel2; + + /* If this one matches the *first* one in the set, that's good + enough, since the other members of the set would have been + through here too... */ + /* note that we are not checking partitionSize here.. + + Note that we are also not checking the mod_counters here. + If everything else matches execpt the mod_counter, that's + good enough for this test. We will deal with the mod_counters + a little later in the autoconfiguration process. + + (clabel1->mod_counter == clabel2->mod_counter) && + + The reason we don't check for this is that failed disks + will have lower modification counts. If those disks are + not added to the set they used to belong to, then they will + form their own set, which may result in 2 different sets, + for example, competing to be configured at raid0, and + perhaps competing to be the root filesystem set. If the + wrong ones get configured, or both attempt to become /, + weird behaviour and or serious lossage will occur. Thus we + need to bring them into the fold here, and kick them out at + a later point. + + */ + + clabel1 = cset->ac->clabel; + clabel2 = ac->clabel; + if ((clabel1->version == clabel2->version) && + (clabel1->serial_number == clabel2->serial_number) && + (clabel1->num_rows == clabel2->num_rows) && + (clabel1->num_columns == clabel2->num_columns) && + (clabel1->sectPerSU == clabel2->sectPerSU) && + (clabel1->SUsPerPU == clabel2->SUsPerPU) && + (clabel1->SUsPerRU == clabel2->SUsPerRU) && + (clabel1->parityConfig == clabel2->parityConfig) && + (clabel1->maxOutstanding == clabel2->maxOutstanding) && + (clabel1->blockSize == clabel2->blockSize) && + (clabel1->numBlocks == clabel2->numBlocks) && + (clabel1->autoconfigure == clabel2->autoconfigure) && + (clabel1->root_partition == clabel2->root_partition) && + (clabel1->last_unit == clabel2->last_unit) && + (clabel1->config_order == clabel2->config_order)) { + /* if it get's here, it almost *has* to be a match */ + } else { + /* it's not consistent with somebody in the set.. + punt */ + return(0); + } + /* all was fine.. it must fit... */ + return(1); +} + +int +rf_have_enough_components(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *auto_config; + RF_ComponentLabel_t *clabel; + int r,c; + int num_rows; + int num_cols; + int num_missing; + int mod_counter; + int mod_counter_found; + int even_pair_failed; + char parity_type; + + + /* check to see that we have enough 'live' components + of this set. If so, we can configure it if necessary */ + + num_rows = cset->ac->clabel->num_rows; + num_cols = cset->ac->clabel->num_columns; + parity_type = cset->ac->clabel->parityConfig; + + /* XXX Check for duplicate components!?!?!? */ + + /* Determine what the mod_counter is supposed to be for this set. */ + + mod_counter_found = 0; + mod_counter = 0; + ac = cset->ac; + while(ac!=NULL) { + if (mod_counter_found==0) { + mod_counter = ac->clabel->mod_counter; + mod_counter_found = 1; + } else { + if (ac->clabel->mod_counter > mod_counter) { + mod_counter = ac->clabel->mod_counter; + } + } + ac = ac->next; + } + + num_missing = 0; + auto_config = cset->ac; + + for(r=0; r<num_rows; r++) { + even_pair_failed = 0; + for(c=0; c<num_cols; c++) { + ac = auto_config; + while(ac!=NULL) { + if ((ac->clabel->row == r) && + (ac->clabel->column == c) && + (ac->clabel->mod_counter == mod_counter)) { + /* it's this one... */ + rf_printf(1, "Found: %s at %d,%d\n", + ac->devname,r,c); + break; + } + ac=ac->next; + } + if (ac==NULL) { + /* Didn't find one here! */ + /* special case for RAID 1, especially + where there are more than 2 + components (where RAIDframe treats + things a little differently :( ) */ + if (parity_type == '1') { + if (c%2 == 0) { /* even component */ + even_pair_failed = 1; + } else { /* odd component. If + we're failed, and + so is the even + component, it's + "Good Night, Charlie" */ + if (even_pair_failed == 1) { + return(0); + } + } + } else { + /* normal accounting */ + num_missing++; + } + } + if ((parity_type == '1') && (c%2 == 1)) { + /* Just did an even component, and we didn't + bail.. reset the even_pair_failed flag, + and go on to the next component.... */ + even_pair_failed = 0; + } + } + } + + clabel = cset->ac->clabel; + + if (((clabel->parityConfig == '0') && (num_missing > 0)) || + ((clabel->parityConfig == '4') && (num_missing > 1)) || + ((clabel->parityConfig == '5') && (num_missing > 1))) { + /* XXX this needs to be made *much* more general */ + /* Too many failures */ + return(0); + } + /* otherwise, all is well, and we've got enough to take a kick + at autoconfiguring this set */ + return(1); +} + +void +rf_create_configuration(ac,config,raidPtr) + RF_AutoConfig_t *ac; + RF_Config_t *config; + RF_Raid_t *raidPtr; +{ + RF_ComponentLabel_t *clabel; + int i; + + clabel = ac->clabel; + + /* 1. Fill in the common stuff */ + config->numRow = clabel->num_rows; + config->numCol = clabel->num_columns; + config->numSpare = 0; /* XXX should this be set here? */ + config->sectPerSU = clabel->sectPerSU; + config->SUsPerPU = clabel->SUsPerPU; + config->SUsPerRU = clabel->SUsPerRU; + config->parityConfig = clabel->parityConfig; + /* XXX... */ + strcpy(config->diskQueueType,"fifo"); + config->maxOutstandingDiskReqs = clabel->maxOutstanding; + config->layoutSpecificSize = 0; /* XXX ? */ + + while(ac!=NULL) { + /* row/col values will be in range due to the checks + in reasonable_label() */ + strcpy(config->devnames[ac->clabel->row][ac->clabel->column], + ac->devname); + ac = ac->next; + } + + for(i=0;i<RF_MAXDBGV;i++) { + config->debugVars[i][0] = NULL; + } +} + +int +rf_set_autoconfig(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t *clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_WAITOK | M_ZERO); + + raidPtr->autoconfigure = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, clabel); + clabel->autoconfigure = new_value; + raidwrite_component_label(dev, vp, clabel); + } + } + } + FREE(clabel, M_RAIDFRAME); + return(new_value); +} + +int +rf_set_rootpartition(raidPtr, new_value) + RF_Raid_t *raidPtr; + int new_value; +{ + RF_ComponentLabel_t *clabel; + struct vnode *vp; + dev_t dev; + int row, column; + + MALLOC(clabel, RF_ComponentLabel_t *, sizeof(RF_ComponentLabel_t), + M_RAIDFRAME, M_WAITOK | M_ZERO); + + raidPtr->root_partition = new_value; + for(row=0; row<raidPtr->numRow; row++) { + for(column=0; column<raidPtr->numCol; column++) { + if (raidPtr->Disks[row][column].status == + rf_ds_optimal) { + dev = raidPtr->Disks[row][column].dev; + vp = raidPtr->raid_cinfo[row][column].ci_vp; + raidread_component_label(dev, vp, clabel); + clabel->root_partition = new_value; + raidwrite_component_label(dev, vp, clabel); + } + } + } + FREE(clabel, M_RAIDFRAME); + return(new_value); +} + +void +rf_release_all_vps(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + struct thread *td; + + td = curthread; + ac = cset->ac; + while(ac!=NULL) { + /* Close the vp, and give it back */ + if (ac->vp) { + VOP_CLOSE(ac->vp, FREAD, td->td_ucred, td); + vrele(ac->vp); + ac->vp = NULL; + } + ac = ac->next; + } +} + + +void +rf_cleanup_config_set(cset) + RF_ConfigSet_t *cset; +{ + RF_AutoConfig_t *ac; + RF_AutoConfig_t *next_ac; + + ac = cset->ac; + while(ac!=NULL) { + next_ac = ac->next; + /* nuke the label */ + free(ac->clabel, M_RAIDFRAME); + /* cleanup the config structure */ + free(ac, M_RAIDFRAME); + /* "next.." */ + ac = next_ac; + } + /* and, finally, nuke the config set */ + free(cset, M_RAIDFRAME); +} + + +void +raid_init_component_label(raidPtr, clabel) + RF_Raid_t *raidPtr; + RF_ComponentLabel_t *clabel; +{ + /* current version number */ + clabel->version = RF_COMPONENT_LABEL_VERSION; + clabel->serial_number = raidPtr->serial_number; + clabel->mod_counter = raidPtr->mod_counter; + clabel->num_rows = raidPtr->numRow; + clabel->num_columns = raidPtr->numCol; + clabel->clean = RF_RAID_DIRTY; /* not clean */ + clabel->status = rf_ds_optimal; /* "It's good!" */ + + clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; + clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; + clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; + + clabel->blockSize = raidPtr->bytesPerSector; + clabel->numBlocks = raidPtr->sectorsPerDisk; + + /* XXX not portable */ + clabel->parityConfig = raidPtr->Layout.map->parityConfig; + clabel->maxOutstanding = raidPtr->maxOutstanding; + clabel->autoconfigure = raidPtr->autoconfigure; + clabel->root_partition = raidPtr->root_partition; + clabel->last_unit = raidPtr->raidid; + clabel->config_order = raidPtr->config_order; +} + +int +rf_auto_config_set(cset, unit, parent_sc) + RF_ConfigSet_t *cset; + int *unit; + struct raidctl_softc *parent_sc; +{ + int retcode = 0; + RF_Raid_t *raidPtr; + RF_Config_t *config; + int raidID; + + rf_printf(0, "RAIDframe autoconfigure\n"); + + *unit = -1; + + /* 1. Create a config structure */ + + config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME, + M_NOWAIT|M_ZERO); + if (config==NULL) { + rf_printf(0, "Out of mem at rf_auto_config_set\n"); + /* XXX do something more intelligent here. */ + return(1); + } + + /* XXX raidID needs to be set correctly.. */ + + /* + 2. Figure out what RAID ID this one is supposed to live at + See if we can get the same RAID dev that it was configured + on last time.. + */ + + raidID = cset->ac->clabel->last_unit; + if (raidID < 0) { + /* let's not wander off into lala land. */ + raidID = raidgetunit(parent_sc, 0); + } else { + raidID = raidgetunit(parent_sc, raidID); + } + + if (raidID < 0) { + /* punt... */ + rf_printf(0, "Unable to auto configure this set!\n"); + rf_printf(1, "Out of RAID devs!\n"); + return(1); + } + rf_printf(0, "Configuring raid%d:\n",raidID); + RF_Malloc(raidPtr, sizeof(*raidPtr), (RF_Raid_t *)); + if (raidPtr == NULL) { + rf_printf(0, "Out of mem at rf_auto_config_set\n"); + return (1); + } + bzero((char *)raidPtr, sizeof(RF_Raid_t)); + + /* XXX all this stuff should be done SOMEWHERE ELSE! */ + raidPtr->raidid = raidID; + raidPtr->openings = RAIDOUTSTANDING; + + /* 3. Build the configuration structure */ + rf_create_configuration(cset->ac, config, raidPtr); + + /* 4. Do the configuration */ + retcode = rf_Configure(raidPtr, config, cset->ac); + + if (retcode == 0) { + + parent_sc->sc_raiddevs[raidID] = raidinit(raidPtr); + if (parent_sc->sc_raiddevs[raidID] == NULL) { + rf_printf(0, "Could not create RAID device\n"); + RF_Free(raidPtr, sizeof(RF_Raid_t)); + free(config, M_RAIDFRAME); + return (1); + } + + parent_sc->sc_numraid++; + ((struct raid_softc *)raidPtr->sc)->sc_parent_dev = + parent_sc->sc_dev; + rf_markalldirty(raidPtr); + raidPtr->autoconfigure = 1; /* XXX do this here? */ + if (cset->ac->clabel->root_partition==1) { + /* everything configured just fine. Make a note + that this set is eligible to be root. */ + cset->rootable = 1; + /* XXX do this here? */ + raidPtr->root_partition = 1; + } + } + + /* 5. Cleanup */ + free(config, M_RAIDFRAME); + + *unit = raidID; + return(retcode); +} + +void +rf_disk_unbusy(desc) + RF_RaidAccessDesc_t *desc; +{ + struct raid_softc *sc; + struct bio *bp; + + sc = desc->raidPtr->sc; + bp = (struct bio *)desc->bp; + + devstat_end_transaction_bio(&sc->device_stats, bp); +} + +/* + * Get the next available unit number from the bitmap. You can also request + * a particular unit number by passing it in the second arg. If it's not + * available, then grab the next free one. Return -1 if none are available. + */ +static int +raidgetunit(struct raidctl_softc *parent_sc, int id) +{ + int i; + + if (id >= RF_MAX_ARRAYS) + return (-1); + + for (i = id; i < RF_MAX_ARRAYS; i++) { + if (parent_sc->sc_raiddevs[i] == NULL) + return (i); + } + + if (id != 0) { + for (i = 0; i < id; i++) { + if (parent_sc->sc_raiddevs[i] == NULL) + return (i); + } + } + + return (-1); +} + +static int +raidshutdown(void) +{ + struct raidctl_softc *parent_sc; + int i, error = 0; + + parent_sc = raidctl_dev->si_drv1; + + if (parent_sc->sc_numraid != 0) { +#if XXX_KTHREAD_EXIT_RACE + return (EBUSY); +#else + for (i = 0; i < RF_MAX_ARRAYS; i++) { + if (parent_sc->sc_raiddevs[i] != NULL) { + rf_printf(0, "Shutting down raid%d\n", i); + error = raidctlioctl(raidctl_dev, + RAIDFRAME_SHUTDOWN, (caddr_t)&i, 0, NULL); + if (error) + return (error); + if (parent_sc->sc_numraid == 0) + break; + } + } +#endif + } + + destroy_dev(raidctl_dev); + + return (error); +} + +int +raid_getcomponentsize(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col) +{ + struct disklabel *dlabel; + struct vnode *vp; + struct vattr va; + RF_Thread_t td; + int retcode; + + td = raidPtr->engine_thread; + + MALLOC(dlabel, struct disklabel *, sizeof(struct disklabel), + M_RAIDFRAME, M_NOWAIT | M_ZERO); + if (dlabel == NULL) { + printf("rf_getcomponentsize: Out of memory?\n"); + return (ENOMEM); + } + + retcode = raidlookup(raidPtr->Disks[row][col].devname, td, &vp); + + if (retcode) { + printf("raid%d: rebuilding: raidlookup on device: %s failed: %d!\n",raidPtr->raidid, + raidPtr->Disks[row][col].devname, retcode); + + /* XXX the component isn't responding properly... + must be still dead :-( */ + raidPtr->reconInProgress--; + FREE(dlabel, M_RAIDFRAME); + return(retcode); + + } else { + + /* Ok, so we can at least do a lookup... + How about actually getting a vp for it? */ + + if ((retcode = VOP_GETATTR(vp, &va, rf_getucred(td), + td)) != 0) { + raidPtr->reconInProgress--; + FREE(dlabel, M_RAIDFRAME); + return(retcode); + } + + retcode = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)dlabel, + FREAD, rf_getucred(td), td); + if (retcode) { + FREE(dlabel, M_RAIDFRAME); + return(retcode); + } + raidPtr->Disks[row][col].blockSize = dlabel->d_secsize; + raidPtr->Disks[row][col].numBlocks = + dlabel->d_partitions[dkpart(vn_todev(vp))].p_size - + rf_protectedSectors; + + raidPtr->raid_cinfo[row][col].ci_vp = vp; + raidPtr->raid_cinfo[row][col].ci_dev = udev2dev(va.va_rdev, 0); + raidPtr->Disks[row][col].dev = udev2dev(va.va_rdev, 0); + + /* we allow the user to specify that only a + fraction of the disks should be used this is + just for debug: it speeds up + * the parity scan */ + raidPtr->Disks[row][col].numBlocks = + raidPtr->Disks[row][col].numBlocks * + rf_sizePercentage / 100; + } + + FREE(dlabel, M_RAIDFRAME); + return(retcode); +} + +static int +raid_modevent(mod, type, data) + module_t mod; + int type; + void *data; +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + raidattach(); + break; + + case MOD_UNLOAD: + case MOD_SHUTDOWN: + error = raidshutdown(); + break; + + default: + break; + } + + return (error); +} + +moduledata_t raid_mod = { + "raidframe", + (modeventhand_t) raid_modevent, + 0}; + +DECLARE_MODULE(raidframe, raid_mod, SI_SUB_RAID, SI_ORDER_MIDDLE); |